First version

This commit is contained in:
2022-04-09 23:24:47 +03:00
parent 2fc2ff1d7f
commit 559f6878f7
18 changed files with 425 additions and 1 deletions

3
.gitignore vendored
View File

@@ -8,5 +8,8 @@
coverage.txt coverage.txt
*.pyc *.pyc
.coverage
.pytest_cache
.idea .idea
venv venv

22
.pre-commit-config.yaml Normal file
View File

@@ -0,0 +1,22 @@
repos:
- repo: https://github.com/psf/black
rev: 21.12b0
hooks:
- id: black
args: [--line-length=80, --target-version=py38]
- repo: https://gitlab.com/pycqa/flake8
rev: 3.9.2
hooks:
- id: flake8
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.2.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- id: check-json
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort
args: ["--filter-files" ]

4
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,4 @@
# Contributing
Any issues and PRs are welcome.
Development environment recommendations are in Readme.md

92
Readme.md Normal file
View File

@@ -0,0 +1,92 @@
# Gitea Mirror
Key idea for this project is to provide the simplest solution
to backup all Gitea repositories on daily basis with simple command
## How to use
This application requires only API key for Gitea.
Unfortunately it only allows to create root-level API keys.
You can generate one here:
```
https://YOUR_INSTANCE/user/settings/applications
```
Other methods are not supporting:
1. User/password is not safe and hard to use with 2FA enabled
2. With ssh only public repositories may be found.
Which is acceptable for full account mirroring.
**Security notice.**
This application uses SSH as git transport layer.
It is safe enough with right use,
and for right use you need to save
git server ssh digest (~/.ssh/known_hosts file).
To do this you just need to clone any repository over ssh first
**Config**. We use single config for this application.
It is slightly ancient solution for modern Docker/Kubernetes backends,
but provides configuration in one place and _secure enough_ place to save token.
Example config:
```ini
[main]
endpoint=https://example.com
token=XXXXX
format={owner}/{name}
out_dir=/home/user/repositories
ssh_key=/home/user/id_rsa
```
### Native
Not recommended, but more efficient in space
and does not require docker.
removing the ability to specify a user
1. Clone this repository (`git clone ...`)
2. Install dependencies (`pip3 install -r requirements.txt`).
Venv-level is recommended.
3. Install git (`sudo apt install git`)
4. And run it with path to ini config.
```bash
python gitea-mirror.py config.ini
```
### Docker
The simplest way.
**TBD**
## How to develop
We use [pre-commit](https://pre-commit.com/) for basic
style fixes and checks.
Also, pytest is used for testing.
It can be installed with `pip install -r requirements.dev.txt`.
To run tests:
```bash
pytest --cov=src tests
```
## FAQ
- **Q:** Is it possible to specify user?
- **A:** This tool should be as simple as possible.
Token as the only one identifier is _good enough_ for 95% cases.
- **Q:** Why I can not just use gitea own `backup` command?
- **A:** For many personal instances or instances for small commands only repositories are important
(not users, wiki, issues, etc).
It _does not_ solve backup problem in general,
but gives possibility to back up all personal repositories with ease.
(And without access to root-level of Gitea instance)
- **Q:** Why Python with dependencies for so small application?
- **A:** Using libraries for specific cases is a good practice in industry.
And it keeps code simple and easy to verify (for bugs or malicious actions).
Which is much more important than one-time venv or Docker setup.

52
gitea-mirror.py Normal file
View File

@@ -0,0 +1,52 @@
import os.path
import sys
from src.gitea_api import GiteaApi
from src.repository_name import get_repository_name, is_valid_repository_names
from src.config import Config, read_ini_config
from src.models import GiteaRepository
from src.git import git_pull, git_clone
from os import makedirs
BASE_PATH = "out"
FORMAT = "{owner}/{name}"
def process_repo(config: Config, repo: GiteaRepository):
path = get_repository_name(name_format=config.repository_format, r=repo)
out_path = os.path.join(config.out_dir, path)
makedirs(out_path, exist_ok=True)
if os.path.exists(os.path.join(out_path, ".git")):
git_pull(out_path, ssh_key="fake")
return
print(f"New repository: {path}")
git_clone(ssh_url=repo.ssh_url, repository=out_path, ssh_key="fake")
def main():
if len(sys.argv) < 2:
print("Usage: python gitea-mirror.py CONFIG_PATH")
sys.exit(1)
try:
config = read_ini_config(sys.argv[1])
except RuntimeError as err:
print(f"Invalid config: {err}")
sys.exit(1)
api = GiteaApi(
endpoint=config.endpoint,
token=config.token,
)
repos = api.get_repositories()
print(f"total {len(repos)} repositories")
if not is_valid_repository_names(name_format=config.repository_format, repos=repos):
print("Format string is not valid, duplicates are not allowed")
sys.exit(1)
for repo in repos:
process_repo(config=config, repo=repo)
if __name__ == "__main__":
main()

10
pyproject.toml Normal file
View File

@@ -0,0 +1,10 @@
[tool.black]
line-length = 80
target-version = ['py38']
include = '.pyi?$'
[tool.isort]
profile = "black"
py_version = "auto"
sections = "FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER"
known_local_folder = "src"

2
requirements.dev.txt Normal file
View File

@@ -0,0 +1,2 @@
pytest~=7.1.1
pytest-cov~=3.0.0

View File

@@ -1,2 +1,2 @@
pydantic~=1.9.0 pydantic~=1.9.0
requests~=2.27.1 requests~=2.27.1

0
src/__init__.py Normal file
View File

43
src/config.py Normal file
View File

@@ -0,0 +1,43 @@
"""
Token should be treated as password,
files are more secure in general than command-line arguments
.ini config example
[main]
endpoint=https://example.com/gitea
token=something
format={owner}/{name}
out_dir=/home/user/repositories
ssh_key=/home/user/.ssh/id_rsa.pub
"""
import configparser
import os
from .models import Config
MAIN_SECTION = "main"
def read_ini_config(path: str) -> Config:
if not os.path.exists(path):
raise RuntimeError("INI config path is not exists")
parser = configparser.ConfigParser()
parser.read(path)
try:
endpoint = parser[MAIN_SECTION]["endpoint"]
token = parser[MAIN_SECTION]["token"]
repository_format = parser[MAIN_SECTION]["format"]
out_dir = parser[MAIN_SECTION]["out_dir"]
ssh_key_path = parser[MAIN_SECTION]["ssh_key"]
except KeyError as err:
raise RuntimeError(f"No value for section: {err}")
return Config(
repository_format=repository_format,
endpoint=endpoint,
token=token,
out_dir=out_dir,
ssh_key_path=ssh_key_path,
)

23
src/git.py Normal file
View File

@@ -0,0 +1,23 @@
import subprocess
from os import makedirs
def git_clone(ssh_url: str, repository: str, ssh_key: str) -> bool:
makedirs(repository, exist_ok=True)
try:
subprocess.check_call(
["git", "clone", ssh_url, "."], cwd=repository
)
except subprocess.CalledProcessError:
print(f"Unable to clone repository {repository} with key {ssh_key} from {ssh_url}")
return False
return True
def git_pull(repository: str, ssh_key: str) -> bool:
try:
subprocess.check_call(["git", "pull"], cwd=repository)
except subprocess.CalledProcessError:
print(f"Unable to pull repository {repository} with key {ssh_key}")
return False
return True

42
src/gitea_api.py Normal file
View File

@@ -0,0 +1,42 @@
from typing import List
from .models import GiteaRepository
from urllib.parse import urljoin
from pydantic import parse_obj_as
import requests
class GiteaApi:
def __init__(self, endpoint: str, token: str):
self._endpoint = endpoint
self._token = token
def get_repositories(self, page_size=10) -> List[GiteaRepository]:
"""
For mirroring input user is not important.
"""
session = requests.session()
session.headers.update({"Authorization": "token " + self._token})
all_repos = {} # hack for unique repositories in result
page_id = 1
while True:
r = session.get(
urljoin(
self._endpoint,
f"/api/v1/user/repos",
),
params={"limit": page_size, "page": page_id},
)
if r.status_code != 200:
print(f"Failed request, code {r.status_code}")
return []
repos_data = r.json()
if not repos_data:
break
else:
page_id += 1
cur_repos = parse_obj_as(List[GiteaRepository], repos_data)
for repo in cur_repos:
all_repos[repo.repo_id] = repo
return list(all_repos.values())

24
src/models.py Normal file
View File

@@ -0,0 +1,24 @@
from pydantic import BaseModel, Field, HttpUrl
import datetime
class Config(BaseModel):
repository_format: str
ssh_key_path: str
endpoint: HttpUrl
token: str
out_dir: str
class GiteaUser(BaseModel):
user_id: int = Field(alias="id")
login: str
email: str
class GiteaRepository(BaseModel):
ssh_url: str
name: str
repo_id: int = Field(alias="id")
updated_at: datetime.datetime
owner: GiteaUser

40
src/repository_name.py Normal file
View File

@@ -0,0 +1,40 @@
from .models import GiteaRepository, GiteaUser
import datetime
from typing import List
def _get_test_repository() -> GiteaRepository:
return GiteaRepository(
ssh_url="ssh://git@example.com/project/name",
name="test name",
id=42,
updated_at=datetime.datetime.now(),
owner=GiteaUser(
id=23,
login="test_user",
email="test_user@example.com",
),
)
def is_valid_format(name_format: str) -> bool:
try:
get_repository_name(name_format, _get_test_repository())
except KeyError:
return False
return True
def get_repository_name(name_format: str, r: GiteaRepository) -> str:
return name_format.format(
name=r.name,
repository_id=r.repo_id,
owner=r.owner.login,
owner_id=r.owner.user_id,
)
def is_valid_repository_names(name_format: str, repos: List[GiteaRepository]):
names = set(get_repository_name(name_format, r) for r in repos)
return len(names) == len(repos) # all names must be unique

11
src/sync.py Normal file
View File

@@ -0,0 +1,11 @@
from .models import GiteaRepository
from typing import List
class SyncProcessor:
def __init__(self):
pass
def sync(self, path, repos: List[GiteaRepository]):
pass

0
tests/__init__.py Normal file
View File

42
tests/test_config.py Normal file
View File

@@ -0,0 +1,42 @@
from tempfile import NamedTemporaryFile
from src.config import read_ini_config, Config
import pytest
@pytest.mark.parametrize(
"config_data, expected",
[
(
"[main]\ntoken=something\n"
"format={owner}/{name}\n"
"ssh_key=/tmp/no_key\n"
"endpoint=https://example.com\n"
"out_dir=/home/user/repositories",
Config(
token="something",
repository_format="{owner}/{name}",
out_dir="/home/user/repositories",
endpoint="https://example.com",
ssh_key_path="/tmp/no_key"
),
),
("[main]", None),
],
)
def test_ini_config(config_data, expected):
with NamedTemporaryFile() as tf:
if config_data:
tf.write(config_data.encode("utf-8"))
tf.flush()
if expected:
assert read_ini_config(tf.name) == expected
else:
with pytest.raises(RuntimeError):
read_ini_config(tf.name)
def test_ini_config_not_exists():
with pytest.raises(RuntimeError):
read_ini_config("not_existing_file")

View File

@@ -0,0 +1,14 @@
import pytest
from src.repository_name import is_valid_format
@pytest.mark.parametrize(
"name_format, expected",
[
("{blabla}", False),
("", True),
("{owner}/{name}", True),
]
)
def test_name_formatting(name_format, expected):
assert is_valid_format(name_format) == expected