diff --git a/.gitignore b/.gitignore index dc16922..64a977d 100644 --- a/.gitignore +++ b/.gitignore @@ -8,5 +8,8 @@ coverage.txt *.pyc +.coverage +.pytest_cache + .idea venv diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..13b007d --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,22 @@ +repos: + - repo: https://github.com/psf/black + rev: 21.12b0 + hooks: + - id: black + args: [--line-length=80, --target-version=py38] + - repo: https://gitlab.com/pycqa/flake8 + rev: 3.9.2 + hooks: + - id: flake8 + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.2.0 + hooks: + - id: check-yaml + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-json + - repo: https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + args: ["--filter-files" ] \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..b2429b0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,4 @@ +# Contributing +Any issues and PRs are welcome. + +Development environment recommendations are in Readme.md \ No newline at end of file diff --git a/Readme.md b/Readme.md new file mode 100644 index 0000000..7bc100e --- /dev/null +++ b/Readme.md @@ -0,0 +1,92 @@ +# Gitea Mirror +Key idea for this project is to provide the simplest solution +to backup all Gitea repositories on daily basis with simple command + +## How to use +This application requires only API key for Gitea. +Unfortunately it only allows to create root-level API keys. + +You can generate one here: +``` +https://YOUR_INSTANCE/user/settings/applications +``` + +Other methods are not supporting: +1. User/password is not safe and hard to use with 2FA enabled +2. With ssh only public repositories may be found. + Which is acceptable for full account mirroring. + +**Security notice.** +This application uses SSH as git transport layer. +It is safe enough with right use, +and for right use you need to save +git server ssh digest (~/.ssh/known_hosts file). +To do this you just need to clone any repository over ssh first + +**Config**. We use single config for this application. +It is slightly ancient solution for modern Docker/Kubernetes backends, +but provides configuration in one place and _secure enough_ place to save token. + + +Example config: +```ini +[main] +endpoint=https://example.com +token=XXXXX +format={owner}/{name} +out_dir=/home/user/repositories +ssh_key=/home/user/id_rsa +``` + + +### Native +Not recommended, but more efficient in space +and does not require docker. +removing the ability to specify a user +1. Clone this repository (`git clone ...`) +2. Install dependencies (`pip3 install -r requirements.txt`). + Venv-level is recommended. +3. Install git (`sudo apt install git`) +4. And run it with path to ini config. +```bash +python gitea-mirror.py config.ini +``` + + +### Docker +The simplest way. + +**TBD** + +## How to develop +We use [pre-commit](https://pre-commit.com/) for basic +style fixes and checks. + +Also, pytest is used for testing. +It can be installed with `pip install -r requirements.dev.txt`. + +To run tests: +```bash +pytest --cov=src tests +``` + +## FAQ +- **Q:** Is it possible to specify user? + +- **A:** This tool should be as simple as possible. +Token as the only one identifier is _good enough_ for 95% cases. + + +- **Q:** Why I can not just use gitea own `backup` command? + +- **A:** For many personal instances or instances for small commands only repositories are important + (not users, wiki, issues, etc). + It _does not_ solve backup problem in general, + but gives possibility to back up all personal repositories with ease. + (And without access to root-level of Gitea instance) + +- **Q:** Why Python with dependencies for so small application? + +- **A:** Using libraries for specific cases is a good practice in industry. + And it keeps code simple and easy to verify (for bugs or malicious actions). + Which is much more important than one-time venv or Docker setup. \ No newline at end of file diff --git a/gitea-mirror.py b/gitea-mirror.py new file mode 100644 index 0000000..fe5dfa9 --- /dev/null +++ b/gitea-mirror.py @@ -0,0 +1,52 @@ +import os.path +import sys +from src.gitea_api import GiteaApi +from src.repository_name import get_repository_name, is_valid_repository_names +from src.config import Config, read_ini_config +from src.models import GiteaRepository +from src.git import git_pull, git_clone + +from os import makedirs + +BASE_PATH = "out" +FORMAT = "{owner}/{name}" + + +def process_repo(config: Config, repo: GiteaRepository): + path = get_repository_name(name_format=config.repository_format, r=repo) + out_path = os.path.join(config.out_dir, path) + makedirs(out_path, exist_ok=True) + if os.path.exists(os.path.join(out_path, ".git")): + git_pull(out_path, ssh_key="fake") + return + print(f"New repository: {path}") + git_clone(ssh_url=repo.ssh_url, repository=out_path, ssh_key="fake") + + +def main(): + if len(sys.argv) < 2: + print("Usage: python gitea-mirror.py CONFIG_PATH") + sys.exit(1) + try: + config = read_ini_config(sys.argv[1]) + except RuntimeError as err: + print(f"Invalid config: {err}") + sys.exit(1) + + api = GiteaApi( + endpoint=config.endpoint, + token=config.token, + ) + repos = api.get_repositories() + print(f"total {len(repos)} repositories") + + if not is_valid_repository_names(name_format=config.repository_format, repos=repos): + print("Format string is not valid, duplicates are not allowed") + sys.exit(1) + + for repo in repos: + process_repo(config=config, repo=repo) + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..dd6b36f --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,10 @@ +[tool.black] +line-length = 80 +target-version = ['py38'] +include = '.pyi?$' + +[tool.isort] +profile = "black" +py_version = "auto" +sections = "FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER" +known_local_folder = "src" \ No newline at end of file diff --git a/requirements.dev.txt b/requirements.dev.txt new file mode 100644 index 0000000..89ffd17 --- /dev/null +++ b/requirements.dev.txt @@ -0,0 +1,2 @@ +pytest~=7.1.1 +pytest-cov~=3.0.0 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4ae140f..f5abf14 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ pydantic~=1.9.0 -requests~=2.27.1 \ No newline at end of file +requests~=2.27.1 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..b39858e --- /dev/null +++ b/src/config.py @@ -0,0 +1,43 @@ +""" +Token should be treated as password, +files are more secure in general than command-line arguments + +.ini config example +[main] +endpoint=https://example.com/gitea +token=something +format={owner}/{name} +out_dir=/home/user/repositories +ssh_key=/home/user/.ssh/id_rsa.pub + +""" + +import configparser +import os +from .models import Config + +MAIN_SECTION = "main" + + +def read_ini_config(path: str) -> Config: + if not os.path.exists(path): + raise RuntimeError("INI config path is not exists") + + parser = configparser.ConfigParser() + parser.read(path) + try: + endpoint = parser[MAIN_SECTION]["endpoint"] + token = parser[MAIN_SECTION]["token"] + repository_format = parser[MAIN_SECTION]["format"] + out_dir = parser[MAIN_SECTION]["out_dir"] + ssh_key_path = parser[MAIN_SECTION]["ssh_key"] + except KeyError as err: + raise RuntimeError(f"No value for section: {err}") + + return Config( + repository_format=repository_format, + endpoint=endpoint, + token=token, + out_dir=out_dir, + ssh_key_path=ssh_key_path, + ) diff --git a/src/git.py b/src/git.py new file mode 100644 index 0000000..f7f37ab --- /dev/null +++ b/src/git.py @@ -0,0 +1,23 @@ +import subprocess +from os import makedirs + + +def git_clone(ssh_url: str, repository: str, ssh_key: str) -> bool: + makedirs(repository, exist_ok=True) + try: + subprocess.check_call( + ["git", "clone", ssh_url, "."], cwd=repository + ) + except subprocess.CalledProcessError: + print(f"Unable to clone repository {repository} with key {ssh_key} from {ssh_url}") + return False + return True + + +def git_pull(repository: str, ssh_key: str) -> bool: + try: + subprocess.check_call(["git", "pull"], cwd=repository) + except subprocess.CalledProcessError: + print(f"Unable to pull repository {repository} with key {ssh_key}") + return False + return True diff --git a/src/gitea_api.py b/src/gitea_api.py new file mode 100644 index 0000000..a88063b --- /dev/null +++ b/src/gitea_api.py @@ -0,0 +1,42 @@ +from typing import List +from .models import GiteaRepository +from urllib.parse import urljoin +from pydantic import parse_obj_as + + +import requests + + +class GiteaApi: + def __init__(self, endpoint: str, token: str): + self._endpoint = endpoint + self._token = token + + def get_repositories(self, page_size=10) -> List[GiteaRepository]: + """ + For mirroring input user is not important. + """ + session = requests.session() + session.headers.update({"Authorization": "token " + self._token}) + all_repos = {} # hack for unique repositories in result + page_id = 1 + while True: + r = session.get( + urljoin( + self._endpoint, + f"/api/v1/user/repos", + ), + params={"limit": page_size, "page": page_id}, + ) + if r.status_code != 200: + print(f"Failed request, code {r.status_code}") + return [] + repos_data = r.json() + if not repos_data: + break + else: + page_id += 1 + cur_repos = parse_obj_as(List[GiteaRepository], repos_data) + for repo in cur_repos: + all_repos[repo.repo_id] = repo + return list(all_repos.values()) diff --git a/src/models.py b/src/models.py new file mode 100644 index 0000000..175b1a3 --- /dev/null +++ b/src/models.py @@ -0,0 +1,24 @@ +from pydantic import BaseModel, Field, HttpUrl +import datetime + + +class Config(BaseModel): + repository_format: str + ssh_key_path: str + endpoint: HttpUrl + token: str + out_dir: str + + +class GiteaUser(BaseModel): + user_id: int = Field(alias="id") + login: str + email: str + + +class GiteaRepository(BaseModel): + ssh_url: str + name: str + repo_id: int = Field(alias="id") + updated_at: datetime.datetime + owner: GiteaUser diff --git a/src/repository_name.py b/src/repository_name.py new file mode 100644 index 0000000..e5a0dad --- /dev/null +++ b/src/repository_name.py @@ -0,0 +1,40 @@ +from .models import GiteaRepository, GiteaUser +import datetime + +from typing import List + + +def _get_test_repository() -> GiteaRepository: + return GiteaRepository( + ssh_url="ssh://git@example.com/project/name", + name="test name", + id=42, + updated_at=datetime.datetime.now(), + owner=GiteaUser( + id=23, + login="test_user", + email="test_user@example.com", + ), + ) + + +def is_valid_format(name_format: str) -> bool: + try: + get_repository_name(name_format, _get_test_repository()) + except KeyError: + return False + return True + + +def get_repository_name(name_format: str, r: GiteaRepository) -> str: + return name_format.format( + name=r.name, + repository_id=r.repo_id, + owner=r.owner.login, + owner_id=r.owner.user_id, + ) + + +def is_valid_repository_names(name_format: str, repos: List[GiteaRepository]): + names = set(get_repository_name(name_format, r) for r in repos) + return len(names) == len(repos) # all names must be unique diff --git a/src/sync.py b/src/sync.py new file mode 100644 index 0000000..5a21075 --- /dev/null +++ b/src/sync.py @@ -0,0 +1,11 @@ +from .models import GiteaRepository + +from typing import List + + +class SyncProcessor: + def __init__(self): + pass + + def sync(self, path, repos: List[GiteaRepository]): + pass diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_config.py b/tests/test_config.py new file mode 100644 index 0000000..53c6dde --- /dev/null +++ b/tests/test_config.py @@ -0,0 +1,42 @@ +from tempfile import NamedTemporaryFile + +from src.config import read_ini_config, Config + +import pytest + + +@pytest.mark.parametrize( + "config_data, expected", + [ + ( + "[main]\ntoken=something\n" + "format={owner}/{name}\n" + "ssh_key=/tmp/no_key\n" + "endpoint=https://example.com\n" + "out_dir=/home/user/repositories", + Config( + token="something", + repository_format="{owner}/{name}", + out_dir="/home/user/repositories", +endpoint="https://example.com", +ssh_key_path="/tmp/no_key" + ), + ), + ("[main]", None), + ], +) +def test_ini_config(config_data, expected): + with NamedTemporaryFile() as tf: + if config_data: + tf.write(config_data.encode("utf-8")) + tf.flush() + if expected: + assert read_ini_config(tf.name) == expected + else: + with pytest.raises(RuntimeError): + read_ini_config(tf.name) + + +def test_ini_config_not_exists(): + with pytest.raises(RuntimeError): + read_ini_config("not_existing_file") diff --git a/tests/test_repository_name.py b/tests/test_repository_name.py new file mode 100644 index 0000000..b9f61c7 --- /dev/null +++ b/tests/test_repository_name.py @@ -0,0 +1,14 @@ +import pytest +from src.repository_name import is_valid_format + + +@pytest.mark.parametrize( + "name_format, expected", + [ + ("{blabla}", False), + ("", True), + ("{owner}/{name}", True), + ] +) +def test_name_formatting(name_format, expected): + assert is_valid_format(name_format) == expected \ No newline at end of file