Compare commits

..

16 Commits

Author SHA1 Message Date
cd6e11fe48 refactor[auto]: pre-commit
All checks were successful
Push to local registry / Build and push image (push) Successful in 2m8s
2024-10-13 01:06:41 +03:00
1de35afed2 ci: Update pre-commit 2024-10-13 01:06:10 +03:00
d23d6e0b42 ci: QEMU build
All checks were successful
Push to local registry / Build and push image (push) Successful in 1m58s
2024-07-21 22:31:54 +03:00
3389c0a35a ci: Multiple docker platforms
Some checks failed
Push to local registry / Build and push image (push) Failing after 13s
2024-07-21 22:28:48 +03:00
4b4624afc8 ci: Local tag
All checks were successful
Push to local registry / Build and push image (push) Successful in 30s
2024-07-21 22:21:17 +03:00
19628bc774 ci: Single tag
Some checks failed
Push to local registry / Build and push image (push) Failing after 21s
2024-07-21 22:19:07 +03:00
f02154bfd3 ci: New local docker 2024-07-21 22:18:31 +03:00
baa8c6b9a4 ci: Add push tag 2024-07-21 22:16:10 +03:00
615e3989fb ci: gitea fix
Some checks failed
Push to local registry / Build and push image (push) Failing after 15s
2024-07-21 22:09:08 +03:00
10295e2f21 ci: Add docker and registry integration 2024-07-21 22:07:01 +03:00
349e4d43be fix: fixes source bucket instead of target bucket for removing 2023-06-04 23:16:48 +03:00
22b1f374af fix: Correct bucket for removing 2023-06-04 23:15:30 +03:00
cd553879a2 feat: Use part_size with fixed big size for better ETag 2023-02-18 23:29:43 +03:00
c22f84b7a4 feat: Improve error logging 2023-02-18 20:54:53 +03:00
894d0b24c5 feat: tqdm for S3-list 2023-02-18 20:54:35 +03:00
07c8bea489 fix: correct calculation of different_files 2023-02-11 23:11:57 +03:00
6 changed files with 80 additions and 15 deletions

View File

@@ -0,0 +1,34 @@
---
name: Push to local registry
run-name: ${{ gitea.actor }} is pushing -> local Docker
on:
- push
jobs:
build:
name: Build and push image
runs-on: ubuntu-latest
container: catthehacker/ubuntu:act-latest
steps:
- name: Checkout
uses: actions/checkout@v4
- name: Login to Docker Registry
uses: docker/login-action@v3
with:
registry: gitea.likemath.ru
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_TOKEN }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Build and push
uses: docker/build-push-action@v6
with:
context: .
push: true
tags: gitea.likemath.ru/alex/s3-mirror:latest
platforms: linux/amd64,linux/arm64

1
.gitignore vendored
View File

@@ -159,4 +159,3 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear # and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder. # option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/ #.idea/

View File

@@ -1,11 +1,11 @@
repos: repos:
- repo: https://github.com/psf/black - repo: https://github.com/psf/black
rev: 22.12.0 rev: 24.10.0
hooks: hooks:
- id: black - id: black
args: [--line-length=88, --target-version=py38] args: [--line-length=88, --target-version=py38]
- repo: https://gitlab.com/pycqa/flake8 - repo: https://github.com/PyCQA/flake8
rev: 3.9.2 rev: 7.1.1
hooks: hooks:
- id: flake8 - id: flake8
args: # arguments to configure flake8 args: # arguments to configure flake8
@@ -20,7 +20,7 @@ repos:
- id: trailing-whitespace - id: trailing-whitespace
- id: check-json - id: check-json
- repo: https://github.com/pycqa/isort - repo: https://github.com/pycqa/isort
rev: 5.10.1 rev: 5.13.2
hooks: hooks:
- id: isort - id: isort
args: ["--filter-files" ] args: ["--filter-files" ]

14
Dockerfile Normal file
View File

@@ -0,0 +1,14 @@
FROM docker.io/python:3.12-slim as builder
WORKDIR /app
ENV PYTHONDONTWRITEBYTECODE 1
ENV PYTHONUNBUFFERED 1
COPY requirements.txt .
RUN pip install -r requirements.txt
COPY *.py /app
ENTRYPOINT ["python3", "main.py"]

View File

@@ -1,3 +1,9 @@
# s3-mirror # s3-mirror
Full mirroring between two s3-targets with redundant files removing. Full mirroring between two s3-targets with redundant files removing.
## Example with Docker
```
docker build . -t s3-mirror
podman run -v ~/.mcli/config.json:/root/.mc/config.json:ro --rm s3-mirror
```

30
main.py
View File

@@ -22,7 +22,9 @@ s3_config_data = json.loads(open(expanduser("~/.mc/config.json")).read())["alias
def get_files(s3, bucket, prefix) -> Mapping[str, Tuple[int, str]]: def get_files(s3, bucket, prefix) -> Mapping[str, Tuple[int, str]]:
res = {} res = {}
prefix_len = len(prefix) prefix_len = len(prefix)
for obj in s3.list_objects(bucket, prefix=prefix, recursive=True): for obj in tqdm.tqdm(
s3.list_objects(bucket, prefix=prefix, recursive=True), desc="S3 list objects"
):
if obj.is_dir: if obj.is_dir:
continue continue
res[obj.object_name[prefix_len:].lstrip("/")] = (obj.size, obj.etag) res[obj.object_name[prefix_len:].lstrip("/")] = (obj.size, obj.etag)
@@ -94,8 +96,10 @@ def put_object_data(s3: minio.Minio, bucket: str, object_name: str, data: bytes)
s3.put_object( s3.put_object(
bucket, bucket,
object_name, object_name,
io.BytesIO(data), data=io.BytesIO(data),
len(data), length=len(data),
num_parallel_uploads=1,
part_size=150 * 1024 * 1024,
) )
@@ -149,20 +153,28 @@ def main():
for file_to_remove in redundant: for file_to_remove in redundant:
object_name = os.path.join(target_prefix, file_to_remove) object_name = os.path.join(target_prefix, file_to_remove)
logging.info(f"Removing redundant {target_bucket}:{object_name}") logging.info(f"Removing redundant {target_bucket}:{object_name}")
target_s3.remove_object(bucket_name="backups", object_name=object_name) try:
target_s3.remove_object(
bucket_name=target_bucket, object_name=object_name
)
except Exception as err:
print(
f"Unable to remove {target_bucket}/{object_name}: erorr {err}"
)
del target_files[file_to_remove] del target_files[file_to_remove]
print(f"Removed {len(redundant)} files") print(f"Removed {len(redundant)} files")
print(f"Target after removing redundant {get_file_metrics(target_files)}") print(f"Target after removing redundant {get_file_metrics(target_files)}")
new_files = get_redundant_files(target_files, source_files) new_files = get_redundant_files(target_files, source_files)
print(f"New {len(new_files)} files") print(f"New {len(new_files)} files")
different_files = get_redundant_files(source_files, target_files)
different_files = get_different_files(source_files, target_files)
print(f"Different {len(different_files)} files") print(f"Different {len(different_files)} files")
for key in tqdm.tqdm(new_files.union(different_files)): for key in tqdm.tqdm(new_files.union(different_files)):
source_object = os.path.join(source_prefix, key)
target_object = os.path.join(target_prefix, key)
try: try:
source_object = os.path.join(source_prefix, key)
target_object = os.path.join(target_prefix, key)
logging.info( logging.info(
f"Moving {source_bucket}:{source_object} to " f"Moving {source_bucket}:{source_object} to "
f"{target_bucket}:{target_object}" f"{target_bucket}:{target_object}"
@@ -177,8 +189,8 @@ def main():
except RetryError: except RetryError:
logging.warning( logging.warning(
"Retry on moving" "Retry on moving"
"{source_bucket}:{source_object} to " f"{source_bucket}:{source_object} to "
"{target_bucket}:{target_object}" f"{target_bucket}:{target_object}"
) )