Compare commits
16 Commits
82e5b05c23
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
| cd6e11fe48 | |||
| 1de35afed2 | |||
| d23d6e0b42 | |||
| 3389c0a35a | |||
| 4b4624afc8 | |||
| 19628bc774 | |||
| f02154bfd3 | |||
| baa8c6b9a4 | |||
| 615e3989fb | |||
| 10295e2f21 | |||
| 349e4d43be | |||
| 22b1f374af | |||
| cd553879a2 | |||
| c22f84b7a4 | |||
| 894d0b24c5 | |||
| 07c8bea489 |
34
.gitea/workflows/local-docker.yaml
Normal file
34
.gitea/workflows/local-docker.yaml
Normal file
@@ -0,0 +1,34 @@
|
||||
---
|
||||
name: Push to local registry
|
||||
run-name: ${{ gitea.actor }} is pushing -> local Docker
|
||||
on:
|
||||
- push
|
||||
jobs:
|
||||
build:
|
||||
name: Build and push image
|
||||
runs-on: ubuntu-latest
|
||||
container: catthehacker/ubuntu:act-latest
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Login to Docker Registry
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: gitea.likemath.ru
|
||||
username: ${{ secrets.REGISTRY_USERNAME }}
|
||||
password: ${{ secrets.REGISTRY_TOKEN }}
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
push: true
|
||||
tags: gitea.likemath.ru/alex/s3-mirror:latest
|
||||
platforms: linux/amd64,linux/arm64
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -159,4 +159,3 @@ cython_debug/
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
repos:
|
||||
- repo: https://github.com/psf/black
|
||||
rev: 22.12.0
|
||||
rev: 24.10.0
|
||||
hooks:
|
||||
- id: black
|
||||
args: [--line-length=88, --target-version=py38]
|
||||
- repo: https://gitlab.com/pycqa/flake8
|
||||
rev: 3.9.2
|
||||
- repo: https://github.com/PyCQA/flake8
|
||||
rev: 7.1.1
|
||||
hooks:
|
||||
- id: flake8
|
||||
args: # arguments to configure flake8
|
||||
@@ -20,7 +20,7 @@ repos:
|
||||
- id: trailing-whitespace
|
||||
- id: check-json
|
||||
- repo: https://github.com/pycqa/isort
|
||||
rev: 5.10.1
|
||||
rev: 5.13.2
|
||||
hooks:
|
||||
- id: isort
|
||||
args: ["--filter-files" ]
|
||||
|
||||
14
Dockerfile
Normal file
14
Dockerfile
Normal file
@@ -0,0 +1,14 @@
|
||||
FROM docker.io/python:3.12-slim as builder
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
ENV PYTHONDONTWRITEBYTECODE 1
|
||||
ENV PYTHONUNBUFFERED 1
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install -r requirements.txt
|
||||
|
||||
|
||||
COPY *.py /app
|
||||
|
||||
ENTRYPOINT ["python3", "main.py"]
|
||||
@@ -1,3 +1,9 @@
|
||||
# s3-mirror
|
||||
|
||||
Full mirroring between two s3-targets with redundant files removing.
|
||||
|
||||
## Example with Docker
|
||||
```
|
||||
docker build . -t s3-mirror
|
||||
podman run -v ~/.mcli/config.json:/root/.mc/config.json:ro --rm s3-mirror
|
||||
```
|
||||
|
||||
28
main.py
28
main.py
@@ -22,7 +22,9 @@ s3_config_data = json.loads(open(expanduser("~/.mc/config.json")).read())["alias
|
||||
def get_files(s3, bucket, prefix) -> Mapping[str, Tuple[int, str]]:
|
||||
res = {}
|
||||
prefix_len = len(prefix)
|
||||
for obj in s3.list_objects(bucket, prefix=prefix, recursive=True):
|
||||
for obj in tqdm.tqdm(
|
||||
s3.list_objects(bucket, prefix=prefix, recursive=True), desc="S3 list objects"
|
||||
):
|
||||
if obj.is_dir:
|
||||
continue
|
||||
res[obj.object_name[prefix_len:].lstrip("/")] = (obj.size, obj.etag)
|
||||
@@ -94,8 +96,10 @@ def put_object_data(s3: minio.Minio, bucket: str, object_name: str, data: bytes)
|
||||
s3.put_object(
|
||||
bucket,
|
||||
object_name,
|
||||
io.BytesIO(data),
|
||||
len(data),
|
||||
data=io.BytesIO(data),
|
||||
length=len(data),
|
||||
num_parallel_uploads=1,
|
||||
part_size=150 * 1024 * 1024,
|
||||
)
|
||||
|
||||
|
||||
@@ -149,20 +153,28 @@ def main():
|
||||
for file_to_remove in redundant:
|
||||
object_name = os.path.join(target_prefix, file_to_remove)
|
||||
logging.info(f"Removing redundant {target_bucket}:{object_name}")
|
||||
target_s3.remove_object(bucket_name="backups", object_name=object_name)
|
||||
try:
|
||||
target_s3.remove_object(
|
||||
bucket_name=target_bucket, object_name=object_name
|
||||
)
|
||||
except Exception as err:
|
||||
print(
|
||||
f"Unable to remove {target_bucket}/{object_name}: erorr {err}"
|
||||
)
|
||||
del target_files[file_to_remove]
|
||||
print(f"Removed {len(redundant)} files")
|
||||
print(f"Target after removing redundant {get_file_metrics(target_files)}")
|
||||
|
||||
new_files = get_redundant_files(target_files, source_files)
|
||||
print(f"New {len(new_files)} files")
|
||||
different_files = get_redundant_files(source_files, target_files)
|
||||
|
||||
different_files = get_different_files(source_files, target_files)
|
||||
print(f"Different {len(different_files)} files")
|
||||
|
||||
for key in tqdm.tqdm(new_files.union(different_files)):
|
||||
try:
|
||||
source_object = os.path.join(source_prefix, key)
|
||||
target_object = os.path.join(target_prefix, key)
|
||||
try:
|
||||
logging.info(
|
||||
f"Moving {source_bucket}:{source_object} to "
|
||||
f"{target_bucket}:{target_object}"
|
||||
@@ -177,8 +189,8 @@ def main():
|
||||
except RetryError:
|
||||
logging.warning(
|
||||
"Retry on moving"
|
||||
"{source_bucket}:{source_object} to "
|
||||
"{target_bucket}:{target_object}"
|
||||
f"{source_bucket}:{source_object} to "
|
||||
f"{target_bucket}:{target_object}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user