feat: Added progress for jsl importing

This commit is contained in:
2021-01-16 21:51:14 +03:00
parent d0741baf44
commit 97fb017e7e

View File

@@ -2,6 +2,7 @@ import sys
import json import json
import logging import logging
import time import time
import subprocess
from sqlalchemy.exc import IntegrityError from sqlalchemy.exc import IntegrityError
import tqdm import tqdm
@@ -13,6 +14,11 @@ from src.settings import init_logging
BATCH_SIZE = 1000 BATCH_SIZE = 1000
def get_lines_count(path: str) -> int:
wc_output = subprocess.check_output(["wc", "-l", path]).decode("utf-8")
return int(wc_output.split(" ")[0])
def get_user_id(session, name: str): def get_user_id(session, name: str):
user = session.query(User).filter(User.name == name).first() user = session.query(User).filter(User.name == name).first()
if user: if user:
@@ -29,7 +35,9 @@ def main():
added_count = duplicate_count = 0 added_count = duplicate_count = 0
begin_at = time.monotonic() begin_at = time.monotonic()
for input_path in sys.argv[1:]: for input_path in sys.argv[1:]:
for line_ind, line in tqdm.tqdm(enumerate(open(input_path)), desc=input_path): for line_ind, line in tqdm.tqdm(
enumerate(open(input_path)), desc=input_path, total=get_lines_count(input_path)
):
data = json.loads(line) data = json.loads(line)
if session.query(Repository).filter(Repository.id == data["id"]).first(): if session.query(Repository).filter(Repository.id == data["id"]).first():
duplicate_count += 1 duplicate_count += 1