From 97fb017e7e1d8066071e5abe81551715bed099c5 Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sat, 16 Jan 2021 21:51:14 +0300 Subject: [PATCH] feat: Added progress for jsl importing --- import_jsl.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/import_jsl.py b/import_jsl.py index b8788bc..5de4301 100644 --- a/import_jsl.py +++ b/import_jsl.py @@ -2,6 +2,7 @@ import sys import json import logging import time +import subprocess from sqlalchemy.exc import IntegrityError import tqdm @@ -13,6 +14,11 @@ from src.settings import init_logging BATCH_SIZE = 1000 +def get_lines_count(path: str) -> int: + wc_output = subprocess.check_output(["wc", "-l", path]).decode("utf-8") + return int(wc_output.split(" ")[0]) + + def get_user_id(session, name: str): user = session.query(User).filter(User.name == name).first() if user: @@ -29,7 +35,9 @@ def main(): added_count = duplicate_count = 0 begin_at = time.monotonic() for input_path in sys.argv[1:]: - for line_ind, line in tqdm.tqdm(enumerate(open(input_path)), desc=input_path): + for line_ind, line in tqdm.tqdm( + enumerate(open(input_path)), desc=input_path, total=get_lines_count(input_path) + ): data = json.loads(line) if session.query(Repository).filter(Repository.id == data["id"]).first(): duplicate_count += 1