feat: Added progress for jsl importing
This commit is contained in:
@@ -2,6 +2,7 @@ import sys
|
|||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
|
import subprocess
|
||||||
|
|
||||||
from sqlalchemy.exc import IntegrityError
|
from sqlalchemy.exc import IntegrityError
|
||||||
import tqdm
|
import tqdm
|
||||||
@@ -13,6 +14,11 @@ from src.settings import init_logging
|
|||||||
BATCH_SIZE = 1000
|
BATCH_SIZE = 1000
|
||||||
|
|
||||||
|
|
||||||
|
def get_lines_count(path: str) -> int:
|
||||||
|
wc_output = subprocess.check_output(["wc", "-l", path]).decode("utf-8")
|
||||||
|
return int(wc_output.split(" ")[0])
|
||||||
|
|
||||||
|
|
||||||
def get_user_id(session, name: str):
|
def get_user_id(session, name: str):
|
||||||
user = session.query(User).filter(User.name == name).first()
|
user = session.query(User).filter(User.name == name).first()
|
||||||
if user:
|
if user:
|
||||||
@@ -29,7 +35,9 @@ def main():
|
|||||||
added_count = duplicate_count = 0
|
added_count = duplicate_count = 0
|
||||||
begin_at = time.monotonic()
|
begin_at = time.monotonic()
|
||||||
for input_path in sys.argv[1:]:
|
for input_path in sys.argv[1:]:
|
||||||
for line_ind, line in tqdm.tqdm(enumerate(open(input_path)), desc=input_path):
|
for line_ind, line in tqdm.tqdm(
|
||||||
|
enumerate(open(input_path)), desc=input_path, total=get_lines_count(input_path)
|
||||||
|
):
|
||||||
data = json.loads(line)
|
data = json.loads(line)
|
||||||
if session.query(Repository).filter(Repository.id == data["id"]).first():
|
if session.query(Repository).filter(Repository.id == data["id"]).first():
|
||||||
duplicate_count += 1
|
duplicate_count += 1
|
||||||
|
|||||||
Reference in New Issue
Block a user