From d127c7b87b0d760ea323f1749605f81804966066 Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sun, 28 Feb 2016 02:39:34 +0300 Subject: [PATCH] Tabs replaced with spaces in several files --- audio-analyzer.py | 48 ++++++++++++++++++------------------- audio-fetcher.py | 60 +++++++++++++++++++++++----------------------- status-analyzer.py | 44 +++++++++++++++++----------------- 3 files changed, 76 insertions(+), 76 deletions(-) diff --git a/audio-analyzer.py b/audio-analyzer.py index 6d7940f..97f5189 100644 --- a/audio-analyzer.py +++ b/audio-analyzer.py @@ -8,34 +8,34 @@ from nltk import RegexpTokenizer, OrderedDict from nltk.stem.snowball import RussianStemmer genres = { - 1: "Rock", - 2: "Pop", - 3: "Rap & Hip - Hop", - 4: "Easy Listening", - 5: "Dance & House", - 6: "Instrumental", - 7: "Metal", - 21: "Alternative", - 8: "Dubstep", - 9: "Jazz & Blues", - 10: "Drum & Bass", - 11: "Trance", - 12: "Chanson", - 13: "Ethnic", - 14: "Acoustic & Vocal", - 15: "Reggae", - 16: "Classical", - 17: "Indie Pop", - 19: "Speech", - 22: "Electropop & Disco", - 18: "Other" + 1: "Rock", + 2: "Pop", + 3: "Rap & Hip - Hop", + 4: "Easy Listening", + 5: "Dance & House", + 6: "Instrumental", + 7: "Metal", + 21: "Alternative", + 8: "Dubstep", + 9: "Jazz & Blues", + 10: "Drum & Bass", + 11: "Trance", + 12: "Chanson", + 13: "Ethnic", + 14: "Acoustic & Vocal", + 15: "Reggae", + 16: "Classical", + 17: "Indie Pop", + 19: "Speech", + 22: "Electropop & Disco", + 18: "Other" } def dictWithoutOneKey(d, key): - new_d = d.copy() - new_d.pop(key) - return new_d + new_d = d.copy() + new_d.pop(key) + return new_d if __name__ == '__main__': musicFileName = sys.argv[1] diff --git a/audio-fetcher.py b/audio-fetcher.py index 6109c20..c078b44 100644 --- a/audio-fetcher.py +++ b/audio-fetcher.py @@ -9,43 +9,43 @@ import vk_api def captcha_handler(captcha): - key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip() - return captcha.try_again(key) + key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip() + return captcha.try_again(key) # getting pazans pazanIds = None pazansFileName = sys.argv[1] with open(pazansFileName, "r") as file: - jsonData = json.loads(file.read()) - pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)] + jsonData = json.loads(file.read()) + pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)] vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler) for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)): - done = False - while not done: - try: + done = False + while not done: + try: pazanSongs = [] - - print(index, pazanId) - - jsonData = vk.method("execute.getMusic", {"id": pazanId}) - for audio in jsonData["items"]: - pazanSong = { - "artist" : audio["artist"], - "title" : audio["title"], - "genre_id": audio.get("genre_id", None), - "url" : audio["url"], - } - pazanSongs.append(pazanSong) - with open(sys.argv[2], "a", encoding="utf-8") as file: - file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n") - done = True - except vk_api.ApiError as e: - if e.code == 9: - print("waiting") - time.sleep(60) - elif e.code == 201 or e.code == 15: - done = True - else: - raise e + + print(index, pazanId) + + jsonData = vk.method("execute.getMusic", {"id": pazanId}) + for audio in jsonData["items"]: + pazanSong = { + "artist" : audio["artist"], + "title" : audio["title"], + "genre_id": audio.get("genre_id", None), + "url" : audio["url"], + } + pazanSongs.append(pazanSong) + with open(sys.argv[2], "a", encoding="utf-8") as file: + file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n") + done = True + except vk_api.ApiError as e: + if e.code == 9: + print("waiting") + time.sleep(60) + elif e.code == 201 or e.code == 15: + done = True + else: + raise e diff --git a/status-analyzer.py b/status-analyzer.py index b6901eb..42ef166 100644 --- a/status-analyzer.py +++ b/status-analyzer.py @@ -10,9 +10,9 @@ from nltk.tokenize import RegexpTokenizer def dictWithoutOneKey(d, key): - new_d = d.copy() - new_d.pop(key) - return new_d + new_d = d.copy() + new_d.pop(key) + return new_d # load pazans @@ -20,7 +20,7 @@ pazans_groups = None pazans_file_name = sys.argv[1] with open(pazans_file_name, "r") as file: - pazans_groups = json.loads(file.read()) + pazans_groups = json.loads(file.read()) # analyze statues status_stats = dict() @@ -30,29 +30,29 @@ stemmer = RussianStemmer() users_file_name = sys.argv[2] with open(users_file_name, "r") as file: - for line in file: - user = json.loads(line) - uid = str(user["_id"]) - if uid in pazans_groups: - pazan_groups = pazans_groups[uid] - status_text = user.get("status", "") - filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)]) - if len(filtered_status_text) > 1: - status_stats_item = status_stats.get(filtered_status_text, { - "full": status_text, - "count-boys": 0, - "count-girls": 0, - }) + for line in file: + user = json.loads(line) + uid = str(user["_id"]) + if uid in pazans_groups: + pazan_groups = pazans_groups[uid] + status_text = user.get("status", "") + filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)]) + if len(filtered_status_text) > 1: + status_stats_item = status_stats.get(filtered_status_text, { + "full": status_text, + "count-boys": 0, + "count-girls": 0, + }) if user["sex"] == 2: status_stats_item["count-boys"] += len(pazan_groups) if user["sex"] == 1: status_stats_item["count-girls"] += len(pazan_groups) - status_stats[filteredstatus_text] = status_stats_item + status_stats[filteredstatus_text] = status_stats_item # print result dest_file_name = sys.argv[3] with open(dest_file_name, "w", encoding="utf-8") as file: - sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"] - sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)] - data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues]) - file.write(json.dumps(data, ensure_ascii=False, indent=4)) + sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"] + sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)] + data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues]) + file.write(json.dumps(data, ensure_ascii=False, indent=4))