Tabs replaced with spaces in several files

This commit is contained in:
2016-02-28 02:39:34 +03:00
parent c44e707cec
commit d127c7b87b
3 changed files with 76 additions and 76 deletions

View File

@@ -8,34 +8,34 @@ from nltk import RegexpTokenizer, OrderedDict
from nltk.stem.snowball import RussianStemmer from nltk.stem.snowball import RussianStemmer
genres = { genres = {
1: "Rock", 1: "Rock",
2: "Pop", 2: "Pop",
3: "Rap & Hip - Hop", 3: "Rap & Hip - Hop",
4: "Easy Listening", 4: "Easy Listening",
5: "Dance & House", 5: "Dance & House",
6: "Instrumental", 6: "Instrumental",
7: "Metal", 7: "Metal",
21: "Alternative", 21: "Alternative",
8: "Dubstep", 8: "Dubstep",
9: "Jazz & Blues", 9: "Jazz & Blues",
10: "Drum & Bass", 10: "Drum & Bass",
11: "Trance", 11: "Trance",
12: "Chanson", 12: "Chanson",
13: "Ethnic", 13: "Ethnic",
14: "Acoustic & Vocal", 14: "Acoustic & Vocal",
15: "Reggae", 15: "Reggae",
16: "Classical", 16: "Classical",
17: "Indie Pop", 17: "Indie Pop",
19: "Speech", 19: "Speech",
22: "Electropop & Disco", 22: "Electropop & Disco",
18: "Other" 18: "Other"
} }
def dictWithoutOneKey(d, key): def dictWithoutOneKey(d, key):
new_d = d.copy() new_d = d.copy()
new_d.pop(key) new_d.pop(key)
return new_d return new_d
if __name__ == '__main__': if __name__ == '__main__':
musicFileName = sys.argv[1] musicFileName = sys.argv[1]

View File

@@ -9,43 +9,43 @@ import vk_api
def captcha_handler(captcha): def captcha_handler(captcha):
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip() key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
return captcha.try_again(key) return captcha.try_again(key)
# getting pazans # getting pazans
pazanIds = None pazanIds = None
pazansFileName = sys.argv[1] pazansFileName = sys.argv[1]
with open(pazansFileName, "r") as file: with open(pazansFileName, "r") as file:
jsonData = json.loads(file.read()) jsonData = json.loads(file.read())
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)] pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler) vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler)
for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)): for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)):
done = False done = False
while not done: while not done:
try: try:
pazanSongs = [] pazanSongs = []
print(index, pazanId) print(index, pazanId)
jsonData = vk.method("execute.getMusic", {"id": pazanId}) jsonData = vk.method("execute.getMusic", {"id": pazanId})
for audio in jsonData["items"]: for audio in jsonData["items"]:
pazanSong = { pazanSong = {
"artist" : audio["artist"], "artist" : audio["artist"],
"title" : audio["title"], "title" : audio["title"],
"genre_id": audio.get("genre_id", None), "genre_id": audio.get("genre_id", None),
"url" : audio["url"], "url" : audio["url"],
} }
pazanSongs.append(pazanSong) pazanSongs.append(pazanSong)
with open(sys.argv[2], "a", encoding="utf-8") as file: with open(sys.argv[2], "a", encoding="utf-8") as file:
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n") file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
done = True done = True
except vk_api.ApiError as e: except vk_api.ApiError as e:
if e.code == 9: if e.code == 9:
print("waiting") print("waiting")
time.sleep(60) time.sleep(60)
elif e.code == 201 or e.code == 15: elif e.code == 201 or e.code == 15:
done = True done = True
else: else:
raise e raise e

View File

@@ -10,9 +10,9 @@ from nltk.tokenize import RegexpTokenizer
def dictWithoutOneKey(d, key): def dictWithoutOneKey(d, key):
new_d = d.copy() new_d = d.copy()
new_d.pop(key) new_d.pop(key)
return new_d return new_d
# load pazans # load pazans
@@ -20,7 +20,7 @@ pazans_groups = None
pazans_file_name = sys.argv[1] pazans_file_name = sys.argv[1]
with open(pazans_file_name, "r") as file: with open(pazans_file_name, "r") as file:
pazans_groups = json.loads(file.read()) pazans_groups = json.loads(file.read())
# analyze statues # analyze statues
status_stats = dict() status_stats = dict()
@@ -30,29 +30,29 @@ stemmer = RussianStemmer()
users_file_name = sys.argv[2] users_file_name = sys.argv[2]
with open(users_file_name, "r") as file: with open(users_file_name, "r") as file:
for line in file: for line in file:
user = json.loads(line) user = json.loads(line)
uid = str(user["_id"]) uid = str(user["_id"])
if uid in pazans_groups: if uid in pazans_groups:
pazan_groups = pazans_groups[uid] pazan_groups = pazans_groups[uid]
status_text = user.get("status", "") status_text = user.get("status", "")
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)]) filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
if len(filtered_status_text) > 1: if len(filtered_status_text) > 1:
status_stats_item = status_stats.get(filtered_status_text, { status_stats_item = status_stats.get(filtered_status_text, {
"full": status_text, "full": status_text,
"count-boys": 0, "count-boys": 0,
"count-girls": 0, "count-girls": 0,
}) })
if user["sex"] == 2: if user["sex"] == 2:
status_stats_item["count-boys"] += len(pazan_groups) status_stats_item["count-boys"] += len(pazan_groups)
if user["sex"] == 1: if user["sex"] == 1:
status_stats_item["count-girls"] += len(pazan_groups) status_stats_item["count-girls"] += len(pazan_groups)
status_stats[filteredstatus_text] = status_stats_item status_stats[filteredstatus_text] = status_stats_item
# print result # print result
dest_file_name = sys.argv[3] dest_file_name = sys.argv[3]
with open(dest_file_name, "w", encoding="utf-8") as file: with open(dest_file_name, "w", encoding="utf-8") as file:
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"] sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)] sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues]) data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
file.write(json.dumps(data, ensure_ascii=False, indent=4)) file.write(json.dumps(data, ensure_ascii=False, indent=4))