Tabs replaced with spaces in several files
This commit is contained in:
@@ -8,34 +8,34 @@ from nltk import RegexpTokenizer, OrderedDict
|
||||
from nltk.stem.snowball import RussianStemmer
|
||||
|
||||
genres = {
|
||||
1: "Rock",
|
||||
2: "Pop",
|
||||
3: "Rap & Hip - Hop",
|
||||
4: "Easy Listening",
|
||||
5: "Dance & House",
|
||||
6: "Instrumental",
|
||||
7: "Metal",
|
||||
21: "Alternative",
|
||||
8: "Dubstep",
|
||||
9: "Jazz & Blues",
|
||||
10: "Drum & Bass",
|
||||
11: "Trance",
|
||||
12: "Chanson",
|
||||
13: "Ethnic",
|
||||
14: "Acoustic & Vocal",
|
||||
15: "Reggae",
|
||||
16: "Classical",
|
||||
17: "Indie Pop",
|
||||
19: "Speech",
|
||||
22: "Electropop & Disco",
|
||||
18: "Other"
|
||||
1: "Rock",
|
||||
2: "Pop",
|
||||
3: "Rap & Hip - Hop",
|
||||
4: "Easy Listening",
|
||||
5: "Dance & House",
|
||||
6: "Instrumental",
|
||||
7: "Metal",
|
||||
21: "Alternative",
|
||||
8: "Dubstep",
|
||||
9: "Jazz & Blues",
|
||||
10: "Drum & Bass",
|
||||
11: "Trance",
|
||||
12: "Chanson",
|
||||
13: "Ethnic",
|
||||
14: "Acoustic & Vocal",
|
||||
15: "Reggae",
|
||||
16: "Classical",
|
||||
17: "Indie Pop",
|
||||
19: "Speech",
|
||||
22: "Electropop & Disco",
|
||||
18: "Other"
|
||||
}
|
||||
|
||||
|
||||
def dictWithoutOneKey(d, key):
|
||||
new_d = d.copy()
|
||||
new_d.pop(key)
|
||||
return new_d
|
||||
new_d = d.copy()
|
||||
new_d.pop(key)
|
||||
return new_d
|
||||
|
||||
if __name__ == '__main__':
|
||||
musicFileName = sys.argv[1]
|
||||
|
||||
@@ -9,43 +9,43 @@ import vk_api
|
||||
|
||||
|
||||
def captcha_handler(captcha):
|
||||
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
|
||||
return captcha.try_again(key)
|
||||
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
|
||||
return captcha.try_again(key)
|
||||
|
||||
# getting pazans
|
||||
pazanIds = None
|
||||
pazansFileName = sys.argv[1]
|
||||
with open(pazansFileName, "r") as file:
|
||||
jsonData = json.loads(file.read())
|
||||
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
|
||||
jsonData = json.loads(file.read())
|
||||
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
|
||||
|
||||
vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler)
|
||||
|
||||
for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)):
|
||||
done = False
|
||||
while not done:
|
||||
try:
|
||||
done = False
|
||||
while not done:
|
||||
try:
|
||||
pazanSongs = []
|
||||
|
||||
print(index, pazanId)
|
||||
print(index, pazanId)
|
||||
|
||||
jsonData = vk.method("execute.getMusic", {"id": pazanId})
|
||||
for audio in jsonData["items"]:
|
||||
pazanSong = {
|
||||
"artist" : audio["artist"],
|
||||
"title" : audio["title"],
|
||||
"genre_id": audio.get("genre_id", None),
|
||||
"url" : audio["url"],
|
||||
}
|
||||
pazanSongs.append(pazanSong)
|
||||
with open(sys.argv[2], "a", encoding="utf-8") as file:
|
||||
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
|
||||
done = True
|
||||
except vk_api.ApiError as e:
|
||||
if e.code == 9:
|
||||
print("waiting")
|
||||
time.sleep(60)
|
||||
elif e.code == 201 or e.code == 15:
|
||||
done = True
|
||||
else:
|
||||
raise e
|
||||
jsonData = vk.method("execute.getMusic", {"id": pazanId})
|
||||
for audio in jsonData["items"]:
|
||||
pazanSong = {
|
||||
"artist" : audio["artist"],
|
||||
"title" : audio["title"],
|
||||
"genre_id": audio.get("genre_id", None),
|
||||
"url" : audio["url"],
|
||||
}
|
||||
pazanSongs.append(pazanSong)
|
||||
with open(sys.argv[2], "a", encoding="utf-8") as file:
|
||||
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
|
||||
done = True
|
||||
except vk_api.ApiError as e:
|
||||
if e.code == 9:
|
||||
print("waiting")
|
||||
time.sleep(60)
|
||||
elif e.code == 201 or e.code == 15:
|
||||
done = True
|
||||
else:
|
||||
raise e
|
||||
|
||||
@@ -10,9 +10,9 @@ from nltk.tokenize import RegexpTokenizer
|
||||
|
||||
|
||||
def dictWithoutOneKey(d, key):
|
||||
new_d = d.copy()
|
||||
new_d.pop(key)
|
||||
return new_d
|
||||
new_d = d.copy()
|
||||
new_d.pop(key)
|
||||
return new_d
|
||||
|
||||
|
||||
# load pazans
|
||||
@@ -20,7 +20,7 @@ pazans_groups = None
|
||||
|
||||
pazans_file_name = sys.argv[1]
|
||||
with open(pazans_file_name, "r") as file:
|
||||
pazans_groups = json.loads(file.read())
|
||||
pazans_groups = json.loads(file.read())
|
||||
|
||||
# analyze statues
|
||||
status_stats = dict()
|
||||
@@ -30,29 +30,29 @@ stemmer = RussianStemmer()
|
||||
|
||||
users_file_name = sys.argv[2]
|
||||
with open(users_file_name, "r") as file:
|
||||
for line in file:
|
||||
user = json.loads(line)
|
||||
uid = str(user["_id"])
|
||||
if uid in pazans_groups:
|
||||
pazan_groups = pazans_groups[uid]
|
||||
status_text = user.get("status", "")
|
||||
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
|
||||
if len(filtered_status_text) > 1:
|
||||
status_stats_item = status_stats.get(filtered_status_text, {
|
||||
"full": status_text,
|
||||
"count-boys": 0,
|
||||
"count-girls": 0,
|
||||
})
|
||||
for line in file:
|
||||
user = json.loads(line)
|
||||
uid = str(user["_id"])
|
||||
if uid in pazans_groups:
|
||||
pazan_groups = pazans_groups[uid]
|
||||
status_text = user.get("status", "")
|
||||
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
|
||||
if len(filtered_status_text) > 1:
|
||||
status_stats_item = status_stats.get(filtered_status_text, {
|
||||
"full": status_text,
|
||||
"count-boys": 0,
|
||||
"count-girls": 0,
|
||||
})
|
||||
if user["sex"] == 2:
|
||||
status_stats_item["count-boys"] += len(pazan_groups)
|
||||
if user["sex"] == 1:
|
||||
status_stats_item["count-girls"] += len(pazan_groups)
|
||||
status_stats[filteredstatus_text] = status_stats_item
|
||||
status_stats[filteredstatus_text] = status_stats_item
|
||||
|
||||
# print result
|
||||
dest_file_name = sys.argv[3]
|
||||
with open(dest_file_name, "w", encoding="utf-8") as file:
|
||||
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
|
||||
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
|
||||
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
|
||||
file.write(json.dumps(data, ensure_ascii=False, indent=4))
|
||||
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
|
||||
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
|
||||
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
|
||||
file.write(json.dumps(data, ensure_ascii=False, indent=4))
|
||||
|
||||
Reference in New Issue
Block a user