Tabs replaced with spaces in several files
This commit is contained in:
@@ -8,34 +8,34 @@ from nltk import RegexpTokenizer, OrderedDict
|
|||||||
from nltk.stem.snowball import RussianStemmer
|
from nltk.stem.snowball import RussianStemmer
|
||||||
|
|
||||||
genres = {
|
genres = {
|
||||||
1: "Rock",
|
1: "Rock",
|
||||||
2: "Pop",
|
2: "Pop",
|
||||||
3: "Rap & Hip - Hop",
|
3: "Rap & Hip - Hop",
|
||||||
4: "Easy Listening",
|
4: "Easy Listening",
|
||||||
5: "Dance & House",
|
5: "Dance & House",
|
||||||
6: "Instrumental",
|
6: "Instrumental",
|
||||||
7: "Metal",
|
7: "Metal",
|
||||||
21: "Alternative",
|
21: "Alternative",
|
||||||
8: "Dubstep",
|
8: "Dubstep",
|
||||||
9: "Jazz & Blues",
|
9: "Jazz & Blues",
|
||||||
10: "Drum & Bass",
|
10: "Drum & Bass",
|
||||||
11: "Trance",
|
11: "Trance",
|
||||||
12: "Chanson",
|
12: "Chanson",
|
||||||
13: "Ethnic",
|
13: "Ethnic",
|
||||||
14: "Acoustic & Vocal",
|
14: "Acoustic & Vocal",
|
||||||
15: "Reggae",
|
15: "Reggae",
|
||||||
16: "Classical",
|
16: "Classical",
|
||||||
17: "Indie Pop",
|
17: "Indie Pop",
|
||||||
19: "Speech",
|
19: "Speech",
|
||||||
22: "Electropop & Disco",
|
22: "Electropop & Disco",
|
||||||
18: "Other"
|
18: "Other"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def dictWithoutOneKey(d, key):
|
def dictWithoutOneKey(d, key):
|
||||||
new_d = d.copy()
|
new_d = d.copy()
|
||||||
new_d.pop(key)
|
new_d.pop(key)
|
||||||
return new_d
|
return new_d
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
musicFileName = sys.argv[1]
|
musicFileName = sys.argv[1]
|
||||||
|
|||||||
@@ -9,43 +9,43 @@ import vk_api
|
|||||||
|
|
||||||
|
|
||||||
def captcha_handler(captcha):
|
def captcha_handler(captcha):
|
||||||
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
|
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
|
||||||
return captcha.try_again(key)
|
return captcha.try_again(key)
|
||||||
|
|
||||||
# getting pazans
|
# getting pazans
|
||||||
pazanIds = None
|
pazanIds = None
|
||||||
pazansFileName = sys.argv[1]
|
pazansFileName = sys.argv[1]
|
||||||
with open(pazansFileName, "r") as file:
|
with open(pazansFileName, "r") as file:
|
||||||
jsonData = json.loads(file.read())
|
jsonData = json.loads(file.read())
|
||||||
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
|
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
|
||||||
|
|
||||||
vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler)
|
vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler)
|
||||||
|
|
||||||
for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)):
|
for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)):
|
||||||
done = False
|
done = False
|
||||||
while not done:
|
while not done:
|
||||||
try:
|
try:
|
||||||
pazanSongs = []
|
pazanSongs = []
|
||||||
|
|
||||||
print(index, pazanId)
|
print(index, pazanId)
|
||||||
|
|
||||||
jsonData = vk.method("execute.getMusic", {"id": pazanId})
|
jsonData = vk.method("execute.getMusic", {"id": pazanId})
|
||||||
for audio in jsonData["items"]:
|
for audio in jsonData["items"]:
|
||||||
pazanSong = {
|
pazanSong = {
|
||||||
"artist" : audio["artist"],
|
"artist" : audio["artist"],
|
||||||
"title" : audio["title"],
|
"title" : audio["title"],
|
||||||
"genre_id": audio.get("genre_id", None),
|
"genre_id": audio.get("genre_id", None),
|
||||||
"url" : audio["url"],
|
"url" : audio["url"],
|
||||||
}
|
}
|
||||||
pazanSongs.append(pazanSong)
|
pazanSongs.append(pazanSong)
|
||||||
with open(sys.argv[2], "a", encoding="utf-8") as file:
|
with open(sys.argv[2], "a", encoding="utf-8") as file:
|
||||||
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
|
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
|
||||||
done = True
|
done = True
|
||||||
except vk_api.ApiError as e:
|
except vk_api.ApiError as e:
|
||||||
if e.code == 9:
|
if e.code == 9:
|
||||||
print("waiting")
|
print("waiting")
|
||||||
time.sleep(60)
|
time.sleep(60)
|
||||||
elif e.code == 201 or e.code == 15:
|
elif e.code == 201 or e.code == 15:
|
||||||
done = True
|
done = True
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|||||||
@@ -10,9 +10,9 @@ from nltk.tokenize import RegexpTokenizer
|
|||||||
|
|
||||||
|
|
||||||
def dictWithoutOneKey(d, key):
|
def dictWithoutOneKey(d, key):
|
||||||
new_d = d.copy()
|
new_d = d.copy()
|
||||||
new_d.pop(key)
|
new_d.pop(key)
|
||||||
return new_d
|
return new_d
|
||||||
|
|
||||||
|
|
||||||
# load pazans
|
# load pazans
|
||||||
@@ -20,7 +20,7 @@ pazans_groups = None
|
|||||||
|
|
||||||
pazans_file_name = sys.argv[1]
|
pazans_file_name = sys.argv[1]
|
||||||
with open(pazans_file_name, "r") as file:
|
with open(pazans_file_name, "r") as file:
|
||||||
pazans_groups = json.loads(file.read())
|
pazans_groups = json.loads(file.read())
|
||||||
|
|
||||||
# analyze statues
|
# analyze statues
|
||||||
status_stats = dict()
|
status_stats = dict()
|
||||||
@@ -30,29 +30,29 @@ stemmer = RussianStemmer()
|
|||||||
|
|
||||||
users_file_name = sys.argv[2]
|
users_file_name = sys.argv[2]
|
||||||
with open(users_file_name, "r") as file:
|
with open(users_file_name, "r") as file:
|
||||||
for line in file:
|
for line in file:
|
||||||
user = json.loads(line)
|
user = json.loads(line)
|
||||||
uid = str(user["_id"])
|
uid = str(user["_id"])
|
||||||
if uid in pazans_groups:
|
if uid in pazans_groups:
|
||||||
pazan_groups = pazans_groups[uid]
|
pazan_groups = pazans_groups[uid]
|
||||||
status_text = user.get("status", "")
|
status_text = user.get("status", "")
|
||||||
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
|
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
|
||||||
if len(filtered_status_text) > 1:
|
if len(filtered_status_text) > 1:
|
||||||
status_stats_item = status_stats.get(filtered_status_text, {
|
status_stats_item = status_stats.get(filtered_status_text, {
|
||||||
"full": status_text,
|
"full": status_text,
|
||||||
"count-boys": 0,
|
"count-boys": 0,
|
||||||
"count-girls": 0,
|
"count-girls": 0,
|
||||||
})
|
})
|
||||||
if user["sex"] == 2:
|
if user["sex"] == 2:
|
||||||
status_stats_item["count-boys"] += len(pazan_groups)
|
status_stats_item["count-boys"] += len(pazan_groups)
|
||||||
if user["sex"] == 1:
|
if user["sex"] == 1:
|
||||||
status_stats_item["count-girls"] += len(pazan_groups)
|
status_stats_item["count-girls"] += len(pazan_groups)
|
||||||
status_stats[filteredstatus_text] = status_stats_item
|
status_stats[filteredstatus_text] = status_stats_item
|
||||||
|
|
||||||
# print result
|
# print result
|
||||||
dest_file_name = sys.argv[3]
|
dest_file_name = sys.argv[3]
|
||||||
with open(dest_file_name, "w", encoding="utf-8") as file:
|
with open(dest_file_name, "w", encoding="utf-8") as file:
|
||||||
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
|
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
|
||||||
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
|
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
|
||||||
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
|
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
|
||||||
file.write(json.dumps(data, ensure_ascii=False, indent=4))
|
file.write(json.dumps(data, ensure_ascii=False, indent=4))
|
||||||
|
|||||||
Reference in New Issue
Block a user