Tabs replaced with spaces in several files

This commit is contained in:
2016-02-28 02:39:34 +03:00
parent c44e707cec
commit d127c7b87b
3 changed files with 76 additions and 76 deletions

View File

@@ -8,34 +8,34 @@ from nltk import RegexpTokenizer, OrderedDict
from nltk.stem.snowball import RussianStemmer
genres = {
1: "Rock",
2: "Pop",
3: "Rap & Hip - Hop",
4: "Easy Listening",
5: "Dance & House",
6: "Instrumental",
7: "Metal",
21: "Alternative",
8: "Dubstep",
9: "Jazz & Blues",
10: "Drum & Bass",
11: "Trance",
12: "Chanson",
13: "Ethnic",
14: "Acoustic & Vocal",
15: "Reggae",
16: "Classical",
17: "Indie Pop",
19: "Speech",
22: "Electropop & Disco",
18: "Other"
1: "Rock",
2: "Pop",
3: "Rap & Hip - Hop",
4: "Easy Listening",
5: "Dance & House",
6: "Instrumental",
7: "Metal",
21: "Alternative",
8: "Dubstep",
9: "Jazz & Blues",
10: "Drum & Bass",
11: "Trance",
12: "Chanson",
13: "Ethnic",
14: "Acoustic & Vocal",
15: "Reggae",
16: "Classical",
17: "Indie Pop",
19: "Speech",
22: "Electropop & Disco",
18: "Other"
}
def dictWithoutOneKey(d, key):
new_d = d.copy()
new_d.pop(key)
return new_d
new_d = d.copy()
new_d.pop(key)
return new_d
if __name__ == '__main__':
musicFileName = sys.argv[1]

View File

@@ -9,43 +9,43 @@ import vk_api
def captcha_handler(captcha):
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
return captcha.try_again(key)
key = input("Enter Captcha {0}: ".format(captcha.get_url())).strip()
return captcha.try_again(key)
# getting pazans
pazanIds = None
pazansFileName = sys.argv[1]
with open(pazansFileName, "r") as file:
jsonData = json.loads(file.read())
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
jsonData = json.loads(file.read())
pazanIds = [item[0] for item in sorted(jsonData.items(), key=lambda item: len(item[1]), reverse=True)]
vk = vk_api.VkApi(token=sys.argv[3], app_id=sys.argv[4], captcha_handler=captcha_handler)
for index, pazanId in enumerate(pazanIds, start=(int(sys.argv[5]) if len(sys.argv) > 5 else 0)):
done = False
while not done:
try:
done = False
while not done:
try:
pazanSongs = []
print(index, pazanId)
jsonData = vk.method("execute.getMusic", {"id": pazanId})
for audio in jsonData["items"]:
pazanSong = {
"artist" : audio["artist"],
"title" : audio["title"],
"genre_id": audio.get("genre_id", None),
"url" : audio["url"],
}
pazanSongs.append(pazanSong)
with open(sys.argv[2], "a", encoding="utf-8") as file:
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
done = True
except vk_api.ApiError as e:
if e.code == 9:
print("waiting")
time.sleep(60)
elif e.code == 201 or e.code == 15:
done = True
else:
raise e
print(index, pazanId)
jsonData = vk.method("execute.getMusic", {"id": pazanId})
for audio in jsonData["items"]:
pazanSong = {
"artist" : audio["artist"],
"title" : audio["title"],
"genre_id": audio.get("genre_id", None),
"url" : audio["url"],
}
pazanSongs.append(pazanSong)
with open(sys.argv[2], "a", encoding="utf-8") as file:
file.write(json.dumps({pazanId: pazanSongs}, ensure_ascii=False) + "\n")
done = True
except vk_api.ApiError as e:
if e.code == 9:
print("waiting")
time.sleep(60)
elif e.code == 201 or e.code == 15:
done = True
else:
raise e

View File

@@ -10,9 +10,9 @@ from nltk.tokenize import RegexpTokenizer
def dictWithoutOneKey(d, key):
new_d = d.copy()
new_d.pop(key)
return new_d
new_d = d.copy()
new_d.pop(key)
return new_d
# load pazans
@@ -20,7 +20,7 @@ pazans_groups = None
pazans_file_name = sys.argv[1]
with open(pazans_file_name, "r") as file:
pazans_groups = json.loads(file.read())
pazans_groups = json.loads(file.read())
# analyze statues
status_stats = dict()
@@ -30,29 +30,29 @@ stemmer = RussianStemmer()
users_file_name = sys.argv[2]
with open(users_file_name, "r") as file:
for line in file:
user = json.loads(line)
uid = str(user["_id"])
if uid in pazans_groups:
pazan_groups = pazans_groups[uid]
status_text = user.get("status", "")
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
if len(filtered_status_text) > 1:
status_stats_item = status_stats.get(filtered_status_text, {
"full": status_text,
"count-boys": 0,
"count-girls": 0,
})
for line in file:
user = json.loads(line)
uid = str(user["_id"])
if uid in pazans_groups:
pazan_groups = pazans_groups[uid]
status_text = user.get("status", "")
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
if len(filtered_status_text) > 1:
status_stats_item = status_stats.get(filtered_status_text, {
"full": status_text,
"count-boys": 0,
"count-girls": 0,
})
if user["sex"] == 2:
status_stats_item["count-boys"] += len(pazan_groups)
if user["sex"] == 1:
status_stats_item["count-girls"] += len(pazan_groups)
status_stats[filteredstatus_text] = status_stats_item
status_stats[filteredstatus_text] = status_stats_item
# print result
dest_file_name = sys.argv[3]
with open(dest_file_name, "w", encoding="utf-8") as file:
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
file.write(json.dumps(data, ensure_ascii=False, indent=4))
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
file.write(json.dumps(data, ensure_ascii=False, indent=4))