Some refactoring to status-analyzer.py (also PEP-8 changes)
This commit is contained in:
@@ -1,3 +1,6 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
@@ -13,41 +16,43 @@ def dictWithoutOneKey(d, key):
|
|||||||
|
|
||||||
|
|
||||||
# load pazans
|
# load pazans
|
||||||
pazansGroups = None
|
pazans_groups = None
|
||||||
|
|
||||||
pazansFileName = sys.argv[1]
|
pazans_file_name = sys.argv[1]
|
||||||
with open(pazansFileName, "r") as file:
|
with open(pazans_file_name, "r") as file:
|
||||||
pazansGroups = json.loads(file.read())
|
pazans_groups = json.loads(file.read())
|
||||||
|
|
||||||
# analyze statues
|
# analyze statues
|
||||||
statusStats = dict()
|
status_stats = dict()
|
||||||
|
|
||||||
tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
|
tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
|
||||||
stemmer = RussianStemmer()
|
stemmer = RussianStemmer()
|
||||||
|
|
||||||
usersFileName = sys.argv[2]
|
users_file_name = sys.argv[2]
|
||||||
with open(usersFileName, "r") as file:
|
with open(users_file_name, "r") as file:
|
||||||
for line in file:
|
for line in file:
|
||||||
user = json.loads(line)
|
user = json.loads(line)
|
||||||
id = str(user["_id"])
|
uid = str(user["_id"])
|
||||||
if id in pazansGroups:
|
if uid in pazans_groups:
|
||||||
pazanGroups = pazansGroups[id]
|
pazan_groups = pazans_groups[uid]
|
||||||
statusText = user.get("status", "")
|
status_text = user.get("status", "")
|
||||||
filteredStatusText = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(statusText)])
|
filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
|
||||||
if len(filteredStatusText) > 1:
|
if len(filtered_status_text) > 1:
|
||||||
statusStatsItem = statusStats.get(filteredStatusText, {
|
status_stats_item = status_stats.get(filtered_status_text, {
|
||||||
"full": statusText,
|
"full": status_text,
|
||||||
"count-boys": 0,
|
"count-boys": 0,
|
||||||
"count-girls": 0,
|
"count-girls": 0,
|
||||||
})
|
})
|
||||||
statusStatsItem["count-boys"] += len(pazanGroups) * (1 if user["sex"] == 2 else 0)
|
if user["sex"] == 2:
|
||||||
statusStatsItem["count-girls"] += len(pazanGroups) * (1 if user["sex"] == 1 else 0)
|
status_stats_item["count-boys"] += len(pazan_groups)
|
||||||
statusStats[filteredStatusText] = statusStatsItem
|
if user["sex"] == 1:
|
||||||
|
status_stats_item["count-girls"] += len(pazan_groups)
|
||||||
|
status_stats[filteredstatus_text] = status_stats_item
|
||||||
|
|
||||||
# print result
|
# print result
|
||||||
destFileName = sys.argv[3]
|
dest_file_name = sys.argv[3]
|
||||||
with open(destFileName, "w", encoding="utf8") as file:
|
with open(dest_file_name, "w", encoding="utf-8") as file:
|
||||||
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
|
sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
|
||||||
sortedStatues = [item[1] for item in sorted(statusStats.items(), key=sortKeyGetter, reverse=True)]
|
sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
|
||||||
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
|
data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
|
||||||
file.write(json.dumps(data, ensure_ascii=False, indent=4))
|
file.write(json.dumps(data, ensure_ascii=False, indent=4))
|
||||||
|
|||||||
Reference in New Issue
Block a user