Some refactoring to status-analyzer.py (also PEP-8 changes)

2016-02-24 00:42:17 +03:00
parent 5bef3ecc01
commit 7274aa80c7
1 changed files with 27 additions and 22 deletions
@@ -1,3 +1,6 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*- 
+
 import json
 import sys
 from collections import OrderedDict
@@ -13,41 +16,43 @@ def dictWithoutOneKey(d, key):


 # load pazans
-pazansGroups = None
+pazans_groups = None

-pazansFileName = sys.argv[1]
-with open(pazansFileName, "r") as file:
-	pazansGroups = json.loads(file.read())
+pazans_file_name = sys.argv[1]
+with open(pazans_file_name, "r") as file:
+	pazans_groups = json.loads(file.read())

 # analyze statues
-statusStats = dict()
+status_stats = dict()

 tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
-stemmer = RussianStemmer()
+stemmer   = RussianStemmer()

-usersFileName = sys.argv[2]
-with open(usersFileName, "r") as file:
+users_file_name = sys.argv[2]
+with open(users_file_name, "r") as file:
 	for line in file:
 		user = json.loads(line)
-		id = str(user["_id"])
-		if id in pazansGroups:
-			pazanGroups = pazansGroups[id]
-			statusText = user.get("status", "")
-			filteredStatusText = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(statusText)])
-			if len(filteredStatusText) > 1:
-				statusStatsItem = statusStats.get(filteredStatusText, {
-					"full": statusText,
+		uid = str(user["_id"])
+		if uid in pazans_groups:
+			pazan_groups = pazans_groups[uid]
+			status_text  = user.get("status", "")
+			filtered_status_text = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize(status_text)])
+			if len(filtered_status_text) > 1:
+				status_stats_item = status_stats.get(filtered_status_text, {
+					"full": status_text,
 					"count-boys": 0,
 					"count-girls": 0,
 				})
-				statusStatsItem["count-boys"] += len(pazanGroups) * (1 if user["sex"] == 2 else 0)
-				statusStatsItem["count-girls"] += len(pazanGroups) * (1 if user["sex"] == 1 else 0)
-				statusStats[filteredStatusText] = statusStatsItem
+                if user["sex"] == 2:
+                    status_stats_item["count-boys"]  += len(pazan_groups)
+                if user["sex"] == 1:
+                    status_stats_item["count-girls"] += len(pazan_groups)
+				status_stats[filteredstatus_text] = status_stats_item

 # print result
-destFileName = sys.argv[3]
-with open(destFileName, "w", encoding="utf8") as file:
+dest_file_name = sys.argv[3]
+with open(dest_file_name, "w", encoding="utf-8") as file:
 	sortKeyGetter = lambda item: item[1]["count-boys"] + item[1]["count-girls"]
-	sortedStatues = [item[1] for item in sorted(statusStats.items(), key=sortKeyGetter, reverse=True)]
+	sortedStatues = [item[1] for item in sorted(status_stats.items(), key=sortKeyGetter, reverse=True)]
 	data = OrderedDict([(item["full"], dictWithoutOneKey(item, "full")) for item in sortedStatues])
 	file.write(json.dumps(data, ensure_ascii=False, indent=4))