From 68ab5418cf570f12d107de2f47040257ca86c967 Mon Sep 17 00:00:00 2001
From: Oleg Morozenkov <omorozenkov@gmail.com>
Date: Sun, 21 Feb 2016 20:53:43 +0300
Subject: [PATCH] Audio analyzer

---
 audio-analyzer.py  | 28 ++++++++++++++++++++++++++++
 status-analyzer.py |  4 ++--
 2 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 audio-analyzer.py

diff --git a/audio-analyzer.py b/audio-analyzer.py
new file mode 100644
index 0000000..376cefa
--- /dev/null
+++ b/audio-analyzer.py
@@ -0,0 +1,28 @@
+import json
+import sys
+import pymongo
+
+pazanIds = None
+
+pazansFileName = sys.argv[1]
+with open(pazansFileName) as file:
+	pazanIds = json.loads(file.read()).keys()
+
+artistStats = dict()
+
+audioCollection = pymongo.MongoClient("goto.reproducible.work")["vk"]["audio"]
+for pazanId in pazanIds:
+	for audio in audioCollection.find({"owner_id": pazanId}, {"artist": 1, "title": 1, "url": 1}):
+		audioName = audio["artist"] + audio["title"]
+		artistStatsItem = artistStats.get(audioName, {
+			"url": audio["url"],
+			"count": 0
+		})
+		artistStatsItem["count"] += 1
+		artistStats[audioName] = artistStatsItem
+
+with open(sys.argv[2], "w", encoding="utf-8") as file:
+	for item in sorted(artistStats.items(), key=lambda item: item[1]["count"], reverse=True):
+		file.write(item[0] + "\n")
+		file.write("\tcount: " + str(item[1]["count"]) + "\n")
+		file.write("\turl: " + str(item[1]["url"]) + "\n")
diff --git a/status-analyzer.py b/status-analyzer.py
index 6e6fbff..c103485 100644
--- a/status-analyzer.py
+++ b/status-analyzer.py
@@ -6,7 +6,7 @@ from nltk.tokenize import RegexpTokenizer
 # load pazans
 pazansGroups = None
 
-pazansFileName = sys.argv[2]
+pazansFileName = sys.argv[1]
 with open(pazansFileName) as file:
 	pazansGroups = json.loads(file.read())
 
@@ -16,7 +16,7 @@ statusStats = dict()
 tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
 stemmer = RussianStemmer()
 
-usersFileName = sys.argv[1]
+usersFileName = sys.argv[2]
 with open(usersFileName) as file:
 	for line in file:
 		user = json.loads(line)