Audio analyzer

2016-02-22 11:25:57 +03:00
parent ea02c9f3f3
commit 92aa431791
1 changed files with 21 additions and 0 deletions
--- a/audio-analyzer.py
+++ b/audio-analyzer.py
@@ -0,0 +1,21 @@
+import json
+import sys
+from collections import Counter
+
+from nltk import RegexpTokenizer
+from nltk.stem.snowball import RussianStemmer
+
+counter = Counter()
+tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
+stemmer = RussianStemmer()
+
+musicFileName = sys.argv[0]
+with open(musicFileName) as file:
+	for line in file:
+		jsonData = json.loads(line, encoding="utf8")
+		for song in jsonData.values()[0]:
+			key = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize("{} {}".format(song["artist"], song["title"]))])
+			counter[key] += 1
+
+for item in counter.most_common():
+	print(item)