Audio analyzer

2016-02-22 11:25:57 +03:00
parent ea02c9f3f3
commit 92aa431791
1 changed files with 21 additions and 0 deletions
--- a/audio-analyzer.py
+++ b/audio-analyzer.py
@@ -0,0 +1,21 @@
 import json
 import sys
 from collections import Counter
 from nltk import RegexpTokenizer
 from nltk.stem.snowball import RussianStemmer
 counter = Counter()
 tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
 stemmer = RussianStemmer()
 musicFileName = sys.argv[0]
 with open(musicFileName) as file:
 	for line in file:
 		jsonData = json.loads(line, encoding="utf8")
 		for song in jsonData.values()[0]:
 			key = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize("{} {}".format(song["artist"], song["title"]))])
 			counter[key] += 1
 for item in counter.most_common():
 	print(item)