Audio analyzer

This commit is contained in:
Oleg Morozenkov
2016-02-22 11:25:57 +03:00
parent ea02c9f3f3
commit 92aa431791

21
audio-analyzer.py Normal file
View File

@@ -0,0 +1,21 @@
import json
import sys
from collections import Counter
from nltk import RegexpTokenizer
from nltk.stem.snowball import RussianStemmer
counter = Counter()
tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
stemmer = RussianStemmer()
musicFileName = sys.argv[0]
with open(musicFileName) as file:
for line in file:
jsonData = json.loads(line, encoding="utf8")
for song in jsonData.values()[0]:
key = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize("{} {}".format(song["artist"], song["title"]))])
counter[key] += 1
for item in counter.most_common():
print(item)