Audio analyzer
This commit is contained in:
21
audio-analyzer.py
Normal file
21
audio-analyzer.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
import json
|
||||||
|
import sys
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
from nltk import RegexpTokenizer
|
||||||
|
from nltk.stem.snowball import RussianStemmer
|
||||||
|
|
||||||
|
counter = Counter()
|
||||||
|
tokenizer = RegexpTokenizer(r"[A-Za-zА-Яа-я]+")
|
||||||
|
stemmer = RussianStemmer()
|
||||||
|
|
||||||
|
musicFileName = sys.argv[0]
|
||||||
|
with open(musicFileName) as file:
|
||||||
|
for line in file:
|
||||||
|
jsonData = json.loads(line, encoding="utf8")
|
||||||
|
for song in jsonData.values()[0]:
|
||||||
|
key = "".join([stemmer.stem(token).lower() for token in tokenizer.tokenize("{} {}".format(song["artist"], song["title"]))])
|
||||||
|
counter[key] += 1
|
||||||
|
|
||||||
|
for item in counter.most_common():
|
||||||
|
print(item)
|
||||||
Reference in New Issue
Block a user