From 95bdf54ad02efdb3ad3a5e3d1e170531064d6501 Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sun, 21 Feb 2016 02:45:43 +0300 Subject: [PATCH 1/4] Added group_ids_downloader --- group_ids_downloader.py | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 group_ids_downloader.py diff --git a/group_ids_downloader.py b/group_ids_downloader.py new file mode 100644 index 0000000..cda13fe --- /dev/null +++ b/group_ids_downloader.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +''' +group_ids_downloader.py VK_LOGIN VK_PASSWORD APP_ID INPUT_FILENAME +where INPUT_FILENAME is file with one id of group per line +''' +import sys + +import vk_api + +def getIdsByGroup(group_id): + ids = [] + res = vk.method("groups.getMembers", {"group_id":group_id,"count":1000}) + count = res['count'] - 1000 + ids += res['items'] + cur_offset = 1000 + while count > 0: + res = vk.method("groups.getMembers", {"group_id":group_id, + "count":1000,"offset":cur_offset, "sort":"id_asc"}) + count = count - 1000 + cur_offset += 1000 + ids += res['items'] + return ids + +vk_login, vk_password = sys.argv[1], sys.argv[2] +vk = vk_api.VkApi(vk_login, vk_password, app_id=sys.argv[3]) + +vk.authorization() + +for group_id in open(sys.argv[4]): + group_id = group_id.strip() + good_ids = getIdsByGroup(group_id) + open('out/' + str(group_id),'w').write('\n'.join([str(i) for i in good_ids])) From 676f110b481766459dd10b3b967c495f755a408e Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sun, 21 Feb 2016 02:54:15 +0300 Subject: [PATCH 2/4] Added list with group ids --- full_group_list | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 full_group_list diff --git a/full_group_list b/full_group_list new file mode 100644 index 0000000..cf672d8 --- /dev/null +++ b/full_group_list @@ -0,0 +1,39 @@ +district_kents +moi_raen_18 +po_rayonu +public35574958 +nas.rayon +chanson_best +club41978736 +publicvor +ceny_brat +truephilosophy +public42291448 +bratva_vsegda_ryadom +bratzasestry +justformens +ceny_life +philosophy_brother +oo_brat_oo +public53664903 +myzhskoi_style +public102976872 +brat_feed +pa_canskye +ygapaem +zalipaay +brat_bratka777 +club52395272 +noooool +yavolna +duha.pacana +pacanskie2015 +po.pacansky +tyt_co_cmislom +pa.cany +parnipy +public41324287 +taz.reshaet +heartoftheman +tazysosnooly +vaz.club \ No newline at end of file From d015935a8fcefea588f2a4c921c4325acf035d9e Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sun, 21 Feb 2016 02:56:12 +0300 Subject: [PATCH 3/4] Added script for union of groups --- all_good_ids.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 all_good_ids.py diff --git a/all_good_ids.py b/all_good_ids.py new file mode 100644 index 0000000..ddcca1f --- /dev/null +++ b/all_good_ids.py @@ -0,0 +1,18 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import os +from os.path import join + +all_ids = set() + +for dirpath, dirnames, filenames in os.walk(sys.argv[1]): + for f in filenames: + fp = os.path.join(dirpath, f) + for uid in open(fp): + all_ids.add(int(uid)) + +f_out = open(sys.argv[2],'w') +for uid in all_ids: + f_out.write(str(uid) + '\n') From 591478a2c008eef9a8777568867afe7c5fd416f1 Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sun, 21 Feb 2016 13:06:52 +0300 Subject: [PATCH 4/4] We build json with freqs of ids --- all_good_ids.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/all_good_ids.py b/all_good_ids.py index ddcca1f..0f937ff 100644 --- a/all_good_ids.py +++ b/all_good_ids.py @@ -2,17 +2,21 @@ # -*- coding: utf-8 -*- import sys +import json import os from os.path import join -all_ids = set() +all_ids = {} for dirpath, dirnames, filenames in os.walk(sys.argv[1]): for f in filenames: fp = os.path.join(dirpath, f) for uid in open(fp): - all_ids.add(int(uid)) + uid = int(uid) + if uid in all_ids: + all_ids[uid] += 1 + else: + all_ids[uid] = 1 f_out = open(sys.argv[2],'w') -for uid in all_ids: - f_out.write(str(uid) + '\n') +f_out.write(json.dumps(all_ids))