diff --git a/all-good-ids.py b/all-good-ids.py new file mode 100644 index 0000000..0f937ff --- /dev/null +++ b/all-good-ids.py @@ -0,0 +1,22 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + +import sys +import json +import os +from os.path import join + +all_ids = {} + +for dirpath, dirnames, filenames in os.walk(sys.argv[1]): + for f in filenames: + fp = os.path.join(dirpath, f) + for uid in open(fp): + uid = int(uid) + if uid in all_ids: + all_ids[uid] += 1 + else: + all_ids[uid] = 1 + +f_out = open(sys.argv[2],'w') +f_out.write(json.dumps(all_ids)) diff --git a/get-users-addresses.py b/get-users-addresses.py new file mode 100644 index 0000000..36b415f --- /dev/null +++ b/get-users-addresses.py @@ -0,0 +1,68 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +get_users_addresses.py SCHOOL_IDS_FILE ADDRESSES_FILE USERS_IDS_JSON FULL_BASE_FILE OUTPUT_FILE +""" +import sys +import csv +import json +from urllib.parse import quote_plus + + +import requests + +def getCoords(name): + r = requests.get('https://geocode-maps.yandex.ru/1.x/?geocode='+quote_plus(name) + '&format=json') + points = r.json()['response']['GeoObjectCollection']['featureMember'][0]['GeoObject']['Point']['pos'].split()[::-1] + return tuple([float(x) for x in points]) + + +school_ids = {} +cnt = 0 +with open(sys.argv[1],'r') as f: + reader = csv.reader(f) + for row in reader: + cnt += 1 + if cnt == 1: + continue + if len(row[0].strip()) == 0 or len(row[3].strip()) == 0: + continue + school_ids[int(row[3])] = row[0] + +addresses = {} +cnt = 0 +with open(sys.argv[2],'r') as f: + reader = csv.reader(f) + for row in reader: + cnt += 1 + if cnt == 1: + continue + if len((row[3] + row[2]).strip()) > 0: + addresses[row[0]] = {"coord":(row[3],row[2]),"address":row[1]} + else: + addresses[row[0]] = {"coord":getCoords('Москва ' + row[1]),"address":row[1]} + +pazans = json.loads(open(sys.argv[3]).read()) + +pazan_ids = set([int(i) for i in pazans.keys()]) + +pazan_schools = {} + +print(sys.argv[4]) +for line in open(sys.argv[4]): + json_line = json.loads(line) + uid = int(json_line['_id']) + if uid in pazan_ids: + if 'schools' not in json_line or len(json_line['schools']) == 0: + continue + school_id = int(json_line['schools'][-1]['id']) + if school_id not in school_ids: + continue + school_name = school_ids[school_id] + if school_name not in addresses: + continue + pazan_schools[uid] = addresses[school_name] + +f_out = open(sys.argv[5],'w') +f_out.write(json.dumps(pazan_schools)) +print("There are {} pazans with school".format(len(pazan_schools))) diff --git a/group-ids-downloader.py b/group-ids-downloader.py new file mode 100644 index 0000000..cda13fe --- /dev/null +++ b/group-ids-downloader.py @@ -0,0 +1,33 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +''' +group_ids_downloader.py VK_LOGIN VK_PASSWORD APP_ID INPUT_FILENAME +where INPUT_FILENAME is file with one id of group per line +''' +import sys + +import vk_api + +def getIdsByGroup(group_id): + ids = [] + res = vk.method("groups.getMembers", {"group_id":group_id,"count":1000}) + count = res['count'] - 1000 + ids += res['items'] + cur_offset = 1000 + while count > 0: + res = vk.method("groups.getMembers", {"group_id":group_id, + "count":1000,"offset":cur_offset, "sort":"id_asc"}) + count = count - 1000 + cur_offset += 1000 + ids += res['items'] + return ids + +vk_login, vk_password = sys.argv[1], sys.argv[2] +vk = vk_api.VkApi(vk_login, vk_password, app_id=sys.argv[3]) + +vk.authorization() + +for group_id in open(sys.argv[4]): + group_id = group_id.strip() + good_ids = getIdsByGroup(group_id) + open('out/' + str(group_id),'w').write('\n'.join([str(i) for i in good_ids]))