Redirect + trackers + php upload api + binary tree search + uuids
This commit is contained in:
@@ -2,22 +2,24 @@
|
||||
|
||||
import sys
|
||||
import common_lib as mylib
|
||||
import tracker_download as tracker
|
||||
|
||||
|
||||
with open(mylib.path_root('src', '3rd-domains.txt'), 'r') as fp:
|
||||
level3_doms = set([x.strip() for x in fp.readlines()])
|
||||
level3_doms = None
|
||||
|
||||
|
||||
def dom_in_3rd_domain(needle):
|
||||
# TODO: binary tree lookup
|
||||
return needle in level3_doms
|
||||
global level3_doms
|
||||
if not level3_doms:
|
||||
level3_doms = mylib.read_list('3rd-domains.txt')
|
||||
return mylib.bintree_lookup(level3_doms, needle)
|
||||
|
||||
|
||||
def get_parent_domain(subdomain):
|
||||
parts = subdomain.split('.')
|
||||
if len(parts) < 3:
|
||||
return x
|
||||
elif dom_in_3rd_domain('.'.join(parts[-2:])):
|
||||
return subdomain
|
||||
elif dom_in_3rd_domain(parts[-1] + '.' + parts[-2]):
|
||||
return '.'.join(parts[-3:])
|
||||
else:
|
||||
return '.'.join(parts[-2:])
|
||||
@@ -46,6 +48,7 @@ def json_combine(bundle_id):
|
||||
uniq_par = set()
|
||||
for subdomain in logs:
|
||||
occurs = len(logs[subdomain])
|
||||
sub_tracker = tracker.is_tracker(subdomain)
|
||||
dict_increment(res, '#logs', occurs)
|
||||
dict_increment(domA, subdomain, 1)
|
||||
dict_increment(domB, subdomain, occurs)
|
||||
@@ -60,17 +63,40 @@ def json_combine(bundle_id):
|
||||
res['uniq_pardom'] = domC
|
||||
res['total_subdom'] = domB
|
||||
res['total_pardom'] = domD
|
||||
sub_tracker = dict()
|
||||
par_tracker = dict()
|
||||
for x in domA:
|
||||
sub_tracker[x] = tracker.is_tracker(x)
|
||||
for x in domC:
|
||||
par_tracker[x] = tracker.is_tracker(x)
|
||||
res['tracker_subdom'] = sub_tracker
|
||||
res['tracker_pardom'] = par_tracker
|
||||
return res
|
||||
|
||||
|
||||
def process(bundle_ids):
|
||||
def process(bundle_ids, where=None):
|
||||
print('writing combined json ...')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_appids())
|
||||
bundle_ids = list(mylib.enum_data_appids())
|
||||
|
||||
affected_ids = []
|
||||
haystack = sorted([x[::-1] for x in where]) if where else None
|
||||
for bid in bundle_ids:
|
||||
print(' ' + bid)
|
||||
mylib.json_write_combined(bid, json_combine(bid))
|
||||
obj = json_combine(bid)
|
||||
should_update = False
|
||||
if not haystack:
|
||||
should_update = True
|
||||
else:
|
||||
for x in obj['uniq_subdom']:
|
||||
if mylib.bintree_lookup(haystack, x[::-1]):
|
||||
should_update = True
|
||||
break
|
||||
if should_update:
|
||||
print(' ' + bid)
|
||||
mylib.json_write_combined(bid, obj)
|
||||
affected_ids.append(bid)
|
||||
print('')
|
||||
return affected_ids
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
Reference in New Issue
Block a user