Redirect + trackers + php upload api + binary tree search + uuids

This commit is contained in:
relikd
2020-08-29 14:44:01 +02:00
parent ec6e4b5a90
commit 1d731e709f
17 changed files with 16565 additions and 99 deletions

View File

@@ -2,22 +2,24 @@
import sys
import common_lib as mylib
import tracker_download as tracker
with open(mylib.path_root('src', '3rd-domains.txt'), 'r') as fp:
level3_doms = set([x.strip() for x in fp.readlines()])
level3_doms = None
def dom_in_3rd_domain(needle):
# TODO: binary tree lookup
return needle in level3_doms
global level3_doms
if not level3_doms:
level3_doms = mylib.read_list('3rd-domains.txt')
return mylib.bintree_lookup(level3_doms, needle)
def get_parent_domain(subdomain):
parts = subdomain.split('.')
if len(parts) < 3:
return x
elif dom_in_3rd_domain('.'.join(parts[-2:])):
return subdomain
elif dom_in_3rd_domain(parts[-1] + '.' + parts[-2]):
return '.'.join(parts[-3:])
else:
return '.'.join(parts[-2:])
@@ -46,6 +48,7 @@ def json_combine(bundle_id):
uniq_par = set()
for subdomain in logs:
occurs = len(logs[subdomain])
sub_tracker = tracker.is_tracker(subdomain)
dict_increment(res, '#logs', occurs)
dict_increment(domA, subdomain, 1)
dict_increment(domB, subdomain, occurs)
@@ -60,17 +63,40 @@ def json_combine(bundle_id):
res['uniq_pardom'] = domC
res['total_subdom'] = domB
res['total_pardom'] = domD
sub_tracker = dict()
par_tracker = dict()
for x in domA:
sub_tracker[x] = tracker.is_tracker(x)
for x in domC:
par_tracker[x] = tracker.is_tracker(x)
res['tracker_subdom'] = sub_tracker
res['tracker_pardom'] = par_tracker
return res
def process(bundle_ids):
def process(bundle_ids, where=None):
print('writing combined json ...')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_appids())
bundle_ids = list(mylib.enum_data_appids())
affected_ids = []
haystack = sorted([x[::-1] for x in where]) if where else None
for bid in bundle_ids:
print(' ' + bid)
mylib.json_write_combined(bid, json_combine(bid))
obj = json_combine(bid)
should_update = False
if not haystack:
should_update = True
else:
for x in obj['uniq_subdom']:
if mylib.bintree_lookup(haystack, x[::-1]):
should_update = True
break
if should_update:
print(' ' + bid)
mylib.json_write_combined(bid, obj)
affected_ids.append(bid)
print('')
return affected_ids
if __name__ == '__main__':