Refactor tracker update

This commit is contained in:
relikd
2020-09-21 14:30:12 +02:00
parent fd821d6743
commit 09917ba1ee
3 changed files with 33 additions and 37 deletions

View File

@@ -5,6 +5,7 @@ import re
import sys
import common_lib as mylib
import download_tracker # is_tracker
import index_domains # load
THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings
@@ -110,31 +111,18 @@ def json_evaluate_inplace(obj):
obj['avg_logs_pm'] = float3(s_c / (obj['avg_time'] or 1) * 60)
def process(bundle_ids, where=None):
def process(bundle_ids):
print('writing combined json ...')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
affected_ids = []
haystack = sorted([x[::-1] for x in where]) if where else None
for bid in bundle_ids:
print(' ' + bid)
obj = json_combine(bid)
should_update = False
if not haystack:
should_update = True
else:
for x in obj['subdom']:
if mylib.bintree_lookup(haystack, x[::-1]):
should_update = True
break
if should_update:
print(' ' + bid)
mylib.json_write(fname_combined(bid), obj, pretty=False)
json_evaluate_inplace(obj)
mylib.json_write(fname_evaluated(bid), obj, pretty=False)
affected_ids.append(bid)
mylib.json_write(fname_combined(bid), obj, pretty=False)
json_evaluate_inplace(obj)
mylib.json_write(fname_evaluated(bid), obj, pretty=False)
print('')
return affected_ids
if __name__ == '__main__':

View File

@@ -97,6 +97,17 @@ def enrich_with_bundle_ids(index):
index[key][dom] = [index['bundle'][i] for i in ids]
def all_bundles_containing(list_of_domains):
affected = set()
json = load_json_from_disk(fname_all())
haystack = sorted([x[::-1] for x in list_of_domains])
for key in ['pardom', 'subdom']:
for dom, ids in json[key].items():
if mylib.bintree_lookup(haystack, dom[::-1]):
affected.update(ids)
return [json['bundle'][i] for i in affected]
def process(bundle_ids, deleteOnly=False):
print('writing index: domains ...')
fname = fname_all()

View File

@@ -6,10 +6,10 @@ import common_lib as mylib
import bundle_combine
import download_itunes
import download_tracker
import html_root
import html_index_apps
import html_bundle
import html_index_apps
import html_index_domains
import html_root
import index_app_names
import index_domains
import index_meta
@@ -58,26 +58,20 @@ def del_id(bundle_ids):
rebuild_app_index_html(inclRoot=True)
def combine_and_update(bundle_ids, where=None):
def star_reset(ids):
# special case needed. '*' will force rebuilt index
return ['*'] if not where and bundle_ids == ['*'] else ids
def combine_and_update(bundle_ids):
# 1. download meta data from iTunes store, incl. app icons
new_ids = download_itunes.process(bundle_ids)
new_ids = star_reset(new_ids)
# 2. if new apps, update bundle name index
if bundle_ids == ['*']:
new_ids = ['*'] # special case needed to force rebuilt index
if len(new_ids) > 0:
index_app_names.process(new_ids) # after download_itunes
# 3. re-calculate combined.json and evaluated.json files
affected = bundle_combine.process(bundle_ids, where=where)
affected = star_reset(affected)
# 4. was any json updated? if so, make html and update domain index
if len(affected) > 0:
index_meta.process(bundle_ids) # after bundle_combine
html_bundle.process(affected) # after index_app_names
rebuild_domain_index(affected) # after bundle_combine
else:
print('no bundle affected by tracker, not generating bundle html')
bundle_combine.process(bundle_ids)
# 4. make html and update domain index
index_meta.process(bundle_ids) # after bundle_combine
html_bundle.process(bundle_ids) # after index_app_names
rebuild_domain_index(bundle_ids) # after bundle_combine
# 5. make all apps index
if len(new_ids) > 0:
rebuild_app_index_html() # after bundle_combine
@@ -112,8 +106,11 @@ def import_update():
def tracker_update():
new_trackers = download_tracker.process()
if new_trackers:
combine_and_update(['*'], where=new_trackers)
affected = index_domains.all_bundles_containing(new_trackers)
if len(affected) > 0:
combine_and_update(affected)
else:
print('no bundle affected by tracker, not generating bundle html')
try:
@@ -138,7 +135,7 @@ try:
elif cmd == 'run':
if len(params) == 0:
print_usage_and_exit()
combine_and_update(params) # ['*'], where=['test.com']
combine_and_update(params)
elif cmd == 'del':
if len(params) == 0:
print_usage_and_exit()