From 09917ba1ee70de9109bb6a2dce9dbc045b35095c Mon Sep 17 00:00:00 2001 From: relikd Date: Mon, 21 Sep 2020 14:30:12 +0200 Subject: [PATCH] Refactor tracker update --- src/bundle_combine.py | 24 ++++++------------------ src/index_domains.py | 11 +++++++++++ src/main.py | 35 ++++++++++++++++------------------- 3 files changed, 33 insertions(+), 37 deletions(-) diff --git a/src/bundle_combine.py b/src/bundle_combine.py index 22f70c6..41674fa 100755 --- a/src/bundle_combine.py +++ b/src/bundle_combine.py @@ -5,6 +5,7 @@ import re import sys import common_lib as mylib import download_tracker # is_tracker +import index_domains # load THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings @@ -110,31 +111,18 @@ def json_evaluate_inplace(obj): obj['avg_logs_pm'] = float3(s_c / (obj['avg_time'] or 1) * 60) -def process(bundle_ids, where=None): +def process(bundle_ids): print('writing combined json ...') if bundle_ids == ['*']: bundle_ids = list(mylib.enum_data_appids()) - affected_ids = [] - haystack = sorted([x[::-1] for x in where]) if where else None for bid in bundle_ids: + print(' ' + bid) obj = json_combine(bid) - should_update = False - if not haystack: - should_update = True - else: - for x in obj['subdom']: - if mylib.bintree_lookup(haystack, x[::-1]): - should_update = True - break - if should_update: - print(' ' + bid) - mylib.json_write(fname_combined(bid), obj, pretty=False) - json_evaluate_inplace(obj) - mylib.json_write(fname_evaluated(bid), obj, pretty=False) - affected_ids.append(bid) + mylib.json_write(fname_combined(bid), obj, pretty=False) + json_evaluate_inplace(obj) + mylib.json_write(fname_evaluated(bid), obj, pretty=False) print('') - return affected_ids if __name__ == '__main__': diff --git a/src/index_domains.py b/src/index_domains.py index 12c0509..547c604 100755 --- a/src/index_domains.py +++ b/src/index_domains.py @@ -97,6 +97,17 @@ def enrich_with_bundle_ids(index): index[key][dom] = [index['bundle'][i] for i in ids] +def all_bundles_containing(list_of_domains): + affected = set() + json = load_json_from_disk(fname_all()) + haystack = sorted([x[::-1] for x in list_of_domains]) + for key in ['pardom', 'subdom']: + for dom, ids in json[key].items(): + if mylib.bintree_lookup(haystack, dom[::-1]): + affected.update(ids) + return [json['bundle'][i] for i in affected] + + def process(bundle_ids, deleteOnly=False): print('writing index: domains ...') fname = fname_all() diff --git a/src/main.py b/src/main.py index ae07730..c19db99 100755 --- a/src/main.py +++ b/src/main.py @@ -6,10 +6,10 @@ import common_lib as mylib import bundle_combine import download_itunes import download_tracker -import html_root -import html_index_apps import html_bundle +import html_index_apps import html_index_domains +import html_root import index_app_names import index_domains import index_meta @@ -58,26 +58,20 @@ def del_id(bundle_ids): rebuild_app_index_html(inclRoot=True) -def combine_and_update(bundle_ids, where=None): - def star_reset(ids): - # special case needed. '*' will force rebuilt index - return ['*'] if not where and bundle_ids == ['*'] else ids +def combine_and_update(bundle_ids): # 1. download meta data from iTunes store, incl. app icons new_ids = download_itunes.process(bundle_ids) - new_ids = star_reset(new_ids) # 2. if new apps, update bundle name index + if bundle_ids == ['*']: + new_ids = ['*'] # special case needed to force rebuilt index if len(new_ids) > 0: index_app_names.process(new_ids) # after download_itunes # 3. re-calculate combined.json and evaluated.json files - affected = bundle_combine.process(bundle_ids, where=where) - affected = star_reset(affected) - # 4. was any json updated? if so, make html and update domain index - if len(affected) > 0: - index_meta.process(bundle_ids) # after bundle_combine - html_bundle.process(affected) # after index_app_names - rebuild_domain_index(affected) # after bundle_combine - else: - print('no bundle affected by tracker, not generating bundle html') + bundle_combine.process(bundle_ids) + # 4. make html and update domain index + index_meta.process(bundle_ids) # after bundle_combine + html_bundle.process(bundle_ids) # after index_app_names + rebuild_domain_index(bundle_ids) # after bundle_combine # 5. make all apps index if len(new_ids) > 0: rebuild_app_index_html() # after bundle_combine @@ -112,8 +106,11 @@ def import_update(): def tracker_update(): new_trackers = download_tracker.process() - if new_trackers: - combine_and_update(['*'], where=new_trackers) + affected = index_domains.all_bundles_containing(new_trackers) + if len(affected) > 0: + combine_and_update(affected) + else: + print('no bundle affected by tracker, not generating bundle html') try: @@ -138,7 +135,7 @@ try: elif cmd == 'run': if len(params) == 0: print_usage_and_exit() - combine_and_update(params) # ['*'], where=['test.com'] + combine_and_update(params) elif cmd == 'del': if len(params) == 0: print_usage_and_exit()