Bundle name index

This commit is contained in:
relikd
2020-09-16 00:28:54 +02:00
parent eb4d104575
commit 6d33baa0db
8 changed files with 115 additions and 35 deletions

View File

@@ -38,18 +38,17 @@ def json_combine(bundle_id):
except KeyError:
ddic[key] = (tracker.is_tracker(key), [num])
res = dict({'rec_len': [], 'name': mylib.app_name(bundle_id)})
pardom = dict()
subdom = dict()
res = {'rec_len': []}
pardom = {}
subdom = {}
latest = 0
for fname, jdata in mylib.enum_jsons(bundle_id):
# TODO: load combined and append newest only, then update evaluated
latest = max(latest, os.path.getmtime(fname)) # or getctime
# if not res['name']:
# res['name'] = jdata['app-name']
res['rec_len'].append(jdata['duration'])
try:
logs = jdata['logs']
uniq_par = dict()
uniq_par = {}
for subdomain in logs:
occurs = len(logs[subdomain])
inc_dic(subdom, subdomain, occurs)
@@ -69,8 +68,6 @@ def json_combine(bundle_id):
def json_evaluate_inplace(obj):
if not obj['name']:
obj['name'] = '< App-Name >'
rec_count = len(obj['rec_len'])
time_total = sum(obj['rec_len'])
del(obj['rec_len'])
@@ -81,7 +78,7 @@ def json_evaluate_inplace(obj):
obj['avg_time'] = time_total / rec_count
def transform(ddic):
res = list()
res = []
c_sum = 0
c_trkr = 0
for name, (is_tracker, counts) in ddic.items():

View File

@@ -32,6 +32,12 @@ def path_data_app(bundle_id, filename=None):
return path_add(pth, filename) if filename else pth
def path_data_index(filename):
pth = path_root('data', '_eval')
mkdir(pth)
return path_add(pth, filename)
def path_out(*path_components):
return path_root('out', *path_components)
@@ -76,17 +82,18 @@ def valid_bundle_id(bundle_id):
return regex_bundle_id.match(bundle_id)
def app_name(bundle_id, fallback=None):
def app_names(bundle_id):
def name_for(lang):
try:
return json_read_meta(bundle_id, lang)['trackCensoredName']
except Exception:
return None
ret = {}
for lang in ['us', 'de']:
name = name_for(lang)
if name:
return name
return fallback
ret[lang] = name
return ret
def err(scope, msg, logOnly=False):

View File

@@ -5,6 +5,7 @@ import sys
import time
import math
import common_lib as mylib
import index_bundle_names
def seconds_to_time(seconds):
@@ -80,9 +81,10 @@ def gen_dom_tags(sorted_arr, onlyTrackers=False):
def gen_html(bundle_id, obj):
name = index_bundle_names.get_name(bundle_id)
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
return mylib.template_with_base(f'''
<h2 class="title">{obj['name']}</h2>
<h2 class="title">{name}</h2>
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
<div id="meta">
<div class="icons">
@@ -122,7 +124,7 @@ def gen_html(bundle_id, obj):
{ gen_dotgraph(obj['subdom']) }
{ gen_dom_tags(obj['subdom']) }
</div>
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=obj['name'])
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=name)
def process(bundle_ids):

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
import common_lib as mylib
import index_bundle_names
def gen_obj(bundle_id):
@@ -10,7 +11,7 @@ def gen_obj(bundle_id):
icon = '/static/app-template.svg'
return {
'id': bundle_id,
'name': mylib.app_name(bundle_id, '&lt; App-Name &gt;'),
'name': index_bundle_names.get_name(bundle_id),
'img': icon
}

66
src/index_bundle_names.py Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env python3
import sys
import common_lib as mylib
_bundle_name_dict = None
def index_fname():
return mylib.path_data_index('bundle_names.json')
def load_json_if_not_already():
global _bundle_name_dict
if not _bundle_name_dict:
index_file = index_fname()
if mylib.file_exists(index_file):
_bundle_name_dict = mylib.json_read(index_file)
else:
_bundle_name_dict = {}
def write_json_to_disk():
mylib.json_write(index_fname(), _bundle_name_dict, pretty=True)
def get_name(bundle_id, langs=['us', 'de']):
load_json_if_not_already()
for lang in langs:
try:
return _bundle_name_dict[bundle_id][lang]
except KeyError:
continue
return '&lt; App-Name &gt;' # None
def process(bundle_ids):
print('writing index: bundle name ...')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
print(' full reset')
mylib.rm_file(index_fname()) # rebuild from ground up
load_json_if_not_already()
did_change = False
for bid in bundle_ids:
names = mylib.app_names(bid)
if not names:
mylib.err('index-bundle-names', 'could not load: {}'.format(bid))
continue
_bundle_name_dict[bid] = names
did_change = True
if did_change:
write_json_to_disk()
else:
print(' no change')
print('')
if __name__ == '__main__':
args = sys.argv[1:]
if len(args) > 0:
process(args)
else:
# process(['*'])
mylib.usage(__file__, '[bundle_id] [...]')

View File

@@ -4,12 +4,6 @@ import sys
import common_lib as mylib
def get_index_path():
pth = mylib.path_root('data', '_eval')
mylib.mkdir(pth)
return mylib.path_add(pth, 'reverse_index.json')
def load_index_json(file_path):
if mylib.file_exists(file_path):
json = mylib.json_read(file_path)
@@ -52,7 +46,10 @@ def insert_in_index(index, bundle_ids):
except ValueError: # index not found
i = len(index['bundle'])
index['bundle'].append(bid)
json, _ = mylib.json_read_evaluated(bid)
try:
json, _ = mylib.json_read_evaluated(bid)
except FileNotFoundError:
continue
for key in ['pardom', 'subdom']: # assuming keys are identical
for domain, _, _ in json[key]:
try:
@@ -64,10 +61,11 @@ def insert_in_index(index, bundle_ids):
def process(bundle_ids, deleteOnly=False):
print('writing reverse index ...')
index_file = get_index_path()
print('writing index: reverse domains ...')
index_file = mylib.path_data_index('reverse_domains.json')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
print(' full reset')
mylib.rm_file(index_file) # rebuild from ground up
# load previous index
json = load_index_json(index_file)

View File

@@ -8,7 +8,8 @@ import bundle_download
import html_root
import html_index
import html_bundle
import json_reverse_index
import index_bundle_names
import index_reverse_domains
import tracker_download
@@ -44,23 +45,31 @@ def del_id(bundle_ids):
mylib.rm_dir(dest)
update_index = True
print('')
json_reverse_index.process(bundle_ids, deleteOnly=True)
index_reverse_domains.process(bundle_ids, deleteOnly=True)
if update_index:
rebuild_index()
def combine_and_update(bundle_ids, where=None):
# 1. download meta data from iTunes store, incl. app icons
new_ids = bundle_download.process(bundle_ids)
# 2. if new apps, update bundle name index
if len(new_ids) > 0:
index_bundle_names.process(new_ids)
# 3. re-calculate combined.json and evaluated.json files
affected = bundle_combine.process(bundle_ids, where=where)
# special case needed for reverse index. '*' will force rebuilt index
if not where and bundle_ids == ['*']:
affected = ['*']
# 4. was any json updated? if so, make html and update reverse index
if len(affected) > 0:
json_reverse_index.process(affected)
index_reverse_domains.process(affected)
html_bundle.process(affected)
else:
print('no bundle affected by tracker, not generating bundle html')
# 5. make all apps index
if len(new_ids) > 0:
rebuild_index()
rebuild_index() # must be called after bundle_combine
else:
print('no new bundle, not rebuilding index')

View File

@@ -6,6 +6,13 @@ import common_lib as mylib
known_trackers = None
def is_tracker(domain):
global known_trackers
if not known_trackers:
known_trackers = mylib.read_list('tracker_all.txt')
return mylib.bintree_lookup(known_trackers, domain[::-1])
# def md5(fname):
# hash_md5 = hashlib.md5()
# with open(fname, 'rb') as f:
@@ -110,13 +117,6 @@ def exodus(fname):
return save_list(res, fname, binary=False)
def is_tracker(domain):
global known_trackers
if not known_trackers:
known_trackers = mylib.read_list('tracker_all.txt')
return mylib.bintree_lookup(known_trackers, domain[::-1])
def combine_all(changes):
final = mylib.path_root('src', 'lists', 'tracker_all.txt')
if changes or not mylib.file_exists(final):