Refactor combined.json

This commit is contained in:
relikd
2020-09-07 17:11:34 +02:00
parent f2b9e87f01
commit 5f1f6e4124
7 changed files with 158 additions and 129 deletions

View File

@@ -138,16 +138,16 @@ p.trckr { font-size: 0.8em; }
margin: 1px 1px;
}
/* color-bind friendly color palette */
.c0{color:#63ACBE} .cb0{background:#63ACBE}
.c1{color:#601A4A} .cb1{background:#601A4A}
.c2{color:#09F4EC} .cb2{background:#09F4EC}
.c3{color:#1F77B4} .cb3{background:#1F77B4}
.c4{color:#EE442F} .cb4{background:#EE442F}
.c5{color:#7F7F7F} .cb5{background:#7F7F7F}
.c6{color:#0F2080} .cb6{background:#0F2080}
.c7{color:#3b9f35} .cb7{background:#3b9f35}
.c8{color:#F5793A} .cb8{background:#F5793A}
.c9{color:#AC66FB} .cb9{background:#AC66FB}
.cb0{background:#63ACBE}
.cb1{background:#601A4A}
.cb2{background:#09F4EC}
.cb3{background:#1F77B4}
.cb4{background:#EE442F}
.cb5{background:#7F7F7F}
.cb6{background:#0F2080}
.cb7{background:#3b9f35}
.cb8{background:#F5793A}
.cb9{background:#AC66FB}
.cs0{stroke:#3AE48C}
.cs1{stroke:#D11}

View File

@@ -26,54 +26,39 @@ def get_parent_domain(subdomain):
return '.'.join(parts[-2:])
def dict_increment(ddic, key, num):
try:
ddic[key]
except KeyError:
ddic[key] = 0
ddic[key] += num
def json_combine(bundle_id):
res = dict({'#rec': 0, '#logs': 0})
domA = dict() # unique sub domains
domB = dict() # total sub domains
domC = dict() # unique parent domains
domD = dict() # total parent domains
def inc_dic(ddic, key, num):
try:
ddic[key][1].append(num)
except KeyError:
ddic[key] = (tracker.is_tracker(key), [num])
res = dict({'rec_len': [], 'name': mylib.app_name(bundle_id)})
pardom = dict()
subdom = dict()
latest = 0
for fname, jdata in mylib.enum_jsons(bundle_id):
latest = max(latest, os.path.getmtime(fname)) # or getctime
res['name'] = jdata['app-name']
res['#rec'] += 1
dict_increment(res, 'rec-total', jdata['duration'])
# if not res['name']:
# res['name'] = jdata['app-name']
res['rec_len'].append(jdata['duration'])
try:
logs = jdata['logs']
uniq_par = set()
uniq_par = dict()
for subdomain in logs:
occurs = len(logs[subdomain])
sub_tracker = tracker.is_tracker(subdomain)
res['#logs'] += 1
dict_increment(domA, subdomain, 1)
dict_increment(domB, subdomain, occurs)
inc_dic(subdom, subdomain, occurs)
par_dom = get_parent_domain(subdomain)
uniq_par.add(par_dom)
dict_increment(domD, par_dom, occurs)
for par in uniq_par:
dict_increment(domC, par, 1)
try:
uniq_par[par_dom] += occurs
except KeyError:
uniq_par[par_dom] = occurs
for name, val in uniq_par.items():
inc_dic(pardom, name, val)
except KeyError:
mylib.err('bundle-combine', 'skip: ' + fname)
res['uniq_subdom'] = domA
res['uniq_pardom'] = domC
res['total_subdom'] = domB
res['total_pardom'] = domD
sub_tracker = dict()
par_tracker = dict()
for x in domA:
sub_tracker[x] = tracker.is_tracker(x)
for x in domC:
par_tracker[x] = tracker.is_tracker(x)
res['tracker_subdom'] = sub_tracker
res['tracker_pardom'] = par_tracker
res['pardom'] = pardom
res['subdom'] = subdom
res['last_date'] = latest
return res
@@ -91,7 +76,7 @@ def process(bundle_ids, where=None):
if not haystack:
should_update = True
else:
for x in obj['uniq_subdom']:
for x in obj['subdom']:
if mylib.bintree_lookup(haystack, x[::-1]):
should_update = True
break

View File

@@ -20,10 +20,14 @@ def download_info(bundle_id, lang, force=False):
mylib.json_write_meta(bundle_id, json, lang)
def download_icon(bundle_id, force=False, langs=['us', 'de']):
# icon_file = mylib.path_data_app(bundle_id, 'icon.png')
def needs_icon_path(bundle_id):
icon_file = mylib.path_out_app(bundle_id, 'icon.png')
if force or not mylib.file_exists(icon_file):
return (mylib.file_exists(icon_file), icon_file)
def download_icon(bundle_id, force=False, langs=['us', 'de']):
exists, icon_file = needs_icon_path(bundle_id)
if force or not exists:
json = None
for lang in langs:
if not json:
@@ -31,13 +35,18 @@ def download_icon(bundle_id, force=False, langs=['us', 'de']):
json = mylib.json_read_meta(bundle_id, lang)
except Exception:
continue
mylib.download_file(json['artworkUrl100'], icon_file)
image_url = json['artworkUrl100'] # fail early on KeyError
is_new = mylib.mkdir_out_app(bundle_id)
mylib.download_file(image_url, icon_file)
return is_new
return False
def download_missing_icons(force=False, langs=['us', 'de']):
didAny = False
for bid in mylib.enum_appids():
if not mylib.file_exists(mylib.path_out_app(bid, 'icon.png')):
exists, _ = needs_icon_path(bid)
if not exists:
if not didAny:
print('downloading missing icons ...')
didAny = True
@@ -51,7 +60,11 @@ def download_missing_icons(force=False, langs=['us', 'de']):
def download(bundle_id, force=False):
if not mylib.valid_bundle_id(bundle_id):
mylib.err('apple-download', 'invalid id: ' + bundle_id)
return
return False
exists, _ = needs_icon_path(bundle_id)
if exists and not force:
return False
mylib.printf(' {} => '.format(bundle_id))
for lang in ['us', 'de']:
@@ -65,12 +78,14 @@ def download(bundle_id, force=False):
lang.upper(), bundle_id), logOnly=True)
try:
mylib.printf('icon')
download_icon(bundle_id, force=force)
index_needs_update = download_icon(bundle_id, force=force)
mylib.printf('[✔] ')
except Exception:
index_needs_update = False
mylib.printf('[✘] ')
mylib.err('apple-download', 'img for ' + bundle_id, logOnly=True)
print('') # end printf line
return index_needs_update
def process(bundle_ids, force=False):
@@ -78,9 +93,12 @@ def process(bundle_ids, force=False):
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
newly_created = set()
for bid in bundle_ids:
download(bid, force=force)
if download(bid, force=force):
newly_created.add(bid)
print('')
return newly_created
if __name__ == "__main__":

View File

@@ -77,6 +77,19 @@ def valid_bundle_id(bundle_id):
return regex_bundle_id.match(bundle_id)
def app_name(bundle_id, fallback=None):
def name_for(lang):
try:
return json_read_meta(bundle_id, lang)['trackCensoredName']
except Exception:
return None
for lang in ['us', 'de']:
name = name_for(lang)
if name:
return name
return fallback
def err(scope, msg, logOnly=False):
if isinstance(msg, Exception):
msg = traceback.format_exc()
@@ -146,6 +159,14 @@ def meta_json_exists(bundle_id, lang):
return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
def mkdir_out_app(bundle_id):
out_dir = path_out_app(bundle_id)
if not dir_exists(out_dir):
mkdir(out_dir)
return True
return False
def next_path(path_pattern):
i = 1
while os.path.exists(path_pattern % i):
@@ -249,7 +270,7 @@ def json_write(path, obj, pretty=False):
def json_write_combined(bundle_id, obj):
fname = path_data_app(bundle_id, 'combined.json')
json_write(fname, obj, pretty=True)
json_write(fname, obj, pretty=False)
def json_write_meta(bundle_id, obj, lang):

View File

@@ -12,31 +12,15 @@ def seconds_to_time(seconds):
return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
def gen_dom_tags(unsorted_dict, trackers=None, additionalClasses=None):
sorted_arr = sorted(unsorted_dict, key=lambda x: (-x[1], x[0]))
def gen_dotgraph(sorted_arr):
txt = ''
anyMark = False
for i, (x, y) in enumerate(sorted_arr):
mark = trackers[x] if trackers else True
title = x if y == 1 else '{} ({})'.format(x, y)
txt += '<i{}>{}</i> '.format(' class="trckr"' if mark else '', title)
anyMark |= mark
if txt:
note = '<p class="trckr">known tracker</p>'
return '<div class="tags{}">{}{}</div>'.format(
additionalClasses or '', txt, note if anyMark else '')
else:
return '<i> None </i>'
def gen_dotgraph(count_dict):
txt = ''
sorted_count = sorted(count_dict.items(), key=lambda x: (-x[1], x[0]))
for i, (name, count) in enumerate(sorted_count):
# TODO: use average not total count
txt += '<span title="{0} ({1})"><p>{0} ({1})</p>'.format(name, count)
for x in range(count):
txt += '<i class="cb{}"></i>'.format(i % 10)
for i, (name, count, mark) in enumerate(sorted_arr):
title = '{} ({})'.format(name, count) if count > 1 else name
clss = 'cb{}'.format(i % 10)
if mark:
clss += ' trckr'
txt += '<span class="{0}" title="{1}"><p>{1}</p>'.format(clss, title)
txt += '<i></i>' * count
txt += '</span>'
return '<div class="dot-graph">{}</div>'.format(txt)
@@ -71,20 +55,53 @@ def gen_pie_chart(parts, classes, stroke=0.6):
def gen_radial_graph(obj):
total = 0
tracker = 0
for name, count in obj['total_subdom'].items():
total += count
if obj['tracker_subdom'][name]:
tracker += count
percent = tracker / total
percent = obj['#logs_tracker'] / obj['#logs_total']
return '<div class="pie-chart">{}</div>'.format(
gen_pie_chart([1 - percent, percent], ['cs0', 'cs1']))
def gen_dom_tags(sorted_arr, onlyTrackers=False):
txt = ''
anyMark = False
for i, (name, count, mark) in enumerate(sorted_arr):
title = '{} ({})'.format(name, count) if count > 1 else name
clss = ' class="trckr"' if mark and not onlyTrackers else ''
txt += '<i{}>{}</i> '.format(clss, title)
anyMark |= mark
if txt:
note = '<p class="trckr">known tracker</p>'
return '<div class="{}tags">{}{}</div>'.format(
'trckr ' if onlyTrackers else '', txt, note if anyMark else '')
else:
return '<i> None </i>'
def prepare_json(obj):
def calc_sum(arr):
# TODO: use average or median, not total count
return sum(arr)
def transform(ddic):
res = list()
for name, (is_tracker, counts) in ddic.items():
res.append([name, calc_sum(counts), is_tracker])
res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name
return res
if not obj['name']:
obj['name'] = '&lt; App-Name &gt;'
obj['#rec'] = len(obj['rec_len'])
obj['rec_len'] = sum(obj['rec_len'])
obj['pardom'] = transform(obj['pardom'])
obj['subdom'] = transform(obj['subdom'])
# do this after the transformation:
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
obj['#logs_total'] = sum(map(lambda x: x[1], obj['pardom']))
obj['#logs_tracker'] = sum(map(lambda x: x[1], obj['tracker']))
def gen_html(bundle_id, obj):
track_dom = [(dom, obj['total_subdom'][dom])
for dom, known in obj['tracker_subdom'].items() if known]
prepare_json(obj)
return mylib.template_with_base(f'''
<h2>{obj['name']}</h2>
<div id="meta">
@@ -100,13 +117,13 @@ def gen_html(bundle_id, obj):
obj['#rec']
}</td></tr>
<tr><td>Total number of logs:</td><td>{
obj['#logs']
obj['#logs_total']
}</td></tr>
<tr><td>Cumulative recording time:</td><td>{
seconds_to_time(obj['rec-total'])
seconds_to_time(obj['rec_len'])
}</td></tr>
<tr><td>Average recording time:</td><td>{
round(obj['rec-total'] / obj['#rec'], 1)
round(obj['rec_len'] / obj['#rec'], 1)
} s</td></tr>
<tr><td>Last updated:</td><td><time datetime="{
time.strftime('%Y-%m-%d %H:%M', time.gmtime(obj['last_date']))
@@ -117,44 +134,32 @@ def gen_html(bundle_id, obj):
</div>
<h3>Connections</h3>
<div>
<h4>Known Trackers ({ len(track_dom) }):</h4>
{ gen_dom_tags(track_dom, additionalClasses=' trckr') }
<h4>Known Trackers ({ len(obj['tracker']) }):</h4>
{ gen_dom_tags(obj['tracker'], onlyTrackers=True) }
<p></p>
<h4>Domains:</h4>
{ gen_dotgraph(obj['total_pardom']) }
{ gen_dom_tags(obj['total_pardom'].items(), obj['tracker_pardom']) }
<h4>Domains ({ len(obj['pardom']) }):</h4>
{ gen_dotgraph(obj['pardom']) }
{ gen_dom_tags(obj['pardom']) }
<h4>Subdomains:</h4>
{ gen_dotgraph(obj['total_subdom']) }
{ gen_dom_tags(obj['total_subdom'].items(), obj['tracker_subdom']) }
<h4>Subdomains ({ len(obj['subdom']) }):</h4>
{ gen_dotgraph(obj['subdom']) }
{ gen_dom_tags(obj['subdom']) }
</div>''', title=obj['name'])
def make_bundle_out(bundle_id):
json = mylib.json_read_combined(bundle_id)
out_dir = mylib.path_out_app(bundle_id)
needs_update_index = False
if not mylib.dir_exists(out_dir):
needs_update_index = True
mylib.mkdir(out_dir)
with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp:
fp.write(gen_html(bundle_id, json))
return needs_update_index
def process(bundle_ids):
print('generating html pages ...')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_appids())
ids_new_in_index = set()
for bid in bundle_ids:
print(' ' + bid)
if make_bundle_out(bid):
ids_new_in_index.add(bid)
json = mylib.json_read_combined(bid)
mylib.mkdir_out_app(bid)
with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp:
fp.write(gen_html(bid, json))
print('')
return ids_new_in_index
if __name__ == '__main__':

View File

@@ -8,11 +8,11 @@ def gen_obj(bundle_id):
icon = '/app/{0}/icon.png'.format(bundle_id)
else:
icon = '/static/app-template.svg'
try:
name = mylib.json_read_meta(bundle_id, 'de')['trackCensoredName']
except Exception:
name = '&lt; App-Name &gt;'
return {'id': bundle_id, 'name': name, 'img': icon}
return {
'id': bundle_id,
'name': mylib.app_name(bundle_id, '&lt; App-Name &gt;'),
'img': icon
}
def gen_entry(obj):

View File

@@ -48,16 +48,16 @@ def del_id(bundle_ids):
def combine_and_update(bundle_ids, where=None):
new_ids = bundle_download.process(bundle_ids)
affected = bundle_combine.process(bundle_ids, where=where)
if len(affected) == 0:
if len(affected) > 0:
html_bundle.process(affected)
else:
print('no bundle affected by tracker, not generating bundle html')
return
new_ids = html_bundle.process(affected)
if len(new_ids) == 0:
if len(new_ids) > 0:
rebuild_index()
else:
print('no new bundle, not rebuilding index')
return
bundle_download.process(new_ids)
rebuild_index()
def import_update():