Refactor combined.json
This commit is contained in:
@@ -138,16 +138,16 @@ p.trckr { font-size: 0.8em; }
|
||||
margin: 1px 1px;
|
||||
}
|
||||
/* color-bind friendly color palette */
|
||||
.c0{color:#63ACBE} .cb0{background:#63ACBE}
|
||||
.c1{color:#601A4A} .cb1{background:#601A4A}
|
||||
.c2{color:#09F4EC} .cb2{background:#09F4EC}
|
||||
.c3{color:#1F77B4} .cb3{background:#1F77B4}
|
||||
.c4{color:#EE442F} .cb4{background:#EE442F}
|
||||
.c5{color:#7F7F7F} .cb5{background:#7F7F7F}
|
||||
.c6{color:#0F2080} .cb6{background:#0F2080}
|
||||
.c7{color:#3b9f35} .cb7{background:#3b9f35}
|
||||
.c8{color:#F5793A} .cb8{background:#F5793A}
|
||||
.c9{color:#AC66FB} .cb9{background:#AC66FB}
|
||||
.cb0{background:#63ACBE}
|
||||
.cb1{background:#601A4A}
|
||||
.cb2{background:#09F4EC}
|
||||
.cb3{background:#1F77B4}
|
||||
.cb4{background:#EE442F}
|
||||
.cb5{background:#7F7F7F}
|
||||
.cb6{background:#0F2080}
|
||||
.cb7{background:#3b9f35}
|
||||
.cb8{background:#F5793A}
|
||||
.cb9{background:#AC66FB}
|
||||
|
||||
.cs0{stroke:#3AE48C}
|
||||
.cs1{stroke:#D11}
|
||||
|
||||
@@ -26,54 +26,39 @@ def get_parent_domain(subdomain):
|
||||
return '.'.join(parts[-2:])
|
||||
|
||||
|
||||
def dict_increment(ddic, key, num):
|
||||
try:
|
||||
ddic[key]
|
||||
except KeyError:
|
||||
ddic[key] = 0
|
||||
ddic[key] += num
|
||||
|
||||
|
||||
def json_combine(bundle_id):
|
||||
res = dict({'#rec': 0, '#logs': 0})
|
||||
domA = dict() # unique sub domains
|
||||
domB = dict() # total sub domains
|
||||
domC = dict() # unique parent domains
|
||||
domD = dict() # total parent domains
|
||||
def inc_dic(ddic, key, num):
|
||||
try:
|
||||
ddic[key][1].append(num)
|
||||
except KeyError:
|
||||
ddic[key] = (tracker.is_tracker(key), [num])
|
||||
|
||||
res = dict({'rec_len': [], 'name': mylib.app_name(bundle_id)})
|
||||
pardom = dict()
|
||||
subdom = dict()
|
||||
latest = 0
|
||||
for fname, jdata in mylib.enum_jsons(bundle_id):
|
||||
latest = max(latest, os.path.getmtime(fname)) # or getctime
|
||||
res['name'] = jdata['app-name']
|
||||
res['#rec'] += 1
|
||||
dict_increment(res, 'rec-total', jdata['duration'])
|
||||
# if not res['name']:
|
||||
# res['name'] = jdata['app-name']
|
||||
res['rec_len'].append(jdata['duration'])
|
||||
try:
|
||||
logs = jdata['logs']
|
||||
uniq_par = set()
|
||||
uniq_par = dict()
|
||||
for subdomain in logs:
|
||||
occurs = len(logs[subdomain])
|
||||
sub_tracker = tracker.is_tracker(subdomain)
|
||||
res['#logs'] += 1
|
||||
dict_increment(domA, subdomain, 1)
|
||||
dict_increment(domB, subdomain, occurs)
|
||||
inc_dic(subdom, subdomain, occurs)
|
||||
par_dom = get_parent_domain(subdomain)
|
||||
uniq_par.add(par_dom)
|
||||
dict_increment(domD, par_dom, occurs)
|
||||
for par in uniq_par:
|
||||
dict_increment(domC, par, 1)
|
||||
try:
|
||||
uniq_par[par_dom] += occurs
|
||||
except KeyError:
|
||||
uniq_par[par_dom] = occurs
|
||||
for name, val in uniq_par.items():
|
||||
inc_dic(pardom, name, val)
|
||||
except KeyError:
|
||||
mylib.err('bundle-combine', 'skip: ' + fname)
|
||||
res['uniq_subdom'] = domA
|
||||
res['uniq_pardom'] = domC
|
||||
res['total_subdom'] = domB
|
||||
res['total_pardom'] = domD
|
||||
sub_tracker = dict()
|
||||
par_tracker = dict()
|
||||
for x in domA:
|
||||
sub_tracker[x] = tracker.is_tracker(x)
|
||||
for x in domC:
|
||||
par_tracker[x] = tracker.is_tracker(x)
|
||||
res['tracker_subdom'] = sub_tracker
|
||||
res['tracker_pardom'] = par_tracker
|
||||
res['pardom'] = pardom
|
||||
res['subdom'] = subdom
|
||||
res['last_date'] = latest
|
||||
return res
|
||||
|
||||
@@ -91,7 +76,7 @@ def process(bundle_ids, where=None):
|
||||
if not haystack:
|
||||
should_update = True
|
||||
else:
|
||||
for x in obj['uniq_subdom']:
|
||||
for x in obj['subdom']:
|
||||
if mylib.bintree_lookup(haystack, x[::-1]):
|
||||
should_update = True
|
||||
break
|
||||
|
||||
@@ -20,10 +20,14 @@ def download_info(bundle_id, lang, force=False):
|
||||
mylib.json_write_meta(bundle_id, json, lang)
|
||||
|
||||
|
||||
def download_icon(bundle_id, force=False, langs=['us', 'de']):
|
||||
# icon_file = mylib.path_data_app(bundle_id, 'icon.png')
|
||||
def needs_icon_path(bundle_id):
|
||||
icon_file = mylib.path_out_app(bundle_id, 'icon.png')
|
||||
if force or not mylib.file_exists(icon_file):
|
||||
return (mylib.file_exists(icon_file), icon_file)
|
||||
|
||||
|
||||
def download_icon(bundle_id, force=False, langs=['us', 'de']):
|
||||
exists, icon_file = needs_icon_path(bundle_id)
|
||||
if force or not exists:
|
||||
json = None
|
||||
for lang in langs:
|
||||
if not json:
|
||||
@@ -31,13 +35,18 @@ def download_icon(bundle_id, force=False, langs=['us', 'de']):
|
||||
json = mylib.json_read_meta(bundle_id, lang)
|
||||
except Exception:
|
||||
continue
|
||||
mylib.download_file(json['artworkUrl100'], icon_file)
|
||||
image_url = json['artworkUrl100'] # fail early on KeyError
|
||||
is_new = mylib.mkdir_out_app(bundle_id)
|
||||
mylib.download_file(image_url, icon_file)
|
||||
return is_new
|
||||
return False
|
||||
|
||||
|
||||
def download_missing_icons(force=False, langs=['us', 'de']):
|
||||
didAny = False
|
||||
for bid in mylib.enum_appids():
|
||||
if not mylib.file_exists(mylib.path_out_app(bid, 'icon.png')):
|
||||
exists, _ = needs_icon_path(bid)
|
||||
if not exists:
|
||||
if not didAny:
|
||||
print('downloading missing icons ...')
|
||||
didAny = True
|
||||
@@ -51,7 +60,11 @@ def download_missing_icons(force=False, langs=['us', 'de']):
|
||||
def download(bundle_id, force=False):
|
||||
if not mylib.valid_bundle_id(bundle_id):
|
||||
mylib.err('apple-download', 'invalid id: ' + bundle_id)
|
||||
return
|
||||
return False
|
||||
|
||||
exists, _ = needs_icon_path(bundle_id)
|
||||
if exists and not force:
|
||||
return False
|
||||
|
||||
mylib.printf(' {} => '.format(bundle_id))
|
||||
for lang in ['us', 'de']:
|
||||
@@ -65,12 +78,14 @@ def download(bundle_id, force=False):
|
||||
lang.upper(), bundle_id), logOnly=True)
|
||||
try:
|
||||
mylib.printf('icon')
|
||||
download_icon(bundle_id, force=force)
|
||||
index_needs_update = download_icon(bundle_id, force=force)
|
||||
mylib.printf('[✔] ')
|
||||
except Exception:
|
||||
index_needs_update = False
|
||||
mylib.printf('[✘] ')
|
||||
mylib.err('apple-download', 'img for ' + bundle_id, logOnly=True)
|
||||
print('') # end printf line
|
||||
return index_needs_update
|
||||
|
||||
|
||||
def process(bundle_ids, force=False):
|
||||
@@ -78,9 +93,12 @@ def process(bundle_ids, force=False):
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_data_appids())
|
||||
|
||||
newly_created = set()
|
||||
for bid in bundle_ids:
|
||||
download(bid, force=force)
|
||||
if download(bid, force=force):
|
||||
newly_created.add(bid)
|
||||
print('')
|
||||
return newly_created
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
@@ -77,6 +77,19 @@ def valid_bundle_id(bundle_id):
|
||||
return regex_bundle_id.match(bundle_id)
|
||||
|
||||
|
||||
def app_name(bundle_id, fallback=None):
|
||||
def name_for(lang):
|
||||
try:
|
||||
return json_read_meta(bundle_id, lang)['trackCensoredName']
|
||||
except Exception:
|
||||
return None
|
||||
for lang in ['us', 'de']:
|
||||
name = name_for(lang)
|
||||
if name:
|
||||
return name
|
||||
return fallback
|
||||
|
||||
|
||||
def err(scope, msg, logOnly=False):
|
||||
if isinstance(msg, Exception):
|
||||
msg = traceback.format_exc()
|
||||
@@ -146,6 +159,14 @@ def meta_json_exists(bundle_id, lang):
|
||||
return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
|
||||
|
||||
|
||||
def mkdir_out_app(bundle_id):
|
||||
out_dir = path_out_app(bundle_id)
|
||||
if not dir_exists(out_dir):
|
||||
mkdir(out_dir)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def next_path(path_pattern):
|
||||
i = 1
|
||||
while os.path.exists(path_pattern % i):
|
||||
@@ -249,7 +270,7 @@ def json_write(path, obj, pretty=False):
|
||||
|
||||
def json_write_combined(bundle_id, obj):
|
||||
fname = path_data_app(bundle_id, 'combined.json')
|
||||
json_write(fname, obj, pretty=True)
|
||||
json_write(fname, obj, pretty=False)
|
||||
|
||||
|
||||
def json_write_meta(bundle_id, obj, lang):
|
||||
|
||||
@@ -12,31 +12,15 @@ def seconds_to_time(seconds):
|
||||
return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
|
||||
|
||||
|
||||
def gen_dom_tags(unsorted_dict, trackers=None, additionalClasses=None):
|
||||
sorted_arr = sorted(unsorted_dict, key=lambda x: (-x[1], x[0]))
|
||||
def gen_dotgraph(sorted_arr):
|
||||
txt = ''
|
||||
anyMark = False
|
||||
for i, (x, y) in enumerate(sorted_arr):
|
||||
mark = trackers[x] if trackers else True
|
||||
title = x if y == 1 else '{} ({})'.format(x, y)
|
||||
txt += '<i{}>{}</i> '.format(' class="trckr"' if mark else '', title)
|
||||
anyMark |= mark
|
||||
if txt:
|
||||
note = '<p class="trckr">known tracker</p>'
|
||||
return '<div class="tags{}">{}{}</div>'.format(
|
||||
additionalClasses or '', txt, note if anyMark else '')
|
||||
else:
|
||||
return '<i>– None –</i>'
|
||||
|
||||
|
||||
def gen_dotgraph(count_dict):
|
||||
txt = ''
|
||||
sorted_count = sorted(count_dict.items(), key=lambda x: (-x[1], x[0]))
|
||||
for i, (name, count) in enumerate(sorted_count):
|
||||
# TODO: use average not total count
|
||||
txt += '<span title="{0} ({1})"><p>{0} ({1})</p>'.format(name, count)
|
||||
for x in range(count):
|
||||
txt += '<i class="cb{}"></i>'.format(i % 10)
|
||||
for i, (name, count, mark) in enumerate(sorted_arr):
|
||||
title = '{} ({})'.format(name, count) if count > 1 else name
|
||||
clss = 'cb{}'.format(i % 10)
|
||||
if mark:
|
||||
clss += ' trckr'
|
||||
txt += '<span class="{0}" title="{1}"><p>{1}</p>'.format(clss, title)
|
||||
txt += '<i></i>' * count
|
||||
txt += '</span>'
|
||||
return '<div class="dot-graph">{}</div>'.format(txt)
|
||||
|
||||
@@ -71,20 +55,53 @@ def gen_pie_chart(parts, classes, stroke=0.6):
|
||||
|
||||
|
||||
def gen_radial_graph(obj):
|
||||
total = 0
|
||||
tracker = 0
|
||||
for name, count in obj['total_subdom'].items():
|
||||
total += count
|
||||
if obj['tracker_subdom'][name]:
|
||||
tracker += count
|
||||
percent = tracker / total
|
||||
percent = obj['#logs_tracker'] / obj['#logs_total']
|
||||
return '<div class="pie-chart">{}</div>'.format(
|
||||
gen_pie_chart([1 - percent, percent], ['cs0', 'cs1']))
|
||||
|
||||
|
||||
def gen_dom_tags(sorted_arr, onlyTrackers=False):
|
||||
txt = ''
|
||||
anyMark = False
|
||||
for i, (name, count, mark) in enumerate(sorted_arr):
|
||||
title = '{} ({})'.format(name, count) if count > 1 else name
|
||||
clss = ' class="trckr"' if mark and not onlyTrackers else ''
|
||||
txt += '<i{}>{}</i> '.format(clss, title)
|
||||
anyMark |= mark
|
||||
if txt:
|
||||
note = '<p class="trckr">known tracker</p>'
|
||||
return '<div class="{}tags">{}{}</div>'.format(
|
||||
'trckr ' if onlyTrackers else '', txt, note if anyMark else '')
|
||||
else:
|
||||
return '<i>– None –</i>'
|
||||
|
||||
|
||||
def prepare_json(obj):
|
||||
def calc_sum(arr):
|
||||
# TODO: use average or median, not total count
|
||||
return sum(arr)
|
||||
|
||||
def transform(ddic):
|
||||
res = list()
|
||||
for name, (is_tracker, counts) in ddic.items():
|
||||
res.append([name, calc_sum(counts), is_tracker])
|
||||
res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name
|
||||
return res
|
||||
|
||||
if not obj['name']:
|
||||
obj['name'] = '< App-Name >'
|
||||
obj['#rec'] = len(obj['rec_len'])
|
||||
obj['rec_len'] = sum(obj['rec_len'])
|
||||
obj['pardom'] = transform(obj['pardom'])
|
||||
obj['subdom'] = transform(obj['subdom'])
|
||||
# do this after the transformation:
|
||||
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
|
||||
obj['#logs_total'] = sum(map(lambda x: x[1], obj['pardom']))
|
||||
obj['#logs_tracker'] = sum(map(lambda x: x[1], obj['tracker']))
|
||||
|
||||
|
||||
def gen_html(bundle_id, obj):
|
||||
track_dom = [(dom, obj['total_subdom'][dom])
|
||||
for dom, known in obj['tracker_subdom'].items() if known]
|
||||
prepare_json(obj)
|
||||
return mylib.template_with_base(f'''
|
||||
<h2>{obj['name']}</h2>
|
||||
<div id="meta">
|
||||
@@ -100,13 +117,13 @@ def gen_html(bundle_id, obj):
|
||||
obj['#rec']
|
||||
}</td></tr>
|
||||
<tr><td>Total number of logs:</td><td>{
|
||||
obj['#logs']
|
||||
obj['#logs_total']
|
||||
}</td></tr>
|
||||
<tr><td>Cumulative recording time:</td><td>{
|
||||
seconds_to_time(obj['rec-total'])
|
||||
seconds_to_time(obj['rec_len'])
|
||||
}</td></tr>
|
||||
<tr><td>Average recording time:</td><td>{
|
||||
round(obj['rec-total'] / obj['#rec'], 1)
|
||||
round(obj['rec_len'] / obj['#rec'], 1)
|
||||
} s</td></tr>
|
||||
<tr><td>Last updated:</td><td><time datetime="{
|
||||
time.strftime('%Y-%m-%d %H:%M', time.gmtime(obj['last_date']))
|
||||
@@ -117,44 +134,32 @@ def gen_html(bundle_id, obj):
|
||||
</div>
|
||||
<h3>Connections</h3>
|
||||
<div>
|
||||
<h4>Known Trackers ({ len(track_dom) }):</h4>
|
||||
{ gen_dom_tags(track_dom, additionalClasses=' trckr') }
|
||||
<h4>Known Trackers ({ len(obj['tracker']) }):</h4>
|
||||
{ gen_dom_tags(obj['tracker'], onlyTrackers=True) }
|
||||
<p></p>
|
||||
|
||||
<h4>Domains:</h4>
|
||||
{ gen_dotgraph(obj['total_pardom']) }
|
||||
{ gen_dom_tags(obj['total_pardom'].items(), obj['tracker_pardom']) }
|
||||
<h4>Domains ({ len(obj['pardom']) }):</h4>
|
||||
{ gen_dotgraph(obj['pardom']) }
|
||||
{ gen_dom_tags(obj['pardom']) }
|
||||
|
||||
<h4>Subdomains:</h4>
|
||||
{ gen_dotgraph(obj['total_subdom']) }
|
||||
{ gen_dom_tags(obj['total_subdom'].items(), obj['tracker_subdom']) }
|
||||
<h4>Subdomains ({ len(obj['subdom']) }):</h4>
|
||||
{ gen_dotgraph(obj['subdom']) }
|
||||
{ gen_dom_tags(obj['subdom']) }
|
||||
</div>''', title=obj['name'])
|
||||
|
||||
|
||||
def make_bundle_out(bundle_id):
|
||||
json = mylib.json_read_combined(bundle_id)
|
||||
out_dir = mylib.path_out_app(bundle_id)
|
||||
needs_update_index = False
|
||||
if not mylib.dir_exists(out_dir):
|
||||
needs_update_index = True
|
||||
mylib.mkdir(out_dir)
|
||||
with open(mylib.path_add(out_dir, 'index.html'), 'w') as fp:
|
||||
fp.write(gen_html(bundle_id, json))
|
||||
return needs_update_index
|
||||
|
||||
|
||||
def process(bundle_ids):
|
||||
print('generating html pages ...')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_appids())
|
||||
|
||||
ids_new_in_index = set()
|
||||
for bid in bundle_ids:
|
||||
print(' ' + bid)
|
||||
if make_bundle_out(bid):
|
||||
ids_new_in_index.add(bid)
|
||||
json = mylib.json_read_combined(bid)
|
||||
mylib.mkdir_out_app(bid)
|
||||
with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp:
|
||||
fp.write(gen_html(bid, json))
|
||||
print('')
|
||||
return ids_new_in_index
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -8,11 +8,11 @@ def gen_obj(bundle_id):
|
||||
icon = '/app/{0}/icon.png'.format(bundle_id)
|
||||
else:
|
||||
icon = '/static/app-template.svg'
|
||||
try:
|
||||
name = mylib.json_read_meta(bundle_id, 'de')['trackCensoredName']
|
||||
except Exception:
|
||||
name = '< App-Name >'
|
||||
return {'id': bundle_id, 'name': name, 'img': icon}
|
||||
return {
|
||||
'id': bundle_id,
|
||||
'name': mylib.app_name(bundle_id, '< App-Name >'),
|
||||
'img': icon
|
||||
}
|
||||
|
||||
|
||||
def gen_entry(obj):
|
||||
|
||||
14
src/main.py
14
src/main.py
@@ -48,16 +48,16 @@ def del_id(bundle_ids):
|
||||
|
||||
|
||||
def combine_and_update(bundle_ids, where=None):
|
||||
new_ids = bundle_download.process(bundle_ids)
|
||||
affected = bundle_combine.process(bundle_ids, where=where)
|
||||
if len(affected) == 0:
|
||||
if len(affected) > 0:
|
||||
html_bundle.process(affected)
|
||||
else:
|
||||
print('no bundle affected by tracker, not generating bundle html')
|
||||
return
|
||||
new_ids = html_bundle.process(affected)
|
||||
if len(new_ids) == 0:
|
||||
print('no new bundle, not rebuilding index')
|
||||
return
|
||||
bundle_download.process(new_ids)
|
||||
if len(new_ids) > 0:
|
||||
rebuild_index()
|
||||
else:
|
||||
print('no new bundle, not rebuilding index')
|
||||
|
||||
|
||||
def import_update():
|
||||
|
||||
Reference in New Issue
Block a user