From a1b319890039df45a95427324bb57dac5a38c7f4 Mon Sep 17 00:00:00 2001 From: relikd Date: Mon, 21 Sep 2020 11:55:39 +0200 Subject: [PATCH] Percentile graphs --- out/static/style.css | 59 ++++++++++++---- src/bundle_download.py | 51 ++++++++++++-- src/common_lib.py | 28 +------- src/html_bundle.py | 93 +++++++++++++++++--------- src/index_meta.py | 148 +++++++++++++++++++++++++++++++---------- 5 files changed, 263 insertions(+), 116 deletions(-) diff --git a/out/static/style.css b/out/static/style.css index 5fe176f..add9759 100644 --- a/out/static/style.css +++ b/out/static/style.css @@ -61,9 +61,9 @@ footer .links { /* dropdown */ .dropdown button { - padding: .5em 1em; - font-size: 16px; - cursor: pointer; + padding: .5em 1em; + font-size: 16px; + cursor: pointer; } .dropdown { display: inline-block; position: relative; } .dropdown nav { @@ -100,7 +100,7 @@ footer .links { margin: .5em auto 1em; display: block; } -#app-toc img, #get-appcheck img, #meta img { +#app-toc img, img.app-icon { border-radius: 21.5%; border: .7px solid #ccc; } @@ -151,11 +151,42 @@ p.subtitle { margin-top: .2em; } .center { text-align: center; } .bg1 { background: #eee; } .border { border: 1pt solid #ccc; } -td { padding: .2em 1em .2em .1em; } + +/*#meta { margin-bottom: 2em; }*/ +#meta .icons { margin-bottom: 2em; } +#meta .icons > *:first-child { margin-right: 1em; } +#meta td { padding: .2em 1em .2em .1em; } #meta td:nth-child(2) { font-weight: bold } +/* percentile */ +#stats { + display: grid; + grid-template-columns: repeat(3, max-content); + grid-gap: 0.7em 2em; + margin: 2em 0; +} +#stats .col1 { grid-column-start: 1; } +#stats>div>h4 { margin: 0 0 .7em; } +#stats>div>p { margin-top: .5em; } +.percentile { + display: inline-block; + background: #EEE; + border: 1px solid #000; + width: 150px; + height: 1.2em; + padding-right: 3px; + vertical-align: top; +} +.percentile div { + position: relative; + background: #000; + width: 3px; + height: 100%; +} +.percentile.b div { background: #CA0D3A; } +.percentile.g div { background: #6AC45C; } + /* app bundle: domain tags */ -.tags { margin: 2em 0; } .tags a { font-size: .9em; font-style: normal; @@ -172,6 +203,7 @@ p.trckr { font-size: .9em; margin-left: .5em; } /* app bundle: graphs */ .dot-graph { + margin-bottom: 1.5em; touch-action: manipulation; user-select: none; -webkit-user-select: none; @@ -212,14 +244,15 @@ p.trckr { font-size: .9em; margin-left: .5em; } #help-links .done { color: #52C840; } /* responsive */ -@media(max-width: 647px) { +@media(max-width: 900px) { + #stats { grid-template-columns: repeat(2, max-content); } +} +@media(max-width: 650px) { header img { padding-right: 0; } header h1 { font-size: 1em; } header h1 span { display: none; } /* header subtitle */ - main { padding-left: .5em; padding-right: .5em; } + main { padding-left: 1em; padding-right: 1em; } footer .col3 div { width: 100%; padding: 0; } /* 3 columns */ - #meta .icons { margin-bottom: 1em; } /* icons beside each other */ - .pie-chart { float: right; } #app-toc a { text-align: left; } #app-toc div { display: inline-block; @@ -233,10 +266,10 @@ p.trckr { font-size: .9em; margin-left: .5em; } #app-toc img { float: left; width: 44px; height: 44px; margin: 0 .5em; } + #stats { grid-template-columns: max-content; } } -@media(min-width: 648px) { - #meta .icons { float: right; } /* icons below each other */ - .pie-chart { margin-top: 1em; } +@media(min-width: 651px) { + #meta .icons { float: right; } #dom-toc h3 a { display: none; } #dom-toc div:nth-child(1) { display: inline-block; diff --git a/src/bundle_download.py b/src/bundle_download.py index 3f66ab7..fa37097 100755 --- a/src/bundle_download.py +++ b/src/bundle_download.py @@ -3,21 +3,58 @@ import sys import common_lib as mylib +AVAILABLE_LANGS = ['us', 'de'] # order matters + + +def fname_for(bundle_id, lang): + return mylib.path_data_app(bundle_id, 'info_{}.json'.format(lang)) + + +def read_from_disk(bundle_id, lang): + return mylib.json_read(fname_for(bundle_id, lang)) + + +def read_first_from_disk(bundle_id, langs=AVAILABLE_LANGS): + for lang in langs: + if mylib.file_exists(fname_for(bundle_id, lang)): + return read_from_disk(bundle_id, lang) + return None + + +def app_names(bundle_id): + def name_for(lang): + try: + return read_from_disk(bundle_id, lang)['trackCensoredName'] + except Exception: + return None + ret = {} + for lang in AVAILABLE_LANGS: + name = name_for(lang) + if name: + ret[lang] = name + return ret + + +def get_genres(bundle_id, langs=AVAILABLE_LANGS): + json = read_first_from_disk(bundle_id, langs=langs) + return list(zip(json['genreIds'], json['genres'])) if json else [] + def download_info(bundle_id, lang, force=False): - if force or not mylib.meta_json_exists(bundle_id, lang): + fname = fname_for(bundle_id, lang) + if force or not mylib.file_exists(fname): url = 'https://itunes.apple.com/lookup?bundleId={}&country={}'.format( bundle_id, lang.upper()) json = mylib.download(url, isJSON=True) json = json['results'][0] # delete unused keys to save on storage for key in ['supportedDevices', 'releaseNotes', 'description', - 'screenshotUrls']: + 'screenshotUrls', 'ipadScreenshotUrls']: try: del(json[key]) except KeyError: continue - mylib.json_write_meta(bundle_id, json, lang) + mylib.json_write(fname, json, pretty=True) def needs_icon_path(bundle_id): @@ -25,14 +62,14 @@ def needs_icon_path(bundle_id): return (mylib.file_exists(icon_file), icon_file) -def download_icon(bundle_id, force=False, langs=['us', 'de']): +def download_icon(bundle_id, force=False, langs=AVAILABLE_LANGS): exists, icon_file = needs_icon_path(bundle_id) if force or not exists: json = None for lang in langs: if not json: try: - json = mylib.json_read_meta(bundle_id, lang) + json = read_from_disk(bundle_id, lang) except Exception: continue image_url = json['artworkUrl100'] # fail early on KeyError @@ -42,7 +79,7 @@ def download_icon(bundle_id, force=False, langs=['us', 'de']): return False -def download_missing_icons(force=False, langs=['us', 'de']): +def download_missing_icons(force=False, langs=AVAILABLE_LANGS): didAny = False for bid in mylib.enum_appids(): exists, _ = needs_icon_path(bid) @@ -67,7 +104,7 @@ def download(bundle_id, force=False): return False mylib.printf(' {} => '.format(bundle_id)) - for lang in ['us', 'de']: + for lang in AVAILABLE_LANGS: try: mylib.printf(lang) download_info(bundle_id, lang, force=force) diff --git a/src/common_lib.py b/src/common_lib.py index a3465ca..47f2479 100755 --- a/src/common_lib.py +++ b/src/common_lib.py @@ -82,20 +82,6 @@ def valid_bundle_id(bundle_id): return regex_bundle_id.match(bundle_id) -def app_names(bundle_id): - def name_for(lang): - try: - return json_read_meta(bundle_id, lang)['trackCensoredName'] - except Exception: - return None - ret = {} - for lang in ['us', 'de']: - name = name_for(lang) - if name: - ret[lang] = name - return ret - - def err(scope, msg, logOnly=False): logger.error('[{}] {}'.format(scope, msg)) if not logOnly: @@ -168,13 +154,10 @@ def file_exists(path): def symlink(source, target): if not file_exists(target): + rm_file(target) # file_exists is false if symlink cant be followed os.symlink(source, target) -def meta_json_exists(bundle_id, lang): - return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang))) - - def mkdir_out_app(bundle_id): out_dir = path_out_app(bundle_id) if not dir_exists(out_dir): @@ -278,10 +261,6 @@ def json_read_evaluated(bundle_id): return json_read(pth), pth -def json_read_meta(bundle_id, lang): - return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang))) - - # JSON write def json_write(path, obj, pretty=False): @@ -297,8 +276,3 @@ def json_write_combined(bundle_id, obj): def json_write_evaluated(bundle_id, obj): fname = path_data_app(bundle_id, 'evaluated.json') json_write(fname, obj, pretty=False) - - -def json_write_meta(bundle_id, obj, lang): - fname = path_data_app(bundle_id, 'info_{}.json'.format(lang)) - json_write(fname, obj, pretty=True) diff --git a/src/html_bundle.py b/src/html_bundle.py index d7e8696..e28c500 100755 --- a/src/html_bundle.py +++ b/src/html_bundle.py @@ -4,18 +4,9 @@ import sys import time import math import common_lib as mylib +import bundle_download import index_app_names - - -def seconds_to_time(seconds): - seconds = int(seconds) - minutes, seconds = divmod(seconds, 60) - hours, minutes = divmod(minutes, 60) - return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds) - - -def round_num(num): - return format(num, '.1f') # .rstrip('0').rstrip('.') +import index_meta def gen_dotgraph(sorted_arr): @@ -59,8 +50,7 @@ def gen_pie_chart(parts, classes, stroke=0.6): def gen_radial_graph(percent): - return '
{}
'.format( - gen_pie_chart([1 - percent, percent], ['cs0', 'cs1'])) + return gen_pie_chart([1 - percent, percent], ['cs0', 'cs1']) def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False): @@ -81,45 +71,82 @@ def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False): def gen_html(bundle_id, obj): + + def round_num(num): + return format(num, '.1f') # .rstrip('0').rstrip('.') + + def as_pm(value): + return round_num(value) + '/min' + + def as_percent(value): + return round_num(value * 100) + '%' + + def as_date(value): + return ''.format( + time.strftime('%Y-%m-%d %H:%M', time.gmtime(value)), + time.strftime('%Y-%m-%d, %H:%M', time.gmtime(value)) + ) + + def seconds_to_time(seconds): + seconds = int(seconds) + minutes, seconds = divmod(seconds, 60) + hours, minutes = divmod(minutes, 60) + return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds) + + def stat(col, title, rank, value, optional=None, fmt=str, fmt2=None): + # percent = int(rank[0] / max_rank * 100) + r = rank[0] / max_rank + detail = fmt2(value) if fmt2 else fmt(value) + if optional: + x = fmt(optional) if fmt2 else optional + detail += '({})'.format(x) + return f''' +
+

{title}

+
+ {detail} +

+ Rank: {rank[0]}, + best: {fmt(rank[1])}, + worst: {fmt(rank[2])}

+
''' + name = index_app_names.get_name(bundle_id) + gernes = bundle_download.get_genres(bundle_id) + rank, max_rank = index_meta.get_rank(bundle_id) obj['tracker'] = list(filter(lambda x: x[2], obj['subdom'])) return mylib.template_with_base(f'''

{name}

Bundle-id:{ bundle_id }

- { gen_radial_graph(obj['tracker_percent']) } + app-icon
- - - - - - + +
Last update:
Number of recordings:{ obj['sum_rec'] }
Total number of requests:{ - obj['sum_logs'] }({ - round_num(obj['sum_logs_pm'])} / min)
Average number of requests:{ - round_num(obj['avg_logs'])}({ - round_num(obj['avg_logs_pm'])} / min)
Average recording time:{ - seconds_to_time(obj['avg_time']) }
Cumulative recording time:{ - seconds_to_time(obj['sum_time']) }
App Categories:{ + ', '.join([name for i, name in gernes]) + }
Last Update:{as_date(obj['last_date'])}
+
+ { stat(1, 'Number of recordings:', rank['sum_rec'], obj['sum_rec']) } + { stat(1, 'Average recording time:', rank['avg_time'], obj['avg_time'], fmt=seconds_to_time) } + { stat(2, 'Cumulative recording time:', rank['sum_time'], obj['sum_time'], fmt=seconds_to_time) } + { stat(1, 'Average number of requests:', rank['avg_logs_pm'], obj['avg_logs'], obj['avg_logs_pm'], fmt=as_pm, fmt2=round_num) } + { stat(2, 'Total number of requests:', rank['sum_logs_pm'], obj['sum_logs'], obj['sum_logs_pm'], fmt=as_pm, fmt2=str) } + { stat(1, 'Number of domains:', rank['pardom'], len(obj['pardom'])) } + { stat(2, 'Number of subdomains:', rank['subdom'], len(obj['subdom'])) } + { stat(3, 'Tracker percentage:', rank['tracker_percent'], obj['tracker_percent'], fmt=as_percent) } +

Connections

Potential Trackers ({ len(obj['tracker']) }):

{ gen_dom_tags(obj['tracker'], isSub=True, onlyTrackers=True) } -

-

Domains ({ len(obj['pardom']) }):

{ gen_dotgraph(obj['pardom']) } { gen_dom_tags(obj['pardom'], isSub=False) } -

Subdomains ({ len(obj['subdom']) }):

{ gen_dotgraph(obj['subdom']) } { gen_dom_tags(obj['subdom'], isSub=True) } diff --git a/src/index_meta.py b/src/index_meta.py index 088916a..9175873 100755 --- a/src/index_meta.py +++ b/src/index_meta.py @@ -3,61 +3,137 @@ import sys import common_lib as mylib +_rank_dict = None -def index_file(): - return mylib.path_data_index('meta.json') + +def fname_app_summary(): + return mylib.path_data_index('app_summary.json') + + +def fname_app_rank(): + return mylib.path_data_index('app_rank.json') def load_json_from_disk(fname): return mylib.json_read(fname) if mylib.file_exists(fname) else {} -def load(): - return load_json_from_disk(index_file()) - - -def get_total_counts(): - try: - return load_json_from_disk(index_file())['_'] - except KeyError: - return [0, 0] - - -def process(bundle_ids, deleteOnly=False): - print('writing index: meta ...') - fname = index_file() - if bundle_ids == ['*']: - bundle_ids = list(mylib.enum_data_appids()) - print(' full reset') - mylib.rm_file(fname) # rebuild from ground up - - # json format: `bundle-id : [#recordings, #logs, #domains, #subdomains]` - index = load_json_from_disk(fname) - for bid in bundle_ids: - # delete old value +def try_del(index, keys): + for x in keys: try: - del(index[bid]) + del(index[x]) except KeyError: pass + + +def json_to_list(json): + return [ + json['sum_rec'], + json['sum_logs'], + json['sum_logs_pm'], + json['sum_time'], + json['avg_logs'], + json['avg_logs_pm'], + json['avg_time'], + json['last_date'], + len(json['pardom']), + len(json['subdom']), + json['tracker_percent'] + ] + + +def list_to_json(list): + return { + 'sum_rec': list[0], + 'sum_logs': list[1], + 'sum_logs_pm': list[2], + 'sum_time': list[3], + 'avg_logs': list[4], + 'avg_logs_pm': list[5], + 'avg_time': list[6], + 'last_date': list[7], + 'pardom': list[8], + 'subdom': list[9], + 'tracker_percent': list[10] + } + + +def write_summary_index(index, bundle_ids, deleteOnly=False): + for bid in bundle_ids: + # delete old value + try_del(index, [bid]) if deleteOnly: continue # set new value - json, _ = mylib.json_read_evaluated(bid) - index[bid] = [json['sum_rec'], json['sum_logs'], - len(json['pardom']), len(json['subdom'])] + evaluated_json, _ = mylib.json_read_evaluated(bid) + index[bid] = json_to_list(evaluated_json) + # sum of counts - try: - del(index['_']) - except KeyError: - pass + try_del(index, ['_sum']) total = [0, 0] for val in index.values(): total[0] += val[0] total[1] += val[1] - index['_'] = total + index['_sum'] = total + mylib.json_write(fname_app_summary(), index, pretty=False) - # write json - mylib.json_write(fname, index, pretty=False) + +def write_rank_index(index): + try_del(index, ['_sum', '_ranks', '_min', '_max']) + mins = [] + maxs = [] + for i in range(11): # equal to number of array entries + tmp = {} + # make temporary reverse index + for bid, val in index.items(): + try: + tmp[val[i]].append(bid) + except KeyError: + tmp[val[i]] = [bid] + # read index position from temp reverse index + r = 1 + ordered = sorted(tmp.items(), reverse=i in [0, 3, 6, 7]) + for idx, (_, ids) in enumerate(ordered): + for bid in ids: + index[bid][i] = r + r += len(ids) + mins.append(ordered[0][0]) + maxs.append(ordered[-1][0]) + index['_min'] = mins + index['_max'] = maxs + index['_ranks'] = len(index) + mylib.json_write(fname_app_rank(), index, pretty=False) + + +def get_total_counts(): + try: + return load_json_from_disk(fname_app_summary())['_sum'] + except KeyError: + return [0, 0] + + +def get_rank(bundle_id): + ''' Return tuples with (rank, max_rank, min_value, max_value) ''' + global _rank_dict + if not _rank_dict: + _rank_dict = load_json_from_disk(fname_app_rank()) + return list_to_json(list(zip( + _rank_dict[bundle_id], + _rank_dict['_min'], + _rank_dict['_max'], + ))), _rank_dict['_ranks'] + + +def process(bundle_ids, deleteOnly=False): + print('writing index: meta ...') + if bundle_ids == ['*']: + bundle_ids = list(mylib.enum_data_appids()) + print(' full reset') + mylib.rm_file(fname_app_summary()) # rebuild from ground up + + index = load_json_from_disk(fname_app_summary()) + write_summary_index(index, bundle_ids, deleteOnly=deleteOnly) + write_rank_index(index) print('')