From 8fef0cb33fd60d7281f5d74cbf35c5babbcbedbc Mon Sep 17 00:00:00 2001 From: relikd Date: Wed, 23 Sep 2020 17:13:33 +0200 Subject: [PATCH] Refactoring html generation + categories --- out/static/lookup-rank.js | 2 +- out/static/style.css | 75 ++++++++------- src/download_itunes.py | 35 +++---- src/html_bundle.py | 129 ++++++++------------------ src/html_categories.py | 31 +++++++ src/html_index_apps.py | 80 ++-------------- src/html_index_domains.py | 190 ++++++++++++++++---------------------- src/html_root.py | 56 +++++------ src/index_app_names.py | 2 +- src/index_categories.py | 135 +++++++++++++++++++++++++++ src/index_meta.py | 2 - src/lib_graphs.py | 66 +++++++++++++ src/lib_html.py | 183 ++++++++++++++++++++++++++++++++++++ templates/base.html | 6 +- 14 files changed, 628 insertions(+), 364 deletions(-) create mode 100755 src/html_categories.py create mode 100755 src/index_categories.py create mode 100755 src/lib_graphs.py create mode 100755 src/lib_html.py diff --git a/out/static/lookup-rank.js b/out/static/lookup-rank.js index c462b74..f92b45c 100644 --- a/out/static/lookup-rank.js +++ b/out/static/lookup-rank.js @@ -12,7 +12,7 @@ function lookup_rank_js(bundle_id) { function update(i, id, fmt=String) { let r = (rank[i] - 1) / (rank_max - 1); let target = document.getElementById(id); - let bar = target.querySelector('.percentile'); + let bar = target.querySelector('.pcbar'); bar.classList.add(r < 0.5 ? 'g' : 'b'); bar.firstChild.style.left = r * 100 + '%'; let meta = target.lastElementChild.children; diff --git a/out/static/style.css b/out/static/style.css index add9759..f25bf64 100644 --- a/out/static/style.css +++ b/out/static/style.css @@ -15,7 +15,7 @@ a.no-ul, a.no-ul:hover, .no-ul-all a, .no-ul-all a:hover { main, footer { padding: 0 1em; } header, main, footer > div { margin: 0 auto; - max-width: 1120px; /*1307px*/ + max-width: 1118px; } header { height: 50px; } header img { vertical-align: top; padding: 0 7px; } @@ -75,21 +75,21 @@ footer .links { } .dropdown:hover nav { display: block; } .dropdown a { display: block; padding: .5em 1em; } -.dropdown a:hover { background-color: #eee; } #app-toc div:hover, .dropdown:hover button, .dropdown a:hover { - background: #BBC6CA; + background: #DDD; } /* app index */ +#app-toc { + display: grid; + grid-gap: 10px; + grid-template-columns: repeat(auto-fill, minmax(178px, 1fr)); +} #app-toc a { text-align: center; } #app-toc div { - display: inline-block; - width: 140px; height: 12em; - margin: 5px; padding: 16px; - vertical-align: top; overflow: hidden; word-wrap: break-word; background: #eee; @@ -107,22 +107,18 @@ footer .links { #app-toc span.name { font-size: .8em; font-weight: bold; } #app-toc span.detail { font-size: .7em; } -#pagination { text-align: center; margin-top: 2em; } -#pagination a { margin: .5em; padding: .2em; } -#pagination a.active { border: 1pt solid black; border-radius: .2em; } +.pagination { text-align: center; margin-top: 2em; } +.pagination a { margin: .5em; padding: .2em; } +.pagination a.active { border: 1pt solid black; border-radius: .2em; } /* domain index */ -#dom-toc h3 { - position: sticky; - top: 0; - background: #fff; - padding-bottom: 4px; -} -#dom-toc a, #dom-top10 a { word-wrap: break-word; } +#dom-top10 { text-align: right; } +#dom-top10>div { margin: .4em; } +#dom-top10 a, #dom-toc a { word-wrap: break-word; } #dom-toc span { display: table; } .found-in span, .snd { color: #586472; font-size: .85em; } -.loadbar { +.fillbar { display: block; background: #DDD; width: 200px; @@ -130,14 +126,15 @@ footer .links { border-radius: 4px; text-align: left; } -.loadbar span { +.fillbar>i { + font-style: normal; display: inline-block; border-radius: 4px 0 0 4px; background: #AC2B4A; font-size: .8em; - padding: 2px 0 2px 0; text-align: center; color: #FFF; + line-height: 1.8em; } @@ -149,8 +146,10 @@ p.subtitle { margin-top: .2em; } .mg_top { margin-top: 2em; } .right { text-align: right; } .center { text-align: center; } -.bg1 { background: #eee; } -.border { border: 1pt solid #ccc; } +.bg1 { background: #EEE; } +.border { border: 1pt solid #CCC; } +.large { font-size: 1.2em; } +.stick-top { top: 0; position: sticky; padding: .8em 0 .5em; background: #FFF; } /*#meta { margin-bottom: 2em; }*/ #meta .icons { margin-bottom: 2em; } @@ -166,9 +165,9 @@ p.subtitle { margin-top: .2em; } margin: 2em 0; } #stats .col1 { grid-column-start: 1; } -#stats>div>h4 { margin: 0 0 .7em; } -#stats>div>p { margin-top: .5em; } -.percentile { +.rank h4 { margin: 0 0 .7em; } +.rank p { margin-top: .5em; } +.pcbar { display: inline-block; background: #EEE; border: 1px solid #000; @@ -177,14 +176,15 @@ p.subtitle { margin-top: .2em; } padding-right: 3px; vertical-align: top; } -.percentile div { +.pcbar>i { + display: block; position: relative; background: #000; width: 3px; height: 100%; } -.percentile.b div { background: #CA0D3A; } -.percentile.g div { background: #6AC45C; } +.pcbar.b>i { background: #CA0D3A; } +.pcbar.g>i { background: #6AC45C; } /* app bundle: domain tags */ .tags a { @@ -198,7 +198,14 @@ p.subtitle { margin-top: .2em; } display: inline-block; margin: .12em; } -.tags a.trckr, .tags.trckr a { background: #F9A7A7;; border-color: #B06363; } +.tags a:hover { background: #DDD; } +.tags.large > * { + border-radius: .4em; + padding: 6pt 12pt; + margin: .36em; +} +.tags a.trckr, .tags.trckr a { background: #F9A7A7; border-color: #B06363; } +.tags a.trckr:hover, .tags.trckr a:hover { background: #F99494; } p.trckr { font-size: .9em; margin-left: .5em; } /* app bundle: graphs */ @@ -253,10 +260,9 @@ p.trckr { font-size: .9em; margin-left: .5em; } header h1 span { display: none; } /* header subtitle */ main { padding-left: 1em; padding-right: 1em; } footer .col3 div { width: 100%; padding: 0; } /* 3 columns */ + #app-toc { grid-template-columns: repeat(auto-fill, minmax(250px, 1fr)); } #app-toc a { text-align: left; } #app-toc div { - display: inline-block; - width: 100%; margin: 0; padding: .7em 0; height: unset; @@ -267,6 +273,9 @@ p.trckr { font-size: .9em; margin-left: .5em; } float: left; width: 44px; height: 44px; margin: 0 .5em; } #stats { grid-template-columns: max-content; } + #dom-top10 { text-align: unset; } + .fillbar { width: 100%; } + .fillbar>i { line-height: 2.5em; } } @media(min-width: 651px) { #meta .icons { float: right; } @@ -282,8 +291,6 @@ p.trckr { font-size: .9em; margin-left: .5em; } width: 40%; margin-left: 1%; } - #dom-top10 { text-align: right; } - #dom-top10 p { margin: .4em; } .div-center { margin: 0 auto; width: max-content; max-width: 100%; } - .loadbar { display: inline-block; } + .fillbar { display: inline-block; } } diff --git a/src/download_itunes.py b/src/download_itunes.py index 9b4a9bf..d5ad19e 100755 --- a/src/download_itunes.py +++ b/src/download_itunes.py @@ -14,30 +14,23 @@ def read_from_disk(bundle_id, lang): return mylib.json_read(fname_for(bundle_id, lang)) -def read_first_from_disk(bundle_id, langs=AVAILABLE_LANGS): - for lang in langs: - if mylib.file_exists(fname_for(bundle_id, lang)): - return read_from_disk(bundle_id, lang) - return None - - -def app_names(bundle_id): - def name_for(lang): - try: - return read_from_disk(bundle_id, lang)['trackCensoredName'] - except Exception: - return None - ret = {} +def enum_all_from_disk(bundle_id): for lang in AVAILABLE_LANGS: - name = name_for(lang) - if name: - ret[lang] = name - return ret + try: + yield lang, read_from_disk(bundle_id, lang) + except Exception: + pass -def get_genres(bundle_id, langs=AVAILABLE_LANGS): - json = read_first_from_disk(bundle_id, langs=langs) - return list(zip(json['genreIds'], json['genres'])) if json else [] +def get_app_names(bundle_id): + return {lang: json['trackCensoredName'] + for lang, json in enum_all_from_disk(bundle_id)} + + +def enum_genres(bundle_id): + for lang, json in enum_all_from_disk(bundle_id): + for gid, name in zip(json['genreIds'], json['genres']): + yield lang, gid, name def download_info(bundle_id, lang, force=False): diff --git a/src/html_bundle.py b/src/html_bundle.py index 2ab7ec1..4904af2 100755 --- a/src/html_bundle.py +++ b/src/html_bundle.py @@ -1,76 +1,51 @@ #!/usr/bin/env python3 import sys -import time -import math import common_lib as mylib -import download_itunes # get_genres +import lib_graphs as Graph +import lib_html as HTML import bundle_combine # get_evaluated, fname_evaluated import index_app_names # get_name +import index_categories # get_categories -def gen_dotgraph(sorted_arr): - txt = '' - for name, count, mark in sorted_arr: - title = '{} ({})'.format(name, count) if count > 1 else name - clss = ' class="trckr"' if mark else '' - txt += '

{1}

'.format(clss, title) - txt += '' * count - txt += '' - return '
{}
'.format(txt) +def trkr_if(flag): + return ' class="trckr"' if flag else '' -def gen_pie_chart(parts, classes, stroke=0.6): - size = 1000 - stroke *= size * 0.5 - stroke_p = '{:.0f}'.format(stroke) - r = (0.99 * size - stroke) / 2 - r_p = '{:.0f},{:.0f}'.format(r, r) - mid = '{:.0f}'.format(size / 2) - - def arc(deg): - deg -= 90 - x = r * math.cos(math.pi * deg / 180) - y = r * math.sin(math.pi * deg / 180) - return '{:.0f},{:.0f}'.format(size / 2 + x, size / 2 + y) - - txt = '' - total = 0 - for i, x in enumerate(parts): - clss = classes[i % len(classes)] - deg = x * 360 - if x == 0: - continue - elif x == 1: - txt += f'' - else: - txt += f'' - total += deg - return '{1}'.format(size, txt) +def domain_w_count(domain, count): + if count > 1: + return '{} ({})'.format(domain, count) + return domain -def gen_radial_graph(percent): - return gen_pie_chart([1 - percent, percent], ['cs0', 'cs1']) - - -def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False): - txt = '' +def gen_dom_tags(sorted_arr, fn_a_html, onlyTrackers=False): + src = '' anyMark = False - for i, (name, count, mark) in enumerate(sorted_arr): - title = '{} ({})'.format(name, count) if count > 1 else name - clss = ' class="trckr"' if mark and not onlyTrackers else '' - txt += '{} '.format( - clss, 'subdomain' if isSub else 'domain', name, title) + for name, count, mark in sorted_arr: anyMark |= mark - if txt: - note = '

* Potential trackers are highlighted

' - return '
{}{}
'.format( - 'trckr ' if onlyTrackers else '', txt, note if anyMark else '') + src += fn_a_html(name, domain_w_count(name, count), + attr_str=trkr_if(mark and not onlyTrackers)) + ' ' + if src: + if anyMark: + src += '

* Potential trackers are highlighted

' + clss = ' trckr' if onlyTrackers else '' + return f'
{src}
' else: return '– None –' -def gen_html(bundle_id, obj): +def gen_dotgraph(arr): + return Graph.dotgraph([(domain_w_count(title, num), num, trkr_if(f)) + for title, num, f in arr]) + + +def stat(col, title, ident, value, optional=None): + return Graph.rank_tile(title, value, optional, { + 'id': ident, 'class': 'col' + str(col)}) + + +def gen_page(bundle_id, obj): def round_num(num): return format(num, '.1f') # .rstrip('0').rstrip('.') @@ -81,48 +56,29 @@ def gen_html(bundle_id, obj): def as_percent(value): return round_num(value * 100) + '%' - def as_date(value): - return ''.format( - time.strftime('%Y-%m-%d %H:%M', time.gmtime(value)), - time.strftime('%Y-%m-%d, %H:%M', time.gmtime(value)) - ) - def seconds_to_time(seconds): seconds = int(seconds) minutes, seconds = divmod(seconds, 60) hours, minutes = divmod(minutes, 60) return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds) - def stat(col, title, ident, value, optional=None): - if optional: - value += '({})'.format(optional) - return ''' -
-

{}

-
- {} -

- Rank: ?, - best: ?, - worst: ?

-
'''.format(ident, col, title, value) - name = index_app_names.get_name(bundle_id) - gernes = download_itunes.get_genres(bundle_id) + gernes = index_categories.get_categories(bundle_id) obj['tracker'] = list(filter(lambda x: x[2], obj['subdom'])) - return mylib.template_with_base(f''' + + HTML.write(mylib.path_out_app(bundle_id), f'''

{name}

Bundle-id:{ bundle_id }

- { gen_radial_graph(obj['tracker_percent']) } + { Graph.pie_chart_tracker(obj['tracker_percent']) } app-icon
- +
App Categories:{ - ', '.join([name for i, name in gernes]) + ', '.join([HTML.a_category(i, name) for i, name in gernes]) }
Last Update:{as_date(obj['last_date'])}
Last Update:{HTML.date_utc(obj['last_date'])}
@@ -138,13 +94,13 @@ def gen_html(bundle_id, obj):

Connections

Potential Trackers ({ len(obj['tracker']) }):

- { gen_dom_tags(obj['tracker'], isSub=True, onlyTrackers=True) } + { gen_dom_tags(obj['tracker'], HTML.a_subdomain, onlyTrackers=True) }

Domains ({ len(obj['pardom']) }):

{ gen_dotgraph(obj['pardom']) } - { gen_dom_tags(obj['pardom'], isSub=False) } + { gen_dom_tags(obj['pardom'], HTML.a_domain) }

Subdomains ({ len(obj['subdom']) }):

{ gen_dotgraph(obj['subdom']) } - { gen_dom_tags(obj['subdom'], isSub=True) } + { gen_dom_tags(obj['subdom'], HTML.a_subdomain) }

Download: json

@@ -156,11 +112,8 @@ def gen_html(bundle_id, obj): def process(bundle_ids): print('generating html: apps ...') for bid in mylib.appids_in_out(bundle_ids): - print(' ' + bid) - mylib.mkdir_out_app(bid) - json = bundle_combine.get_evaluated(bid) - with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp: - fp.write(gen_html(bid, json)) + # print(' ' + bid) + gen_page(bid, bundle_combine.get_evaluated(bid)) mylib.symlink(bundle_combine.fname_evaluated(bid), mylib.path_out_app(bid, 'data.json')) print('') diff --git a/src/html_categories.py b/src/html_categories.py new file mode 100755 index 0000000..291ec6e --- /dev/null +++ b/src/html_categories.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 + +import common_lib as mylib +import lib_html as HTML +import index_categories # enum_all_categories + + +def process(per_page=60): + print('generating html: category-index ...') + base = mylib.path_out('category') + parent = 'All Categories' + arr = [] + for cid, cat, apps in sorted(index_categories.enum_all_categories(), + key=lambda x: x[1].lower()): + arr.append((cid, cat)) + pre = HTML.h2(HTML.a_path([(parent, '../')], cat)) + _, a = HTML.write_app_pages(mylib.path_add(base, cid), apps, cat, + per_page, pre=pre) + print(' {} ({})'.format(cat, a)) + + src = ''.join([HTML.a(n, '{}/'.format(cid)) for cid, n in arr]) + HTML.write(base, ''' +

{}

+
+ {} +
'''.format(parent, src), parent) + print('') + + +if __name__ == '__main__': + process() diff --git a/src/html_index_apps.py b/src/html_index_apps.py index 406481c..d3e8b73 100755 --- a/src/html_index_apps.py +++ b/src/html_index_apps.py @@ -1,83 +1,17 @@ #!/usr/bin/env python3 import common_lib as mylib -import index_app_names # get_name - - -def gen_obj(bundle_id): - if mylib.file_exists(mylib.path_out_app(bundle_id, 'icon.png')): - icon = '/app/{0}/icon.png'.format(bundle_id) - else: - icon = '/static/app-template.svg' - return { - 'id': bundle_id, - 'name': index_app_names.get_name(bundle_id), - 'img': icon - } - - -def gen_entry(obj): - return ''' - -
- - {name}
- {id} -
-
'''.format(**obj) - - -def gen_pager(current, total): - - def mklink(i, name, active=False): - clss = ' class="active"' if active else '' - return '{}'.format(i, clss, name) - - links = '' - # if current > 1: - # links += mklink(current - 1, 'Previous') - start = max(1, current - 5) - for i in range(start, min(total, start + 10) + 1): - links += mklink(i, i, active=i == current) - # if current < total: - # links += mklink(current + 1, 'Next') - return ''.format(links) - - -def gen_page(arr, base, page_id=1, total=1): - title = 'Apps (A–Z)' - path = mylib.path_add(base, str(page_id)) - mylib.mkdir(path) - with open(mylib.path_add(path, 'index.html'), 'w') as fp: - content = ''.join([gen_entry(x) for x in arr]) - pagination = gen_pager(page_id, total) # if total > 1 else '' - fp.write(mylib.template_with_base(''' -

{}

-
- {} -
-{}'''.format(title, content, pagination), title=title)) +import lib_html as HTML def process(per_page=60): print('generating html: app-index ...') - index_dir = mylib.path_out('index', 'apps') - mylib.rm_dir(index_dir) - mylib.mkdir(index_dir) - - apps = [gen_obj(x) for x in mylib.appids_in_out()] - apps_total = len(apps) - pages_total, rest = divmod(apps_total, per_page) - if rest > 0: - pages_total += 1 - print(' {} apps'.format(apps_total)) - print(' {} pages'.format(pages_total)) - - apps_sorted = sorted(apps, key=lambda x: (x['name'].lower(), x['id'])) - for x in range(1, pages_total + 1): - start = (x - 1) * per_page - batch = apps_sorted[start:start + per_page] - gen_page(batch, index_dir, x, pages_total) + title = 'Apps (A–Z)' + p, a = HTML.write_app_pages(mylib.path_out('index', 'apps'), + mylib.appids_in_out(), title, + per_page=per_page, pre=HTML.h2(title)) + print(' {} apps'.format(a)) + print(' {} pages'.format(p)) print('') diff --git a/src/html_index_domains.py b/src/html_index_domains.py index 539dfda..8209f5d 100755 --- a/src/html_index_domains.py +++ b/src/html_index_domains.py @@ -1,25 +1,13 @@ #!/usr/bin/env python3 import common_lib as mylib +import lib_graphs as Graph +import lib_html as HTML import index_app_names # get_name import index_domains import index_meta # get_total_counts -def a_app(bundle_id): - return '{}'.format( - bundle_id, index_app_names.get_name(bundle_id)) - - -def a_dom(domain, key): - return '{1}'.format(key, domain) - - -def div_dom(domain, count, key): - return '{} found in {} {}'.format( - a_dom(domain, key), count, 'apps' if count > 1 else 'app') - - def dropdown_choose(button): return f''' @@ -33,156 +21,138 @@ def dropdown_choose(button):
''' +def div_dom(fn_a_html, domain, count): + return '{} found in {} {}'.format( + fn_a_html(domain), count, 'apps' if count > 1 else 'app') + + def duo_list(list1, list2): - txt1 = '
\n'.join([div_dom(dom, len(ids), 'subdomain') for dom, ids in list1]) - txt2 = '
\n'.join([div_dom(dom, len(ids), 'domain') for dom, ids in list2]) - return ''' + def full(fn_a_html, arr): + return '
\n'.join([div_dom(fn_a_html, domain, count) + for domain, count in arr]) + return f'''
-

Subdomains ({}) go to Domains

- {} +

Subdomains ({len(list1)}) + go to Domains

+ { full(HTML.a_subdomain, list1) }
-

Domains ({}) go to Subdomains

- {} +

Domains ({len(list2)}) + go to Subdomains

+ { full(HTML.a_domain, list2) }
-
'''.format(len(list1), txt1, len(list2), txt2) +''' -def gen_html_index(l1, l2, fname, title, button): - with open(fname, 'w') as fp: - fp.write(mylib.template_with_base( - f'

{title}

' + dropdown_choose(button) + duo_list(l1, l2), - title=title)) +def gen_html_top_10(path, subset, total, title): + src = '' + for dom, count in subset: + src += '\n
{} {}
'.format( + div_dom(HTML.a_domain, dom, count), Graph.fill_bar(count / total)) - -def gen_html_top_10(subset, fname, total, title): - - def div_loadbar(percent): - return '{0}%'.format(percent) - - with open(fname, 'w') as fp: - txt = f''' -
+ HTML.write(path, f'''

{ title }

-
''' - for dom, ids in subset: - dom_str = div_dom(dom, len(ids), 'domain') - pct_bar = div_loadbar(round(len(ids) / total * 100)) - txt += f'\n

{dom_str} {pct_bar}

' - fp.write(mylib.template_with_base(txt + ''' -
-

Get full list -sorted by Occurrence frequency -or in Alphabetical order.

+
+
+ { src } +
+

Get full list sorted by + Occurrence frequency or in + Alphabetical order. +

Download: json

-''', title=title)) +''', title=title) -def gen_html_trinity(json, idx_dir, app_count, title): +def gen_html_trinity(idx_dir, app_count, json, title, symlink): + list1 = [(dom, len(ids)) for dom, ids in json['subdom'].items()] + list2 = [(dom, len(ids)) for dom, ids in json['pardom'].items()] + + def write_index(fname, title, button): + HTML.write(idx_dir, '

{}

{}{}'.format( + title, dropdown_choose(button), duo_list(list1, list2) + ), title=title, fname=fname) + # Full list (A–Z) - list1 = sorted(json['subdom'].items(), key=lambda x: x[0]) - list2 = sorted(json['pardom'].items(), key=lambda x: x[0]) - gen_html_index(list1, list2, mylib.path_add(idx_dir, 'by_name.html'), - title='{} (A–Z)'.format(title), - button='Full list (A–Z)') + list1.sort(key=lambda x: x[0]) + list2.sort(key=lambda x: x[0]) + write_index('by_name.html', title='{} (A–Z)'.format(title), + button='Full list (A–Z)') # Full list (by count) - list1.sort(key=lambda x: -len(x[1])) - list2.sort(key=lambda x: -len(x[1])) - gen_html_index(list1, list2, mylib.path_add(idx_dir, 'by_count.html'), - title='{} (most apps)'.format(title), - button='Full list (by count)') + list1.sort(key=lambda x: -x[1]) + list2.sort(key=lambda x: -x[1]) + write_index('by_count.html', title='{} (most apps)'.format(title), + button='Full list (by count)') # Top 10 - gen_html_top_10(list2[:25], mylib.path_add(idx_dir, 'index.html'), - app_count, title='Top 25 {}'.format(title)) + gen_html_top_10(idx_dir, list2[:25], app_count, 'Top 25 {}'.format(title)) + mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json')) -def gen_html_lookup(html_dir, json, key, title): - mylib.mkdir(html_dir) - names = [[x, index_app_names.get_name(x)] for x in json['bundle']] - mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names) - mylib.json_write(mylib.path_add(html_dir, 'doms.json'), json[key]) - with open(mylib.path_add(html_dir, 'index.html'), 'w') as fp: - fp.write(mylib.template_with_base(f''' +def gen_lookup(html_dir, doms_dict, names_dict, title): + HTML.write(html_dir, '''

Present in: … applications

Apps containing this domain:

-''', title=title)) +'''.format(HTML.app_tile_template()), title=title) + # after html write which will create the dir + mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names_dict) + mylib.json_write(mylib.path_add(html_dir, 'doms.json'), doms_dict) -def gen_html_stats(c_apps, c_domains): +def gen_stats(c_apps, c_domains, title): [c_recordings, c_logs] = index_meta.get_total_counts() print(' {} apps'.format(c_apps)) print(' {} domains'.format(c_domains)) print(' {} recordings'.format(c_recordings)) print(' {} logs'.format(c_logs)) - title = 'Statistics' - mylib.mkdir(mylib.path_out('stats')) - with open(mylib.path_out('stats', 'index.html'), 'w') as fp: - fp.write(mylib.template_with_base(''' + HTML.write(mylib.path_out('stats'), '''

{}

-

- The AppCheck database currently contains {:,} apps with a total of {:,} unique domains. -

-

- Collected through {:,} recordings with {:,} individual requests. -

+

The AppCheck database currently contains {:,} apps with a total of {:,} unique domains.

+

Collected through {:,} recordings with {:,} individual requests.

'''.format(title, c_apps, c_domains, c_recordings, c_logs), title=title)) +'''.format(title, c_apps, c_domains, c_recordings, c_logs), title=title) def process(): # bundle_combine assures domain name is [a-zA-Z0-9.-] print('generating html: domain-index ...') - # Data export - all_dom_dir = mylib.path_out('index', 'domains', 'all') - trkr_dir = mylib.path_out('index', 'domains', 'tracker') - mylib.mkdir(all_dom_dir) - mylib.mkdir(trkr_dir) - mylib.symlink(index_domains.fname_all(), - mylib.path_out_app(all_dom_dir, 'data.json')) - mylib.symlink(index_domains.fname_tracker(), - mylib.path_out_app(trkr_dir, 'data.json')) - json = index_domains.load() app_count = index_domains.number_of_apps(json) dom_count = len(json['subdom']) print(' Lookup') - gen_html_lookup(mylib.path_out('domain'), json, 'pardom', - title='Domain Lookup') - gen_html_lookup(mylib.path_out('subdomain'), json, 'subdom', - title='Subdomain Lookup') + names = [[x, index_app_names.get_name(x)] for x in json['bundle']] + gen_lookup(mylib.path_out('domain'), json['pardom'], names, + title='Domain Lookup') + gen_lookup(mylib.path_out('subdomain'), json['subdom'], names, + title='Subdomain Lookup') + names = None print(' All Domains') - index_domains.enrich_with_bundle_ids(json) - gen_html_trinity(json, all_dom_dir, app_count, - title='Requested Domains') + gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count, + json=json, title='Requested Domains', + symlink=index_domains.fname_all()) + json = None print(' Trackers Only') - json = index_domains.load(tracker=True) - index_domains.enrich_with_bundle_ids(json) - gen_html_trinity(json, trkr_dir, app_count, - title='Tracker') + gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count, + json=index_domains.load(tracker=True), title='Tracker', + symlink=index_domains.fname_tracker()) # Stats print(' Stats') - gen_html_stats(app_count, dom_count) + gen_stats(app_count, dom_count, title='Statistics') print('') diff --git a/src/html_root.py b/src/html_root.py index 91b17d1..6b234d3 100755 --- a/src/html_root.py +++ b/src/html_root.py @@ -2,11 +2,11 @@ import os import common_lib as mylib +import lib_html as HTML def gen_root(): - with open(mylib.path_out('index.html'), 'w') as fp: - fp.write(mylib.template_with_base(''' + HTML.write(mylib.path_out(), '''

About

@@ -24,7 +24,7 @@ def gen_root():

Results

- If you're just interested in the results, go ahead to see all apps. + If you're just interested in the results, go ahead to see all apps.

Current research

@@ -32,10 +32,11 @@ def gen_root(): For mor infos follow this link.

-''')) +''') def gen_help(): + many = 7 txt = '''

Help needed!

With the release of iOS 14 some Privacy features are put into the spotlight. @@ -53,58 +54,51 @@ def gen_help():

' - mylib.mkdir(mylib.path_out('help')) - with open(mylib.path_out('help', 'index.html'), 'w') as fp: - fp.write(mylib.template_with_base(txt)) + HTML.write(mylib.path_out('help'), txt) def gen_search(): - with open(mylib.path_out('redirect.html'), 'w') as fp: - fp.write(mylib.template_with_base(''' + HTML.write(mylib.path_out(), '''

Redirecting …

''')) +''', fname='redirect.html') def gen_404(): - with open(mylib.path_out('404.html'), 'w') as fp: - fp.write(mylib.template_with_base(''' + HTML.write(mylib.path_out(), '''

404 – Not Found

-

Go back to start page

''')) +

Go back to start page

''', fname='404.html') def process(): diff --git a/src/index_app_names.py b/src/index_app_names.py index 67ffc35..9da7439 100755 --- a/src/index_app_names.py +++ b/src/index_app_names.py @@ -48,7 +48,7 @@ def process(bundle_ids): load_json_if_not_already() did_change = False for bid in mylib.appids_in_data(bundle_ids): - names = download_itunes.app_names(bid) + names = download_itunes.get_app_names(bid) if not names: mylib.err('index-app-names', 'could not load: {}'.format(bid)) continue diff --git a/src/index_categories.py b/src/index_categories.py new file mode 100755 index 0000000..4bc347a --- /dev/null +++ b/src/index_categories.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +import sys +import common_lib as mylib +import download_itunes # get_genres + +_dict_apps = None +_dict_names = None + + +def fname_app_categories(): + return mylib.path_data_index('app_categories.json') + + +def fname_category_names(): + return mylib.path_data_index('category_names.json') + + +def load_json_if_not_already(): + def load_json_from_disk(fname): + return mylib.json_read(fname) if mylib.file_exists(fname) else {} + + global _dict_apps, _dict_names + if not _dict_apps: + _dict_apps = load_json_from_disk(fname_app_categories()) + if not _dict_names: + _dict_names = load_json_from_disk(fname_category_names()) + + +def try_update_app(bid, genre_ids): + try: + if _dict_apps[bid] == genre_ids: + return False + except KeyError: + pass + _dict_apps[bid] = genre_ids + return True + + +def try_update_name(gid, lang, name): + try: + _dict_names[gid] + except KeyError: + _dict_names[gid] = {} + try: + if _dict_names[gid][lang]: + return False # key already exists + except KeyError: + pass + _dict_names[gid][lang] = name + return True # updated, need to persist changes + + +def reset_index(): + global _dict_apps + print(' full reset') + mylib.rm_file(fname_app_categories()) # rebuild from ground up + _dict_apps = None + + +def try_persist_changes(flag_apps, flag_names): + if flag_apps: + print(' write app-index') + mylib.json_write(fname_app_categories(), _dict_apps, pretty=False) + if flag_names: + print(' write name-index') + mylib.json_write(fname_category_names(), _dict_names, pretty=False) + + +def get_categories(bundle_id): + load_json_if_not_already() + try: + genres = _dict_apps[bundle_id] + except KeyError: + return [] + res = [] + for gid in genres: + for lang in ['us', 'de']: + try: + name = _dict_names[gid][lang] + except KeyError: + continue + res.append((gid, name)) + break + return res + + +def enum_all_categories(): + load_json_if_not_already() + reverse_index = {} + for bid, genre_ids in _dict_apps.items(): + for gid in genre_ids: + try: + reverse_index[gid].append(bid) + except KeyError: + reverse_index[gid] = [bid] + for gid, lang_dict in _dict_names.items(): + for lang in ['us', 'de']: + try: + name = lang_dict[lang] + except KeyError: + continue + yield gid, name, reverse_index[gid] + break + + +def process(bundle_ids, force=False): + print('writing index: categories ...') + if force and bundle_ids == ['*']: + reset_index() + + load_json_if_not_already() + write_app_index = False + write_name_index = False + for bid in mylib.appids_in_data(bundle_ids): + genre_ids = [] + for lang, gid, gname in download_itunes.enum_genres(bid): + if gid not in genre_ids: + genre_ids.append(gid) + if try_update_name(gid, lang, gname): + write_name_index = True + if try_update_app(bid, genre_ids): + write_app_index = True + + try_persist_changes(write_app_index, write_name_index) + print('') + + +if __name__ == '__main__': + args = sys.argv[1:] + if len(args) > 0: + process(args) + else: + # process(['*']) + mylib.usage(__file__, '[bundle_id] [...]') diff --git a/src/index_meta.py b/src/index_meta.py index 8fbbdce..dca5978 100755 --- a/src/index_meta.py +++ b/src/index_meta.py @@ -4,8 +4,6 @@ import sys import common_lib as mylib import bundle_combine # get_evaluated -_rank_dict = None - def fname_app_summary(): return mylib.path_data_index('app_summary.json') diff --git a/src/lib_graphs.py b/src/lib_graphs.py new file mode 100755 index 0000000..902f7f8 --- /dev/null +++ b/src/lib_graphs.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python3 + +import math +import common_lib as mylib +import lib_html as HTML + + +def fill_bar(percent): + return '
{0}%
'.format(round(percent * 100)) + + +def percent_bar(percent): + return '
'.format(round(percent * 100)) + + +def rank_tile(title, value, additional=None, attr={}, + percent=0.5, rank='?', best='?', worst='?'): + if additional: + value += '({})'.format(additional) + attr = HTML.attr_and(attr, {'class': 'rank'}) + return HTML.div(''' +

{}

+{} {} +

Rank: {}, best: {}, worst: {}

+'''.format(title, percent_bar(percent), value, rank, best, worst), attr) + + +def dotgraph(arr): + ''' Needs list of (title, count, attr_str) tuples ''' + def D(title, count, attr_str=''): + return '

{1}

{2}'.format( + attr_str, title, '' * count) + return '
' + ''.join([D(*x) for x in arr]) + '
' + + +def pie_chart(parts, stroke=0.6): + ''' Needs list of (percent, css_class) tuples ''' + size = 1000 + stroke *= size * 0.5 + stroke_p = '{:.0f}'.format(stroke) + r = (0.99 * size - stroke) / 2 + r_p = '{:.0f},{:.0f}'.format(r, r) + mid = '{:.0f}'.format(size / 2) + + def arc(deg): + deg -= 90 + x = r * math.cos(math.pi * deg / 180) + y = r * math.sin(math.pi * deg / 180) + return '{:.0f},{:.0f}'.format(size / 2 + x, size / 2 + y) + + src = '' + total = 0 + for percent, clss in parts: + deg = percent * 360 + if percent == 0: + continue + elif percent == 1: + src += f'' + else: + src += f'' + total += deg + return '{1}'.format(size, src) + + +def pie_chart_tracker(percent): + return pie_chart([(1 - percent, 'cs0'), (percent, 'cs1')]) diff --git a/src/lib_html.py b/src/lib_html.py new file mode 100755 index 0000000..42a726e --- /dev/null +++ b/src/lib_html.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python3 + +import math # ceil +import time # strftime, gmtime +import common_lib as mylib +import index_app_names # get_name + + +# REFS + +def a_app(bundle_id, inner, attr_str=''): + return '{}'.format(attr_str, bundle_id, inner) + + +def a_category(cat_id, inner, attr_str=''): + return '{}'.format(attr_str, cat_id, inner) + + +def a_domain(x, inner=None, attr_str=''): + return '{}'.format(attr_str, x, inner or x) + + +def a_subdomain(x, inner=None, attr_str=''): + return '{}'.format(attr_str, x, inner or x) + + +# Data object preparation + +def apps_sorted_batch(bundle_ids, batch_size=60): + apps = [(x, index_app_names.get_name(x)) for x in bundle_ids] + apps.sort(key=lambda x: (x[1].lower(), x[0])) + for i in range(0, len(apps), batch_size): + yield int(i / batch_size), apps[i:i + batch_size] + + +def attr_and(a, b): + res = {} + for d in [a, b]: + for key, val in d.items(): + try: + res[key] += ' ' + val + except KeyError: + res[key] = val + return res + + +# Basic building blocks + +def xml(tag, inner, attr={}): + src = '' + for key, val in attr.items(): + if val: + src += ' {}="{}"'.format(key, val) + return '<{0}{1}>{2}'.format(tag, src, inner) + + +def div(inner, attr={}): + return xml('div', inner, attr) + + +def h2(inner, attr={}): + return xml('h2', inner, attr) + + +def a(inner, href, attr={}): + return xml('a', inner, attr_and(attr, {'href': href})) + + +def a_path(parts, suffix): + ''' expects (name, url) tuples ''' + return ' / '.join([a(*x) for x in parts] + [suffix]) + + +# Simple constructs + +def tr(columns, tag='td'): + return f''' +{''.join(['<{0}>{1}'.format(tag, c) for c in columns])}''' + + +def date_utc(ctime): + return ''.format( + time.strftime('%Y-%m-%d %H:%M', time.gmtime(ctime)), + time.strftime('%Y-%m-%d, %H:%M', time.gmtime(ctime))) + + +# Higher level constructs + +def pagination(current, total): + if total == 1: + return '' + + def _lnk(i, name, active=False): + C = ' class="active"' if active else '' + if i == current: + link = './' + elif current == 1: + link = f'./{i}/' + else: + link = '../' if i == 1 else f'../{i}/' + return f'{name}' + + links = '' + # if current > 1: + # links += _lnk(current - 1, 'Previous') + start = max(1, current - 5) + for i in range(start, min(total, start + 10) + 1): + links += _lnk(i, i, active=i == current) + # if current < total: + # links += _lnk(current + 1, 'Next') + return ''.format(links) + + +def url_for_icon(bundle_id): + if mylib.file_exists(mylib.path_out_app(bundle_id, 'icon.png')): + return '/app/{0}/icon.png'.format(bundle_id) + else: + return '/static/app-template.svg' + + +def app_tile(bundle_id, name): + return f''' + +
+ + {name}
+ {bundle_id} +
+
''' + + +def app_tile_template(): + return f'''
+ +
+ +
''' + + +def app_tiles_all(bundle_ids, per_page=60, attr={}): + attr = attr_and(attr, {'id': 'app-toc', 'class': 'no-ul-all'}) + c_apps = len(bundle_ids) + c_pages = int(math.ceil(c_apps / per_page)) + for i, apps in apps_sorted_batch(bundle_ids, batch_size=per_page): + i += 1 + src = '' + for x in apps: + src += app_tile(x[0], x[1]) + yield i, len(apps), div(src, attr) + pagination(i, c_pages) + + +# Write html to disk + +_base_template = None + + +def base_template(content, title=None): + global _base_template + if not _base_template: + with open(mylib.path_root('templates', 'base.html'), 'r') as fp: + _base_template = fp.read() + return _base_template.replace( + '#_TITLE_#', title + ' – ' if title else '').replace( + '#_CONTENT_#', content) + + +def write(path, content, title=None, fname='index.html'): + mylib.mkdir(path) + with open(mylib.path_add(path, fname), 'w') as fp: + fp.write(base_template(content, title=title)) + + +def write_app_pages(base, bundle_ids, title, per_page=60, attr={}, pre=''): + pages = 0 + entries = 0 + mylib.rm_dir(base) + for i, count, src in app_tiles_all(bundle_ids, per_page, attr): + pages += 1 + entries += count + pth = base if i == 1 else mylib.path_add(base, str(i)) + mylib.mkdir(pth) + write(pth, pre + '\n' + src, title=title) + return pages, entries diff --git a/templates/base.html b/templates/base.html index f8d7e07..689eedc 100644 --- a/templates/base.html +++ b/templates/base.html @@ -3,9 +3,9 @@ - + #_TITLE_#AppCheck: Privacy Monitor - + @@ -21,7 +21,7 @@