Refactoring html generation + categories

This commit is contained in:
relikd
2020-09-23 17:13:33 +02:00
parent 46a196c3c4
commit 8fef0cb33f
14 changed files with 628 additions and 364 deletions

View File

@@ -14,30 +14,23 @@ def read_from_disk(bundle_id, lang):
return mylib.json_read(fname_for(bundle_id, lang))
def read_first_from_disk(bundle_id, langs=AVAILABLE_LANGS):
for lang in langs:
if mylib.file_exists(fname_for(bundle_id, lang)):
return read_from_disk(bundle_id, lang)
return None
def app_names(bundle_id):
def name_for(lang):
try:
return read_from_disk(bundle_id, lang)['trackCensoredName']
except Exception:
return None
ret = {}
def enum_all_from_disk(bundle_id):
for lang in AVAILABLE_LANGS:
name = name_for(lang)
if name:
ret[lang] = name
return ret
try:
yield lang, read_from_disk(bundle_id, lang)
except Exception:
pass
def get_genres(bundle_id, langs=AVAILABLE_LANGS):
json = read_first_from_disk(bundle_id, langs=langs)
return list(zip(json['genreIds'], json['genres'])) if json else []
def get_app_names(bundle_id):
return {lang: json['trackCensoredName']
for lang, json in enum_all_from_disk(bundle_id)}
def enum_genres(bundle_id):
for lang, json in enum_all_from_disk(bundle_id):
for gid, name in zip(json['genreIds'], json['genres']):
yield lang, gid, name
def download_info(bundle_id, lang, force=False):

View File

@@ -1,76 +1,51 @@
#!/usr/bin/env python3
import sys
import time
import math
import common_lib as mylib
import download_itunes # get_genres
import lib_graphs as Graph
import lib_html as HTML
import bundle_combine # get_evaluated, fname_evaluated
import index_app_names # get_name
import index_categories # get_categories
def gen_dotgraph(sorted_arr):
txt = ''
for name, count, mark in sorted_arr:
title = '{} ({})'.format(name, count) if count > 1 else name
clss = ' class="trckr"' if mark else ''
txt += '<span{0} title="{1}"><p>{1}</p>'.format(clss, title)
txt += '<i></i>' * count
txt += '</span>'
return '<div class="dot-graph">{}</div>'.format(txt)
def trkr_if(flag):
return ' class="trckr"' if flag else ''
def gen_pie_chart(parts, classes, stroke=0.6):
size = 1000
stroke *= size * 0.5
stroke_p = '{:.0f}'.format(stroke)
r = (0.99 * size - stroke) / 2
r_p = '{:.0f},{:.0f}'.format(r, r)
mid = '{:.0f}'.format(size / 2)
def arc(deg):
deg -= 90
x = r * math.cos(math.pi * deg / 180)
y = r * math.sin(math.pi * deg / 180)
return '{:.0f},{:.0f}'.format(size / 2 + x, size / 2 + y)
txt = ''
total = 0
for i, x in enumerate(parts):
clss = classes[i % len(classes)]
deg = x * 360
if x == 0:
continue
elif x == 1:
txt += f'<circle fill="transparent" class="{clss}" stroke-width="{stroke_p}" cx="{mid}" cy="{mid}" r="{r}"/>'
else:
txt += f'<path fill="transparent" class="{clss}" stroke-width="{stroke_p}" d="M{arc(total)}A{r_p},0,{1 if deg > 180 else 0},1,{arc(total + deg)}" />'
total += deg
return '<svg viewBox="0 0 {0} {0}" width="100" height="100">{1}</svg>'.format(size, txt)
def domain_w_count(domain, count):
if count > 1:
return '{} ({})'.format(domain, count)
return domain
def gen_radial_graph(percent):
return gen_pie_chart([1 - percent, percent], ['cs0', 'cs1'])
def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False):
txt = ''
def gen_dom_tags(sorted_arr, fn_a_html, onlyTrackers=False):
src = ''
anyMark = False
for i, (name, count, mark) in enumerate(sorted_arr):
title = '{} ({})'.format(name, count) if count > 1 else name
clss = ' class="trckr"' if mark and not onlyTrackers else ''
txt += '<a{} href="/{}/#{}">{}</a> '.format(
clss, 'subdomain' if isSub else 'domain', name, title)
for name, count, mark in sorted_arr:
anyMark |= mark
if txt:
note = '<p class="trckr">* Potential trackers are highlighted</p>'
return '<div class="{}tags">{}{}</div>'.format(
'trckr ' if onlyTrackers else '', txt, note if anyMark else '')
src += fn_a_html(name, domain_w_count(name, count),
attr_str=trkr_if(mark and not onlyTrackers)) + ' '
if src:
if anyMark:
src += '<p class="trckr">* Potential trackers are highlighted</p>'
clss = ' trckr' if onlyTrackers else ''
return f'<div class="tags{clss}">{src}</div>'
else:
return '<i> None </i>'
def gen_html(bundle_id, obj):
def gen_dotgraph(arr):
return Graph.dotgraph([(domain_w_count(title, num), num, trkr_if(f))
for title, num, f in arr])
def stat(col, title, ident, value, optional=None):
return Graph.rank_tile(title, value, optional, {
'id': ident, 'class': 'col' + str(col)})
def gen_page(bundle_id, obj):
def round_num(num):
return format(num, '.1f') # .rstrip('0').rstrip('.')
@@ -81,48 +56,29 @@ def gen_html(bundle_id, obj):
def as_percent(value):
return round_num(value * 100) + '%'
def as_date(value):
return '<time datetime="{}">{} UTC</time>'.format(
time.strftime('%Y-%m-%d %H:%M', time.gmtime(value)),
time.strftime('%Y-%m-%d, %H:%M', time.gmtime(value))
)
def seconds_to_time(seconds):
seconds = int(seconds)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
def stat(col, title, ident, value, optional=None):
if optional:
value += '<i class="snd mg_lr">({})</i>'.format(optional)
return '''
<div id="{}" class="col{}">
<h4>{}</h4>
<div class="percentile"><div style="left: 50%"></div></div>
<b class="mg_lr">{}</b>
<p class="snd">
Rank:&nbsp;<b>?</b>,
best:&nbsp;<i>?</i>,
worst:&nbsp;<i>?</i></p>
</div>'''.format(ident, col, title, value)
name = index_app_names.get_name(bundle_id)
gernes = download_itunes.get_genres(bundle_id)
gernes = index_categories.get_categories(bundle_id)
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
return mylib.template_with_base(f'''
HTML.write(mylib.path_out_app(bundle_id), f'''
<h2 class="title">{name}</h2>
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
<div id="meta">
<div class="icons">
{ gen_radial_graph(obj['tracker_percent']) }
{ Graph.pie_chart_tracker(obj['tracker_percent']) }
<img class="app-icon" src="icon.png" alt="app-icon" width="100" height="100">
</div>
<table>
<tr><td>App Categories:</td><td>{
', '.join([name for i, name in gernes])
', '.join([HTML.a_category(i, name) for i, name in gernes])
}</td></tr>
<tr><td>Last Update:</td><td>{as_date(obj['last_date'])}</td></tr>
<tr><td>Last Update:</td><td>{HTML.date_utc(obj['last_date'])}</td></tr>
</table>
</div>
<div id="stats">
@@ -138,13 +94,13 @@ def gen_html(bundle_id, obj):
<h3>Connections</h3>
<div>
<h4>Potential Trackers ({ len(obj['tracker']) }):</h4>
{ gen_dom_tags(obj['tracker'], isSub=True, onlyTrackers=True) }
{ gen_dom_tags(obj['tracker'], HTML.a_subdomain, onlyTrackers=True) }
<h4>Domains ({ len(obj['pardom']) }):</h4>
{ gen_dotgraph(obj['pardom']) }
{ gen_dom_tags(obj['pardom'], isSub=False) }
{ gen_dom_tags(obj['pardom'], HTML.a_domain) }
<h4>Subdomains ({ len(obj['subdom']) }):</h4>
{ gen_dotgraph(obj['subdom']) }
{ gen_dom_tags(obj['subdom'], isSub=True) }
{ gen_dom_tags(obj['subdom'], HTML.a_subdomain) }
</div>
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>
<script type="text/javascript" src="/static/lookup-rank.js"></script>
@@ -156,11 +112,8 @@ def gen_html(bundle_id, obj):
def process(bundle_ids):
print('generating html: apps ...')
for bid in mylib.appids_in_out(bundle_ids):
print(' ' + bid)
mylib.mkdir_out_app(bid)
json = bundle_combine.get_evaluated(bid)
with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp:
fp.write(gen_html(bid, json))
# print(' ' + bid)
gen_page(bid, bundle_combine.get_evaluated(bid))
mylib.symlink(bundle_combine.fname_evaluated(bid),
mylib.path_out_app(bid, 'data.json'))
print('')

31
src/html_categories.py Executable file
View File

@@ -0,0 +1,31 @@
#!/usr/bin/env python3
import common_lib as mylib
import lib_html as HTML
import index_categories # enum_all_categories
def process(per_page=60):
print('generating html: category-index ...')
base = mylib.path_out('category')
parent = 'All Categories'
arr = []
for cid, cat, apps in sorted(index_categories.enum_all_categories(),
key=lambda x: x[1].lower()):
arr.append((cid, cat))
pre = HTML.h2(HTML.a_path([(parent, '../')], cat))
_, a = HTML.write_app_pages(mylib.path_add(base, cid), apps, cat,
per_page, pre=pre)
print(' {} ({})'.format(cat, a))
src = ''.join([HTML.a(n, '{}/'.format(cid)) for cid, n in arr])
HTML.write(base, '''
<h2>{}</h2>
<div class="tags large center">
{}
</div>'''.format(parent, src), parent)
print('')
if __name__ == '__main__':
process()

View File

@@ -1,83 +1,17 @@
#!/usr/bin/env python3
import common_lib as mylib
import index_app_names # get_name
def gen_obj(bundle_id):
if mylib.file_exists(mylib.path_out_app(bundle_id, 'icon.png')):
icon = '/app/{0}/icon.png'.format(bundle_id)
else:
icon = '/static/app-template.svg'
return {
'id': bundle_id,
'name': index_app_names.get_name(bundle_id),
'img': icon
}
def gen_entry(obj):
return '''
<a href="/app/{id}/">
<div>
<img src="{img}" width="100" height="100">
<span class="name">{name}</span><br />
<span class="detail">{id}</span>
</div>
</a>'''.format(**obj)
def gen_pager(current, total):
def mklink(i, name, active=False):
clss = ' class="active"' if active else ''
return '<a href="../{}/"{}>{}</a>'.format(i, clss, name)
links = ''
# if current > 1:
# links += mklink(current - 1, 'Previous')
start = max(1, current - 5)
for i in range(start, min(total, start + 10) + 1):
links += mklink(i, i, active=i == current)
# if current < total:
# links += mklink(current + 1, 'Next')
return '<div id="pagination">{}</div>'.format(links)
def gen_page(arr, base, page_id=1, total=1):
title = 'Apps (AZ)'
path = mylib.path_add(base, str(page_id))
mylib.mkdir(path)
with open(mylib.path_add(path, 'index.html'), 'w') as fp:
content = ''.join([gen_entry(x) for x in arr])
pagination = gen_pager(page_id, total) # if total > 1 else ''
fp.write(mylib.template_with_base('''
<h2 class="center">{}</h2>
<div id="app-toc" class="center no-ul-all">
{}
</div>
{}'''.format(title, content, pagination), title=title))
import lib_html as HTML
def process(per_page=60):
print('generating html: app-index ...')
index_dir = mylib.path_out('index', 'apps')
mylib.rm_dir(index_dir)
mylib.mkdir(index_dir)
apps = [gen_obj(x) for x in mylib.appids_in_out()]
apps_total = len(apps)
pages_total, rest = divmod(apps_total, per_page)
if rest > 0:
pages_total += 1
print(' {} apps'.format(apps_total))
print(' {} pages'.format(pages_total))
apps_sorted = sorted(apps, key=lambda x: (x['name'].lower(), x['id']))
for x in range(1, pages_total + 1):
start = (x - 1) * per_page
batch = apps_sorted[start:start + per_page]
gen_page(batch, index_dir, x, pages_total)
title = 'Apps (AZ)'
p, a = HTML.write_app_pages(mylib.path_out('index', 'apps'),
mylib.appids_in_out(), title,
per_page=per_page, pre=HTML.h2(title))
print(' {} apps'.format(a))
print(' {} pages'.format(p))
print('')

View File

@@ -1,25 +1,13 @@
#!/usr/bin/env python3
import common_lib as mylib
import lib_graphs as Graph
import lib_html as HTML
import index_app_names # get_name
import index_domains
import index_meta # get_total_counts
def a_app(bundle_id):
return '<a href="/app/{}/">{}</a>'.format(
bundle_id, index_app_names.get_name(bundle_id))
def a_dom(domain, key):
return '<a href="/{0}/#{1}">{1}</a>'.format(key, domain)
def div_dom(domain, count, key):
return '{} <span>found in {} {}</span>'.format(
a_dom(domain, key), count, 'apps' if count > 1 else 'app')
def dropdown_choose(button):
return f'''
<label for="dropdown">Choose list:</label>
@@ -33,156 +21,138 @@ def dropdown_choose(button):
</div>'''
def div_dom(fn_a_html, domain, count):
return '{} <span>found in {} {}</span>'.format(
fn_a_html(domain), count, 'apps' if count > 1 else 'app')
def duo_list(list1, list2):
txt1 = '<br>\n'.join([div_dom(dom, len(ids), 'subdomain') for dom, ids in list1])
txt2 = '<br>\n'.join([div_dom(dom, len(ids), 'domain') for dom, ids in list2])
return '''
def full(fn_a_html, arr):
return '<br>\n'.join([div_dom(fn_a_html, domain, count)
for domain, count in arr])
return f'''
<div id="dom-toc" class="found-in">
<div id="subdomains">
<h3>Subdomains ({}) <a class="snd mg_lr" href="#domains">go to Domains</a></h3>
{}
<h3 class="stick-top">Subdomains ({len(list1)})
<a class="snd mg_lr" href="#domains">go to Domains</a></h3>
{ full(HTML.a_subdomain, list1) }
</div><div id="domains">
<h3>Domains ({}) <a class="snd mg_lr" href="#subdomains">go to Subdomains</a></h3>
{}
<h3 class="stick-top">Domains ({len(list2)})
<a class="snd mg_lr" href="#subdomains">go to Subdomains</a></h3>
{ full(HTML.a_domain, list2) }
</div>
</div>'''.format(len(list1), txt1, len(list2), txt2)
</div>'''
def gen_html_index(l1, l2, fname, title, button):
with open(fname, 'w') as fp:
fp.write(mylib.template_with_base(
f'<h2>{title}</h2>' + dropdown_choose(button) + duo_list(l1, l2),
title=title))
def gen_html_top_10(path, subset, total, title):
src = ''
for dom, count in subset:
src += '\n<div>{} {}</div>'.format(
div_dom(HTML.a_domain, dom, count), Graph.fill_bar(count / total))
def gen_html_top_10(subset, fname, total, title):
def div_loadbar(percent):
return '<span class="loadbar"><span style="width: {0}%">{0}%</span></span>'.format(percent)
with open(fname, 'w') as fp:
txt = f'''
<div class="div-center">
HTML.write(path, f'''
<h2 class="center">{ title }</h2>
<div id="dom-top10" class="found-in">'''
for dom, ids in subset:
dom_str = div_dom(dom, len(ids), 'domain')
pct_bar = div_loadbar(round(len(ids) / total * 100))
txt += f'\n<p>{dom_str} {pct_bar}</p>'
fp.write(mylib.template_with_base(txt + '''
</div>
<p class="mg_top">Get full list
sorted by <a class="snd" href="by_count.html">Occurrence frequency</a>
or in <a class="snd" href="by_name.html">Alphabetical order</a>.</p>
<div class="div-center">
<div id="dom-top10" class="found-in">
{ src }
</div>
<p class="mg_top">Get full list sorted by
<a class="snd" href="by_count.html">Occurrence frequency</a> or in
<a class="snd" href="by_name.html">Alphabetical order</a>.
</p>
</div>
<p class="right snd">Download: <a href="data.json" download="domains.json">json</a></p>
''', title=title))
''', title=title)
def gen_html_trinity(json, idx_dir, app_count, title):
def gen_html_trinity(idx_dir, app_count, json, title, symlink):
list1 = [(dom, len(ids)) for dom, ids in json['subdom'].items()]
list2 = [(dom, len(ids)) for dom, ids in json['pardom'].items()]
def write_index(fname, title, button):
HTML.write(idx_dir, '<h2>{}</h2>{}{}'.format(
title, dropdown_choose(button), duo_list(list1, list2)
), title=title, fname=fname)
# Full list (AZ)
list1 = sorted(json['subdom'].items(), key=lambda x: x[0])
list2 = sorted(json['pardom'].items(), key=lambda x: x[0])
gen_html_index(list1, list2, mylib.path_add(idx_dir, 'by_name.html'),
title='{} (AZ)'.format(title),
button='Full list (AZ)')
list1.sort(key=lambda x: x[0])
list2.sort(key=lambda x: x[0])
write_index('by_name.html', title='{} (AZ)'.format(title),
button='Full list (AZ)')
# Full list (by count)
list1.sort(key=lambda x: -len(x[1]))
list2.sort(key=lambda x: -len(x[1]))
gen_html_index(list1, list2, mylib.path_add(idx_dir, 'by_count.html'),
title='{} (most apps)'.format(title),
button='Full list (by count)')
list1.sort(key=lambda x: -x[1])
list2.sort(key=lambda x: -x[1])
write_index('by_count.html', title='{} (most apps)'.format(title),
button='Full list (by count)')
# Top 10
gen_html_top_10(list2[:25], mylib.path_add(idx_dir, 'index.html'),
app_count, title='Top 25 {}'.format(title))
gen_html_top_10(idx_dir, list2[:25], app_count, 'Top 25 {}'.format(title))
mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json'))
def gen_html_lookup(html_dir, json, key, title):
mylib.mkdir(html_dir)
names = [[x, index_app_names.get_name(x)] for x in json['bundle']]
mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names)
mylib.json_write(mylib.path_add(html_dir, 'doms.json'), json[key])
with open(mylib.path_add(html_dir, 'index.html'), 'w') as fp:
fp.write(mylib.template_with_base(f'''
def gen_lookup(html_dir, doms_dict, names_dict, title):
HTML.write(html_dir, '''
<h2 id="name"></h2>
<p>Present in: <b id="num-apps">… applications</b></p>
<h3>Apps containing this domain:</h3>
<div id="app-toc" class="no-ul-all">
<a>
<div>
<img width="100" height="100">
<span class="name"></span><br />
<span class="detail"></span>
</div>
</a>
{}
</div>
<script type="text/javascript" src="/static/lookup-domain.js"></script>
<script type="text/javascript">
lookup_domain_js('doms.json', 'apps.json', 'name', 'num-apps', 'app-toc');
</script>
''', title=title))
'''.format(HTML.app_tile_template()), title=title)
# after html write which will create the dir
mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names_dict)
mylib.json_write(mylib.path_add(html_dir, 'doms.json'), doms_dict)
def gen_html_stats(c_apps, c_domains):
def gen_stats(c_apps, c_domains, title):
[c_recordings, c_logs] = index_meta.get_total_counts()
print(' {} apps'.format(c_apps))
print(' {} domains'.format(c_domains))
print(' {} recordings'.format(c_recordings))
print(' {} logs'.format(c_logs))
title = 'Statistics'
mylib.mkdir(mylib.path_out('stats'))
with open(mylib.path_out('stats', 'index.html'), 'w') as fp:
fp.write(mylib.template_with_base('''
HTML.write(mylib.path_out('stats'), '''
<h2>{}</h2>
<p>
The AppCheck database currently contains <b>{:,}&nbsp;apps</b> with a total of <b>{:,} unique domains</b>.
</p>
<p>
Collected through <b>{:,}&nbsp;recordings</b> with <b>{:,} individual requests</b>.
</p>
<p>The AppCheck database currently contains <b>{:,}&nbsp;apps</b> with a total of <b>{:,} unique domains</b>.</p>
<p>Collected through <b>{:,}&nbsp;recordings</b> with <b>{:,} individual requests</b>.</p>
<ul>
<li>List of <a href="/index/apps/1/">Apps</a></li>
<li>List of <a href="/index/apps/">Apps</a></li>
<li>List of <a href="/category/">All Categories</a></li>
<li>List of <a href="/index/domains/all/">Requested Domains</a></li>
<li>List of <a href="/index/domains/tracker/">Trackers</a></li>
</ul>'''.format(title, c_apps, c_domains, c_recordings, c_logs), title=title))
</ul>'''.format(title, c_apps, c_domains, c_recordings, c_logs), title=title)
def process():
# bundle_combine assures domain name is [a-zA-Z0-9.-]
print('generating html: domain-index ...')
# Data export
all_dom_dir = mylib.path_out('index', 'domains', 'all')
trkr_dir = mylib.path_out('index', 'domains', 'tracker')
mylib.mkdir(all_dom_dir)
mylib.mkdir(trkr_dir)
mylib.symlink(index_domains.fname_all(),
mylib.path_out_app(all_dom_dir, 'data.json'))
mylib.symlink(index_domains.fname_tracker(),
mylib.path_out_app(trkr_dir, 'data.json'))
json = index_domains.load()
app_count = index_domains.number_of_apps(json)
dom_count = len(json['subdom'])
print(' Lookup')
gen_html_lookup(mylib.path_out('domain'), json, 'pardom',
title='Domain Lookup')
gen_html_lookup(mylib.path_out('subdomain'), json, 'subdom',
title='Subdomain Lookup')
names = [[x, index_app_names.get_name(x)] for x in json['bundle']]
gen_lookup(mylib.path_out('domain'), json['pardom'], names,
title='Domain Lookup')
gen_lookup(mylib.path_out('subdomain'), json['subdom'], names,
title='Subdomain Lookup')
names = None
print(' All Domains')
index_domains.enrich_with_bundle_ids(json)
gen_html_trinity(json, all_dom_dir, app_count,
title='Requested Domains')
gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count,
json=json, title='Requested Domains',
symlink=index_domains.fname_all())
json = None
print(' Trackers Only')
json = index_domains.load(tracker=True)
index_domains.enrich_with_bundle_ids(json)
gen_html_trinity(json, trkr_dir, app_count,
title='Tracker')
gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count,
json=index_domains.load(tracker=True), title='Tracker',
symlink=index_domains.fname_tracker())
# Stats
print(' Stats')
gen_html_stats(app_count, dom_count)
gen_stats(app_count, dom_count, title='Statistics')
print('')

View File

@@ -2,11 +2,11 @@
import os
import common_lib as mylib
import lib_html as HTML
def gen_root():
with open(mylib.path_out('index.html'), 'w') as fp:
fp.write(mylib.template_with_base('''
HTML.write(mylib.path_out(), '''
<h2>About</h2>
<div class="squeeze">
<p>
@@ -24,7 +24,7 @@ def gen_root():
</p>
<h2>Results</h2>
<p>
If you're just interested in the results, go ahead to see <a href="/index/apps/1/">all apps</a>.
If you're just interested in the results, go ahead to see <a href="/index/apps/">all apps</a>.
</p>
<h2>Current research</h2>
<p>
@@ -32,10 +32,11 @@ def gen_root():
For mor infos follow <a href="/help/">this link</a>.
</p>
</div>
'''))
''')
def gen_help():
many = 7
txt = '''<h2>Help needed!</h2>
<div class="squeeze"><p>
With the release of iOS 14 some <a href="https://www.apple.com/ios/ios-14/features/#Privacy" target="_blank">Privacy</a> features are put into the spotlight.
@@ -53,58 +54,51 @@ def gen_help():
</p>
</div>
<div id="help-links">'''
many = 7
def app(bundle_id, name, appstore_id):
iurl = 'https://apps.apple.com/de/app/id{}'.format(appstore_id)
aref = '<a href="{}" target="_blank">AppStore</a>'.format(iurl)
return '{} <span class="snd">Download from {}</span>'.format(
HTML.a_app(bid, name), aref)
def rec(count):
return '<span class="{}"><b>{}</b>/{}</span> recordings'.format(
'done' if count >= many else 'notyet', count, many)
obj = mylib.json_read(mylib.path_root('src', 'help.json'))
for land in sorted(obj.keys()):
txt += '\n<h3>{}:</h3>\n<table>'.format(land)
txt += '\n<tr><th></th><th>App Name</th><th>pre iOS 14</th><th>post iOS 14</th></tr>'
txt += HTML.tr(['', 'App Name', 'pre iOS 14', 'post iOS 14'], 'th')
for i, x in enumerate(obj[land]):
bid = x[2]
asurl = 'https://apps.apple.com/de/app/id{}'.format(x[1])
count = [0, 0]
c = [0, 0]
for fname, json in mylib.enum_jsons(bid):
try:
ios14 = int(json['ios'].split('.')[0]) >= 14
except KeyError:
# assume everything submitted after release date is iOS14
ios14 = os.path.getmtime(fname) > 1600258000
count[1 if ios14 else 0] += 1
s1 = '<span class="{}"><b>{}</b>/{}</span> recordings'.format(
'done' if count[0] >= many else 'notyet', count[0], many)
s2 = '<span class="{}"><b>{}</b>/{}</span> recordings'.format(
'done' if count[1] >= many else 'notyet', count[1], many)
txt += '''
<tr><td>{}</td>
<td><a href="/app/{}/">{}</a> <span class="snd">Download from <a href="{}" target="_blank">AppStore</a></span></td>
<td>{}</td>
<td>{}</td>
</tr>'''.format(i + 1, bid, x[0], asurl, s1, s2)
c[1 if ios14 else 0] += 1
txt += HTML.tr([i + 1, app(bid, x[0], x[1]), rec(c[0]), rec(c[1])])
txt += '</table>'
txt += '</div>'
mylib.mkdir(mylib.path_out('help'))
with open(mylib.path_out('help', 'index.html'), 'w') as fp:
fp.write(mylib.template_with_base(txt))
HTML.write(mylib.path_out('help'), txt)
def gen_search():
with open(mylib.path_out('redirect.html'), 'w') as fp:
fp.write(mylib.template_with_base('''
HTML.write(mylib.path_out(), '''
<h2>Redirecting …</h2>
<script type="text/javascript">
var GET={};
window.location.search.substr(1).split("&").forEach(function(x){GET[x.split("=")[0]]=x.split("=")[1]});
if (GET["id"]) { window.location = "/app/" + GET["id"] + "/"; }
</script>'''))
</script>''', fname='redirect.html')
def gen_404():
with open(mylib.path_out('404.html'), 'w') as fp:
fp.write(mylib.template_with_base('''
HTML.write(mylib.path_out(), '''
<h2>404 Not Found</h2>
<p>Go back to <a href="/">start page</a></p>'''))
<p>Go back to <a href="/">start page</a></p>''', fname='404.html')
def process():

View File

@@ -48,7 +48,7 @@ def process(bundle_ids):
load_json_if_not_already()
did_change = False
for bid in mylib.appids_in_data(bundle_ids):
names = download_itunes.app_names(bid)
names = download_itunes.get_app_names(bid)
if not names:
mylib.err('index-app-names', 'could not load: {}'.format(bid))
continue

135
src/index_categories.py Executable file
View File

@@ -0,0 +1,135 @@
#!/usr/bin/env python3
import sys
import common_lib as mylib
import download_itunes # get_genres
_dict_apps = None
_dict_names = None
def fname_app_categories():
return mylib.path_data_index('app_categories.json')
def fname_category_names():
return mylib.path_data_index('category_names.json')
def load_json_if_not_already():
def load_json_from_disk(fname):
return mylib.json_read(fname) if mylib.file_exists(fname) else {}
global _dict_apps, _dict_names
if not _dict_apps:
_dict_apps = load_json_from_disk(fname_app_categories())
if not _dict_names:
_dict_names = load_json_from_disk(fname_category_names())
def try_update_app(bid, genre_ids):
try:
if _dict_apps[bid] == genre_ids:
return False
except KeyError:
pass
_dict_apps[bid] = genre_ids
return True
def try_update_name(gid, lang, name):
try:
_dict_names[gid]
except KeyError:
_dict_names[gid] = {}
try:
if _dict_names[gid][lang]:
return False # key already exists
except KeyError:
pass
_dict_names[gid][lang] = name
return True # updated, need to persist changes
def reset_index():
global _dict_apps
print(' full reset')
mylib.rm_file(fname_app_categories()) # rebuild from ground up
_dict_apps = None
def try_persist_changes(flag_apps, flag_names):
if flag_apps:
print(' write app-index')
mylib.json_write(fname_app_categories(), _dict_apps, pretty=False)
if flag_names:
print(' write name-index')
mylib.json_write(fname_category_names(), _dict_names, pretty=False)
def get_categories(bundle_id):
load_json_if_not_already()
try:
genres = _dict_apps[bundle_id]
except KeyError:
return []
res = []
for gid in genres:
for lang in ['us', 'de']:
try:
name = _dict_names[gid][lang]
except KeyError:
continue
res.append((gid, name))
break
return res
def enum_all_categories():
load_json_if_not_already()
reverse_index = {}
for bid, genre_ids in _dict_apps.items():
for gid in genre_ids:
try:
reverse_index[gid].append(bid)
except KeyError:
reverse_index[gid] = [bid]
for gid, lang_dict in _dict_names.items():
for lang in ['us', 'de']:
try:
name = lang_dict[lang]
except KeyError:
continue
yield gid, name, reverse_index[gid]
break
def process(bundle_ids, force=False):
print('writing index: categories ...')
if force and bundle_ids == ['*']:
reset_index()
load_json_if_not_already()
write_app_index = False
write_name_index = False
for bid in mylib.appids_in_data(bundle_ids):
genre_ids = []
for lang, gid, gname in download_itunes.enum_genres(bid):
if gid not in genre_ids:
genre_ids.append(gid)
if try_update_name(gid, lang, gname):
write_name_index = True
if try_update_app(bid, genre_ids):
write_app_index = True
try_persist_changes(write_app_index, write_name_index)
print('')
if __name__ == '__main__':
args = sys.argv[1:]
if len(args) > 0:
process(args)
else:
# process(['*'])
mylib.usage(__file__, '[bundle_id] [...]')

View File

@@ -4,8 +4,6 @@ import sys
import common_lib as mylib
import bundle_combine # get_evaluated
_rank_dict = None
def fname_app_summary():
return mylib.path_data_index('app_summary.json')

66
src/lib_graphs.py Executable file
View File

@@ -0,0 +1,66 @@
#!/usr/bin/env python3
import math
import common_lib as mylib
import lib_html as HTML
def fill_bar(percent):
return '<div class="fillbar"><i style="width: {0}%">{0}%</i></div>'.format(round(percent * 100))
def percent_bar(percent):
return '<div class="pcbar"><i style="left: {}%"></i></div>'.format(round(percent * 100))
def rank_tile(title, value, additional=None, attr={},
percent=0.5, rank='?', best='?', worst='?'):
if additional:
value += '<i class="snd mg_lr">({})</i>'.format(additional)
attr = HTML.attr_and(attr, {'class': 'rank'})
return HTML.div('''
<h4>{}</h4>
{} <b class="mg_lr">{}</b>
<p class="snd">Rank:&nbsp;<b>{}</b>, best:&nbsp;<i>{}</i>, worst:&nbsp;<i>{}</i></p>
'''.format(title, percent_bar(percent), value, rank, best, worst), attr)
def dotgraph(arr):
''' Needs list of (title, count, attr_str) tuples '''
def D(title, count, attr_str=''):
return '<span{0} title="{1}"><p>{1}</p>{2}</span>'.format(
attr_str, title, '<i></i>' * count)
return '<div class="dot-graph">' + ''.join([D(*x) for x in arr]) + '</div>'
def pie_chart(parts, stroke=0.6):
''' Needs list of (percent, css_class) tuples '''
size = 1000
stroke *= size * 0.5
stroke_p = '{:.0f}'.format(stroke)
r = (0.99 * size - stroke) / 2
r_p = '{:.0f},{:.0f}'.format(r, r)
mid = '{:.0f}'.format(size / 2)
def arc(deg):
deg -= 90
x = r * math.cos(math.pi * deg / 180)
y = r * math.sin(math.pi * deg / 180)
return '{:.0f},{:.0f}'.format(size / 2 + x, size / 2 + y)
src = ''
total = 0
for percent, clss in parts:
deg = percent * 360
if percent == 0:
continue
elif percent == 1:
src += f'<circle fill="transparent" class="{clss}" stroke-width="{stroke_p}" cx="{mid}" cy="{mid}" r="{r}"/>'
else:
src += f'<path fill="transparent" class="{clss}" stroke-width="{stroke_p}" d="M{arc(total)}A{r_p},0,{1 if deg > 180 else 0},1,{arc(total + deg)}" />'
total += deg
return '<svg viewBox="0 0 {0} {0}" width="100" height="100">{1}</svg>'.format(size, src)
def pie_chart_tracker(percent):
return pie_chart([(1 - percent, 'cs0'), (percent, 'cs1')])

183
src/lib_html.py Executable file
View File

@@ -0,0 +1,183 @@
#!/usr/bin/env python3
import math # ceil
import time # strftime, gmtime
import common_lib as mylib
import index_app_names # get_name
# REFS
def a_app(bundle_id, inner, attr_str=''):
return '<a{} href="/app/{}/">{}</a>'.format(attr_str, bundle_id, inner)
def a_category(cat_id, inner, attr_str=''):
return '<a{} href="/category/{}/">{}</a>'.format(attr_str, cat_id, inner)
def a_domain(x, inner=None, attr_str=''):
return '<a{} href="/domain/#{}">{}</a>'.format(attr_str, x, inner or x)
def a_subdomain(x, inner=None, attr_str=''):
return '<a{} href="/subdomain/#{}">{}</a>'.format(attr_str, x, inner or x)
# Data object preparation
def apps_sorted_batch(bundle_ids, batch_size=60):
apps = [(x, index_app_names.get_name(x)) for x in bundle_ids]
apps.sort(key=lambda x: (x[1].lower(), x[0]))
for i in range(0, len(apps), batch_size):
yield int(i / batch_size), apps[i:i + batch_size]
def attr_and(a, b):
res = {}
for d in [a, b]:
for key, val in d.items():
try:
res[key] += ' ' + val
except KeyError:
res[key] = val
return res
# Basic building blocks
def xml(tag, inner, attr={}):
src = ''
for key, val in attr.items():
if val:
src += ' {}="{}"'.format(key, val)
return '<{0}{1}>{2}</{0}>'.format(tag, src, inner)
def div(inner, attr={}):
return xml('div', inner, attr)
def h2(inner, attr={}):
return xml('h2', inner, attr)
def a(inner, href, attr={}):
return xml('a', inner, attr_and(attr, {'href': href}))
def a_path(parts, suffix):
''' expects (name, url) tuples '''
return ' / '.join([a(*x) for x in parts] + [suffix])
# Simple constructs
def tr(columns, tag='td'):
return f'''
<tr>{''.join(['<{0}>{1}</{0}>'.format(tag, c) for c in columns])}</tr>'''
def date_utc(ctime):
return '<time datetime="{}">{} UTC</time>'.format(
time.strftime('%Y-%m-%d %H:%M', time.gmtime(ctime)),
time.strftime('%Y-%m-%d, %H:%M', time.gmtime(ctime)))
# Higher level constructs
def pagination(current, total):
if total == 1:
return ''
def _lnk(i, name, active=False):
C = ' class="active"' if active else ''
if i == current:
link = './'
elif current == 1:
link = f'./{i}/'
else:
link = '../' if i == 1 else f'../{i}/'
return f'<a href="{link}"{C}>{name}</a>'
links = ''
# if current > 1:
# links += _lnk(current - 1, 'Previous')
start = max(1, current - 5)
for i in range(start, min(total, start + 10) + 1):
links += _lnk(i, i, active=i == current)
# if current < total:
# links += _lnk(current + 1, 'Next')
return '<div class="pagination">{}</div>'.format(links)
def url_for_icon(bundle_id):
if mylib.file_exists(mylib.path_out_app(bundle_id, 'icon.png')):
return '/app/{0}/icon.png'.format(bundle_id)
else:
return '/static/app-template.svg'
def app_tile(bundle_id, name):
return f'''
<a href="/app/{bundle_id}/">
<div>
<img src="{url_for_icon(bundle_id)}" width="100" height="100">
<span class="name">{name}</span><br />
<span class="detail">{bundle_id}</span>
</div>
</a>'''
def app_tile_template():
return f'''<a><div>
<img width="100" height="100">
<span class="name"></span><br />
<span class="detail"></span>
</div></a>'''
def app_tiles_all(bundle_ids, per_page=60, attr={}):
attr = attr_and(attr, {'id': 'app-toc', 'class': 'no-ul-all'})
c_apps = len(bundle_ids)
c_pages = int(math.ceil(c_apps / per_page))
for i, apps in apps_sorted_batch(bundle_ids, batch_size=per_page):
i += 1
src = ''
for x in apps:
src += app_tile(x[0], x[1])
yield i, len(apps), div(src, attr) + pagination(i, c_pages)
# Write html to disk
_base_template = None
def base_template(content, title=None):
global _base_template
if not _base_template:
with open(mylib.path_root('templates', 'base.html'), 'r') as fp:
_base_template = fp.read()
return _base_template.replace(
'#_TITLE_#', title + ' ' if title else '').replace(
'#_CONTENT_#', content)
def write(path, content, title=None, fname='index.html'):
mylib.mkdir(path)
with open(mylib.path_add(path, fname), 'w') as fp:
fp.write(base_template(content, title=title))
def write_app_pages(base, bundle_ids, title, per_page=60, attr={}, pre=''):
pages = 0
entries = 0
mylib.rm_dir(base)
for i, count, src in app_tiles_all(bundle_ids, per_page, attr):
pages += 1
entries += count
pth = base if i == 1 else mylib.path_add(base, str(i))
mylib.mkdir(pth)
write(pth, pre + '\n' + src, title=title)
return pages, entries