#!/usr/bin/env python3
import common_lib as mylib
import index_app_names
import index_domains
def a_app(bundle_id):
return '{}'.format(
bundle_id, index_app_names.get_name(bundle_id))
def a_dom(domain, key):
return '{1}'.format(key, domain)
def div_dom(domain, count, key):
return '{} found in {} {}'.format(
a_dom(domain, key), count, 'apps' if count > 1 else 'app')
def dropdown_choose(button):
return f'''
'''
def duo_list(list1, list2):
txt1 = '
\n'.join([div_dom(dom, len(ids), 'subdomain') for dom, ids in list1])
txt2 = '
\n'.join([div_dom(dom, len(ids), 'domain') for dom, ids in list2])
return '''
'''.format(len(list1), txt1, len(list2), txt2)
def gen_html_index(l1, l2, fname, title, button):
with open(fname, 'w') as fp:
fp.write(mylib.template_with_base(
f'{title}
' + dropdown_choose(button) + duo_list(l1, l2),
title=title))
def gen_html_top_10(subset, fname, total, title):
def div_loadbar(percent):
return '{0}%'.format(percent)
with open(fname, 'w') as fp:
txt = f'''
{ title }
'''
for dom, ids in subset:
dom_str = div_dom(dom, len(ids), 'domain')
pct_bar = div_loadbar(round(len(ids) / total * 100))
txt += f'\n
{dom_str} {pct_bar}
'
fp.write(mylib.template_with_base(txt + '''
Get full list
sorted by Occurrence frequency
or in Alphabetical order.
Download: json
''', title=title))
def gen_html_trinity(json, idx_dir, app_count, title):
# Full list (A–Z)
list1 = sorted(json['subdom'].items(), key=lambda x: x[0])
list2 = sorted(json['pardom'].items(), key=lambda x: x[0])
gen_html_index(list1, list2, mylib.path_add(idx_dir, 'by_name.html'),
title='{} (A–Z)'.format(title),
button='Full list (A–Z)')
# Full list (by count)
list1.sort(key=lambda x: -len(x[1]))
list2.sort(key=lambda x: -len(x[1]))
gen_html_index(list1, list2, mylib.path_add(idx_dir, 'by_count.html'),
title='{} (most apps)'.format(title),
button='Full list (by count)')
# Top 10
gen_html_top_10(list2[:25], mylib.path_add(idx_dir, 'index.html'),
app_count, title='Top 25 {}'.format(title))
def gen_html_lookup(html_dir, json, key, title):
mylib.mkdir(html_dir)
names = [[x, index_app_names.get_name(x)] for x in json['bundle']]
mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names)
mylib.json_write(mylib.path_add(html_dir, 'doms.json'), json[key])
with open(mylib.path_add(html_dir, 'index.html'), 'w') as fp:
fp.write(mylib.template_with_base(f'''
Present in: … applications
Apps containing this domain:
loading…
''', title=title))
def gen_html_stats(c_apps, c_domains):
title = 'Statistics'
mylib.mkdir(mylib.path_out('stats'))
with open(mylib.path_out('stats', 'index.html'), 'w') as fp:
fp.write(mylib.template_with_base('''
{}
The AppCheck database currently contains {} apps with a total of {} unique domains.
'''.format(title, c_apps, c_domains), title=title))
def process():
# bundle_combine assures domain name is [a-zA-Z0-9.-]
print('generating domain-index ...')
# Data export
all_dom_dir = mylib.path_out('index', 'domains', 'all')
trkr_dir = mylib.path_out('index', 'domains', 'tracker')
mylib.mkdir(all_dom_dir)
mylib.mkdir(trkr_dir)
mylib.symlink(index_domains.fname_all(),
mylib.path_out_app(all_dom_dir, 'data.json'))
mylib.symlink(index_domains.fname_tracker(),
mylib.path_out_app(trkr_dir, 'data.json'))
json = index_domains.load()
app_count = index_domains.number_of_apps(json)
dom_count = len(json['subdom'])
print(' Lookup')
gen_html_lookup(mylib.path_out('domain'), json, 'pardom',
title='Domain Lookup')
gen_html_lookup(mylib.path_out('subdomain'), json, 'subdom',
title='Subdomain Lookup')
print(' All Domains')
index_domains.enrich_with_bundle_ids(json)
gen_html_trinity(json, all_dom_dir, app_count,
title='Requested Domains')
print(' Trackers Only')
json = index_domains.load(tracker=True)
index_domains.enrich_with_bundle_ids(json)
gen_html_trinity(json, trkr_dir, app_count,
title='Tracker')
# Stats
print(' Stats')
gen_html_stats(app_count, dom_count)
print('')
if __name__ == '__main__':
process()