#!/usr/bin/env python3 import lib_common as mylib import lib_graphs as Graph import lib_html as HTML import index_app_names # get_name import index_domains def dropdown_choose(button): return f''' ''' def div_dom(fn_a_html, domain, count): return '{} found in {} {}'.format( fn_a_html(domain), count, 'apps' if count > 1 else 'app') def duo_list(list1, list2): def full(fn_a_html, arr): return '
\n'.join([div_dom(fn_a_html, domain, count) for domain, count in arr]) return f'''

Subdomains ({len(list1)}) go to Domains

{ full(HTML.a_subdomain, list1) }

Domains ({len(list2)}) go to Subdomains

{ full(HTML.a_domain, list2) }
''' def gen_html_top_10(path, subset, total, title): src = '' for dom, count in subset: src += '\n
{} {}
'.format( div_dom(HTML.a_domain, dom, count), Graph.fill_bar(count / total)) HTML.write(path, f'''

{ title }

{ src }

Get full list sorted by Occurrence frequency or in Alphabetical order.

{ HTML.p_download_json('data.json', 'domains.json') } ''', title=title) def gen_html_trinity(idx_dir, app_count, json, title, symlink): list1 = [(dom, len(ids) - 1) for dom, ids in json['subdom'].items()] list2 = [(dom, len(ids) - 1) for dom, ids in json['pardom'].items()] def write_index(fname, title, button): HTML.write(idx_dir, '

{}

{}{}'.format( HTML.a_path([('Results', '/results/')], title), dropdown_choose(button), duo_list(list1, list2) ), title=title, fname=fname) # Full list (A–Z) list1.sort(key=lambda x: x[0]) list2.sort(key=lambda x: x[0]) write_index('by_name.html', title='{} (A–Z)'.format(title), button='Full list (A–Z)') # Full list (by count) list1.sort(key=lambda x: -x[1]) list2.sort(key=lambda x: -x[1]) write_index('by_count.html', title='{} (by count)'.format(title), button='Full list (by count)') # Top 10 gen_html_top_10(idx_dir, list2[:25], app_count, 'Top 25 {}'.format(title)) mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json')) def gen_lookup(html_dir, doms_dict, flag, title): HTML.write(html_dir, f'''

{ HTML.a_path([('All Domains', '/index/domains/all/')], '') }

Known Tracker: ?

Present in: … applications

{ '

Subdomains:

' if flag else '' }

Apps containing this domain:

{ HTML.app_tile_template() }
''', title=title) mylib.json_write(mylib.path_add(html_dir, 'doms.json'), doms_dict) def process(): # bundle_combine assures domain name is [a-zA-Z0-9.-] print('generating html: domain-index ...') json = index_domains.loadAll() app_count = index_domains.number_of_apps(json) dom_count = len(json['subdom']) # Prepare for lookup names = [[x, index_app_names.get_name(x)] for x in json['bundle']] dest_dir = mylib.path_out('results') mylib.mkdir(dest_dir) mylib.json_write(mylib.path_add(dest_dir, 'lookup-apps.json'), names) mylib.symlink(index_domains.fname_dom_subdoms(), mylib.path_add(dest_dir, 'subdoms.json')) names = None print(' Lookup') gen_lookup(mylib.path_out('domain'), json['pardom'], True, title='Domain Lookup') gen_lookup(mylib.path_out('subdomain'), json['subdom'], False, title='Subdomain Lookup') print(' All Domains') gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count, json=json, title='Requested Domains', symlink=index_domains.fname_all()) json = None print(' Trackers Only') gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count, json=index_domains.loadTracker(), title='Tracker', symlink=index_domains.fname_tracker()) print(' Highly Used') gen_html_trinity(mylib.path_out('index', 'domains', 'highly-used'), app_count, json=index_domains.loadNonTracker(), title='Highly Used Domains', symlink=index_domains.fname_no_tracker()) print('') return app_count, dom_count if __name__ == '__main__': process()