Files
appchk-web/src/html_index_domains.py
2020-09-28 16:22:25 +02:00

156 lines
5.4 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import lib_common as mylib
import lib_graphs as Graph
import lib_html as HTML
import index_app_names # get_name
import index_domains
def dropdown_choose(button):
return f'''
<label for="dropdown">Choose list:</label>
<div class="dropdown" name="dropdown">
<button class="bg1 border">{button}</button>
<nav class="bg1 no-ul-all">
<a href="index.html">Most frequent</a>
<a href="by_name.html">Full list (AZ)</a>
<a href="by_count.html">Full list (by count)</a>
</nav>
</div>'''
def div_dom(fn_a_html, domain, count):
return '{} <span>found in {} {}</span>'.format(
fn_a_html(domain), count, 'apps' if count > 1 else 'app')
def duo_list(list1, list2):
def full(fn_a_html, arr):
return '<br>\n'.join([div_dom(fn_a_html, domain, count)
for domain, count in arr])
return f'''
<div id="dom-toc" class="found-in">
<div id="subdomains">
<h3 class="stick-top">Subdomains ({len(list1)})
<a class="snd mg_lr" href="#domains">go to Domains</a></h3>
{ full(HTML.a_subdomain, list1) }
</div><div id="domains">
<h3 class="stick-top">Domains ({len(list2)})
<a class="snd mg_lr" href="#subdomains">go to Subdomains</a></h3>
{ full(HTML.a_domain, list2) }
</div>
</div>'''
def gen_html_top_10(path, subset, total, title):
src = ''
for dom, count in subset:
src += '\n<div>{} {}</div>'.format(
div_dom(HTML.a_domain, dom, count), Graph.fill_bar(count / total))
HTML.write(path, f'''
<h2 class="center">{ title }</h2>
<div class="div-center">
<div id="dom-top10" class="found-in">
{ src }
</div>
<p class="mg_top">Get full list sorted by
<a class="snd" href="by_count.html">Occurrence frequency</a> or in
<a class="snd" href="by_name.html">Alphabetical order</a>.
</p>
</div>
{ HTML.p_download_json('data.json', 'domains.json') }
''', title=title)
def gen_html_trinity(idx_dir, app_count, json, title, symlink):
list1 = [(dom, len(ids)) for dom, ids in json['subdom'].items()]
list2 = [(dom, len(ids)) for dom, ids in json['pardom'].items()]
def write_index(fname, title, button):
HTML.write(idx_dir, '<h2>{}</h2>{}{}'.format(
HTML.a_path([('Results', '/results/')], title),
dropdown_choose(button), duo_list(list1, list2)
), title=title, fname=fname)
# Full list (AZ)
list1.sort(key=lambda x: x[0])
list2.sort(key=lambda x: x[0])
write_index('by_name.html', title='{} (AZ)'.format(title),
button='Full list (AZ)')
# Full list (by count)
list1.sort(key=lambda x: -x[1])
list2.sort(key=lambda x: -x[1])
write_index('by_count.html', title='{} (by count)'.format(title),
button='Full list (by count)')
# Top 10
gen_html_top_10(idx_dir, list2[:25], app_count, 'Top 25 {}'.format(title))
mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json'))
def gen_lookup(html_dir, doms_dict, flag, title):
HTML.write(html_dir, f'''
<h2>{ HTML.a_path([('All Domains', '/index/domains/all/')],
'<span id="name"></span>') }</h2>
<p>Known Tracker: <b id="known">?</b></p>
<p>Present in: <b id="num-apps">… applications</b></p>
{ '<h3>Subdomains:</h3><div id="subdoms" class="tags"></div>' if flag else '' }
<h3>Apps containing this domain:</h3>
<div id="app-toc" class="no-ul-all">
{ HTML.app_tile_template() }
</div>
<script type="text/javascript" src="/static/lookup-domain.js?2"></script>
<script type="text/javascript" src="/static/lozad.js"></script>
<script type="text/javascript">
lookup_domain_js('doms.json', '/results/lookup-apps.json', '/results/subdoms.json');
</script>
''', title=title)
mylib.json_write(mylib.path_add(html_dir, 'doms.json'), doms_dict)
def process():
# bundle_combine assures domain name is [a-zA-Z0-9.-]
print('generating html: domain-index ...')
json = index_domains.loadAll()
app_count = index_domains.number_of_apps(json)
dom_count = len(json['subdom'])
# Prepare for lookup
names = [[x, index_app_names.get_name(x)] for x in json['bundle']]
dest_dir = mylib.path_out('results')
mylib.mkdir(dest_dir)
mylib.json_write(mylib.path_add(dest_dir, 'lookup-apps.json'), names)
mylib.symlink(index_domains.fname_dom_subdoms(),
mylib.path_add(dest_dir, 'subdoms.json'))
names = None
print(' Lookup')
gen_lookup(mylib.path_out('domain'), json['pardom'], True,
title='Domain Lookup')
gen_lookup(mylib.path_out('subdomain'), json['subdom'], False,
title='Subdomain Lookup')
print(' All Domains')
gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count,
json=json, title='Requested Domains',
symlink=index_domains.fname_all())
json = None
print(' Trackers Only')
gen_html_trinity(mylib.path_out('index', 'domains', 'tracker'), app_count,
json=index_domains.loadTracker(), title='Tracker',
symlink=index_domains.fname_tracker())
print(' Highly Used')
gen_html_trinity(mylib.path_out('index', 'domains', 'highly-used'),
app_count, json=index_domains.loadNonTracker(),
title='Highly Used Domains',
symlink=index_domains.fname_no_tracker())
print('')
return app_count, dom_count
if __name__ == '__main__':
process()