diff --git a/out/static/lookup-domain.js b/out/static/lookup-domain.js
new file mode 100644
index 0000000..eb4f28b
--- /dev/null
+++ b/out/static/lookup-domain.js
@@ -0,0 +1,39 @@
+function lookup_domain_fragment(fname_a, fname_b, id1, id2, id3) {
+ let dom = window.location.hash.substr(1);
+ document.getElementById(id1).innerHTML = dom;
+
+ // load reverse domains json
+ loadJSON(fname_a, function(response) {
+ let elem = JSON.parse(response)[dom];
+ if (!elem || elem.length == 0) {
+ document.getElementById(id2).innerHTML = '0 applications';
+ document.getElementById(id3).innerHTML = '– None –';
+ return;
+ }
+ document.getElementById(id2).innerHTML = elem.length + ' applications';
+
+ // load app name json
+ loadJSON(fname_b, function(response) {
+ let name_list = JSON.parse(response);
+ var apps = [];
+ for (var i = elem.length - 1; i >= 0; i--) {
+ let bndl = name_list[elem[i]];
+ if (!bndl) { continue; }
+ apps.push([bndl[0], bndl[1], bndl[1].toLowerCase()]);
+ }
+ apps.sort(function(a, b){return a[2] < b[2] ? -1 : a[2] > b[2] ? 1 : 0});
+ var content = '';
+ for (var i = 0; i < apps.length; i++) {
+ content += `
+
+
+

+
` + apps[i][1] + `
+
` + apps[i][0] + `
+
+ `;
+ }
+ document.getElementById(id3).innerHTML = '
+
{}
{}'''.format(content, pagination), title="Index"))
diff --git a/src/html_reverse_domains.py b/src/html_reverse_domains.py
new file mode 100755
index 0000000..da1559b
--- /dev/null
+++ b/src/html_reverse_domains.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+
+import common_lib as mylib
+import index_bundle_names
+import index_reverse_domains
+
+
+def a_app(bundle_id):
+ return '
{}'.format(
+ bundle_id, index_bundle_names.get_name(bundle_id))
+
+
+def a_dom(domain, key):
+ return '
{1}'.format(key, domain)
+
+
+def div_dom(domain, count, key):
+ return '{}
found in {} {}'.format(
+ a_dom(domain, key), count, 'apps' if count > 1 else 'app')
+
+
+def dropdown_choose(button):
+ return f'''
+
+
'''
+
+
+def duo_list(list1, list2):
+ txt1 = '
\n'.join([div_dom(dom, len(ids), 'subdomain') for dom, ids in list1])
+ txt2 = '
\n'.join([div_dom(dom, len(ids), 'domain') for dom, ids in list2])
+ return '''
+
'''.format(len(list1), txt1, len(list2), txt2)
+
+
+def gen_html_index(l1, l2, fname, title, button):
+ with open(fname, 'w') as fp:
+ fp.write(mylib.template_with_base(
+ f'
{title}
' + dropdown_choose(button) + duo_list(l1, l2),
+ title=title))
+
+
+def gen_html_top_domains(subset, fname, total, title):
+
+ def div_loadbar(percent):
+ return '
{0}%'.format(percent)
+
+ with open(fname, 'w') as fp:
+ txt = f'''
+
+
{ title }
'''
+ for dom, ids in subset:
+ dom_str = div_dom(dom, len(ids), 'subdomain')
+ pct_bar = div_loadbar(round(len(ids) / total * 100))
+ txt += f'\n
{dom_str} {pct_bar}
'
+ fp.write(mylib.template_with_base(txt + '''
+
Get full list
+sorted by Occurrence frequency
+or in Alphabetical order.
+
+
Download: json
+''', title=title))
+
+
+def gen_html_lookup(html_dir, json, key, title):
+ mylib.mkdir(html_dir)
+ names = [[x, index_bundle_names.get_name(x)] for x in json['bundle']]
+ mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names)
+ mylib.json_write(mylib.path_add(html_dir, 'doms.json'), json[key])
+ with open(mylib.path_add(html_dir, 'index.html'), 'w') as fp:
+ fp.write(mylib.template_with_base(f'''
+
+
Present in: … applications
+
Apps containing this domain:
+
loading…
+
+
+''', title=title))
+
+
+def process():
+ # bundle_combine assures domain name is [a-zA-Z0-9.-]
+ print('generating reverse-domain-index ...')
+ idx_dir = mylib.path_out('index', 'domains')
+ mylib.mkdir(idx_dir)
+
+ # Data export
+ mylib.symlink(mylib.path_data_index('reverse_domains.json'),
+ mylib.path_out_app(idx_dir, 'data.json'))
+
+ par_arr = list(index_reverse_domains.enumerate('pardom'))
+ sub_arr = list(index_reverse_domains.enumerate('subdom'))
+
+ # Full list (A–Z)
+ sub_arr.sort(key=lambda x: x[0])
+ par_arr.sort(key=lambda x: x[0])
+ gen_html_index(sub_arr, par_arr, mylib.path_add(idx_dir, 'by_name.html'),
+ title='Requested Domains (A–Z)',
+ button='Full list (A–Z)')
+
+ # Full list (by count)
+ sub_arr.sort(key=lambda x: -len(x[1]))
+ par_arr.sort(key=lambda x: -len(x[1]))
+ gen_html_index(sub_arr, par_arr, mylib.path_add(idx_dir, 'by_count.html'),
+ title='Requested Domains (most apps)',
+ button='Full list (by count)')
+
+ # Top 10
+ del(sub_arr[20:])
+ del(par_arr)
+ total = index_reverse_domains.number_of_apps()
+ gen_html_top_domains(sub_arr, mylib.path_add(idx_dir, 'index.html'),
+ total, 'Top 20 Requested Domains')
+
+ # Lookup
+ json = index_reverse_domains.raw()
+ gen_html_lookup(mylib.path_out('domain'), json, 'pardom',
+ title='Domain Lookup')
+ gen_html_lookup(mylib.path_out('subdomain'), json, 'subdom',
+ title='Subdomain Lookup')
+ print('')
+
+
+if __name__ == '__main__':
+ process()
diff --git a/src/index_bundle_names.py b/src/index_bundle_names.py
index d2aba8f..2781647 100755
--- a/src/index_bundle_names.py
+++ b/src/index_bundle_names.py
@@ -24,14 +24,14 @@ def write_json_to_disk():
mylib.json_write(index_fname(), _bundle_name_dict, pretty=True)
-def get_name(bundle_id, langs=['us', 'de']):
+def get_name(bundle_id, langs=['us', 'de'], fallback='< App-Name >'):
load_json_if_not_already()
for lang in langs:
try:
return _bundle_name_dict[bundle_id][lang]
except KeyError:
continue
- return '< App-Name >' # None
+ return fallback # None
def process(bundle_ids):
diff --git a/src/index_reverse_domains.py b/src/index_reverse_domains.py
index 640d813..5c80281 100755
--- a/src/index_reverse_domains.py
+++ b/src/index_reverse_domains.py
@@ -3,79 +3,103 @@
import sys
import common_lib as mylib
-
-def load_index_json(file_path):
- if mylib.file_exists(file_path):
- json = mylib.json_read(file_path)
- else:
- json = dict({'bundle': [], 'pardom': dict(), 'subdom': dict()})
- return json
+_reverse_domain_dict = None
-def delete_from_index(index, bundle_ids, deleteOnly=False):
+def index_fname():
+ return mylib.path_data_index('reverse_domains.json')
+
+
+def load_json_if_not_already():
+ global _reverse_domain_dict
+ if not _reverse_domain_dict:
+ index_file = index_fname()
+ if mylib.file_exists(index_file):
+ _reverse_domain_dict = mylib.json_read(index_file)
+ else:
+ _reverse_domain_dict = {'bundle': [], 'pardom': {}, 'subdom': {}}
+
+
+def write_json_to_disk():
+ mylib.json_write(index_fname(), _reverse_domain_dict, pretty=False)
+
+
+def delete_from_index(bundle_ids, deleteOnly=False):
+ global _reverse_domain_dict
ids_to_delete = set()
for bid in bundle_ids:
try:
- i = index['bundle'].index(bid)
+ i = _reverse_domain_dict['bundle'].index(bid)
except ValueError: # index not found
continue
ids_to_delete.add(i)
if deleteOnly:
- index['bundle'][i] = '_'
+ _reverse_domain_dict['bundle'][i] = '_'
if len(ids_to_delete) == 0:
return False
for key in ['pardom', 'subdom']:
- for domain in list(index[key].keys()):
+ for domain in list(_reverse_domain_dict[key].keys()):
for i in ids_to_delete:
try:
- index[key][domain].remove(i)
+ _reverse_domain_dict[key][domain].remove(i)
except ValueError: # ignore if not present
continue
- if not index[key][domain]:
- del(index[key][domain])
+ if not _reverse_domain_dict[key][domain]:
+ del(_reverse_domain_dict[key][domain])
return True
-def insert_in_index(index, bundle_ids):
+def insert_in_index(bundle_ids):
+ global _reverse_domain_dict
has_changes = False
for bid in bundle_ids:
try:
- i = index['bundle'].index(bid)
+ i = _reverse_domain_dict['bundle'].index(bid)
except ValueError: # index not found
- i = len(index['bundle'])
- index['bundle'].append(bid)
- try:
- json, _ = mylib.json_read_evaluated(bid)
- except FileNotFoundError:
- continue
+ i = len(_reverse_domain_dict['bundle'])
+ _reverse_domain_dict['bundle'].append(bid)
+ json, _ = mylib.json_read_evaluated(bid)
for key in ['pardom', 'subdom']: # assuming keys are identical
for domain, _, _ in json[key]:
try:
- index[key][domain].append(i)
+ _reverse_domain_dict[key][domain].append(i)
except KeyError:
- index[key][domain] = [i]
+ _reverse_domain_dict[key][domain] = [i]
has_changes = True
return has_changes
+def raw():
+ load_json_if_not_already()
+ return _reverse_domain_dict
+
+
+def number_of_apps():
+ load_json_if_not_already()
+ return sum(1 for x in _reverse_domain_dict['bundle'] if x != '_')
+
+
+def enumerate(key):
+ load_json_if_not_already()
+ for dom, bundles in _reverse_domain_dict[key].items():
+ yield [dom, [_reverse_domain_dict['bundle'][i] for i in bundles]]
+
+
def process(bundle_ids, deleteOnly=False):
print('writing index: reverse domains ...')
- index_file = mylib.path_data_index('reverse_domains.json')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
print(' full reset')
- mylib.rm_file(index_file) # rebuild from ground up
- # load previous index
- json = load_index_json(index_file)
- # delete previous index entries
- did_change = delete_from_index(json, bundle_ids, deleteOnly=deleteOnly)
- # write new index to disk
+ mylib.rm_file(index_fname()) # rebuild from ground up
+
+ load_json_if_not_already()
+ did_change = delete_from_index(bundle_ids, deleteOnly=deleteOnly)
if not deleteOnly:
- did_change |= insert_in_index(json, bundle_ids)
+ did_change |= insert_in_index(bundle_ids)
if did_change:
- mylib.json_write(index_file, json, pretty=False)
+ write_json_to_disk()
else:
print(' no change')
print('')
diff --git a/templates/base.html b/templates/base.html
index 7a0dbb4..d912a4f 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -22,6 +22,7 @@