From fc73635cb685035a73807a5544844b118130add0 Mon Sep 17 00:00:00 2001 From: relikd Date: Mon, 28 Sep 2020 16:22:25 +0200 Subject: [PATCH] Parent domain incl subdomains --- out/static/lookup-domain.js | 43 +++++++++++++++++++++++++++++-------- src/html_index_domains.py | 38 ++++++++++++++++++-------------- src/index_domains.py | 19 ++++++++++++++++ 3 files changed, 75 insertions(+), 25 deletions(-) diff --git a/out/static/lookup-domain.js b/out/static/lookup-domain.js index c0a4d6c..1a1bbe6 100644 --- a/out/static/lookup-domain.js +++ b/out/static/lookup-domain.js @@ -1,22 +1,30 @@ -function lookup_domain_js(fname_a, fname_b, id1, id2, id3) { - let dom = window.location.hash.substr(1); - document.getElementById(id1).innerHTML = dom; // domain name - let dom_app_list = document.getElementById(id3); // apps list +function lookup_domain_js(fname_doms, fname_apps, fname_subs) { + let dom = window.location.hash.substr(1); // domain name + document.getElementById('name').innerHTML = dom; + let dom_num_apps = document.getElementById('num-apps'); + let dom_app_list = document.getElementById('app-toc'); + let dom_sub_doms = document.getElementById('subdoms'); + let dom_known_trkr = document.getElementById('known'); let template = dom_app_list.firstElementChild; dom_app_list.innerHTML = 'loading…'; // load reverse domains json - loadJSON(fname_a, function(response) { + loadJSON(fname_doms, function(response) { let elem = JSON.parse(response)[dom]; - if (!elem || elem.length == 0) { - document.getElementById(id2).innerHTML = '0 applications'; + let count = elem.length - 1; + if (!elem || count < 1) { + dom_num_apps.innerHTML = '0 applications'; dom_app_list.innerHTML = '– None –'; return; + } else if (count == 1) { + dom_num_apps.innerHTML = '1 application'; + } else { + dom_num_apps.innerHTML = elem.length - 1 + ' applications'; } - document.getElementById(id2).innerHTML = elem.length + ' applications'; + dom_known_trkr.innerHTML = elem[0] ? 'Yes' : 'No'; // load app name json - loadJSON(fname_b, function(response) { + loadJSON(fname_apps, function(response) { let name_list = JSON.parse(response); var apps = []; for (var i = elem.length - 1; i >= 0; i--) { @@ -39,6 +47,23 @@ function lookup_domain_js(fname_a, fname_b, id1, id2, id3) { dom_app_list.appendChild(item); } const observer = lozad(); observer.observe(); + + if (!dom_sub_doms) { return } + loadJSON(fname_subs, function(response) { + let subdomains_list = JSON.parse(response)[dom]; + if (subdomains_list) { + var src = ''; + for (var i = 0; i < subdomains_list.length; i++) { + let sub = subdomains_list[i]; + let full = sub ? sub + '.' + dom : dom; + let lnk = '' + sub + '. '; + src += lnk; + } + dom_sub_doms.innerHTML = src; + } else { + dom_sub_doms.innerHTML = '– None –'; + } + }); }); }); } \ No newline at end of file diff --git a/src/html_index_domains.py b/src/html_index_domains.py index d947035..9625951 100755 --- a/src/html_index_domains.py +++ b/src/html_index_domains.py @@ -89,24 +89,23 @@ def gen_html_trinity(idx_dir, app_count, json, title, symlink): mylib.symlink(symlink, mylib.path_out(idx_dir, 'data.json')) -def gen_lookup(html_dir, doms_dict, names_dict, title): - header = HTML.a_path([('All Domains', '/index/domains/all/')], - '') - HTML.write(html_dir, ''' -

{}

+def gen_lookup(html_dir, doms_dict, flag, title): + HTML.write(html_dir, f''' +

{ HTML.a_path([('All Domains', '/index/domains/all/')], + '') }

+

Known Tracker: ?

Present in: … applications

+{ '

Subdomains:

' if flag else '' }

Apps containing this domain:

- {} + { HTML.app_tile_template() }
- + -'''.format(header, HTML.app_tile_template()), title=title) - # after html write which will create the dir - mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names_dict) +''', title=title) mylib.json_write(mylib.path_add(html_dir, 'doms.json'), doms_dict) @@ -117,14 +116,21 @@ def process(): app_count = index_domains.number_of_apps(json) dom_count = len(json['subdom']) - print(' Lookup') + # Prepare for lookup names = [[x, index_app_names.get_name(x)] for x in json['bundle']] - gen_lookup(mylib.path_out('domain'), json['pardom'], names, - title='Domain Lookup') - gen_lookup(mylib.path_out('subdomain'), json['subdom'], names, - title='Subdomain Lookup') + dest_dir = mylib.path_out('results') + mylib.mkdir(dest_dir) + mylib.json_write(mylib.path_add(dest_dir, 'lookup-apps.json'), names) + mylib.symlink(index_domains.fname_dom_subdoms(), + mylib.path_add(dest_dir, 'subdoms.json')) names = None + print(' Lookup') + gen_lookup(mylib.path_out('domain'), json['pardom'], True, + title='Domain Lookup') + gen_lookup(mylib.path_out('subdomain'), json['subdom'], False, + title='Subdomain Lookup') + print(' All Domains') gen_html_trinity(mylib.path_out('index', 'domains', 'all'), app_count, json=json, title='Requested Domains', diff --git a/src/index_domains.py b/src/index_domains.py index eab8141..819470d 100755 --- a/src/index_domains.py +++ b/src/index_domains.py @@ -18,6 +18,10 @@ def fname_no_tracker(): return mylib.path_data_index('domains_no_tracker.json') +def fname_dom_subdoms(): + return mylib.path_data_index('domains_subdomains.json') + + def load_json_from_disk(index_file): return mylib.json_safe_read( index_file, fallback={'bundle': [], 'pardom': {}, 'subdom': {}}) @@ -110,6 +114,18 @@ def filter_list_at_least(index, min_count): index['pardom'] = par +def dict_dom_subdomains(index): + ret = {} + for subdomain in index['subdom'].keys(): + pardom = mylib.parent_domain(subdomain) + host = subdomain[:-len(pardom) - 1] # - '.' + try: + ret[pardom].append(host) + except KeyError: + ret[pardom] = [host] + return ret + + def number_of_apps(index): return sum(1 for x in index['bundle'] if x != '_') @@ -149,6 +165,9 @@ def process(bundle_ids, deleteOnly=False): mylib.json_write(fname_tracker(), dict_trkr, pretty=False) filter_list_at_least(dict_no_trkr, 5) # or 0.1 * len(ids) mylib.json_write(fname_no_tracker(), dict_no_trkr, pretty=False) + mylib.json_write(fname_dom_subdoms(), dict_dom_subdomains(index), + pretty=False) + else: print(' no change') print('')