Domain index

This commit is contained in:
relikd
2020-09-18 23:56:15 +02:00
parent 0148106a56
commit ba8091268d
10 changed files with 351 additions and 59 deletions

View File

@@ -0,0 +1,39 @@
function lookup_domain_fragment(fname_a, fname_b, id1, id2, id3) {
let dom = window.location.hash.substr(1);
document.getElementById(id1).innerHTML = dom;
// load reverse domains json
loadJSON(fname_a, function(response) {
let elem = JSON.parse(response)[dom];
if (!elem || elem.length == 0) {
document.getElementById(id2).innerHTML = '0 applications';
document.getElementById(id3).innerHTML = ' None ';
return;
}
document.getElementById(id2).innerHTML = elem.length + ' applications';
// load app name json
loadJSON(fname_b, function(response) {
let name_list = JSON.parse(response);
var apps = [];
for (var i = elem.length - 1; i >= 0; i--) {
let bndl = name_list[elem[i]];
if (!bndl) { continue; }
apps.push([bndl[0], bndl[1], bndl[1].toLowerCase()]);
}
apps.sort(function(a, b){return a[2] < b[2] ? -1 : a[2] > b[2] ? 1 : 0});
var content = '';
for (var i = 0; i < apps.length; i++) {
content += `
<a href="/app/` + apps[i][0] + `/">
<div>
<img src="/app/` + apps[i][0] + `/icon.png" width="100" height="100">
<span class="name">` + apps[i][1] + `</span><br />
<span class="detail">` + apps[i][0] + `</span>
</div>
</a>`;
}
document.getElementById(id3).innerHTML = '<div id="app-toc" class="no_ul_all">' + content + '</div>';
});
});
}

View File

@@ -12,3 +12,14 @@ function updateViewport() {// show at least 2 columns on mobile devices
document.head.appendChild(x); document.head.appendChild(x);
} }
} }
function loadJSON(url, callback, async=true) {
var xobj = new XMLHttpRequest();
xobj.overrideMimeType("application/json");
xobj.open('GET', url, async);
xobj.onreadystatechange = function () {
if (xobj.readyState == 4 && xobj.status == "200") {
callback(xobj.responseText);
}
};
xobj.send(null);
}

View File

@@ -7,9 +7,9 @@ body {
min-width: 436px; min-width: 436px;
} }
a { text-decoration: none; color: unset; } a { text-decoration: none; color: unset; }
p a, td a { border-bottom: 1pt dotted; } main a { border-bottom: 1pt dotted; }
a:hover { border-bottom: 1pt solid; } a:hover { border-bottom: 1pt solid; }
#app-toc a:hover, a.no-ul:hover { a.no-ul, a.no-ul:hover, .no_ul_all a, .no_ul_all a:hover {
border-bottom: unset; border-bottom: unset;
} }
main, footer { padding: 0 1em; } main, footer { padding: 0 1em; }
@@ -48,18 +48,43 @@ footer .links {
font-size: 0.9em; font-size: 0.9em;
padding: 1em; padding: 1em;
} }
footer .links a { color: #ddd; }
#main-nav { float: right; } #main-nav { float: right; }
#main-nav li { display: inline-block; margin-right: 1em; } #main-nav li { display: inline-block; margin-right: 1em; }
#main-nav img { height: 1.2em; margin: 0 -0.4em; } #main-nav img { height: 1.2em; margin: 0 -0.4em; }
#main-nav img:hover { transform: scale(1.2); } #main-nav img:hover { transform: scale(1.2); }
/* web root */
#get-appcheck:hover { color: #586472; } #get-appcheck:hover { color: #586472; }
#get-appcheck img { width: 3em; height: 3em; margin: 0.3em; } #get-appcheck img { width: 3em; height: 3em; margin: 0.3em; }
#get-appcheck * { display: inline-block; vertical-align: middle; } #get-appcheck * { display: inline-block; vertical-align: middle; }
#app-toc { text-align: center; } /* dropdown */
.dropdown button {
padding: 0.5em 1em;
font-size: 16px;
cursor: pointer;
}
.dropdown { display: inline-block; position: relative; }
.dropdown div {
display: none;
position: absolute;
width: max-content;
box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
z-index: 1;
}
.dropdown:hover div { display: block; }
.dropdown a { display: block; padding: 0.5em 1em; }
.dropdown a:hover { background-color: #eee; }
#app-toc div, .bg1 { background: #eee; }
#app-toc div, .border { border: 1pt solid #ccc; }
#app-toc div:hover, .dropdown:hover button, .dropdown a:hover {
background: #BBC6CA;
}
/* app index */
#app-toc a { text-align: center; }
#app-toc div { #app-toc div {
display: inline-block; display: inline-block;
width: 140px; width: 140px;
@@ -68,12 +93,9 @@ footer .links a { color: #ddd; }
margin: 5px; margin: 5px;
padding: 16px; padding: 16px;
vertical-align: top; vertical-align: top;
background: #eee;
word-wrap: break-word; word-wrap: break-word;
border: 1pt solid #ccc;
border-radius: 7px; border-radius: 7px;
} }
#app-toc div:hover { background: #BBC6CA; }
#app-toc img { #app-toc img {
margin: 0.5em auto 1em; margin: 0.5em auto 1em;
display: block; display: block;
@@ -89,30 +111,56 @@ footer .links a { color: #ddd; }
#pagination a { margin: 0.5em; padding: 0.2em } #pagination a { margin: 0.5em; padding: 0.2em }
#pagination a.active { border: 1pt solid black; border-radius: 0.2em; } #pagination a.active { border: 1pt solid black; border-radius: 0.2em; }
/* domain index */
#dom-toc h3 {
position: sticky;
top: 0;
background: #fff;
padding-bottom: 4px;
}
#dom-toc a, #dom-top10 a { word-wrap: break-word; }
#dom-toc span { display: table; }
.found-in span, .snd { color: #586472; font-size: 0.85em; }
.loadbar {
display: block;
background: #DDD;
width: 200px;
margin: 2px;
border-radius: 4px;
text-align: left;
}
.loadbar span {
display: inline-block;
border-radius: 4px 0 0 4px;
background: #AC2B4A;
font-size: 0.8em;
padding: 2px 0 2px 0;
text-align: center;
color: #FFF;
}
/* app bundle */ /* app bundle */
.squeeze { max-width: 700px; } .squeeze { max-width: 700px; }
h2.title { margin-bottom: 0; } h2.title { margin-bottom: 0; }
p.subtitle { margin-top: 0.2em; } p.subtitle { margin-top: 0.2em; }
.mg_lr { margin: 0 0.4em; } .mg_lr { margin: 0 0.4em; }
.snd { color: #586472; font-size: 0.85em; } .mg_top { margin-top: 2em; }
.right { text-align: right; }
.center { text-align: center; }
td { padding: 0.2em 1em 0.2em 0.1em; } td { padding: 0.2em 1em 0.2em 0.1em; }
#meta td:nth-child(2) { font-weight: bold } #meta td:nth-child(2) { font-weight: bold }
.help-links td { padding: 0.5em; }
.help-links tr:nth-child(even) { background: #DDD; }
.help-links tr:nth-child(odd) { background: #F9F9F9; }
.help-links .notyet { color: #D11; }
.help-links .done { color: #52C840; }
.right { text-align: right; }
/* domain tags */ /* app bundle: domain tags */
.tags { margin: 2em 0; } .tags { margin: 2em 0; }
.tags i { .tags i {
font-size: 0.9em; font-size: 0.9em;
font-style: normal; font-style: normal;
font-weight: normal; font-weight: normal;
background: #eee; background: #EEE;
padding: 2pt 4pt; padding: 2pt 4pt;
border: 1pt solid #aaa; border: 1pt solid #AAA;
border-radius: 0.2em; border-radius: 0.2em;
display: inline-block; display: inline-block;
margin: 0.12em; margin: 0.12em;
@@ -120,7 +168,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; }
.tags i.trckr, .tags.trckr i { background: #F9A7A7;; border-color: #B06363; } .tags i.trckr, .tags.trckr i { background: #F9A7A7;; border-color: #B06363; }
p.trckr { font-size: 0.9em; margin-left: 0.5em; } p.trckr { font-size: 0.9em; margin-left: 0.5em; }
/* graphs */ /* app bundle: graphs */
.dot-graph { .dot-graph {
touch-action: manipulation; touch-action: manipulation;
user-select: none; user-select: none;
@@ -153,6 +201,13 @@ p.trckr { font-size: 0.9em; margin-left: 0.5em; }
.cs0{stroke:#6AC45C} .cs0{stroke:#6AC45C}
.cs1{stroke:#CA0D3A} .cs1{stroke:#CA0D3A}
/* Help needed */
.help-links td { padding: 0.5em; }
.help-links tr:nth-child(even) { background: #DDD; }
.help-links tr:nth-child(odd) { background: #F9F9F9; }
.help-links .notyet { color: #D11; }
.help-links .done { color: #52C840; }
/* responsive */ /* responsive */
@media(max-width: 647px) { @media(max-width: 647px) {
header h1 span { display: none; } /* header subtitle */ header h1 span { display: none; } /* header subtitle */
@@ -163,4 +218,23 @@ p.trckr { font-size: 0.9em; margin-left: 0.5em; }
@media(min-width: 648px) { @media(min-width: 648px) {
#meta .icons { float: right; } /* icons below each other */ #meta .icons { float: right; } /* icons below each other */
.pie-chart { margin-top: 1em; } .pie-chart { margin-top: 1em; }
#dom-toc h3 a { display: none; }
#dom-toc div:nth-child(1) {
display: inline-block;
vertical-align: top;
width: 59%;
}
#dom-toc div:nth-child(2) {
display: inline-block;
vertical-align: top;
width: 40%;
margin-left: 1%;
}
#dom-top10 {
margin: 0 auto;
width: max-content;
max-width: 100%;
text-align: right;
}
.loadbar { display: inline-block; }
} }

View File

@@ -166,6 +166,11 @@ def file_exists(path):
return os.path.isfile(path) and os.path.getsize(path) > 0 return os.path.isfile(path) and os.path.getsize(path) > 0
def symlink(source, target):
if not file_exists(target):
os.symlink(source, target)
def meta_json_exists(bundle_id, lang): def meta_json_exists(bundle_id, lang):
return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang))) return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os
import sys import sys
import time import time
import math import math
@@ -138,9 +137,7 @@ def process(bundle_ids):
mylib.mkdir_out_app(bid) mylib.mkdir_out_app(bid)
with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp: with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp:
fp.write(gen_html(bid, json)) fp.write(gen_html(bid, json))
download_link = mylib.path_out_app(bid, 'data.json') mylib.symlink(json_data_path, mylib.path_out_app(bid, 'data.json'))
if not mylib.file_exists(download_link):
os.symlink(json_data_path, download_link)
print('') print('')

View File

@@ -41,7 +41,7 @@ def gen_pager(current, total):
links += mklink(i, i, active=i == current) links += mklink(i, i, active=i == current)
# if current < total: # if current < total:
# links += mklink(current + 1, 'Next') # links += mklink(current + 1, 'Next')
return '<div id="pagination">{}</div>'.format(links) return '<div id="pagination" class="no_ul_all">{}</div>'.format(links)
def gen_page(arr, base, page_id=1, total=1): def gen_page(arr, base, page_id=1, total=1):
@@ -52,7 +52,7 @@ def gen_page(arr, base, page_id=1, total=1):
pagination = gen_pager(page_id, total) # if total > 1 else '' pagination = gen_pager(page_id, total) # if total > 1 else ''
fp.write(mylib.template_with_base(''' fp.write(mylib.template_with_base('''
<h2>List of app recordings (AZ)</h2> <h2>List of app recordings (AZ)</h2>
<div id="app-toc"> <div id="app-toc" class="center no_ul_all">
{} {}
</div> </div>
{}'''.format(content, pagination), title="Index")) {}'''.format(content, pagination), title="Index"))

141
src/html_reverse_domains.py Executable file
View File

@@ -0,0 +1,141 @@
#!/usr/bin/env python3
import common_lib as mylib
import index_bundle_names
import index_reverse_domains
def a_app(bundle_id):
return '<a href="/app/{}/">{}</a>'.format(
bundle_id, index_bundle_names.get_name(bundle_id))
def a_dom(domain, key):
return '<a href="/{0}/#{1}">{1}</a>'.format(key, domain)
def div_dom(domain, count, key):
return '{} <span>found in {} {}</span>'.format(
a_dom(domain, key), count, 'apps' if count > 1 else 'app')
def dropdown_choose(button):
return f'''
<label for="dropdown">Choose list:</label>
<div class="dropdown" name="dropdown">
<button class="bg1 border">{button}</button>
<div class="bg1 no_ul_all">
<a href="index.html">Most frequent</a>
<a href="by_name.html">Full list (AZ)</a>
<a href="by_count.html">Full list (by count)</a>
</div>
</div>'''
def duo_list(list1, list2):
txt1 = '<br>\n'.join([div_dom(dom, len(ids), 'subdomain') for dom, ids in list1])
txt2 = '<br>\n'.join([div_dom(dom, len(ids), 'domain') for dom, ids in list2])
return '''
<div id="dom-toc" class="found-in">
<div id="subdomains">
<h3>Subdomains ({}) <a class="snd mg_lr" href="#domains">go to Domains</a></h3>
{}
</div><div id="domains">
<h3>Domains ({}) <a class="snd mg_lr" href="#subdomains">go to Subdomains</a></h3>
{}
</div>
</div>'''.format(len(list1), txt1, len(list2), txt2)
def gen_html_index(l1, l2, fname, title, button):
with open(fname, 'w') as fp:
fp.write(mylib.template_with_base(
f'<h2>{title}</h2>' + dropdown_choose(button) + duo_list(l1, l2),
title=title))
def gen_html_top_domains(subset, fname, total, title):
def div_loadbar(percent):
return '<span class="loadbar"><span style="width: {0}%">{0}%</span></span>'.format(percent)
with open(fname, 'w') as fp:
txt = f'''
<div id="dom-top10" class="found-in">
<h2>{ title }</h2>'''
for dom, ids in subset:
dom_str = div_dom(dom, len(ids), 'subdomain')
pct_bar = div_loadbar(round(len(ids) / total * 100))
txt += f'\n<p>{dom_str} {pct_bar}</p>'
fp.write(mylib.template_with_base(txt + '''
<p class="mg_top">Get full list
sorted by <a class="snd" href="by_count.html">Occurrence frequency</a>
or in <a class="snd" href="by_name.html">Alphabetical order</a>.</p>
</div>
<p class="right snd">Download: <a href="data.json" download="appcheck_domains_full.json">json</a></p>
''', title=title))
def gen_html_lookup(html_dir, json, key, title):
mylib.mkdir(html_dir)
names = [[x, index_bundle_names.get_name(x)] for x in json['bundle']]
mylib.json_write(mylib.path_add(html_dir, 'apps.json'), names)
mylib.json_write(mylib.path_add(html_dir, 'doms.json'), json[key])
with open(mylib.path_add(html_dir, 'index.html'), 'w') as fp:
fp.write(mylib.template_with_base(f'''
<h2 id="name"></h2>
<p>Present in: <b id="num_apps">… applications</b></p>
<h3>Apps containing this domain:</h3>
<div id="app_list">loading…</div>
<script type="text/javascript" src="/static/lookup-domain.js?1"></script>
<script type="text/javascript">
lookup_domain_fragment('doms.json', 'apps.json', 'name', 'num_apps', 'app_list');
</script>
''', title=title))
def process():
# bundle_combine assures domain name is [a-zA-Z0-9.-]
print('generating reverse-domain-index ...')
idx_dir = mylib.path_out('index', 'domains')
mylib.mkdir(idx_dir)
# Data export
mylib.symlink(mylib.path_data_index('reverse_domains.json'),
mylib.path_out_app(idx_dir, 'data.json'))
par_arr = list(index_reverse_domains.enumerate('pardom'))
sub_arr = list(index_reverse_domains.enumerate('subdom'))
# Full list (AZ)
sub_arr.sort(key=lambda x: x[0])
par_arr.sort(key=lambda x: x[0])
gen_html_index(sub_arr, par_arr, mylib.path_add(idx_dir, 'by_name.html'),
title='Requested Domains (AZ)',
button='Full list (AZ)')
# Full list (by count)
sub_arr.sort(key=lambda x: -len(x[1]))
par_arr.sort(key=lambda x: -len(x[1]))
gen_html_index(sub_arr, par_arr, mylib.path_add(idx_dir, 'by_count.html'),
title='Requested Domains (most apps)',
button='Full list (by count)')
# Top 10
del(sub_arr[20:])
del(par_arr)
total = index_reverse_domains.number_of_apps()
gen_html_top_domains(sub_arr, mylib.path_add(idx_dir, 'index.html'),
total, 'Top 20 Requested Domains')
# Lookup
json = index_reverse_domains.raw()
gen_html_lookup(mylib.path_out('domain'), json, 'pardom',
title='Domain Lookup')
gen_html_lookup(mylib.path_out('subdomain'), json, 'subdom',
title='Subdomain Lookup')
print('')
if __name__ == '__main__':
process()

View File

@@ -24,14 +24,14 @@ def write_json_to_disk():
mylib.json_write(index_fname(), _bundle_name_dict, pretty=True) mylib.json_write(index_fname(), _bundle_name_dict, pretty=True)
def get_name(bundle_id, langs=['us', 'de']): def get_name(bundle_id, langs=['us', 'de'], fallback='&lt; App-Name &gt;'):
load_json_if_not_already() load_json_if_not_already()
for lang in langs: for lang in langs:
try: try:
return _bundle_name_dict[bundle_id][lang] return _bundle_name_dict[bundle_id][lang]
except KeyError: except KeyError:
continue continue
return '&lt; App-Name &gt;' # None return fallback # None
def process(bundle_ids): def process(bundle_ids):

View File

@@ -3,79 +3,103 @@
import sys import sys
import common_lib as mylib import common_lib as mylib
_reverse_domain_dict = None
def load_index_json(file_path):
if mylib.file_exists(file_path): def index_fname():
json = mylib.json_read(file_path) return mylib.path_data_index('reverse_domains.json')
def load_json_if_not_already():
global _reverse_domain_dict
if not _reverse_domain_dict:
index_file = index_fname()
if mylib.file_exists(index_file):
_reverse_domain_dict = mylib.json_read(index_file)
else: else:
json = dict({'bundle': [], 'pardom': dict(), 'subdom': dict()}) _reverse_domain_dict = {'bundle': [], 'pardom': {}, 'subdom': {}}
return json
def delete_from_index(index, bundle_ids, deleteOnly=False): def write_json_to_disk():
mylib.json_write(index_fname(), _reverse_domain_dict, pretty=False)
def delete_from_index(bundle_ids, deleteOnly=False):
global _reverse_domain_dict
ids_to_delete = set() ids_to_delete = set()
for bid in bundle_ids: for bid in bundle_ids:
try: try:
i = index['bundle'].index(bid) i = _reverse_domain_dict['bundle'].index(bid)
except ValueError: # index not found except ValueError: # index not found
continue continue
ids_to_delete.add(i) ids_to_delete.add(i)
if deleteOnly: if deleteOnly:
index['bundle'][i] = '_' _reverse_domain_dict['bundle'][i] = '_'
if len(ids_to_delete) == 0: if len(ids_to_delete) == 0:
return False return False
for key in ['pardom', 'subdom']: for key in ['pardom', 'subdom']:
for domain in list(index[key].keys()): for domain in list(_reverse_domain_dict[key].keys()):
for i in ids_to_delete: for i in ids_to_delete:
try: try:
index[key][domain].remove(i) _reverse_domain_dict[key][domain].remove(i)
except ValueError: # ignore if not present except ValueError: # ignore if not present
continue continue
if not index[key][domain]: if not _reverse_domain_dict[key][domain]:
del(index[key][domain]) del(_reverse_domain_dict[key][domain])
return True return True
def insert_in_index(index, bundle_ids): def insert_in_index(bundle_ids):
global _reverse_domain_dict
has_changes = False has_changes = False
for bid in bundle_ids: for bid in bundle_ids:
try: try:
i = index['bundle'].index(bid) i = _reverse_domain_dict['bundle'].index(bid)
except ValueError: # index not found except ValueError: # index not found
i = len(index['bundle']) i = len(_reverse_domain_dict['bundle'])
index['bundle'].append(bid) _reverse_domain_dict['bundle'].append(bid)
try:
json, _ = mylib.json_read_evaluated(bid) json, _ = mylib.json_read_evaluated(bid)
except FileNotFoundError:
continue
for key in ['pardom', 'subdom']: # assuming keys are identical for key in ['pardom', 'subdom']: # assuming keys are identical
for domain, _, _ in json[key]: for domain, _, _ in json[key]:
try: try:
index[key][domain].append(i) _reverse_domain_dict[key][domain].append(i)
except KeyError: except KeyError:
index[key][domain] = [i] _reverse_domain_dict[key][domain] = [i]
has_changes = True has_changes = True
return has_changes return has_changes
def raw():
load_json_if_not_already()
return _reverse_domain_dict
def number_of_apps():
load_json_if_not_already()
return sum(1 for x in _reverse_domain_dict['bundle'] if x != '_')
def enumerate(key):
load_json_if_not_already()
for dom, bundles in _reverse_domain_dict[key].items():
yield [dom, [_reverse_domain_dict['bundle'][i] for i in bundles]]
def process(bundle_ids, deleteOnly=False): def process(bundle_ids, deleteOnly=False):
print('writing index: reverse domains ...') print('writing index: reverse domains ...')
index_file = mylib.path_data_index('reverse_domains.json')
if bundle_ids == ['*']: if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids()) bundle_ids = list(mylib.enum_data_appids())
print(' full reset') print(' full reset')
mylib.rm_file(index_file) # rebuild from ground up mylib.rm_file(index_fname()) # rebuild from ground up
# load previous index
json = load_index_json(index_file) load_json_if_not_already()
# delete previous index entries did_change = delete_from_index(bundle_ids, deleteOnly=deleteOnly)
did_change = delete_from_index(json, bundle_ids, deleteOnly=deleteOnly)
# write new index to disk
if not deleteOnly: if not deleteOnly:
did_change |= insert_in_index(json, bundle_ids) did_change |= insert_in_index(bundle_ids)
if did_change: if did_change:
mylib.json_write(index_file, json, pretty=False) write_json_to_disk()
else: else:
print(' no change') print(' no change')
print('') print('')

View File

@@ -22,6 +22,7 @@
<nav> <nav>
<ul id="main-nav"> <ul id="main-nav">
<li><a href="/index/page/1/">All Apps</a></li> <li><a href="/index/page/1/">All Apps</a></li>
<li><a href="/index/domains/">Domains</a></li>
<li><a href="/help/">Help needed</a></li> <li><a href="/help/">Help needed</a></li>
<li><a class="no-ul" href="https://github.com/relikd/appcheck" target="_blank"><img src="/static/github.svg" alt="GitHub"></a></li> <li><a class="no-ul" href="https://github.com/relikd/appcheck" target="_blank"><img src="/static/github.svg" alt="GitHub"></a></li>
</ul> </ul>