Percentile graphs

This commit is contained in:
relikd
2020-09-21 11:55:39 +02:00
parent 562048455b
commit a1b3198900
5 changed files with 263 additions and 116 deletions

View File

@@ -100,7 +100,7 @@ footer .links {
margin: .5em auto 1em;
display: block;
}
#app-toc img, #get-appcheck img, #meta img {
#app-toc img, img.app-icon {
border-radius: 21.5%;
border: .7px solid #ccc;
}
@@ -151,11 +151,42 @@ p.subtitle { margin-top: .2em; }
.center { text-align: center; }
.bg1 { background: #eee; }
.border { border: 1pt solid #ccc; }
td { padding: .2em 1em .2em .1em; }
/*#meta { margin-bottom: 2em; }*/
#meta .icons { margin-bottom: 2em; }
#meta .icons > *:first-child { margin-right: 1em; }
#meta td { padding: .2em 1em .2em .1em; }
#meta td:nth-child(2) { font-weight: bold }
/* percentile */
#stats {
display: grid;
grid-template-columns: repeat(3, max-content);
grid-gap: 0.7em 2em;
margin: 2em 0;
}
#stats .col1 { grid-column-start: 1; }
#stats>div>h4 { margin: 0 0 .7em; }
#stats>div>p { margin-top: .5em; }
.percentile {
display: inline-block;
background: #EEE;
border: 1px solid #000;
width: 150px;
height: 1.2em;
padding-right: 3px;
vertical-align: top;
}
.percentile div {
position: relative;
background: #000;
width: 3px;
height: 100%;
}
.percentile.b div { background: #CA0D3A; }
.percentile.g div { background: #6AC45C; }
/* app bundle: domain tags */
.tags { margin: 2em 0; }
.tags a {
font-size: .9em;
font-style: normal;
@@ -172,6 +203,7 @@ p.trckr { font-size: .9em; margin-left: .5em; }
/* app bundle: graphs */
.dot-graph {
margin-bottom: 1.5em;
touch-action: manipulation;
user-select: none;
-webkit-user-select: none;
@@ -212,14 +244,15 @@ p.trckr { font-size: .9em; margin-left: .5em; }
#help-links .done { color: #52C840; }
/* responsive */
@media(max-width: 647px) {
@media(max-width: 900px) {
#stats { grid-template-columns: repeat(2, max-content); }
}
@media(max-width: 650px) {
header img { padding-right: 0; }
header h1 { font-size: 1em; }
header h1 span { display: none; } /* header subtitle */
main { padding-left: .5em; padding-right: .5em; }
main { padding-left: 1em; padding-right: 1em; }
footer .col3 div { width: 100%; padding: 0; } /* 3 columns */
#meta .icons { margin-bottom: 1em; } /* icons beside each other */
.pie-chart { float: right; }
#app-toc a { text-align: left; }
#app-toc div {
display: inline-block;
@@ -233,10 +266,10 @@ p.trckr { font-size: .9em; margin-left: .5em; }
#app-toc img {
float: left; width: 44px; height: 44px; margin: 0 .5em;
}
#stats { grid-template-columns: max-content; }
}
@media(min-width: 648px) {
#meta .icons { float: right; } /* icons below each other */
.pie-chart { margin-top: 1em; }
@media(min-width: 651px) {
#meta .icons { float: right; }
#dom-toc h3 a { display: none; }
#dom-toc div:nth-child(1) {
display: inline-block;

View File

@@ -3,21 +3,58 @@
import sys
import common_lib as mylib
AVAILABLE_LANGS = ['us', 'de'] # order matters
def fname_for(bundle_id, lang):
return mylib.path_data_app(bundle_id, 'info_{}.json'.format(lang))
def read_from_disk(bundle_id, lang):
return mylib.json_read(fname_for(bundle_id, lang))
def read_first_from_disk(bundle_id, langs=AVAILABLE_LANGS):
for lang in langs:
if mylib.file_exists(fname_for(bundle_id, lang)):
return read_from_disk(bundle_id, lang)
return None
def app_names(bundle_id):
def name_for(lang):
try:
return read_from_disk(bundle_id, lang)['trackCensoredName']
except Exception:
return None
ret = {}
for lang in AVAILABLE_LANGS:
name = name_for(lang)
if name:
ret[lang] = name
return ret
def get_genres(bundle_id, langs=AVAILABLE_LANGS):
json = read_first_from_disk(bundle_id, langs=langs)
return list(zip(json['genreIds'], json['genres'])) if json else []
def download_info(bundle_id, lang, force=False):
if force or not mylib.meta_json_exists(bundle_id, lang):
fname = fname_for(bundle_id, lang)
if force or not mylib.file_exists(fname):
url = 'https://itunes.apple.com/lookup?bundleId={}&country={}'.format(
bundle_id, lang.upper())
json = mylib.download(url, isJSON=True)
json = json['results'][0]
# delete unused keys to save on storage
for key in ['supportedDevices', 'releaseNotes', 'description',
'screenshotUrls']:
'screenshotUrls', 'ipadScreenshotUrls']:
try:
del(json[key])
except KeyError:
continue
mylib.json_write_meta(bundle_id, json, lang)
mylib.json_write(fname, json, pretty=True)
def needs_icon_path(bundle_id):
@@ -25,14 +62,14 @@ def needs_icon_path(bundle_id):
return (mylib.file_exists(icon_file), icon_file)
def download_icon(bundle_id, force=False, langs=['us', 'de']):
def download_icon(bundle_id, force=False, langs=AVAILABLE_LANGS):
exists, icon_file = needs_icon_path(bundle_id)
if force or not exists:
json = None
for lang in langs:
if not json:
try:
json = mylib.json_read_meta(bundle_id, lang)
json = read_from_disk(bundle_id, lang)
except Exception:
continue
image_url = json['artworkUrl100'] # fail early on KeyError
@@ -42,7 +79,7 @@ def download_icon(bundle_id, force=False, langs=['us', 'de']):
return False
def download_missing_icons(force=False, langs=['us', 'de']):
def download_missing_icons(force=False, langs=AVAILABLE_LANGS):
didAny = False
for bid in mylib.enum_appids():
exists, _ = needs_icon_path(bid)
@@ -67,7 +104,7 @@ def download(bundle_id, force=False):
return False
mylib.printf(' {} => '.format(bundle_id))
for lang in ['us', 'de']:
for lang in AVAILABLE_LANGS:
try:
mylib.printf(lang)
download_info(bundle_id, lang, force=force)

View File

@@ -82,20 +82,6 @@ def valid_bundle_id(bundle_id):
return regex_bundle_id.match(bundle_id)
def app_names(bundle_id):
def name_for(lang):
try:
return json_read_meta(bundle_id, lang)['trackCensoredName']
except Exception:
return None
ret = {}
for lang in ['us', 'de']:
name = name_for(lang)
if name:
ret[lang] = name
return ret
def err(scope, msg, logOnly=False):
logger.error('[{}] {}'.format(scope, msg))
if not logOnly:
@@ -168,13 +154,10 @@ def file_exists(path):
def symlink(source, target):
if not file_exists(target):
rm_file(target) # file_exists is false if symlink cant be followed
os.symlink(source, target)
def meta_json_exists(bundle_id, lang):
return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
def mkdir_out_app(bundle_id):
out_dir = path_out_app(bundle_id)
if not dir_exists(out_dir):
@@ -278,10 +261,6 @@ def json_read_evaluated(bundle_id):
return json_read(pth), pth
def json_read_meta(bundle_id, lang):
return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
# JSON write
def json_write(path, obj, pretty=False):
@@ -297,8 +276,3 @@ def json_write_combined(bundle_id, obj):
def json_write_evaluated(bundle_id, obj):
fname = path_data_app(bundle_id, 'evaluated.json')
json_write(fname, obj, pretty=False)
def json_write_meta(bundle_id, obj, lang):
fname = path_data_app(bundle_id, 'info_{}.json'.format(lang))
json_write(fname, obj, pretty=True)

View File

@@ -4,18 +4,9 @@ import sys
import time
import math
import common_lib as mylib
import bundle_download
import index_app_names
def seconds_to_time(seconds):
seconds = int(seconds)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
def round_num(num):
return format(num, '.1f') # .rstrip('0').rstrip('.')
import index_meta
def gen_dotgraph(sorted_arr):
@@ -59,8 +50,7 @@ def gen_pie_chart(parts, classes, stroke=0.6):
def gen_radial_graph(percent):
return '<div class="pie-chart">{}</div>'.format(
gen_pie_chart([1 - percent, percent], ['cs0', 'cs1']))
return gen_pie_chart([1 - percent, percent], ['cs0', 'cs1'])
def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False):
@@ -81,45 +71,82 @@ def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False):
def gen_html(bundle_id, obj):
def round_num(num):
return format(num, '.1f') # .rstrip('0').rstrip('.')
def as_pm(value):
return round_num(value) + '/min'
def as_percent(value):
return round_num(value * 100) + '%'
def as_date(value):
return '<time datetime="{}">{} UTC</time>'.format(
time.strftime('%Y-%m-%d %H:%M', time.gmtime(value)),
time.strftime('%Y-%m-%d, %H:%M', time.gmtime(value))
)
def seconds_to_time(seconds):
seconds = int(seconds)
minutes, seconds = divmod(seconds, 60)
hours, minutes = divmod(minutes, 60)
return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
def stat(col, title, rank, value, optional=None, fmt=str, fmt2=None):
# percent = int(rank[0] / max_rank * 100)
r = rank[0] / max_rank
detail = fmt2(value) if fmt2 else fmt(value)
if optional:
x = fmt(optional) if fmt2 else optional
detail += '<i class="snd mg_lr">({})</i>'.format(x)
return f'''
<div class="col{col}">
<h4>{title}</h4>
<div class="percentile {'g' if r < 0.5 else 'b'}"><div style="left: {as_percent(r)}"></div></div>
<b class="mg_lr">{detail}</b>
<p class="snd">
Rank:&nbsp;<b>{rank[0]}</b>,
best:&nbsp;<i>{fmt(rank[1])}</i>,
worst:&nbsp;<i>{fmt(rank[2])}</i></p>
</div>'''
name = index_app_names.get_name(bundle_id)
gernes = bundle_download.get_genres(bundle_id)
rank, max_rank = index_meta.get_rank(bundle_id)
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
return mylib.template_with_base(f'''
<h2 class="title">{name}</h2>
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
<div id="meta">
<div class="icons">
<img src="icon.png" width="100" height="100">
{ gen_radial_graph(obj['tracker_percent']) }
<img class="app-icon" src="icon.png" alt="app-icon" width="100" height="100">
</div>
<table>
<tr><td>Last update:</td><td><time datetime="{
time.strftime('%Y-%m-%d %H:%M', time.gmtime(obj['last_date']))
}">{
time.strftime('%Y-%m-%d, %H:%M', time.gmtime(obj['last_date']))
}</time></td></tr>
<tr><td>Number of recordings:</td><td>{ obj['sum_rec'] }</td></tr>
<tr><td>Total number of requests:</td><td>{
obj['sum_logs'] }<i class="snd mg_lr">({
round_num(obj['sum_logs_pm'])} / min)</i></td></tr>
<tr><td>Average number of requests:</td><td>{
round_num(obj['avg_logs'])}<i class="snd mg_lr">({
round_num(obj['avg_logs_pm'])} / min)</i></td></tr>
<tr><td>Average recording time:</td><td>{
seconds_to_time(obj['avg_time']) }</td></tr>
<tr><td>Cumulative recording time:</td><td>{
seconds_to_time(obj['sum_time']) }</td></tr>
<tr><td>App Categories:</td><td>{
', '.join([name for i, name in gernes])
}</td></tr>
<tr><td>Last Update:</td><td>{as_date(obj['last_date'])}</td></tr>
</table>
</div>
<div id="stats">
{ stat(1, 'Number of recordings:', rank['sum_rec'], obj['sum_rec']) }
{ stat(1, 'Average recording time:', rank['avg_time'], obj['avg_time'], fmt=seconds_to_time) }
{ stat(2, 'Cumulative recording time:', rank['sum_time'], obj['sum_time'], fmt=seconds_to_time) }
{ stat(1, 'Average number of requests:', rank['avg_logs_pm'], obj['avg_logs'], obj['avg_logs_pm'], fmt=as_pm, fmt2=round_num) }
{ stat(2, 'Total number of requests:', rank['sum_logs_pm'], obj['sum_logs'], obj['sum_logs_pm'], fmt=as_pm, fmt2=str) }
{ stat(1, 'Number of domains:', rank['pardom'], len(obj['pardom'])) }
{ stat(2, 'Number of subdomains:', rank['subdom'], len(obj['subdom'])) }
{ stat(3, 'Tracker percentage:', rank['tracker_percent'], obj['tracker_percent'], fmt=as_percent) }
</div>
<h3>Connections</h3>
<div>
<h4>Potential Trackers ({ len(obj['tracker']) }):</h4>
{ gen_dom_tags(obj['tracker'], isSub=True, onlyTrackers=True) }
<p></p>
<h4>Domains ({ len(obj['pardom']) }):</h4>
{ gen_dotgraph(obj['pardom']) }
{ gen_dom_tags(obj['pardom'], isSub=False) }
<h4>Subdomains ({ len(obj['subdom']) }):</h4>
{ gen_dotgraph(obj['subdom']) }
{ gen_dom_tags(obj['subdom'], isSub=True) }

View File

@@ -3,61 +3,137 @@
import sys
import common_lib as mylib
_rank_dict = None
def index_file():
return mylib.path_data_index('meta.json')
def fname_app_summary():
return mylib.path_data_index('app_summary.json')
def fname_app_rank():
return mylib.path_data_index('app_rank.json')
def load_json_from_disk(fname):
return mylib.json_read(fname) if mylib.file_exists(fname) else {}
def load():
return load_json_from_disk(index_file())
def get_total_counts():
def try_del(index, keys):
for x in keys:
try:
return load_json_from_disk(index_file())['_']
except KeyError:
return [0, 0]
def process(bundle_ids, deleteOnly=False):
print('writing index: meta ...')
fname = index_file()
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
print(' full reset')
mylib.rm_file(fname) # rebuild from ground up
# json format: `bundle-id : [#recordings, #logs, #domains, #subdomains]`
index = load_json_from_disk(fname)
for bid in bundle_ids:
# delete old value
try:
del(index[bid])
del(index[x])
except KeyError:
pass
def json_to_list(json):
return [
json['sum_rec'],
json['sum_logs'],
json['sum_logs_pm'],
json['sum_time'],
json['avg_logs'],
json['avg_logs_pm'],
json['avg_time'],
json['last_date'],
len(json['pardom']),
len(json['subdom']),
json['tracker_percent']
]
def list_to_json(list):
return {
'sum_rec': list[0],
'sum_logs': list[1],
'sum_logs_pm': list[2],
'sum_time': list[3],
'avg_logs': list[4],
'avg_logs_pm': list[5],
'avg_time': list[6],
'last_date': list[7],
'pardom': list[8],
'subdom': list[9],
'tracker_percent': list[10]
}
def write_summary_index(index, bundle_ids, deleteOnly=False):
for bid in bundle_ids:
# delete old value
try_del(index, [bid])
if deleteOnly:
continue
# set new value
json, _ = mylib.json_read_evaluated(bid)
index[bid] = [json['sum_rec'], json['sum_logs'],
len(json['pardom']), len(json['subdom'])]
evaluated_json, _ = mylib.json_read_evaluated(bid)
index[bid] = json_to_list(evaluated_json)
# sum of counts
try:
del(index['_'])
except KeyError:
pass
try_del(index, ['_sum'])
total = [0, 0]
for val in index.values():
total[0] += val[0]
total[1] += val[1]
index['_'] = total
index['_sum'] = total
mylib.json_write(fname_app_summary(), index, pretty=False)
# write json
mylib.json_write(fname, index, pretty=False)
def write_rank_index(index):
try_del(index, ['_sum', '_ranks', '_min', '_max'])
mins = []
maxs = []
for i in range(11): # equal to number of array entries
tmp = {}
# make temporary reverse index
for bid, val in index.items():
try:
tmp[val[i]].append(bid)
except KeyError:
tmp[val[i]] = [bid]
# read index position from temp reverse index
r = 1
ordered = sorted(tmp.items(), reverse=i in [0, 3, 6, 7])
for idx, (_, ids) in enumerate(ordered):
for bid in ids:
index[bid][i] = r
r += len(ids)
mins.append(ordered[0][0])
maxs.append(ordered[-1][0])
index['_min'] = mins
index['_max'] = maxs
index['_ranks'] = len(index)
mylib.json_write(fname_app_rank(), index, pretty=False)
def get_total_counts():
try:
return load_json_from_disk(fname_app_summary())['_sum']
except KeyError:
return [0, 0]
def get_rank(bundle_id):
''' Return tuples with (rank, max_rank, min_value, max_value) '''
global _rank_dict
if not _rank_dict:
_rank_dict = load_json_from_disk(fname_app_rank())
return list_to_json(list(zip(
_rank_dict[bundle_id],
_rank_dict['_min'],
_rank_dict['_max'],
))), _rank_dict['_ranks']
def process(bundle_ids, deleteOnly=False):
print('writing index: meta ...')
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
print(' full reset')
mylib.rm_file(fname_app_summary()) # rebuild from ground up
index = load_json_from_disk(fname_app_summary())
write_summary_index(index, bundle_ids, deleteOnly=deleteOnly)
write_rank_index(index)
print('')