From a1b319890039df45a95427324bb57dac5a38c7f4 Mon Sep 17 00:00:00 2001
From: relikd <info@relikd.de>
Date: Mon, 21 Sep 2020 11:55:39 +0200
Subject: [PATCH] Percentile graphs

---
 out/static/style.css   |  59 ++++++++++++----
 src/bundle_download.py |  51 ++++++++++++--
 src/common_lib.py      |  28 +-------
 src/html_bundle.py     |  93 +++++++++++++++++---------
 src/index_meta.py      | 148 +++++++++++++++++++++++++++++++----------
 5 files changed, 263 insertions(+), 116 deletions(-)

diff --git a/out/static/style.css b/out/static/style.css
index 5fe176f..add9759 100644
--- a/out/static/style.css
+++ b/out/static/style.css
@@ -61,9 +61,9 @@ footer .links {
 
 /* dropdown */
 .dropdown button {
-  padding: .5em 1em;
-  font-size: 16px;
-  cursor: pointer;
+	padding: .5em 1em;
+	font-size: 16px;
+	cursor: pointer;
 }
 .dropdown { display: inline-block; position: relative; }
 .dropdown nav {
@@ -100,7 +100,7 @@ footer .links {
 	margin: .5em auto 1em;
 	display: block;
 }
-#app-toc img, #get-appcheck img, #meta img {
+#app-toc img, img.app-icon {
 	border-radius: 21.5%;
 	border: .7px solid #ccc;
 }
@@ -151,11 +151,42 @@ p.subtitle { margin-top: .2em; }
 .center { text-align: center; }
 .bg1 { background: #eee; }
 .border { border: 1pt solid #ccc; }
-td { padding: .2em 1em .2em .1em; }
+
+/*#meta { margin-bottom: 2em; }*/
+#meta .icons { margin-bottom: 2em; }
+#meta .icons > *:first-child { margin-right: 1em; }
+#meta td { padding: .2em 1em .2em .1em; }
 #meta td:nth-child(2) { font-weight: bold }
 
+/* percentile */
+#stats {
+	display: grid;
+	grid-template-columns: repeat(3, max-content);
+	grid-gap: 0.7em 2em;
+	margin: 2em 0;
+}
+#stats .col1 { grid-column-start: 1; }
+#stats>div>h4 { margin: 0 0 .7em; }
+#stats>div>p { margin-top: .5em; }
+.percentile {
+	display: inline-block;
+	background: #EEE;
+	border: 1px solid #000;
+	width: 150px;
+	height: 1.2em;
+	padding-right: 3px;
+	vertical-align: top;
+}
+.percentile div {
+	position: relative;
+	background: #000;
+	width: 3px;
+	height: 100%;
+}
+.percentile.b div { background: #CA0D3A; }
+.percentile.g div { background: #6AC45C; }
+
 /* app bundle: domain tags */
-.tags { margin: 2em 0; }
 .tags a {
 	font-size: .9em;
 	font-style: normal;
@@ -172,6 +203,7 @@ p.trckr { font-size: .9em; margin-left: .5em; }
 
 /* app bundle: graphs */
 .dot-graph {
+	margin-bottom: 1.5em;
 	touch-action: manipulation;
 	user-select: none;
 	-webkit-user-select: none;
@@ -212,14 +244,15 @@ p.trckr { font-size: .9em; margin-left: .5em; }
 #help-links .done { color: #52C840; }
 
 /* responsive */
-@media(max-width: 647px) {
+@media(max-width: 900px) {
+	#stats { grid-template-columns: repeat(2, max-content); }
+}
+@media(max-width: 650px) {
 	header img { padding-right: 0; }
 	header h1 { font-size: 1em; }
 	header h1 span { display: none; } /* header subtitle */
-	main { padding-left: .5em; padding-right: .5em; }
+	main { padding-left: 1em; padding-right: 1em; }
 	footer .col3 div { width: 100%; padding: 0; } /* 3 columns */
-	#meta .icons { margin-bottom: 1em; } /* icons beside each other */
-	.pie-chart { float: right; }
 	#app-toc a { text-align: left; }
 	#app-toc div {
 		display: inline-block;
@@ -233,10 +266,10 @@ p.trckr { font-size: .9em; margin-left: .5em; }
 	#app-toc img {
 		float: left; width: 44px; height: 44px; margin: 0 .5em;
 	}
+	#stats { grid-template-columns: max-content; }
 }
-@media(min-width: 648px) {
-	#meta .icons { float: right; } /* icons below each other */
-	.pie-chart { margin-top: 1em; }
+@media(min-width: 651px) {
+	#meta .icons { float: right; }
 	#dom-toc h3 a { display: none; }
 	#dom-toc div:nth-child(1) {
 		display: inline-block;
diff --git a/src/bundle_download.py b/src/bundle_download.py
index 3f66ab7..fa37097 100755
--- a/src/bundle_download.py
+++ b/src/bundle_download.py
@@ -3,21 +3,58 @@
 import sys
 import common_lib as mylib
 
+AVAILABLE_LANGS = ['us', 'de']  # order matters
+
+
+def fname_for(bundle_id, lang):
+    return mylib.path_data_app(bundle_id, 'info_{}.json'.format(lang))
+
+
+def read_from_disk(bundle_id, lang):
+    return mylib.json_read(fname_for(bundle_id, lang))
+
+
+def read_first_from_disk(bundle_id, langs=AVAILABLE_LANGS):
+    for lang in langs:
+        if mylib.file_exists(fname_for(bundle_id, lang)):
+            return read_from_disk(bundle_id, lang)
+    return None
+
+
+def app_names(bundle_id):
+    def name_for(lang):
+        try:
+            return read_from_disk(bundle_id, lang)['trackCensoredName']
+        except Exception:
+            return None
+    ret = {}
+    for lang in AVAILABLE_LANGS:
+        name = name_for(lang)
+        if name:
+            ret[lang] = name
+    return ret
+
+
+def get_genres(bundle_id, langs=AVAILABLE_LANGS):
+    json = read_first_from_disk(bundle_id, langs=langs)
+    return list(zip(json['genreIds'], json['genres'])) if json else []
+
 
 def download_info(bundle_id, lang, force=False):
-    if force or not mylib.meta_json_exists(bundle_id, lang):
+    fname = fname_for(bundle_id, lang)
+    if force or not mylib.file_exists(fname):
         url = 'https://itunes.apple.com/lookup?bundleId={}&country={}'.format(
             bundle_id, lang.upper())
         json = mylib.download(url, isJSON=True)
         json = json['results'][0]
         # delete unused keys to save on storage
         for key in ['supportedDevices', 'releaseNotes', 'description',
-                    'screenshotUrls']:
+                    'screenshotUrls', 'ipadScreenshotUrls']:
             try:
                 del(json[key])
             except KeyError:
                 continue
-        mylib.json_write_meta(bundle_id, json, lang)
+        mylib.json_write(fname, json, pretty=True)
 
 
 def needs_icon_path(bundle_id):
@@ -25,14 +62,14 @@ def needs_icon_path(bundle_id):
     return (mylib.file_exists(icon_file), icon_file)
 
 
-def download_icon(bundle_id, force=False, langs=['us', 'de']):
+def download_icon(bundle_id, force=False, langs=AVAILABLE_LANGS):
     exists, icon_file = needs_icon_path(bundle_id)
     if force or not exists:
         json = None
         for lang in langs:
             if not json:
                 try:
-                    json = mylib.json_read_meta(bundle_id, lang)
+                    json = read_from_disk(bundle_id, lang)
                 except Exception:
                     continue
         image_url = json['artworkUrl100']  # fail early on KeyError
@@ -42,7 +79,7 @@ def download_icon(bundle_id, force=False, langs=['us', 'de']):
     return False
 
 
-def download_missing_icons(force=False, langs=['us', 'de']):
+def download_missing_icons(force=False, langs=AVAILABLE_LANGS):
     didAny = False
     for bid in mylib.enum_appids():
         exists, _ = needs_icon_path(bid)
@@ -67,7 +104,7 @@ def download(bundle_id, force=False):
         return False
 
     mylib.printf('  {} => '.format(bundle_id))
-    for lang in ['us', 'de']:
+    for lang in AVAILABLE_LANGS:
         try:
             mylib.printf(lang)
             download_info(bundle_id, lang, force=force)
diff --git a/src/common_lib.py b/src/common_lib.py
index a3465ca..47f2479 100755
--- a/src/common_lib.py
+++ b/src/common_lib.py
@@ -82,20 +82,6 @@ def valid_bundle_id(bundle_id):
     return regex_bundle_id.match(bundle_id)
 
 
-def app_names(bundle_id):
-    def name_for(lang):
-        try:
-            return json_read_meta(bundle_id, lang)['trackCensoredName']
-        except Exception:
-            return None
-    ret = {}
-    for lang in ['us', 'de']:
-        name = name_for(lang)
-        if name:
-            ret[lang] = name
-    return ret
-
-
 def err(scope, msg, logOnly=False):
     logger.error('[{}] {}'.format(scope, msg))
     if not logOnly:
@@ -168,13 +154,10 @@ def file_exists(path):
 
 def symlink(source, target):
     if not file_exists(target):
+        rm_file(target)  # file_exists is false if symlink cant be followed
         os.symlink(source, target)
 
 
-def meta_json_exists(bundle_id, lang):
-    return file_exists(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
-
-
 def mkdir_out_app(bundle_id):
     out_dir = path_out_app(bundle_id)
     if not dir_exists(out_dir):
@@ -278,10 +261,6 @@ def json_read_evaluated(bundle_id):
     return json_read(pth), pth
 
 
-def json_read_meta(bundle_id, lang):
-    return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
-
-
 # JSON write
 
 def json_write(path, obj, pretty=False):
@@ -297,8 +276,3 @@ def json_write_combined(bundle_id, obj):
 def json_write_evaluated(bundle_id, obj):
     fname = path_data_app(bundle_id, 'evaluated.json')
     json_write(fname, obj, pretty=False)
-
-
-def json_write_meta(bundle_id, obj, lang):
-    fname = path_data_app(bundle_id, 'info_{}.json'.format(lang))
-    json_write(fname, obj, pretty=True)
diff --git a/src/html_bundle.py b/src/html_bundle.py
index d7e8696..e28c500 100755
--- a/src/html_bundle.py
+++ b/src/html_bundle.py
@@ -4,18 +4,9 @@ import sys
 import time
 import math
 import common_lib as mylib
+import bundle_download
 import index_app_names
-
-
-def seconds_to_time(seconds):
-    seconds = int(seconds)
-    minutes, seconds = divmod(seconds, 60)
-    hours, minutes = divmod(minutes, 60)
-    return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
-
-
-def round_num(num):
-    return format(num, '.1f')  # .rstrip('0').rstrip('.')
+import index_meta
 
 
 def gen_dotgraph(sorted_arr):
@@ -59,8 +50,7 @@ def gen_pie_chart(parts, classes, stroke=0.6):
 
 
 def gen_radial_graph(percent):
-    return '<div class="pie-chart">{}</div>'.format(
-        gen_pie_chart([1 - percent, percent], ['cs0', 'cs1']))
+    return gen_pie_chart([1 - percent, percent], ['cs0', 'cs1'])
 
 
 def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False):
@@ -81,45 +71,82 @@ def gen_dom_tags(sorted_arr, isSub, onlyTrackers=False):
 
 
 def gen_html(bundle_id, obj):
+
+    def round_num(num):
+        return format(num, '.1f')  # .rstrip('0').rstrip('.')
+
+    def as_pm(value):
+        return round_num(value) + '/min'
+
+    def as_percent(value):
+        return round_num(value * 100) + '%'
+
+    def as_date(value):
+        return '<time datetime="{}">{} UTC</time>'.format(
+            time.strftime('%Y-%m-%d %H:%M', time.gmtime(value)),
+            time.strftime('%Y-%m-%d, %H:%M', time.gmtime(value))
+        )
+
+    def seconds_to_time(seconds):
+        seconds = int(seconds)
+        minutes, seconds = divmod(seconds, 60)
+        hours, minutes = divmod(minutes, 60)
+        return '{:02d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
+
+    def stat(col, title, rank, value, optional=None, fmt=str, fmt2=None):
+        # percent = int(rank[0] / max_rank * 100)
+        r = rank[0] / max_rank
+        detail = fmt2(value) if fmt2 else fmt(value)
+        if optional:
+            x = fmt(optional) if fmt2 else optional
+            detail += '<i class="snd mg_lr">({})</i>'.format(x)
+        return f'''
+<div class="col{col}">
+  <h4>{title}</h4>
+  <div class="percentile {'g' if r < 0.5 else 'b'}"><div style="left: {as_percent(r)}"></div></div>
+  <b class="mg_lr">{detail}</b>
+  <p class="snd">
+    Rank:&nbsp;<b>{rank[0]}</b>,
+    best:&nbsp;<i>{fmt(rank[1])}</i>,
+    worst:&nbsp;<i>{fmt(rank[2])}</i></p>
+</div>'''
+
     name = index_app_names.get_name(bundle_id)
+    gernes = bundle_download.get_genres(bundle_id)
+    rank, max_rank = index_meta.get_rank(bundle_id)
     obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
     return mylib.template_with_base(f'''
 <h2 class="title">{name}</h2>
 <p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
 <div id="meta">
   <div class="icons">
-    <img src="icon.png" width="100" height="100">
     { gen_radial_graph(obj['tracker_percent']) }
+    <img class="app-icon" src="icon.png" alt="app-icon" width="100" height="100">
   </div>
   <table>
-    <tr><td>Last update:</td><td><time datetime="{
-        time.strftime('%Y-%m-%d %H:%M', time.gmtime(obj['last_date']))
-    }">{
-        time.strftime('%Y-%m-%d, %H:%M', time.gmtime(obj['last_date']))
-    }</time></td></tr>
-    <tr><td>Number of recordings:</td><td>{ obj['sum_rec'] }</td></tr>
-    <tr><td>Total number of requests:</td><td>{
-        obj['sum_logs'] }<i class="snd mg_lr">({
-            round_num(obj['sum_logs_pm'])} / min)</i></td></tr>
-    <tr><td>Average number of requests:</td><td>{
-        round_num(obj['avg_logs'])}<i class="snd mg_lr">({
-            round_num(obj['avg_logs_pm'])} / min)</i></td></tr>
-    <tr><td>Average recording time:</td><td>{
-        seconds_to_time(obj['avg_time']) }</td></tr>
-    <tr><td>Cumulative recording time:</td><td>{
-        seconds_to_time(obj['sum_time']) }</td></tr>
+    <tr><td>App Categories:</td><td>{
+      ', '.join([name for i, name in gernes])
+    }</td></tr>
+    <tr><td>Last Update:</td><td>{as_date(obj['last_date'])}</td></tr>
   </table>
 </div>
+<div id="stats">
+  { stat(1, 'Number of recordings:', rank['sum_rec'], obj['sum_rec']) }
+  { stat(1, 'Average recording time:', rank['avg_time'], obj['avg_time'], fmt=seconds_to_time) }
+  { stat(2, 'Cumulative recording time:', rank['sum_time'], obj['sum_time'], fmt=seconds_to_time) }
+  { stat(1, 'Average number of requests:', rank['avg_logs_pm'], obj['avg_logs'], obj['avg_logs_pm'], fmt=as_pm, fmt2=round_num) }
+  { stat(2, 'Total number of requests:', rank['sum_logs_pm'], obj['sum_logs'], obj['sum_logs_pm'], fmt=as_pm, fmt2=str) }
+  { stat(1, 'Number of domains:', rank['pardom'], len(obj['pardom'])) }
+  { stat(2, 'Number of subdomains:', rank['subdom'], len(obj['subdom'])) }
+  { stat(3, 'Tracker percentage:', rank['tracker_percent'], obj['tracker_percent'], fmt=as_percent) }
+</div>
 <h3>Connections</h3>
 <div>
   <h4>Potential Trackers ({ len(obj['tracker']) }):</h4>
   { gen_dom_tags(obj['tracker'], isSub=True, onlyTrackers=True) }
-  <p></p>
-
   <h4>Domains ({ len(obj['pardom']) }):</h4>
   { gen_dotgraph(obj['pardom']) }
   { gen_dom_tags(obj['pardom'], isSub=False) }
-
   <h4>Subdomains ({ len(obj['subdom']) }):</h4>
   { gen_dotgraph(obj['subdom']) }
   { gen_dom_tags(obj['subdom'], isSub=True) }
diff --git a/src/index_meta.py b/src/index_meta.py
index 088916a..9175873 100755
--- a/src/index_meta.py
+++ b/src/index_meta.py
@@ -3,61 +3,137 @@
 import sys
 import common_lib as mylib
 
+_rank_dict = None
 
-def index_file():
-    return mylib.path_data_index('meta.json')
+
+def fname_app_summary():
+    return mylib.path_data_index('app_summary.json')
+
+
+def fname_app_rank():
+    return mylib.path_data_index('app_rank.json')
 
 
 def load_json_from_disk(fname):
     return mylib.json_read(fname) if mylib.file_exists(fname) else {}
 
 
-def load():
-    return load_json_from_disk(index_file())
-
-
-def get_total_counts():
-    try:
-        return load_json_from_disk(index_file())['_']
-    except KeyError:
-        return [0, 0]
-
-
-def process(bundle_ids, deleteOnly=False):
-    print('writing index: meta ...')
-    fname = index_file()
-    if bundle_ids == ['*']:
-        bundle_ids = list(mylib.enum_data_appids())
-        print('  full reset')
-        mylib.rm_file(fname)  # rebuild from ground up
-
-    # json format: `bundle-id : [#recordings, #logs, #domains, #subdomains]`
-    index = load_json_from_disk(fname)
-    for bid in bundle_ids:
-        # delete old value
+def try_del(index, keys):
+    for x in keys:
         try:
-            del(index[bid])
+            del(index[x])
         except KeyError:
             pass
+
+
+def json_to_list(json):
+    return [
+        json['sum_rec'],
+        json['sum_logs'],
+        json['sum_logs_pm'],
+        json['sum_time'],
+        json['avg_logs'],
+        json['avg_logs_pm'],
+        json['avg_time'],
+        json['last_date'],
+        len(json['pardom']),
+        len(json['subdom']),
+        json['tracker_percent']
+    ]
+
+
+def list_to_json(list):
+    return {
+        'sum_rec': list[0],
+        'sum_logs': list[1],
+        'sum_logs_pm': list[2],
+        'sum_time': list[3],
+        'avg_logs': list[4],
+        'avg_logs_pm': list[5],
+        'avg_time': list[6],
+        'last_date': list[7],
+        'pardom': list[8],
+        'subdom': list[9],
+        'tracker_percent': list[10]
+    }
+
+
+def write_summary_index(index, bundle_ids, deleteOnly=False):
+    for bid in bundle_ids:
+        # delete old value
+        try_del(index, [bid])
         if deleteOnly:
             continue
         # set new value
-        json, _ = mylib.json_read_evaluated(bid)
-        index[bid] = [json['sum_rec'], json['sum_logs'],
-                      len(json['pardom']), len(json['subdom'])]
+        evaluated_json, _ = mylib.json_read_evaluated(bid)
+        index[bid] = json_to_list(evaluated_json)
+
     # sum of counts
-    try:
-        del(index['_'])
-    except KeyError:
-        pass
+    try_del(index, ['_sum'])
     total = [0, 0]
     for val in index.values():
         total[0] += val[0]
         total[1] += val[1]
-    index['_'] = total
+    index['_sum'] = total
+    mylib.json_write(fname_app_summary(), index, pretty=False)
 
-    # write json
-    mylib.json_write(fname, index, pretty=False)
+
+def write_rank_index(index):
+    try_del(index, ['_sum', '_ranks', '_min', '_max'])
+    mins = []
+    maxs = []
+    for i in range(11):  # equal to number of array entries
+        tmp = {}
+        # make temporary reverse index
+        for bid, val in index.items():
+            try:
+                tmp[val[i]].append(bid)
+            except KeyError:
+                tmp[val[i]] = [bid]
+        # read index position from temp reverse index
+        r = 1
+        ordered = sorted(tmp.items(), reverse=i in [0, 3, 6, 7])
+        for idx, (_, ids) in enumerate(ordered):
+            for bid in ids:
+                index[bid][i] = r
+            r += len(ids)
+        mins.append(ordered[0][0])
+        maxs.append(ordered[-1][0])
+    index['_min'] = mins
+    index['_max'] = maxs
+    index['_ranks'] = len(index)
+    mylib.json_write(fname_app_rank(), index, pretty=False)
+
+
+def get_total_counts():
+    try:
+        return load_json_from_disk(fname_app_summary())['_sum']
+    except KeyError:
+        return [0, 0]
+
+
+def get_rank(bundle_id):
+    ''' Return tuples with (rank, max_rank, min_value, max_value) '''
+    global _rank_dict
+    if not _rank_dict:
+        _rank_dict = load_json_from_disk(fname_app_rank())
+    return list_to_json(list(zip(
+        _rank_dict[bundle_id],
+        _rank_dict['_min'],
+        _rank_dict['_max'],
+    ))), _rank_dict['_ranks']
+
+
+def process(bundle_ids, deleteOnly=False):
+    print('writing index: meta ...')
+    if bundle_ids == ['*']:
+        bundle_ids = list(mylib.enum_data_appids())
+        print('  full reset')
+        mylib.rm_file(fname_app_summary())  # rebuild from ground up
+
+    index = load_json_from_disk(fname_app_summary())
+    write_summary_index(index, bundle_ids, deleteOnly=deleteOnly)
+    write_rank_index(index)
     print('')