From b41a8073132713b10d1faf183779160ba5f703ce Mon Sep 17 00:00:00 2001
From: relikd <info@relikd.de>
Date: Tue, 15 Sep 2020 17:03:33 +0200
Subject: [PATCH] Json download

---
 out/static/style.css  |  3 +-
 src/bundle_combine.py | 41 +++++++++++++++++++++++++
 src/common_lib.py     | 10 +++++++
 src/html_bundle.py    | 54 +++++++--------------------------
 src/html_root.py      | 70 +++++++++++++++++++++++--------------------
 templates/base.html   |  4 +--
 6 files changed, 102 insertions(+), 80 deletions(-)

diff --git a/out/static/style.css b/out/static/style.css
index cc0dbe8..ee83893 100644
--- a/out/static/style.css
+++ b/out/static/style.css
@@ -90,7 +90,7 @@ footer .links a { color: #ddd; }
 #pagination a.active { border: 1pt solid black; border-radius: 0.2em; }
 
 /* app bundle */
-p { max-width: 700px; }
+.squeeze { max-width: 700px; }
 h2.title { margin-bottom: 0; }
 p.subtitle { margin-top: 0.2em; }
 .mg_lr { margin: 0 0.4em; }
@@ -102,6 +102,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; }
 .help-links tr:nth-child(odd) { background: #F9F9F9; }
 .help-links .notyet { color: #D11; }
 .help-links .done { color: #52C840; }
+.right { text-align: right; }
 
 /* domain tags */
 .tags { margin: 2em 0; }
diff --git a/src/bundle_combine.py b/src/bundle_combine.py
index 71ed236..ce10cb2 100755
--- a/src/bundle_combine.py
+++ b/src/bundle_combine.py
@@ -6,6 +6,9 @@ import common_lib as mylib
 import tracker_download as tracker
 
 
+THRESHOLD_PERCENT_OF_LOGS = 0.33  # domain appears in % recordings
+THRESHOLD_MIN_AVG_LOGS = 0.4  # at least x times in total (after %-thresh)
+
 level3_doms = None
 
 
@@ -63,6 +66,42 @@ def json_combine(bundle_id):
     return res
 
 
+def json_evaluate_inplace(obj):
+    if not obj['name']:
+        obj['name'] = '&lt; App-Name &gt;'
+    rec_count = len(obj['rec_len'])
+    time_total = sum(obj['rec_len'])
+    del(obj['rec_len'])
+    obj['sum_rec'] = rec_count
+    obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()])
+    obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60
+    obj['sum_time'] = time_total
+    obj['avg_time'] = time_total / rec_count
+
+    def transform(ddic):
+        res = list()
+        c_sum = 0
+        c_trkr = 0
+        for name, (is_tracker, counts) in ddic.items():
+            rec_percent = len(counts) / rec_count
+            if rec_percent < THRESHOLD_PERCENT_OF_LOGS:
+                continue
+            avg = sum(counts) / rec_count  # len(counts)
+            if avg < THRESHOLD_MIN_AVG_LOGS:
+                continue
+            res.append([name, round(avg + 0.001), is_tracker])
+            c_sum += avg
+            c_trkr += avg if is_tracker else 0
+        res.sort(key=lambda x: (-x[1], x[0]))  # sort by count desc, then name
+        return res, c_trkr, c_sum
+
+    obj['pardom'], p_t, p_c = transform(obj['pardom'])
+    obj['subdom'], s_t, s_c = transform(obj['subdom'])
+    obj['tracker_percent'] = s_t / (s_c or 1)
+    obj['avg_logs'] = s_c
+    obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60
+
+
 def process(bundle_ids, where=None):
     print('writing combined json ...')
     if bundle_ids == ['*']:
@@ -83,6 +122,8 @@ def process(bundle_ids, where=None):
         if should_update:
             print('  ' + bid)
             mylib.json_write_combined(bid, obj)
+            json_evaluate_inplace(obj)
+            mylib.json_write_evaluated(bid, obj)
             affected_ids.append(bid)
     print('')
     return affected_ids
diff --git a/src/common_lib.py b/src/common_lib.py
index 5eba2c3..4804c48 100755
--- a/src/common_lib.py
+++ b/src/common_lib.py
@@ -254,6 +254,11 @@ def json_read_combined(bundle_id):
     return json_read(path_data_app(bundle_id, 'combined.json'))
 
 
+def json_read_evaluated(bundle_id):
+    pth = path_data_app(bundle_id, 'evaluated.json')
+    return json_read(pth), pth
+
+
 def json_read_meta(bundle_id, lang):
     return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
 
@@ -270,6 +275,11 @@ def json_write_combined(bundle_id, obj):
     json_write(fname, obj, pretty=False)
 
 
+def json_write_evaluated(bundle_id, obj):
+    fname = path_data_app(bundle_id, 'evaluated.json')
+    json_write(fname, obj, pretty=False)
+
+
 def json_write_meta(bundle_id, obj, lang):
     fname = path_data_app(bundle_id, 'info_{}.json'.format(lang))
     json_write(fname, obj, pretty=True)
diff --git a/src/html_bundle.py b/src/html_bundle.py
index c78ef87..dd7de72 100755
--- a/src/html_bundle.py
+++ b/src/html_bundle.py
@@ -1,13 +1,11 @@
 #!/usr/bin/env python3
 
+import os
 import sys
 import time
 import math
 import common_lib as mylib
 
-THRESHOLD_PERCENT_OF_LOGS = 0.33  # domain appears in % recordings
-THRESHOLD_MIN_AVG_LOGS = 0.4  # at least x times in total (after %-thresh)
-
 
 def seconds_to_time(seconds):
     seconds = int(seconds)
@@ -77,44 +75,8 @@ def gen_dom_tags(sorted_arr, onlyTrackers=False):
         return '<i>– None –</i>'
 
 
-def prepare_json(obj):
-    if not obj['name']:
-        obj['name'] = '&lt; App-Name &gt;'
-    rec_count = len(obj['rec_len'])
-    time_total = sum(obj['rec_len'])
-    obj['sum_rec'] = rec_count
-    obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()])
-    obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60
-    obj['sum_time'] = time_total
-    obj['avg_time'] = time_total / rec_count
-
-    def transform(ddic):
-        res = list()
-        c_sum = 0
-        c_trkr = 0
-        for name, (is_tracker, counts) in ddic.items():
-            rec_percent = len(counts) / rec_count
-            if rec_percent < THRESHOLD_PERCENT_OF_LOGS:
-                continue
-            avg = sum(counts) / rec_count  # len(counts)
-            if avg < THRESHOLD_MIN_AVG_LOGS:
-                continue
-            res.append([name, round(avg + 0.001), is_tracker])
-            c_sum += avg
-            c_trkr += avg if is_tracker else 0
-        res.sort(key=lambda x: (-x[1], x[0]))  # sort by count desc, then name
-        return res, c_trkr, c_sum
-
-    obj['pardom'], p_t, p_c = transform(obj['pardom'])
-    obj['subdom'], s_t, s_c = transform(obj['subdom'])
-    obj['tracker_percent'] = s_t / (s_c or 1)
-    obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
-    obj['avg_logs'] = s_c
-    obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60
-
-
 def gen_html(bundle_id, obj):
-    prepare_json(obj)
+    obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
     return mylib.template_with_base(f'''
 <h2 class="title">{obj['name']}</h2>
 <p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
@@ -148,14 +110,15 @@ def gen_html(bundle_id, obj):
   { gen_dom_tags(obj['tracker'], onlyTrackers=True) }
   <p></p>
 
-  <h4>Domains ({ len(obj['pardom']) }):</h4>
+  <h4>Overlapping Domains ({ len(obj['pardom']) }):</h4>
   { gen_dotgraph(obj['pardom']) }
   { gen_dom_tags(obj['pardom']) }
 
-  <h4>Subdomains ({ len(obj['subdom']) }):</h4>
+  <h4>Overlapping Subdomains ({ len(obj['subdom']) }):</h4>
   { gen_dotgraph(obj['subdom']) }
   { gen_dom_tags(obj['subdom']) }
-</div>''', title=obj['name'])
+</div>
+<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=obj['name'])
 
 
 def process(bundle_ids):
@@ -165,10 +128,13 @@ def process(bundle_ids):
 
     for bid in bundle_ids:
         print('  ' + bid)
-        json = mylib.json_read_combined(bid)
+        json, json_data_path = mylib.json_read_evaluated(bid)
         mylib.mkdir_out_app(bid)
         with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp:
             fp.write(gen_html(bid, json))
+        download_link = mylib.path_out_app(bid, 'data.json')
+        if not mylib.file_exists(download_link):
+            os.symlink(json_data_path, download_link)
     print('')
 
 
diff --git a/src/html_root.py b/src/html_root.py
index 7797fae..cd30635 100755
--- a/src/html_root.py
+++ b/src/html_root.py
@@ -7,47 +7,51 @@ def gen_root():
     with open(mylib.path_out('index.html'), 'w') as fp:
         fp.write(mylib.template_with_base('''
 <h2>About</h2>
-<p>
-  Information about the research project will be added soon. Stay tuned.
-</p>
-<a id="get-appcheck" class="no-ul" href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">
-  <img src="/static/appcheck.svg" alt="app-icon" width="30" height="30">
+<div class="squeeze">
   <p>
-    Get the iOS App and contribute.<br />
-    Join the TestFlight Beta.
+    Information about the research project will be added soon. Stay tuned.
   </p>
-</a>
-<p>
-  The source code of the app is available <a href="https://github.com/relikd/appcheck/" target="_blank">on GitHub</a>.
-</p>
-<h2>Results</h2>
-<p>
-  If you're just interested in the results, go ahead to <a href="/index/page/1/">all apps</a>.
-</p>
-<h2>Current research</h2>
-<p>
-  We have an ongoing research project open. Your help is highly appreciated. <br>
-  For mor infos follow <a href="/help/">this link</a>.
-</p>
+  <a id="get-appcheck" class="no-ul" href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">
+    <img src="/static/appcheck.svg" alt="app-icon" width="30" height="30">
+    <p>
+      Get the iOS App and contribute.<br />
+      Join the TestFlight Beta.
+    </p>
+  </a>
+  <p>
+    The source code of the app is available <a href="https://github.com/relikd/appcheck/" target="_blank">on GitHub</a>.
+  </p>
+  <h2>Results</h2>
+  <p>
+    If you're just interested in the results, go ahead to <a href="/index/page/1/">all apps</a>.
+  </p>
+  <h2>Current research</h2>
+  <p>
+    We have an ongoing research project open. Your help is highly appreciated. <br>
+    For mor infos follow <a href="/help/">this link</a>.
+  </p>
+</div>
 '''))
 
 
 def gen_help():
     many = 7
     txt = '''<h2>Help needed!</h2>
-<p>
-    This study contains two stages. This is the first one.
-    We have selected a random sample of applications for evaluation.
-    We want to track the app behviour over a longer period of time.
-</p><p>
-    You can help us by providing app recordings of the following application.
-    The more you record the better. 
-    Ideally you could do recordings for all the apps below.
-    But really, even if you only find time for a single recording, anything helps!
-</p><p>
-    We need at least {} recordings per app. Stage 2 will follow in a few weeks.
-    Get the <a href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">Testflight beta</a>.
-</p>
+<div class="squeeze">
+  <p>
+      This study contains two stages. This is the first one.
+      We have selected a random sample of applications for evaluation.
+      We want to track the app behviour over a longer period of time.
+  </p><p>
+      You can help us by providing app recordings of the following application.
+      The more you record the better. 
+      Ideally you could do recordings for all the apps below.
+      But really, even if you only find time for a single recording, anything helps!
+  </p><p>
+      We need at least {} recordings per app. Stage 2 will follow in a few weeks.
+      Get the <a href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">Testflight beta</a>.
+  </p>
+</div>
 <div class="help-links">'''.format(many)
     obj = mylib.json_read(mylib.path_root('src', 'help.json'))
     for land in sorted(obj.keys()):
diff --git a/templates/base.html b/templates/base.html
index 18804c8..7a0dbb4 100644
--- a/templates/base.html
+++ b/templates/base.html
@@ -3,9 +3,9 @@
 <head>
   <meta charset="utf-8"/>
   <meta name="viewport" content="width=device-width, initial-scale=0.75" />
-  <script type="text/javascript" src="/static/script.js?5"></script>
+  <script type="text/javascript" src="/static/script.js?6"></script>
   <title>#_TITLE_#AppCheck: Privacy Monitor</title>
-  <link rel="stylesheet" type="text/css" href="/static/style.css?5">
+  <link rel="stylesheet" type="text/css" href="/static/style.css?6">
   <link rel="stylesheet" type="text/css" href="/static/fonts/font.css">
   <link rel="apple-touch-icon" sizes="180x180" href="/static/favicon/apple-touch-icon.png">
   <link rel="icon" type="image/png" sizes="32x32" href="/static/favicon/favicon-32x32.png">