From b41a8073132713b10d1faf183779160ba5f703ce Mon Sep 17 00:00:00 2001 From: relikd Date: Tue, 15 Sep 2020 17:03:33 +0200 Subject: [PATCH] Json download --- out/static/style.css | 3 +- src/bundle_combine.py | 41 +++++++++++++++++++++++++ src/common_lib.py | 10 +++++++ src/html_bundle.py | 54 +++++++-------------------------- src/html_root.py | 70 +++++++++++++++++++++++-------------------- templates/base.html | 4 +-- 6 files changed, 102 insertions(+), 80 deletions(-) diff --git a/out/static/style.css b/out/static/style.css index cc0dbe8..ee83893 100644 --- a/out/static/style.css +++ b/out/static/style.css @@ -90,7 +90,7 @@ footer .links a { color: #ddd; } #pagination a.active { border: 1pt solid black; border-radius: 0.2em; } /* app bundle */ -p { max-width: 700px; } +.squeeze { max-width: 700px; } h2.title { margin-bottom: 0; } p.subtitle { margin-top: 0.2em; } .mg_lr { margin: 0 0.4em; } @@ -102,6 +102,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; } .help-links tr:nth-child(odd) { background: #F9F9F9; } .help-links .notyet { color: #D11; } .help-links .done { color: #52C840; } +.right { text-align: right; } /* domain tags */ .tags { margin: 2em 0; } diff --git a/src/bundle_combine.py b/src/bundle_combine.py index 71ed236..ce10cb2 100755 --- a/src/bundle_combine.py +++ b/src/bundle_combine.py @@ -6,6 +6,9 @@ import common_lib as mylib import tracker_download as tracker +THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings +THRESHOLD_MIN_AVG_LOGS = 0.4 # at least x times in total (after %-thresh) + level3_doms = None @@ -63,6 +66,42 @@ def json_combine(bundle_id): return res +def json_evaluate_inplace(obj): + if not obj['name']: + obj['name'] = '< App-Name >' + rec_count = len(obj['rec_len']) + time_total = sum(obj['rec_len']) + del(obj['rec_len']) + obj['sum_rec'] = rec_count + obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()]) + obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60 + obj['sum_time'] = time_total + obj['avg_time'] = time_total / rec_count + + def transform(ddic): + res = list() + c_sum = 0 + c_trkr = 0 + for name, (is_tracker, counts) in ddic.items(): + rec_percent = len(counts) / rec_count + if rec_percent < THRESHOLD_PERCENT_OF_LOGS: + continue + avg = sum(counts) / rec_count # len(counts) + if avg < THRESHOLD_MIN_AVG_LOGS: + continue + res.append([name, round(avg + 0.001), is_tracker]) + c_sum += avg + c_trkr += avg if is_tracker else 0 + res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name + return res, c_trkr, c_sum + + obj['pardom'], p_t, p_c = transform(obj['pardom']) + obj['subdom'], s_t, s_c = transform(obj['subdom']) + obj['tracker_percent'] = s_t / (s_c or 1) + obj['avg_logs'] = s_c + obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60 + + def process(bundle_ids, where=None): print('writing combined json ...') if bundle_ids == ['*']: @@ -83,6 +122,8 @@ def process(bundle_ids, where=None): if should_update: print(' ' + bid) mylib.json_write_combined(bid, obj) + json_evaluate_inplace(obj) + mylib.json_write_evaluated(bid, obj) affected_ids.append(bid) print('') return affected_ids diff --git a/src/common_lib.py b/src/common_lib.py index 5eba2c3..4804c48 100755 --- a/src/common_lib.py +++ b/src/common_lib.py @@ -254,6 +254,11 @@ def json_read_combined(bundle_id): return json_read(path_data_app(bundle_id, 'combined.json')) +def json_read_evaluated(bundle_id): + pth = path_data_app(bundle_id, 'evaluated.json') + return json_read(pth), pth + + def json_read_meta(bundle_id, lang): return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang))) @@ -270,6 +275,11 @@ def json_write_combined(bundle_id, obj): json_write(fname, obj, pretty=False) +def json_write_evaluated(bundle_id, obj): + fname = path_data_app(bundle_id, 'evaluated.json') + json_write(fname, obj, pretty=False) + + def json_write_meta(bundle_id, obj, lang): fname = path_data_app(bundle_id, 'info_{}.json'.format(lang)) json_write(fname, obj, pretty=True) diff --git a/src/html_bundle.py b/src/html_bundle.py index c78ef87..dd7de72 100755 --- a/src/html_bundle.py +++ b/src/html_bundle.py @@ -1,13 +1,11 @@ #!/usr/bin/env python3 +import os import sys import time import math import common_lib as mylib -THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings -THRESHOLD_MIN_AVG_LOGS = 0.4 # at least x times in total (after %-thresh) - def seconds_to_time(seconds): seconds = int(seconds) @@ -77,44 +75,8 @@ def gen_dom_tags(sorted_arr, onlyTrackers=False): return '– None –' -def prepare_json(obj): - if not obj['name']: - obj['name'] = '< App-Name >' - rec_count = len(obj['rec_len']) - time_total = sum(obj['rec_len']) - obj['sum_rec'] = rec_count - obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()]) - obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60 - obj['sum_time'] = time_total - obj['avg_time'] = time_total / rec_count - - def transform(ddic): - res = list() - c_sum = 0 - c_trkr = 0 - for name, (is_tracker, counts) in ddic.items(): - rec_percent = len(counts) / rec_count - if rec_percent < THRESHOLD_PERCENT_OF_LOGS: - continue - avg = sum(counts) / rec_count # len(counts) - if avg < THRESHOLD_MIN_AVG_LOGS: - continue - res.append([name, round(avg + 0.001), is_tracker]) - c_sum += avg - c_trkr += avg if is_tracker else 0 - res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name - return res, c_trkr, c_sum - - obj['pardom'], p_t, p_c = transform(obj['pardom']) - obj['subdom'], s_t, s_c = transform(obj['subdom']) - obj['tracker_percent'] = s_t / (s_c or 1) - obj['tracker'] = list(filter(lambda x: x[2], obj['subdom'])) - obj['avg_logs'] = s_c - obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60 - - def gen_html(bundle_id, obj): - prepare_json(obj) + obj['tracker'] = list(filter(lambda x: x[2], obj['subdom'])) return mylib.template_with_base(f'''

{obj['name']}

Bundle-id:{ bundle_id }

@@ -148,14 +110,15 @@ def gen_html(bundle_id, obj): { gen_dom_tags(obj['tracker'], onlyTrackers=True) }

-

Domains ({ len(obj['pardom']) }):

+

Overlapping Domains ({ len(obj['pardom']) }):

{ gen_dotgraph(obj['pardom']) } { gen_dom_tags(obj['pardom']) } -

Subdomains ({ len(obj['subdom']) }):

+

Overlapping Subdomains ({ len(obj['subdom']) }):

{ gen_dotgraph(obj['subdom']) } { gen_dom_tags(obj['subdom']) } -''', title=obj['name']) + +

Download: json

''', title=obj['name']) def process(bundle_ids): @@ -165,10 +128,13 @@ def process(bundle_ids): for bid in bundle_ids: print(' ' + bid) - json = mylib.json_read_combined(bid) + json, json_data_path = mylib.json_read_evaluated(bid) mylib.mkdir_out_app(bid) with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp: fp.write(gen_html(bid, json)) + download_link = mylib.path_out_app(bid, 'data.json') + if not mylib.file_exists(download_link): + os.symlink(json_data_path, download_link) print('') diff --git a/src/html_root.py b/src/html_root.py index 7797fae..cd30635 100755 --- a/src/html_root.py +++ b/src/html_root.py @@ -7,47 +7,51 @@ def gen_root(): with open(mylib.path_out('index.html'), 'w') as fp: fp.write(mylib.template_with_base('''

About

-

- Information about the research project will be added soon. Stay tuned. -

- - app-icon +

- Get the iOS App and contribute.
- Join the TestFlight Beta. + Information about the research project will be added soon. Stay tuned.

-
-

- The source code of the app is available on GitHub. -

-

Results

-

- If you're just interested in the results, go ahead to all apps. -

-

Current research

-

- We have an ongoing research project open. Your help is highly appreciated.
- For mor infos follow this link. -

+ + app-icon +

+ Get the iOS App and contribute.
+ Join the TestFlight Beta. +

+
+

+ The source code of the app is available on GitHub. +

+

Results

+

+ If you're just interested in the results, go ahead to all apps. +

+

Current research

+

+ We have an ongoing research project open. Your help is highly appreciated.
+ For mor infos follow this link. +

+
''')) def gen_help(): many = 7 txt = '''

Help needed!

-

- This study contains two stages. This is the first one. - We have selected a random sample of applications for evaluation. - We want to track the app behviour over a longer period of time. -

- You can help us by providing app recordings of the following application. - The more you record the better. - Ideally you could do recordings for all the apps below. - But really, even if you only find time for a single recording, anything helps! -

- We need at least {} recordings per app. Stage 2 will follow in a few weeks. - Get the Testflight beta. -

+
+

+ This study contains two stages. This is the first one. + We have selected a random sample of applications for evaluation. + We want to track the app behviour over a longer period of time. +

+ You can help us by providing app recordings of the following application. + The more you record the better. + Ideally you could do recordings for all the apps below. + But really, even if you only find time for a single recording, anything helps! +

+ We need at least {} recordings per app. Stage 2 will follow in a few weeks. + Get the Testflight beta. +

+