Json download

This commit is contained in:
relikd
2020-09-15 17:03:33 +02:00
parent de3acfa0e1
commit b41a807313
6 changed files with 102 additions and 80 deletions

View File

@@ -90,7 +90,7 @@ footer .links a { color: #ddd; }
#pagination a.active { border: 1pt solid black; border-radius: 0.2em; }
/* app bundle */
p { max-width: 700px; }
.squeeze { max-width: 700px; }
h2.title { margin-bottom: 0; }
p.subtitle { margin-top: 0.2em; }
.mg_lr { margin: 0 0.4em; }
@@ -102,6 +102,7 @@ td { padding: 0.2em 1em 0.2em 0.1em; }
.help-links tr:nth-child(odd) { background: #F9F9F9; }
.help-links .notyet { color: #D11; }
.help-links .done { color: #52C840; }
.right { text-align: right; }
/* domain tags */
.tags { margin: 2em 0; }

View File

@@ -6,6 +6,9 @@ import common_lib as mylib
import tracker_download as tracker
THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings
THRESHOLD_MIN_AVG_LOGS = 0.4 # at least x times in total (after %-thresh)
level3_doms = None
@@ -63,6 +66,42 @@ def json_combine(bundle_id):
return res
def json_evaluate_inplace(obj):
if not obj['name']:
obj['name'] = '< App-Name >'
rec_count = len(obj['rec_len'])
time_total = sum(obj['rec_len'])
del(obj['rec_len'])
obj['sum_rec'] = rec_count
obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()])
obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60
obj['sum_time'] = time_total
obj['avg_time'] = time_total / rec_count
def transform(ddic):
res = list()
c_sum = 0
c_trkr = 0
for name, (is_tracker, counts) in ddic.items():
rec_percent = len(counts) / rec_count
if rec_percent < THRESHOLD_PERCENT_OF_LOGS:
continue
avg = sum(counts) / rec_count # len(counts)
if avg < THRESHOLD_MIN_AVG_LOGS:
continue
res.append([name, round(avg + 0.001), is_tracker])
c_sum += avg
c_trkr += avg if is_tracker else 0
res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name
return res, c_trkr, c_sum
obj['pardom'], p_t, p_c = transform(obj['pardom'])
obj['subdom'], s_t, s_c = transform(obj['subdom'])
obj['tracker_percent'] = s_t / (s_c or 1)
obj['avg_logs'] = s_c
obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60
def process(bundle_ids, where=None):
print('writing combined json ...')
if bundle_ids == ['*']:
@@ -83,6 +122,8 @@ def process(bundle_ids, where=None):
if should_update:
print(' ' + bid)
mylib.json_write_combined(bid, obj)
json_evaluate_inplace(obj)
mylib.json_write_evaluated(bid, obj)
affected_ids.append(bid)
print('')
return affected_ids

View File

@@ -254,6 +254,11 @@ def json_read_combined(bundle_id):
return json_read(path_data_app(bundle_id, 'combined.json'))
def json_read_evaluated(bundle_id):
pth = path_data_app(bundle_id, 'evaluated.json')
return json_read(pth), pth
def json_read_meta(bundle_id, lang):
return json_read(path_data_app(bundle_id, 'info_{}.json'.format(lang)))
@@ -270,6 +275,11 @@ def json_write_combined(bundle_id, obj):
json_write(fname, obj, pretty=False)
def json_write_evaluated(bundle_id, obj):
fname = path_data_app(bundle_id, 'evaluated.json')
json_write(fname, obj, pretty=False)
def json_write_meta(bundle_id, obj, lang):
fname = path_data_app(bundle_id, 'info_{}.json'.format(lang))
json_write(fname, obj, pretty=True)

View File

@@ -1,13 +1,11 @@
#!/usr/bin/env python3
import os
import sys
import time
import math
import common_lib as mylib
THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings
THRESHOLD_MIN_AVG_LOGS = 0.4 # at least x times in total (after %-thresh)
def seconds_to_time(seconds):
seconds = int(seconds)
@@ -77,44 +75,8 @@ def gen_dom_tags(sorted_arr, onlyTrackers=False):
return '<i> None </i>'
def prepare_json(obj):
if not obj['name']:
obj['name'] = '&lt; App-Name &gt;'
rec_count = len(obj['rec_len'])
time_total = sum(obj['rec_len'])
obj['sum_rec'] = rec_count
obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()])
obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60
obj['sum_time'] = time_total
obj['avg_time'] = time_total / rec_count
def transform(ddic):
res = list()
c_sum = 0
c_trkr = 0
for name, (is_tracker, counts) in ddic.items():
rec_percent = len(counts) / rec_count
if rec_percent < THRESHOLD_PERCENT_OF_LOGS:
continue
avg = sum(counts) / rec_count # len(counts)
if avg < THRESHOLD_MIN_AVG_LOGS:
continue
res.append([name, round(avg + 0.001), is_tracker])
c_sum += avg
c_trkr += avg if is_tracker else 0
res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name
return res, c_trkr, c_sum
obj['pardom'], p_t, p_c = transform(obj['pardom'])
obj['subdom'], s_t, s_c = transform(obj['subdom'])
obj['tracker_percent'] = s_t / (s_c or 1)
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
obj['avg_logs'] = s_c
obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60
def gen_html(bundle_id, obj):
prepare_json(obj)
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
return mylib.template_with_base(f'''
<h2 class="title">{obj['name']}</h2>
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
@@ -148,14 +110,15 @@ def gen_html(bundle_id, obj):
{ gen_dom_tags(obj['tracker'], onlyTrackers=True) }
<p></p>
<h4>Domains ({ len(obj['pardom']) }):</h4>
<h4>Overlapping Domains ({ len(obj['pardom']) }):</h4>
{ gen_dotgraph(obj['pardom']) }
{ gen_dom_tags(obj['pardom']) }
<h4>Subdomains ({ len(obj['subdom']) }):</h4>
<h4>Overlapping Subdomains ({ len(obj['subdom']) }):</h4>
{ gen_dotgraph(obj['subdom']) }
{ gen_dom_tags(obj['subdom']) }
</div>''', title=obj['name'])
</div>
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=obj['name'])
def process(bundle_ids):
@@ -165,10 +128,13 @@ def process(bundle_ids):
for bid in bundle_ids:
print(' ' + bid)
json = mylib.json_read_combined(bid)
json, json_data_path = mylib.json_read_evaluated(bid)
mylib.mkdir_out_app(bid)
with open(mylib.path_out_app(bid, 'index.html'), 'w') as fp:
fp.write(gen_html(bid, json))
download_link = mylib.path_out_app(bid, 'data.json')
if not mylib.file_exists(download_link):
os.symlink(json_data_path, download_link)
print('')

View File

@@ -7,47 +7,51 @@ def gen_root():
with open(mylib.path_out('index.html'), 'w') as fp:
fp.write(mylib.template_with_base('''
<h2>About</h2>
<p>
<div class="squeeze">
<p>
Information about the research project will be added soon. Stay tuned.
</p>
<a id="get-appcheck" class="no-ul" href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">
</p>
<a id="get-appcheck" class="no-ul" href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">
<img src="/static/appcheck.svg" alt="app-icon" width="30" height="30">
<p>
Get the iOS App and contribute.<br />
Join the TestFlight Beta.
</p>
</a>
<p>
</a>
<p>
The source code of the app is available <a href="https://github.com/relikd/appcheck/" target="_blank">on GitHub</a>.
</p>
<h2>Results</h2>
<p>
</p>
<h2>Results</h2>
<p>
If you're just interested in the results, go ahead to <a href="/index/page/1/">all apps</a>.
</p>
<h2>Current research</h2>
<p>
</p>
<h2>Current research</h2>
<p>
We have an ongoing research project open. Your help is highly appreciated. <br>
For mor infos follow <a href="/help/">this link</a>.
</p>
</p>
</div>
'''))
def gen_help():
many = 7
txt = '''<h2>Help needed!</h2>
<p>
<div class="squeeze">
<p>
This study contains two stages. This is the first one.
We have selected a random sample of applications for evaluation.
We want to track the app behviour over a longer period of time.
</p><p>
</p><p>
You can help us by providing app recordings of the following application.
The more you record the better.
Ideally you could do recordings for all the apps below.
But really, even if you only find time for a single recording, anything helps!
</p><p>
</p><p>
We need at least {} recordings per app. Stage 2 will follow in a few weeks.
Get the <a href="https://testflight.apple.com/join/9jjaFeHO" target="_blank">Testflight beta</a>.
</p>
</p>
</div>
<div class="help-links">'''.format(many)
obj = mylib.json_read(mylib.path_root('src', 'help.json'))
for land in sorted(obj.keys()):

View File

@@ -3,9 +3,9 @@
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=0.75" />
<script type="text/javascript" src="/static/script.js?5"></script>
<script type="text/javascript" src="/static/script.js?6"></script>
<title>#_TITLE_#AppCheck: Privacy Monitor</title>
<link rel="stylesheet" type="text/css" href="/static/style.css?5">
<link rel="stylesheet" type="text/css" href="/static/style.css?6">
<link rel="stylesheet" type="text/css" href="/static/fonts/font.css">
<link rel="apple-touch-icon" sizes="180x180" href="/static/favicon/apple-touch-icon.png">
<link rel="icon" type="image/png" sizes="32x32" href="/static/favicon/favicon-32x32.png">