Json download

This commit is contained in:
relikd
2020-09-15 17:03:33 +02:00
parent de3acfa0e1
commit b41a807313
6 changed files with 102 additions and 80 deletions

View File

@@ -6,6 +6,9 @@ import common_lib as mylib
import tracker_download as tracker
THRESHOLD_PERCENT_OF_LOGS = 0.33 # domain appears in % recordings
THRESHOLD_MIN_AVG_LOGS = 0.4 # at least x times in total (after %-thresh)
level3_doms = None
@@ -63,6 +66,42 @@ def json_combine(bundle_id):
return res
def json_evaluate_inplace(obj):
if not obj['name']:
obj['name'] = '< App-Name >'
rec_count = len(obj['rec_len'])
time_total = sum(obj['rec_len'])
del(obj['rec_len'])
obj['sum_rec'] = rec_count
obj['sum_logs'] = sum([sum(x[1]) for x in obj['pardom'].values()])
obj['sum_logs_pm'] = obj['sum_logs'] / (time_total or 1) * 60
obj['sum_time'] = time_total
obj['avg_time'] = time_total / rec_count
def transform(ddic):
res = list()
c_sum = 0
c_trkr = 0
for name, (is_tracker, counts) in ddic.items():
rec_percent = len(counts) / rec_count
if rec_percent < THRESHOLD_PERCENT_OF_LOGS:
continue
avg = sum(counts) / rec_count # len(counts)
if avg < THRESHOLD_MIN_AVG_LOGS:
continue
res.append([name, round(avg + 0.001), is_tracker])
c_sum += avg
c_trkr += avg if is_tracker else 0
res.sort(key=lambda x: (-x[1], x[0])) # sort by count desc, then name
return res, c_trkr, c_sum
obj['pardom'], p_t, p_c = transform(obj['pardom'])
obj['subdom'], s_t, s_c = transform(obj['subdom'])
obj['tracker_percent'] = s_t / (s_c or 1)
obj['avg_logs'] = s_c
obj['avg_logs_pm'] = s_c / (obj['avg_time'] or 1) * 60
def process(bundle_ids, where=None):
print('writing combined json ...')
if bundle_ids == ['*']:
@@ -83,6 +122,8 @@ def process(bundle_ids, where=None):
if should_update:
print(' ' + bid)
mylib.json_write_combined(bid, obj)
json_evaluate_inplace(obj)
mylib.json_write_evaluated(bid, obj)
affected_ids.append(bid)
print('')
return affected_ids