Bundle name index
This commit is contained in:
@@ -38,18 +38,17 @@ def json_combine(bundle_id):
|
|||||||
except KeyError:
|
except KeyError:
|
||||||
ddic[key] = (tracker.is_tracker(key), [num])
|
ddic[key] = (tracker.is_tracker(key), [num])
|
||||||
|
|
||||||
res = dict({'rec_len': [], 'name': mylib.app_name(bundle_id)})
|
res = {'rec_len': []}
|
||||||
pardom = dict()
|
pardom = {}
|
||||||
subdom = dict()
|
subdom = {}
|
||||||
latest = 0
|
latest = 0
|
||||||
for fname, jdata in mylib.enum_jsons(bundle_id):
|
for fname, jdata in mylib.enum_jsons(bundle_id):
|
||||||
|
# TODO: load combined and append newest only, then update evaluated
|
||||||
latest = max(latest, os.path.getmtime(fname)) # or getctime
|
latest = max(latest, os.path.getmtime(fname)) # or getctime
|
||||||
# if not res['name']:
|
|
||||||
# res['name'] = jdata['app-name']
|
|
||||||
res['rec_len'].append(jdata['duration'])
|
res['rec_len'].append(jdata['duration'])
|
||||||
try:
|
try:
|
||||||
logs = jdata['logs']
|
logs = jdata['logs']
|
||||||
uniq_par = dict()
|
uniq_par = {}
|
||||||
for subdomain in logs:
|
for subdomain in logs:
|
||||||
occurs = len(logs[subdomain])
|
occurs = len(logs[subdomain])
|
||||||
inc_dic(subdom, subdomain, occurs)
|
inc_dic(subdom, subdomain, occurs)
|
||||||
@@ -69,8 +68,6 @@ def json_combine(bundle_id):
|
|||||||
|
|
||||||
|
|
||||||
def json_evaluate_inplace(obj):
|
def json_evaluate_inplace(obj):
|
||||||
if not obj['name']:
|
|
||||||
obj['name'] = '< App-Name >'
|
|
||||||
rec_count = len(obj['rec_len'])
|
rec_count = len(obj['rec_len'])
|
||||||
time_total = sum(obj['rec_len'])
|
time_total = sum(obj['rec_len'])
|
||||||
del(obj['rec_len'])
|
del(obj['rec_len'])
|
||||||
@@ -81,7 +78,7 @@ def json_evaluate_inplace(obj):
|
|||||||
obj['avg_time'] = time_total / rec_count
|
obj['avg_time'] = time_total / rec_count
|
||||||
|
|
||||||
def transform(ddic):
|
def transform(ddic):
|
||||||
res = list()
|
res = []
|
||||||
c_sum = 0
|
c_sum = 0
|
||||||
c_trkr = 0
|
c_trkr = 0
|
||||||
for name, (is_tracker, counts) in ddic.items():
|
for name, (is_tracker, counts) in ddic.items():
|
||||||
|
|||||||
@@ -32,6 +32,12 @@ def path_data_app(bundle_id, filename=None):
|
|||||||
return path_add(pth, filename) if filename else pth
|
return path_add(pth, filename) if filename else pth
|
||||||
|
|
||||||
|
|
||||||
|
def path_data_index(filename):
|
||||||
|
pth = path_root('data', '_eval')
|
||||||
|
mkdir(pth)
|
||||||
|
return path_add(pth, filename)
|
||||||
|
|
||||||
|
|
||||||
def path_out(*path_components):
|
def path_out(*path_components):
|
||||||
return path_root('out', *path_components)
|
return path_root('out', *path_components)
|
||||||
|
|
||||||
@@ -76,17 +82,18 @@ def valid_bundle_id(bundle_id):
|
|||||||
return regex_bundle_id.match(bundle_id)
|
return regex_bundle_id.match(bundle_id)
|
||||||
|
|
||||||
|
|
||||||
def app_name(bundle_id, fallback=None):
|
def app_names(bundle_id):
|
||||||
def name_for(lang):
|
def name_for(lang):
|
||||||
try:
|
try:
|
||||||
return json_read_meta(bundle_id, lang)['trackCensoredName']
|
return json_read_meta(bundle_id, lang)['trackCensoredName']
|
||||||
except Exception:
|
except Exception:
|
||||||
return None
|
return None
|
||||||
|
ret = {}
|
||||||
for lang in ['us', 'de']:
|
for lang in ['us', 'de']:
|
||||||
name = name_for(lang)
|
name = name_for(lang)
|
||||||
if name:
|
if name:
|
||||||
return name
|
ret[lang] = name
|
||||||
return fallback
|
return ret
|
||||||
|
|
||||||
|
|
||||||
def err(scope, msg, logOnly=False):
|
def err(scope, msg, logOnly=False):
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import sys
|
|||||||
import time
|
import time
|
||||||
import math
|
import math
|
||||||
import common_lib as mylib
|
import common_lib as mylib
|
||||||
|
import index_bundle_names
|
||||||
|
|
||||||
|
|
||||||
def seconds_to_time(seconds):
|
def seconds_to_time(seconds):
|
||||||
@@ -80,9 +81,10 @@ def gen_dom_tags(sorted_arr, onlyTrackers=False):
|
|||||||
|
|
||||||
|
|
||||||
def gen_html(bundle_id, obj):
|
def gen_html(bundle_id, obj):
|
||||||
|
name = index_bundle_names.get_name(bundle_id)
|
||||||
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
|
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
|
||||||
return mylib.template_with_base(f'''
|
return mylib.template_with_base(f'''
|
||||||
<h2 class="title">{obj['name']}</h2>
|
<h2 class="title">{name}</h2>
|
||||||
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
|
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
|
||||||
<div id="meta">
|
<div id="meta">
|
||||||
<div class="icons">
|
<div class="icons">
|
||||||
@@ -122,7 +124,7 @@ def gen_html(bundle_id, obj):
|
|||||||
{ gen_dotgraph(obj['subdom']) }
|
{ gen_dotgraph(obj['subdom']) }
|
||||||
{ gen_dom_tags(obj['subdom']) }
|
{ gen_dom_tags(obj['subdom']) }
|
||||||
</div>
|
</div>
|
||||||
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=obj['name'])
|
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=name)
|
||||||
|
|
||||||
|
|
||||||
def process(bundle_ids):
|
def process(bundle_ids):
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import common_lib as mylib
|
import common_lib as mylib
|
||||||
|
import index_bundle_names
|
||||||
|
|
||||||
|
|
||||||
def gen_obj(bundle_id):
|
def gen_obj(bundle_id):
|
||||||
@@ -10,7 +11,7 @@ def gen_obj(bundle_id):
|
|||||||
icon = '/static/app-template.svg'
|
icon = '/static/app-template.svg'
|
||||||
return {
|
return {
|
||||||
'id': bundle_id,
|
'id': bundle_id,
|
||||||
'name': mylib.app_name(bundle_id, '< App-Name >'),
|
'name': index_bundle_names.get_name(bundle_id),
|
||||||
'img': icon
|
'img': icon
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
66
src/index_bundle_names.py
Executable file
66
src/index_bundle_names.py
Executable file
@@ -0,0 +1,66 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import common_lib as mylib
|
||||||
|
|
||||||
|
_bundle_name_dict = None
|
||||||
|
|
||||||
|
|
||||||
|
def index_fname():
|
||||||
|
return mylib.path_data_index('bundle_names.json')
|
||||||
|
|
||||||
|
|
||||||
|
def load_json_if_not_already():
|
||||||
|
global _bundle_name_dict
|
||||||
|
if not _bundle_name_dict:
|
||||||
|
index_file = index_fname()
|
||||||
|
if mylib.file_exists(index_file):
|
||||||
|
_bundle_name_dict = mylib.json_read(index_file)
|
||||||
|
else:
|
||||||
|
_bundle_name_dict = {}
|
||||||
|
|
||||||
|
|
||||||
|
def write_json_to_disk():
|
||||||
|
mylib.json_write(index_fname(), _bundle_name_dict, pretty=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_name(bundle_id, langs=['us', 'de']):
|
||||||
|
load_json_if_not_already()
|
||||||
|
for lang in langs:
|
||||||
|
try:
|
||||||
|
return _bundle_name_dict[bundle_id][lang]
|
||||||
|
except KeyError:
|
||||||
|
continue
|
||||||
|
return '< App-Name >' # None
|
||||||
|
|
||||||
|
|
||||||
|
def process(bundle_ids):
|
||||||
|
print('writing index: bundle name ...')
|
||||||
|
if bundle_ids == ['*']:
|
||||||
|
bundle_ids = list(mylib.enum_data_appids())
|
||||||
|
print(' full reset')
|
||||||
|
mylib.rm_file(index_fname()) # rebuild from ground up
|
||||||
|
|
||||||
|
load_json_if_not_already()
|
||||||
|
did_change = False
|
||||||
|
for bid in bundle_ids:
|
||||||
|
names = mylib.app_names(bid)
|
||||||
|
if not names:
|
||||||
|
mylib.err('index-bundle-names', 'could not load: {}'.format(bid))
|
||||||
|
continue
|
||||||
|
_bundle_name_dict[bid] = names
|
||||||
|
did_change = True
|
||||||
|
if did_change:
|
||||||
|
write_json_to_disk()
|
||||||
|
else:
|
||||||
|
print(' no change')
|
||||||
|
print('')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = sys.argv[1:]
|
||||||
|
if len(args) > 0:
|
||||||
|
process(args)
|
||||||
|
else:
|
||||||
|
# process(['*'])
|
||||||
|
mylib.usage(__file__, '[bundle_id] [...]')
|
||||||
@@ -4,12 +4,6 @@ import sys
|
|||||||
import common_lib as mylib
|
import common_lib as mylib
|
||||||
|
|
||||||
|
|
||||||
def get_index_path():
|
|
||||||
pth = mylib.path_root('data', '_eval')
|
|
||||||
mylib.mkdir(pth)
|
|
||||||
return mylib.path_add(pth, 'reverse_index.json')
|
|
||||||
|
|
||||||
|
|
||||||
def load_index_json(file_path):
|
def load_index_json(file_path):
|
||||||
if mylib.file_exists(file_path):
|
if mylib.file_exists(file_path):
|
||||||
json = mylib.json_read(file_path)
|
json = mylib.json_read(file_path)
|
||||||
@@ -52,7 +46,10 @@ def insert_in_index(index, bundle_ids):
|
|||||||
except ValueError: # index not found
|
except ValueError: # index not found
|
||||||
i = len(index['bundle'])
|
i = len(index['bundle'])
|
||||||
index['bundle'].append(bid)
|
index['bundle'].append(bid)
|
||||||
|
try:
|
||||||
json, _ = mylib.json_read_evaluated(bid)
|
json, _ = mylib.json_read_evaluated(bid)
|
||||||
|
except FileNotFoundError:
|
||||||
|
continue
|
||||||
for key in ['pardom', 'subdom']: # assuming keys are identical
|
for key in ['pardom', 'subdom']: # assuming keys are identical
|
||||||
for domain, _, _ in json[key]:
|
for domain, _, _ in json[key]:
|
||||||
try:
|
try:
|
||||||
@@ -64,10 +61,11 @@ def insert_in_index(index, bundle_ids):
|
|||||||
|
|
||||||
|
|
||||||
def process(bundle_ids, deleteOnly=False):
|
def process(bundle_ids, deleteOnly=False):
|
||||||
print('writing reverse index ...')
|
print('writing index: reverse domains ...')
|
||||||
index_file = get_index_path()
|
index_file = mylib.path_data_index('reverse_domains.json')
|
||||||
if bundle_ids == ['*']:
|
if bundle_ids == ['*']:
|
||||||
bundle_ids = list(mylib.enum_data_appids())
|
bundle_ids = list(mylib.enum_data_appids())
|
||||||
|
print(' full reset')
|
||||||
mylib.rm_file(index_file) # rebuild from ground up
|
mylib.rm_file(index_file) # rebuild from ground up
|
||||||
# load previous index
|
# load previous index
|
||||||
json = load_index_json(index_file)
|
json = load_index_json(index_file)
|
||||||
17
src/main.py
17
src/main.py
@@ -8,7 +8,8 @@ import bundle_download
|
|||||||
import html_root
|
import html_root
|
||||||
import html_index
|
import html_index
|
||||||
import html_bundle
|
import html_bundle
|
||||||
import json_reverse_index
|
import index_bundle_names
|
||||||
|
import index_reverse_domains
|
||||||
import tracker_download
|
import tracker_download
|
||||||
|
|
||||||
|
|
||||||
@@ -44,23 +45,31 @@ def del_id(bundle_ids):
|
|||||||
mylib.rm_dir(dest)
|
mylib.rm_dir(dest)
|
||||||
update_index = True
|
update_index = True
|
||||||
print('')
|
print('')
|
||||||
json_reverse_index.process(bundle_ids, deleteOnly=True)
|
index_reverse_domains.process(bundle_ids, deleteOnly=True)
|
||||||
if update_index:
|
if update_index:
|
||||||
rebuild_index()
|
rebuild_index()
|
||||||
|
|
||||||
|
|
||||||
def combine_and_update(bundle_ids, where=None):
|
def combine_and_update(bundle_ids, where=None):
|
||||||
|
# 1. download meta data from iTunes store, incl. app icons
|
||||||
new_ids = bundle_download.process(bundle_ids)
|
new_ids = bundle_download.process(bundle_ids)
|
||||||
|
# 2. if new apps, update bundle name index
|
||||||
|
if len(new_ids) > 0:
|
||||||
|
index_bundle_names.process(new_ids)
|
||||||
|
# 3. re-calculate combined.json and evaluated.json files
|
||||||
affected = bundle_combine.process(bundle_ids, where=where)
|
affected = bundle_combine.process(bundle_ids, where=where)
|
||||||
|
# special case needed for reverse index. '*' will force rebuilt index
|
||||||
if not where and bundle_ids == ['*']:
|
if not where and bundle_ids == ['*']:
|
||||||
affected = ['*']
|
affected = ['*']
|
||||||
|
# 4. was any json updated? if so, make html and update reverse index
|
||||||
if len(affected) > 0:
|
if len(affected) > 0:
|
||||||
json_reverse_index.process(affected)
|
index_reverse_domains.process(affected)
|
||||||
html_bundle.process(affected)
|
html_bundle.process(affected)
|
||||||
else:
|
else:
|
||||||
print('no bundle affected by tracker, not generating bundle html')
|
print('no bundle affected by tracker, not generating bundle html')
|
||||||
|
# 5. make all apps index
|
||||||
if len(new_ids) > 0:
|
if len(new_ids) > 0:
|
||||||
rebuild_index()
|
rebuild_index() # must be called after bundle_combine
|
||||||
else:
|
else:
|
||||||
print('no new bundle, not rebuilding index')
|
print('no new bundle, not rebuilding index')
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,13 @@ import common_lib as mylib
|
|||||||
known_trackers = None
|
known_trackers = None
|
||||||
|
|
||||||
|
|
||||||
|
def is_tracker(domain):
|
||||||
|
global known_trackers
|
||||||
|
if not known_trackers:
|
||||||
|
known_trackers = mylib.read_list('tracker_all.txt')
|
||||||
|
return mylib.bintree_lookup(known_trackers, domain[::-1])
|
||||||
|
|
||||||
|
|
||||||
# def md5(fname):
|
# def md5(fname):
|
||||||
# hash_md5 = hashlib.md5()
|
# hash_md5 = hashlib.md5()
|
||||||
# with open(fname, 'rb') as f:
|
# with open(fname, 'rb') as f:
|
||||||
@@ -110,13 +117,6 @@ def exodus(fname):
|
|||||||
return save_list(res, fname, binary=False)
|
return save_list(res, fname, binary=False)
|
||||||
|
|
||||||
|
|
||||||
def is_tracker(domain):
|
|
||||||
global known_trackers
|
|
||||||
if not known_trackers:
|
|
||||||
known_trackers = mylib.read_list('tracker_all.txt')
|
|
||||||
return mylib.bintree_lookup(known_trackers, domain[::-1])
|
|
||||||
|
|
||||||
|
|
||||||
def combine_all(changes):
|
def combine_all(changes):
|
||||||
final = mylib.path_root('src', 'lists', 'tracker_all.txt')
|
final = mylib.path_root('src', 'lists', 'tracker_all.txt')
|
||||||
if changes or not mylib.file_exists(final):
|
if changes or not mylib.file_exists(final):
|
||||||
|
|||||||
Reference in New Issue
Block a user