Bundle name index
This commit is contained in:
@@ -38,18 +38,17 @@ def json_combine(bundle_id):
|
||||
except KeyError:
|
||||
ddic[key] = (tracker.is_tracker(key), [num])
|
||||
|
||||
res = dict({'rec_len': [], 'name': mylib.app_name(bundle_id)})
|
||||
pardom = dict()
|
||||
subdom = dict()
|
||||
res = {'rec_len': []}
|
||||
pardom = {}
|
||||
subdom = {}
|
||||
latest = 0
|
||||
for fname, jdata in mylib.enum_jsons(bundle_id):
|
||||
# TODO: load combined and append newest only, then update evaluated
|
||||
latest = max(latest, os.path.getmtime(fname)) # or getctime
|
||||
# if not res['name']:
|
||||
# res['name'] = jdata['app-name']
|
||||
res['rec_len'].append(jdata['duration'])
|
||||
try:
|
||||
logs = jdata['logs']
|
||||
uniq_par = dict()
|
||||
uniq_par = {}
|
||||
for subdomain in logs:
|
||||
occurs = len(logs[subdomain])
|
||||
inc_dic(subdom, subdomain, occurs)
|
||||
@@ -69,8 +68,6 @@ def json_combine(bundle_id):
|
||||
|
||||
|
||||
def json_evaluate_inplace(obj):
|
||||
if not obj['name']:
|
||||
obj['name'] = '< App-Name >'
|
||||
rec_count = len(obj['rec_len'])
|
||||
time_total = sum(obj['rec_len'])
|
||||
del(obj['rec_len'])
|
||||
@@ -81,7 +78,7 @@ def json_evaluate_inplace(obj):
|
||||
obj['avg_time'] = time_total / rec_count
|
||||
|
||||
def transform(ddic):
|
||||
res = list()
|
||||
res = []
|
||||
c_sum = 0
|
||||
c_trkr = 0
|
||||
for name, (is_tracker, counts) in ddic.items():
|
||||
|
||||
@@ -32,6 +32,12 @@ def path_data_app(bundle_id, filename=None):
|
||||
return path_add(pth, filename) if filename else pth
|
||||
|
||||
|
||||
def path_data_index(filename):
|
||||
pth = path_root('data', '_eval')
|
||||
mkdir(pth)
|
||||
return path_add(pth, filename)
|
||||
|
||||
|
||||
def path_out(*path_components):
|
||||
return path_root('out', *path_components)
|
||||
|
||||
@@ -76,17 +82,18 @@ def valid_bundle_id(bundle_id):
|
||||
return regex_bundle_id.match(bundle_id)
|
||||
|
||||
|
||||
def app_name(bundle_id, fallback=None):
|
||||
def app_names(bundle_id):
|
||||
def name_for(lang):
|
||||
try:
|
||||
return json_read_meta(bundle_id, lang)['trackCensoredName']
|
||||
except Exception:
|
||||
return None
|
||||
ret = {}
|
||||
for lang in ['us', 'de']:
|
||||
name = name_for(lang)
|
||||
if name:
|
||||
return name
|
||||
return fallback
|
||||
ret[lang] = name
|
||||
return ret
|
||||
|
||||
|
||||
def err(scope, msg, logOnly=False):
|
||||
|
||||
@@ -5,6 +5,7 @@ import sys
|
||||
import time
|
||||
import math
|
||||
import common_lib as mylib
|
||||
import index_bundle_names
|
||||
|
||||
|
||||
def seconds_to_time(seconds):
|
||||
@@ -80,9 +81,10 @@ def gen_dom_tags(sorted_arr, onlyTrackers=False):
|
||||
|
||||
|
||||
def gen_html(bundle_id, obj):
|
||||
name = index_bundle_names.get_name(bundle_id)
|
||||
obj['tracker'] = list(filter(lambda x: x[2], obj['subdom']))
|
||||
return mylib.template_with_base(f'''
|
||||
<h2 class="title">{obj['name']}</h2>
|
||||
<h2 class="title">{name}</h2>
|
||||
<p class="subtitle snd"><i class="mg_lr">Bundle-id:</i>{ bundle_id }</p>
|
||||
<div id="meta">
|
||||
<div class="icons">
|
||||
@@ -122,7 +124,7 @@ def gen_html(bundle_id, obj):
|
||||
{ gen_dotgraph(obj['subdom']) }
|
||||
{ gen_dom_tags(obj['subdom']) }
|
||||
</div>
|
||||
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=obj['name'])
|
||||
<p class="right snd">Download: <a href="data.json" download="{bundle_id}.json">json</a></p>''', title=name)
|
||||
|
||||
|
||||
def process(bundle_ids):
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import common_lib as mylib
|
||||
import index_bundle_names
|
||||
|
||||
|
||||
def gen_obj(bundle_id):
|
||||
@@ -10,7 +11,7 @@ def gen_obj(bundle_id):
|
||||
icon = '/static/app-template.svg'
|
||||
return {
|
||||
'id': bundle_id,
|
||||
'name': mylib.app_name(bundle_id, '< App-Name >'),
|
||||
'name': index_bundle_names.get_name(bundle_id),
|
||||
'img': icon
|
||||
}
|
||||
|
||||
|
||||
66
src/index_bundle_names.py
Executable file
66
src/index_bundle_names.py
Executable file
@@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import sys
|
||||
import common_lib as mylib
|
||||
|
||||
_bundle_name_dict = None
|
||||
|
||||
|
||||
def index_fname():
|
||||
return mylib.path_data_index('bundle_names.json')
|
||||
|
||||
|
||||
def load_json_if_not_already():
|
||||
global _bundle_name_dict
|
||||
if not _bundle_name_dict:
|
||||
index_file = index_fname()
|
||||
if mylib.file_exists(index_file):
|
||||
_bundle_name_dict = mylib.json_read(index_file)
|
||||
else:
|
||||
_bundle_name_dict = {}
|
||||
|
||||
|
||||
def write_json_to_disk():
|
||||
mylib.json_write(index_fname(), _bundle_name_dict, pretty=True)
|
||||
|
||||
|
||||
def get_name(bundle_id, langs=['us', 'de']):
|
||||
load_json_if_not_already()
|
||||
for lang in langs:
|
||||
try:
|
||||
return _bundle_name_dict[bundle_id][lang]
|
||||
except KeyError:
|
||||
continue
|
||||
return '< App-Name >' # None
|
||||
|
||||
|
||||
def process(bundle_ids):
|
||||
print('writing index: bundle name ...')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_data_appids())
|
||||
print(' full reset')
|
||||
mylib.rm_file(index_fname()) # rebuild from ground up
|
||||
|
||||
load_json_if_not_already()
|
||||
did_change = False
|
||||
for bid in bundle_ids:
|
||||
names = mylib.app_names(bid)
|
||||
if not names:
|
||||
mylib.err('index-bundle-names', 'could not load: {}'.format(bid))
|
||||
continue
|
||||
_bundle_name_dict[bid] = names
|
||||
did_change = True
|
||||
if did_change:
|
||||
write_json_to_disk()
|
||||
else:
|
||||
print(' no change')
|
||||
print('')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = sys.argv[1:]
|
||||
if len(args) > 0:
|
||||
process(args)
|
||||
else:
|
||||
# process(['*'])
|
||||
mylib.usage(__file__, '[bundle_id] [...]')
|
||||
@@ -4,12 +4,6 @@ import sys
|
||||
import common_lib as mylib
|
||||
|
||||
|
||||
def get_index_path():
|
||||
pth = mylib.path_root('data', '_eval')
|
||||
mylib.mkdir(pth)
|
||||
return mylib.path_add(pth, 'reverse_index.json')
|
||||
|
||||
|
||||
def load_index_json(file_path):
|
||||
if mylib.file_exists(file_path):
|
||||
json = mylib.json_read(file_path)
|
||||
@@ -52,7 +46,10 @@ def insert_in_index(index, bundle_ids):
|
||||
except ValueError: # index not found
|
||||
i = len(index['bundle'])
|
||||
index['bundle'].append(bid)
|
||||
json, _ = mylib.json_read_evaluated(bid)
|
||||
try:
|
||||
json, _ = mylib.json_read_evaluated(bid)
|
||||
except FileNotFoundError:
|
||||
continue
|
||||
for key in ['pardom', 'subdom']: # assuming keys are identical
|
||||
for domain, _, _ in json[key]:
|
||||
try:
|
||||
@@ -64,10 +61,11 @@ def insert_in_index(index, bundle_ids):
|
||||
|
||||
|
||||
def process(bundle_ids, deleteOnly=False):
|
||||
print('writing reverse index ...')
|
||||
index_file = get_index_path()
|
||||
print('writing index: reverse domains ...')
|
||||
index_file = mylib.path_data_index('reverse_domains.json')
|
||||
if bundle_ids == ['*']:
|
||||
bundle_ids = list(mylib.enum_data_appids())
|
||||
print(' full reset')
|
||||
mylib.rm_file(index_file) # rebuild from ground up
|
||||
# load previous index
|
||||
json = load_index_json(index_file)
|
||||
17
src/main.py
17
src/main.py
@@ -8,7 +8,8 @@ import bundle_download
|
||||
import html_root
|
||||
import html_index
|
||||
import html_bundle
|
||||
import json_reverse_index
|
||||
import index_bundle_names
|
||||
import index_reverse_domains
|
||||
import tracker_download
|
||||
|
||||
|
||||
@@ -44,23 +45,31 @@ def del_id(bundle_ids):
|
||||
mylib.rm_dir(dest)
|
||||
update_index = True
|
||||
print('')
|
||||
json_reverse_index.process(bundle_ids, deleteOnly=True)
|
||||
index_reverse_domains.process(bundle_ids, deleteOnly=True)
|
||||
if update_index:
|
||||
rebuild_index()
|
||||
|
||||
|
||||
def combine_and_update(bundle_ids, where=None):
|
||||
# 1. download meta data from iTunes store, incl. app icons
|
||||
new_ids = bundle_download.process(bundle_ids)
|
||||
# 2. if new apps, update bundle name index
|
||||
if len(new_ids) > 0:
|
||||
index_bundle_names.process(new_ids)
|
||||
# 3. re-calculate combined.json and evaluated.json files
|
||||
affected = bundle_combine.process(bundle_ids, where=where)
|
||||
# special case needed for reverse index. '*' will force rebuilt index
|
||||
if not where and bundle_ids == ['*']:
|
||||
affected = ['*']
|
||||
# 4. was any json updated? if so, make html and update reverse index
|
||||
if len(affected) > 0:
|
||||
json_reverse_index.process(affected)
|
||||
index_reverse_domains.process(affected)
|
||||
html_bundle.process(affected)
|
||||
else:
|
||||
print('no bundle affected by tracker, not generating bundle html')
|
||||
# 5. make all apps index
|
||||
if len(new_ids) > 0:
|
||||
rebuild_index()
|
||||
rebuild_index() # must be called after bundle_combine
|
||||
else:
|
||||
print('no new bundle, not rebuilding index')
|
||||
|
||||
|
||||
@@ -6,6 +6,13 @@ import common_lib as mylib
|
||||
known_trackers = None
|
||||
|
||||
|
||||
def is_tracker(domain):
|
||||
global known_trackers
|
||||
if not known_trackers:
|
||||
known_trackers = mylib.read_list('tracker_all.txt')
|
||||
return mylib.bintree_lookup(known_trackers, domain[::-1])
|
||||
|
||||
|
||||
# def md5(fname):
|
||||
# hash_md5 = hashlib.md5()
|
||||
# with open(fname, 'rb') as f:
|
||||
@@ -110,13 +117,6 @@ def exodus(fname):
|
||||
return save_list(res, fname, binary=False)
|
||||
|
||||
|
||||
def is_tracker(domain):
|
||||
global known_trackers
|
||||
if not known_trackers:
|
||||
known_trackers = mylib.read_list('tracker_all.txt')
|
||||
return mylib.bintree_lookup(known_trackers, domain[::-1])
|
||||
|
||||
|
||||
def combine_all(changes):
|
||||
final = mylib.path_root('src', 'lists', 'tracker_all.txt')
|
||||
if changes or not mylib.file_exists(final):
|
||||
|
||||
Reference in New Issue
Block a user