diff --git a/src/download_itunes.py b/src/download_itunes.py index 2e1a4ec..2de98ea 100755 --- a/src/download_itunes.py +++ b/src/download_itunes.py @@ -33,6 +33,16 @@ def enum_genres(bundle_id): yield lang, gid, name +def choose_lang(obj): + ''' expects dict with {'us': ..., 'de': ...} ''' + for lang in AVAILABLE_LANGS: + try: + return obj[lang] + except KeyError: + pass + return None + + def download_info(bundle_id, lang, force=False): fname = fname_for(bundle_id, lang) if force or not mylib.file_exists(fname): diff --git a/src/html_categories.py b/src/html_categories.py index 28c58d0..c189a2a 100755 --- a/src/html_categories.py +++ b/src/html_categories.py @@ -21,7 +21,7 @@ def process(affected=None, per_page=60): print(' {} ({})'.format(cat, a)) print(' .. {} categories'.format(len(arr))) - src = ''.join([HTML.a(n, '{}/'.format(cid)) for cid, n in arr]) + src = ''.join([HTML.a_category(cid, n) for cid, n in arr]) HTML.write(base, '''

{}

diff --git a/src/index_app_names.py b/src/index_app_names.py index d9089ff..c2df442 100755 --- a/src/index_app_names.py +++ b/src/index_app_names.py @@ -4,67 +4,58 @@ import sys import lib_common as mylib import download_itunes # app_names -_bundle_name_dict = None +_app_names_dict = None -def index_fname(): - return mylib.path_data_index('app_names.json') +def fname_apps_all(): + return mylib.path_data_index('app_names_all.json') -def missing(): - return not mylib.file_exists(index_fname()) +def fname_apps_compact(): + return mylib.path_data_index('app_names_compact.json') -def load_json_if_not_already(): - global _bundle_name_dict - if not _bundle_name_dict: - index_file = index_fname() - if mylib.file_exists(index_file): - _bundle_name_dict = mylib.json_read(index_file) - else: - _bundle_name_dict = {} - - -def write_json_to_disk(): - mylib.json_write(index_fname(), _bundle_name_dict, pretty=False) - - -def get_name(bundle_id, langs=['us', 'de'], fallback='< App-Name >'): - load_json_if_not_already() - for lang in langs: - try: - return _bundle_name_dict[bundle_id][lang] - except KeyError: - continue - return fallback # None +def get_name(bundle_id, fallback='< App-Name >'): + global _app_names_dict + if not _app_names_dict: + _app_names_dict = mylib.json_safe_read(fname_apps_compact(), {}) + try: + return _app_names_dict[bundle_id] + except KeyError: + return fallback def process(bundle_ids, deleteOnly=False): + global _app_names_dict print('writing index: app names ...') if bundle_ids == ['*']: print(' full reset') - mylib.rm_file(index_fname()) # rebuild from ground up + mylib.rm_file(fname_apps_all()) # rebuild from ground up + mylib.rm_file(fname_apps_compact()) - load_json_if_not_already() + index = mylib.json_safe_read(fname_apps_all(), {}) did_change = False for bid in mylib.appids_in_data(bundle_ids): if deleteOnly: - did_change |= mylib.try_del(_bundle_name_dict, [bid]) + did_change |= mylib.try_del(index, [bid]) continue names = download_itunes.get_app_names(bid) if not names: mylib.err('index-app-names', 'could not load: {}'.format(bid)) continue try: - if _bundle_name_dict[bid] == names: + if index[bid] == names: continue except KeyError: pass - _bundle_name_dict[bid] = names + index[bid] = names did_change = True if did_change: print(' writing') - write_json_to_disk() + mylib.json_write(fname_apps_all(), index, pretty=False) + _app_names_dict = {bid: download_itunes.choose_lang(names) + for bid, names in index.items()} + mylib.json_write(fname_apps_compact(), _app_names_dict, pretty=False) else: print(' no change') print('') diff --git a/src/index_categories.py b/src/index_categories.py index 694522f..22545c3 100755 --- a/src/index_categories.py +++ b/src/index_categories.py @@ -12,50 +12,53 @@ def fname_app_categories(): return mylib.path_data_index('app_categories.json') -def fname_category_names(): - return mylib.path_data_index('category_names.json') +def fname_cat_name_all(): + return mylib.path_data_index('category_names_all.json') -def load_json_if_not_already(): - def load_json_from_disk(fname): - return mylib.json_read(fname) if mylib.file_exists(fname) else {} +def fname_cat_name_compact(): + return mylib.path_data_index('category_names_compact.json') + +def load_json_if_not_already(noNames=False): global _dict_apps, _dict_names if not _dict_apps: - _dict_apps = load_json_from_disk(fname_app_categories()) - if not _dict_names: - _dict_names = load_json_from_disk(fname_category_names()) + _dict_apps = mylib.json_safe_read(fname_app_categories(), {}) + if not _dict_names and not noNames: + _dict_names = mylib.json_safe_read(fname_cat_name_compact(), {}) -def try_update_app(bid, genre_ids): +def try_update_app(index, bid, genre_ids): try: - if _dict_apps[bid] == genre_ids: + if index[bid] == genre_ids: return False except KeyError: pass - _dict_apps[bid] = genre_ids + index[bid] = genre_ids return True -def try_update_name(gid, lang, name): +def try_update_name_all(index, cid, lang, name): try: - _dict_names[gid] + index[cid] except KeyError: - _dict_names[gid] = {} + index[cid] = {} try: - if _dict_names[gid][lang]: + if index[cid][lang]: return False # key already exists except KeyError: pass - _dict_names[gid][lang] = name + index[cid][lang] = name return True # updated, need to persist changes def reset_index(): - global _dict_apps - print(' full reset') + global _dict_apps, _dict_names mylib.rm_file(fname_app_categories()) # rebuild from ground up + mylib.rm_file(fname_cat_name_all()) + mylib.rm_file(fname_cat_name_compact()) _dict_apps = None + _dict_names = None def get_categories(bundle_id): @@ -66,13 +69,7 @@ def get_categories(bundle_id): return [] res = [] for gid in genres: - for lang in ['us', 'de']: - try: - name = _dict_names[gid][lang] - except KeyError: - continue - res.append((gid, name)) - break + res.append((gid, _dict_names[gid])) return res @@ -85,40 +82,40 @@ def enum_all_categories(): reverse_index[gid].append(bid) except KeyError: reverse_index[gid] = [bid] - for gid, lang_dict in _dict_names.items(): - for lang in ['us', 'de']: - try: - name = lang_dict[lang] - except KeyError: - continue - yield gid, name, reverse_index[gid] - break + for gid, name in _dict_names.items(): + yield gid, name, reverse_index[gid] def process(bundle_ids, force=False): + global _dict_apps, _dict_names print('writing index: categories ...') if force and bundle_ids == ['*']: + print(' full reset') reset_index() - load_json_if_not_already() + load_json_if_not_already(noNames=False) + name_index = mylib.json_safe_read(fname_cat_name_all(), {}) write_name_index = False write_app_index = False for bid in mylib.appids_in_data(bundle_ids): - genre_ids = [] - for lang, gid, gname in download_itunes.enum_genres(bid): - if gid not in genre_ids: - genre_ids.append(gid) - if try_update_name(gid, lang, gname): + cateogory_ids = [] + for lang, cid, gname in download_itunes.enum_genres(bid): + if cid not in cateogory_ids: + cateogory_ids.append(cid) + if try_update_name_all(name_index, cid, lang, gname): write_name_index = True - if try_update_app(bid, genre_ids): + if try_update_app(_dict_apps, bid, cateogory_ids): write_app_index = True - if write_name_index: - print(' write name-index') - mylib.json_write(fname_category_names(), _dict_names, pretty=False) if write_app_index: print(' write app-index') mylib.json_write(fname_app_categories(), _dict_apps, pretty=False) + if write_name_index: + print(' write name-index') + mylib.json_write(fname_cat_name_all(), name_index, pretty=False) + _dict_names = {cid: download_itunes.choose_lang(names) + for cid, names in name_index.items()} + mylib.json_write(fname_cat_name_compact(), _dict_names, pretty=False) print('') diff --git a/src/index_domains.py b/src/index_domains.py index 6101280..4030d67 100755 --- a/src/index_domains.py +++ b/src/index_domains.py @@ -15,10 +15,8 @@ def fname_tracker(): def load_json_from_disk(index_file): - if mylib.file_exists(index_file): - return mylib.json_read(index_file) - else: - return {'bundle': [], 'pardom': {}, 'subdom': {}} + return mylib.json_safe_read( + index_file, fallback={'bundle': [], 'pardom': {}, 'subdom': {}}) def delete_from_index(index, bundle_ids, deleteOnly=False): diff --git a/src/index_meta.py b/src/index_meta.py index 62849c5..4da92eb 100755 --- a/src/index_meta.py +++ b/src/index_meta.py @@ -13,10 +13,6 @@ def fname_app_rank(): return mylib.path_data_index('app_rank.json') -def load_json_from_disk(fname): - return mylib.json_read(fname) if mylib.file_exists(fname) else {} - - def json_to_list(json): return [ json['sum_rec'], @@ -100,7 +96,7 @@ def write_rank_index(index): def get_total_counts(): try: - return load_json_from_disk(fname_app_summary())['_sum'] + return mylib.json_safe_read(fname_app_summary(), {})['_sum'] except KeyError: return [0, 0] @@ -112,7 +108,7 @@ def process(bundle_ids, deleteOnly=False): print(' full reset') mylib.rm_file(fname) # rebuild from ground up - index = load_json_from_disk(fname) + index = mylib.json_safe_read(fname, {}) ids = mylib.appids_in_data(bundle_ids) write_summary_index(index, ids, deleteOnly=deleteOnly) write_rank_index(index) diff --git a/src/lib_common.py b/src/lib_common.py index 7464086..cd4f377 100755 --- a/src/lib_common.py +++ b/src/lib_common.py @@ -249,14 +249,14 @@ def enum_jsons(bundle_id): yield fname, json.load(fp) -def appids_in_out(selection=['*']): - if selection != ['*']: +def appids_in_out(selection=None): + if selection and selection != ['*']: return selection return [os.path.basename(x) for x in glob.glob(path_out_app('*'))] -def appids_in_data(selection=['*']): - if selection != ['*']: +def appids_in_data(selection=None): + if selection and selection != ['*']: return selection global _all_data_bundle_ids if not _all_data_bundle_ids: @@ -287,6 +287,10 @@ def json_read(path): return json.load(fp) +def json_safe_read(path, fallback=None): + return json_read(path) if file_exists(path) else fallback + + def json_write(path, obj, pretty=False): with open(path, 'w') as fp: json.dump(obj, fp, indent=2 if pretty else None, sort_keys=pretty) diff --git a/src/lib_graphs.py b/src/lib_graphs.py index 87be7c9..1a4fe11 100755 --- a/src/lib_graphs.py +++ b/src/lib_graphs.py @@ -13,11 +13,11 @@ def percent_bar(percent): return '
'.format(round(percent * 100)) -def rank_tile(title, value, additional=None, attr={}, +def rank_tile(title, value, additional=None, attr=None, percent=0.5, rank='?', best='?', worst='?'): if additional: value += '({})'.format(additional) - attr = HTML.attr_and(attr, {'class': 'rank'}) + attr = HTML.attr_and(attr or {}, {'class': 'rank'}) return HTML.div('''

{}

{} {} diff --git a/src/lib_html.py b/src/lib_html.py index 2882ec7..57b3541 100755 --- a/src/lib_html.py +++ b/src/lib_html.py @@ -46,29 +46,27 @@ def attr_and(a, b): # Basic building blocks -def xml(tag, inner, attr={}): +def xml(tag, inner, attr=None): src = '' - for key, val in attr.items(): - if val: - src += ' {}="{}"'.format(key, val) + if attr: + for key, val in attr.items(): + if val: + src += ' {}="{}"'.format(key, val) return '<{0}{1}>{2}'.format(tag, src, inner) -def div(inner, attr={}): +def div(inner, attr=None): return xml('div', inner, attr) -def h2(inner, attr={}): +def h2(inner, attr=None): return xml('h2', inner, attr) -def a(inner, href, attr={}): - return xml('a', inner, attr_and(attr, {'href': href})) - - def a_path(parts, suffix): ''' expects (name, url) tuples ''' - return ' / '.join([a(*x) for x in parts] + [suffix]) + return ' / '.join(['{}'.format(url, title) + for title, url in parts] + [suffix]) # Simple constructs @@ -137,8 +135,8 @@ def app_tile_template():
''' -def app_tiles_all(bundle_ids, per_page=60, attr={}): - attr = attr_and(attr, {'id': 'app-toc', 'class': 'no-ul-all'}) +def app_tiles_all(bundle_ids, per_page=60): + attr = {'id': 'app-toc', 'class': 'no-ul-all'} c_apps = len(bundle_ids) c_pages = int(math.ceil(c_apps / per_page)) for i, apps in apps_sorted_batch(bundle_ids, batch_size=per_page): @@ -170,11 +168,11 @@ def write(path, content, title=None, fname='index.html'): fp.write(base_template(content, title=title)) -def write_app_pages(base, bundle_ids, title, per_page=60, attr={}, pre=''): +def write_app_pages(base, bundle_ids, title, per_page=60, pre=''): pages = 0 entries = 0 mylib.rm_dir(base) - for i, count, src in app_tiles_all(bundle_ids, per_page, attr): + for i, count, src in app_tiles_all(bundle_ids, per_page): pages += 1 entries += count pth = base if i == 1 else mylib.path_add(base, str(i))