From f7d0b9353dd57359580233c7f01b8d82a5016810 Mon Sep 17 00:00:00 2001 From: relikd Date: Fri, 25 Sep 2020 16:39:27 +0200 Subject: [PATCH] Create individual category json --- src/README.md | 5 +- src/html_categories.py | 18 ++-- src/html_index_apps.py | 5 +- src/index_app_names.py | 11 ++- src/index_categories.py | 58 +++++++----- src/index_rank.py | 2 +- src/lib_common.py | 14 ++- src/lib_html.py | 24 ++--- src/z_dependency.svg | 195 ++++++++++++++++++++-------------------- 9 files changed, 179 insertions(+), 153 deletions(-) diff --git a/src/README.md b/src/README.md index 4656bec..657d07d 100644 --- a/src/README.md +++ b/src/README.md @@ -44,13 +44,12 @@ digraph Dependency { "." -> download_itunes "." -> bundle_combine download_itunes -> index_app_names - download_itunes -> index_categories bundle_combine -> index_rank bundle_combine -> index_domains - index_categories -> html_categories index_app_names -> html_index_apps - index_app_names -> html_categories index_app_names -> index_rank + index_app_names -> index_categories + index_categories -> html_categories index_rank -> html_bundle index_rank -> html_rank index_rank -> html_index_domains diff --git a/src/html_categories.py b/src/html_categories.py index c189a2a..899cca9 100755 --- a/src/html_categories.py +++ b/src/html_categories.py @@ -2,7 +2,6 @@ import lib_common as mylib import lib_html as HTML -import index_categories # enum_all_categories def process(affected=None, per_page=60): @@ -10,18 +9,19 @@ def process(affected=None, per_page=60): base = mylib.path_out('category') parent = 'All Categories' arr = [] - for cid, cat, apps in sorted(index_categories.enum_all_categories(), - key=lambda x: x[1].lower()): - arr.append((cid, cat)) + for json in mylib.enum_categories(): + cid, cname = json['cat'] + arr.append((cid, cname)) if affected and cid not in affected: continue - pre = HTML.h2(HTML.a_path([(parent, '../')], cat)) - _, a = HTML.write_app_pages(mylib.path_add(base, cid), apps, cat, - per_page, pre=pre) - print(' {} ({})'.format(cat, a)) + pre = HTML.h2(HTML.a_path([(parent, '../')], cname)) + _, a = HTML.write_app_pages(mylib.path_add(base, cid), json['apps'], + cname, per_page, pre=pre) + print(' {} ({})'.format(cname, a)) print(' .. {} categories'.format(len(arr))) - src = ''.join([HTML.a_category(cid, n) for cid, n in arr]) + mylib.sort_by_name(arr, 1) + src = ''.join([HTML.a_category(*x) for x in arr]) HTML.write(base, '''

{}

diff --git a/src/html_index_apps.py b/src/html_index_apps.py index 5e708fe..9193d59 100755 --- a/src/html_index_apps.py +++ b/src/html_index_apps.py @@ -2,6 +2,7 @@ import lib_common as mylib import lib_html as HTML +import index_app_names # get_sorted_app_names def process(per_page=60): @@ -9,8 +10,8 @@ def process(per_page=60): title = 'Apps (A–Z)' header = HTML.h2(HTML.a_path([('Results', '/results/')], title)) p, a = HTML.write_app_pages(mylib.path_out('index', 'apps'), - mylib.appids_in_out(), title, - per_page=per_page, pre=header) + index_app_names.get_sorted_app_names(), + title, per_page=per_page, pre=header) print(' {} apps'.format(a)) print(' {} pages'.format(p)) print('') diff --git a/src/index_app_names.py b/src/index_app_names.py index c2df442..95a962f 100755 --- a/src/index_app_names.py +++ b/src/index_app_names.py @@ -15,16 +15,25 @@ def fname_apps_compact(): return mylib.path_data_index('app_names_compact.json') -def get_name(bundle_id, fallback='< App-Name >'): +def load_json_if_not_already(): global _app_names_dict if not _app_names_dict: _app_names_dict = mylib.json_safe_read(fname_apps_compact(), {}) + + +def get_name(bundle_id, fallback='< App-Name >'): + load_json_if_not_already() try: return _app_names_dict[bundle_id] except KeyError: return fallback +def get_sorted_app_names(): + load_json_if_not_already() + return sorted(_app_names_dict.items(), key=lambda x: x[1].lower()) + + def process(bundle_ids, deleteOnly=False): global _app_names_dict print('writing index: app names ...') diff --git a/src/index_categories.py b/src/index_categories.py index 22545c3..f27c875 100755 --- a/src/index_categories.py +++ b/src/index_categories.py @@ -3,6 +3,7 @@ import sys import lib_common as mylib import download_itunes # get_genres +import index_app_names # get_name _dict_apps = None _dict_names = None @@ -61,6 +62,37 @@ def reset_index(): _dict_names = None +def persist_name_index(index): + global _dict_names + mylib.json_write(fname_cat_name_all(), index, pretty=False) + _dict_names = {cid: download_itunes.choose_lang(names) + for cid, names in index.items()} + mylib.json_write(fname_cat_name_compact(), _dict_names, pretty=False) + + +def persist_individual_files(): + def sorted_reverse_index(): + ret = {} + for bid, category_ids in _dict_apps.items(): + itm = [bid, index_app_names.get_name(bid)] + for cid in category_ids: + try: + ret[cid].append(itm) + except KeyError: + ret[cid] = [itm] + for cid in ret.keys(): + mylib.sort_by_name(ret[cid], 1) + return ret + + index = sorted_reverse_index() + pth = mylib.path_data_index('category') + mylib.rm_dir(pth) + mylib.mkdir(pth) + for cid, cname in _dict_names.items(): + mylib.json_write(mylib.path_add(pth, 'id_{}.json'.format(cid)), + {'cat': [cid, cname], 'apps': index[cid]}) + + def get_categories(bundle_id): load_json_if_not_already() try: @@ -73,21 +105,8 @@ def get_categories(bundle_id): return res -def enum_all_categories(): - load_json_if_not_already() - reverse_index = {} - for bid, genre_ids in _dict_apps.items(): - for gid in genre_ids: - try: - reverse_index[gid].append(bid) - except KeyError: - reverse_index[gid] = [bid] - for gid, name in _dict_names.items(): - yield gid, name, reverse_index[gid] - - def process(bundle_ids, force=False): - global _dict_apps, _dict_names + global _dict_apps print('writing index: categories ...') if force and bundle_ids == ['*']: print(' full reset') @@ -107,15 +126,14 @@ def process(bundle_ids, force=False): if try_update_app(_dict_apps, bid, cateogory_ids): write_app_index = True + if write_name_index: + print(' write name-index') + persist_name_index(name_index) # names first, they are used below if write_app_index: print(' write app-index') mylib.json_write(fname_app_categories(), _dict_apps, pretty=False) - if write_name_index: - print(' write name-index') - mylib.json_write(fname_cat_name_all(), name_index, pretty=False) - _dict_names = {cid: download_itunes.choose_lang(names) - for cid, names in name_index.items()} - mylib.json_write(fname_cat_name_compact(), _dict_names, pretty=False) + if write_name_index or write_app_index: + persist_individual_files() print('') diff --git a/src/index_rank.py b/src/index_rank.py index 1c076a0..ba5e8d2 100755 --- a/src/index_rank.py +++ b/src/index_rank.py @@ -64,7 +64,7 @@ def write_ranking_list(index): # TODO: doesnt scale well, 100'000 apps ~> 12mb if len(ret) > 500: # limit to most recent X entries ret = ret[:500] - # ret.sort(key=lambda x: x[1].lower()) # sort by name + # mylib.sort_by_name(ret, 1) mylib.json_write(fname_ranking_list(), ret, pretty=False) diff --git a/src/lib_common.py b/src/lib_common.py index cd4f377..1a881a9 100755 --- a/src/lib_common.py +++ b/src/lib_common.py @@ -32,10 +32,10 @@ def path_data_app(bundle_id, filename=None): return path_add(pth, filename) if filename else pth -def path_data_index(filename): +def path_data_index(*filename): pth = path_root('data', '_eval') mkdir(pth) - return path_add(pth, filename) + return path_add(pth, *filename) def path_out(*path_components): @@ -92,6 +92,10 @@ def printf(msg): print(msg, end='', flush=True) +def sort_by_name(obj, col): + obj.sort(key=lambda x: x[col].lower()) + + # Binary Tree Search _list_TLD = None @@ -249,6 +253,12 @@ def enum_jsons(bundle_id): yield fname, json.load(fp) +def enum_categories(): + for fname in glob.glob(path_data_index('category', 'id_*.json')): + with open(fname, 'r') as fp: + yield json.load(fp) + + def appids_in_out(selection=None): if selection and selection != ['*']: return selection diff --git a/src/lib_html.py b/src/lib_html.py index 4105611..a9d8e1e 100755 --- a/src/lib_html.py +++ b/src/lib_html.py @@ -3,7 +3,6 @@ import math # ceil import time # strftime, gmtime import lib_common as mylib -import index_app_names # get_name # REFS @@ -31,12 +30,6 @@ def p_download_json(href, download_name): # Data object preparation -def apps_sorted_batch(bundle_ids, batch_size=60): - apps = [(x, index_app_names.get_name(x)) for x in bundle_ids] - apps.sort(key=lambda x: (x[1].lower(), x[0])) - for i in range(0, len(apps), batch_size): - yield int(i / batch_size), apps[i:i + batch_size] - def attr_and(a, b): res = {} @@ -140,16 +133,17 @@ def app_tile_template():
''' -def app_tiles_all(bundle_ids, per_page=60): +def app_tiles_all(apps, per_page=60): attr = {'id': 'app-toc', 'class': 'no-ul-all'} - c_apps = len(bundle_ids) + c_apps = len(apps) c_pages = int(math.ceil(c_apps / per_page)) - for i, apps in apps_sorted_batch(bundle_ids, batch_size=per_page): - i += 1 + for offset in range(0, len(apps), per_page): + idx = int(offset / per_page) + 1 + batch = apps[offset:offset + per_page] src = '' - for x in apps: + for x in batch: src += app_tile(x[0], x[1]) - yield i, len(apps), div(src, attr) + pagination(i, c_pages) + yield idx, len(batch), div(src, attr) + pagination(idx, c_pages) # Write html to disk @@ -173,11 +167,11 @@ def write(path, content, title=None, fname='index.html'): fp.write(base_template(content, title=title)) -def write_app_pages(base, bundle_ids, title, per_page=60, pre='', post=''): +def write_app_pages(base, apps, title, per_page=60, pre='', post=''): pages = 0 entries = 0 mylib.rm_dir(base) - for i, count, src in app_tiles_all(bundle_ids, per_page): + for i, count, src in app_tiles_all(apps, per_page): pages += 1 entries += count pth = base if i == 1 else mylib.path_add(base, str(i)) diff --git a/src/z_dependency.svg b/src/z_dependency.svg index 4694e56..c99a3df 100644 --- a/src/z_dependency.svg +++ b/src/z_dependency.svg @@ -1,166 +1,161 @@ - + Dependency - + . - -. + +. download_tracker - -download_tracker + +download_tracker .->download_tracker - - + + download_itunes - -download_itunes + +download_itunes .->download_itunes - - + + bundle_combine - -bundle_combine + +bundle_combine .->bundle_combine - - + + -html_ranking - -html_ranking +html_ranking + +html_ranking -.->html_ranking - - +.->html_ranking + + -html_root - -html_root +html_root + +html_root -.->html_root - - +.->html_root + + index_app_names - -index_app_names + +index_app_names download_itunes->index_app_names - - - - -index_categories - -index_categories - - -download_itunes->index_categories - - + + -index_rank - -index_rank +index_rank + +index_rank -bundle_combine->index_rank - - +bundle_combine->index_rank + + -index_domains - -index_domains +index_domains + +index_domains -bundle_combine->index_domains - - +bundle_combine->index_domains + + -index_app_names->index_rank - - - - -html_categories - -html_categories - - -index_app_names->html_categories - - +index_app_names->index_rank + + -html_index_apps - -html_index_apps +html_index_apps + +html_index_apps -index_app_names->html_index_apps - - +index_app_names->html_index_apps + + - -index_categories->html_categories - - + +index_categories + +index_categories - -html_index_domains - -html_index_domains - - -index_rank->html_index_domains - - + +index_app_names->index_categories + + -html_bundle - -html_bundle +html_bundle + +html_bundle -index_rank->html_bundle - - +index_rank->html_bundle + + -html_rank - -html_rank +html_rank + +html_rank -index_rank->html_rank - - +index_rank->html_rank + + + + +html_index_domains + +html_index_domains + + +index_rank->html_index_domains + + index_domains->html_index_domains - - + + + + +html_categories + +html_categories + + +index_categories->html_categories + + \ No newline at end of file