diff --git a/src/README.md b/src/README.md
index 4656bec..657d07d 100644
--- a/src/README.md
+++ b/src/README.md
@@ -44,13 +44,12 @@ digraph Dependency {
"." -> download_itunes
"." -> bundle_combine
download_itunes -> index_app_names
- download_itunes -> index_categories
bundle_combine -> index_rank
bundle_combine -> index_domains
- index_categories -> html_categories
index_app_names -> html_index_apps
- index_app_names -> html_categories
index_app_names -> index_rank
+ index_app_names -> index_categories
+ index_categories -> html_categories
index_rank -> html_bundle
index_rank -> html_rank
index_rank -> html_index_domains
diff --git a/src/html_categories.py b/src/html_categories.py
index c189a2a..899cca9 100755
--- a/src/html_categories.py
+++ b/src/html_categories.py
@@ -2,7 +2,6 @@
import lib_common as mylib
import lib_html as HTML
-import index_categories # enum_all_categories
def process(affected=None, per_page=60):
@@ -10,18 +9,19 @@ def process(affected=None, per_page=60):
base = mylib.path_out('category')
parent = 'All Categories'
arr = []
- for cid, cat, apps in sorted(index_categories.enum_all_categories(),
- key=lambda x: x[1].lower()):
- arr.append((cid, cat))
+ for json in mylib.enum_categories():
+ cid, cname = json['cat']
+ arr.append((cid, cname))
if affected and cid not in affected:
continue
- pre = HTML.h2(HTML.a_path([(parent, '../')], cat))
- _, a = HTML.write_app_pages(mylib.path_add(base, cid), apps, cat,
- per_page, pre=pre)
- print(' {} ({})'.format(cat, a))
+ pre = HTML.h2(HTML.a_path([(parent, '../')], cname))
+ _, a = HTML.write_app_pages(mylib.path_add(base, cid), json['apps'],
+ cname, per_page, pre=pre)
+ print(' {} ({})'.format(cname, a))
print(' .. {} categories'.format(len(arr)))
- src = ''.join([HTML.a_category(cid, n) for cid, n in arr])
+ mylib.sort_by_name(arr, 1)
+ src = ''.join([HTML.a_category(*x) for x in arr])
HTML.write(base, '''
{}
diff --git a/src/html_index_apps.py b/src/html_index_apps.py
index 5e708fe..9193d59 100755
--- a/src/html_index_apps.py
+++ b/src/html_index_apps.py
@@ -2,6 +2,7 @@
import lib_common as mylib
import lib_html as HTML
+import index_app_names # get_sorted_app_names
def process(per_page=60):
@@ -9,8 +10,8 @@ def process(per_page=60):
title = 'Apps (A–Z)'
header = HTML.h2(HTML.a_path([('Results', '/results/')], title))
p, a = HTML.write_app_pages(mylib.path_out('index', 'apps'),
- mylib.appids_in_out(), title,
- per_page=per_page, pre=header)
+ index_app_names.get_sorted_app_names(),
+ title, per_page=per_page, pre=header)
print(' {} apps'.format(a))
print(' {} pages'.format(p))
print('')
diff --git a/src/index_app_names.py b/src/index_app_names.py
index c2df442..95a962f 100755
--- a/src/index_app_names.py
+++ b/src/index_app_names.py
@@ -15,16 +15,25 @@ def fname_apps_compact():
return mylib.path_data_index('app_names_compact.json')
-def get_name(bundle_id, fallback='< App-Name >'):
+def load_json_if_not_already():
global _app_names_dict
if not _app_names_dict:
_app_names_dict = mylib.json_safe_read(fname_apps_compact(), {})
+
+
+def get_name(bundle_id, fallback='< App-Name >'):
+ load_json_if_not_already()
try:
return _app_names_dict[bundle_id]
except KeyError:
return fallback
+def get_sorted_app_names():
+ load_json_if_not_already()
+ return sorted(_app_names_dict.items(), key=lambda x: x[1].lower())
+
+
def process(bundle_ids, deleteOnly=False):
global _app_names_dict
print('writing index: app names ...')
diff --git a/src/index_categories.py b/src/index_categories.py
index 22545c3..f27c875 100755
--- a/src/index_categories.py
+++ b/src/index_categories.py
@@ -3,6 +3,7 @@
import sys
import lib_common as mylib
import download_itunes # get_genres
+import index_app_names # get_name
_dict_apps = None
_dict_names = None
@@ -61,6 +62,37 @@ def reset_index():
_dict_names = None
+def persist_name_index(index):
+ global _dict_names
+ mylib.json_write(fname_cat_name_all(), index, pretty=False)
+ _dict_names = {cid: download_itunes.choose_lang(names)
+ for cid, names in index.items()}
+ mylib.json_write(fname_cat_name_compact(), _dict_names, pretty=False)
+
+
+def persist_individual_files():
+ def sorted_reverse_index():
+ ret = {}
+ for bid, category_ids in _dict_apps.items():
+ itm = [bid, index_app_names.get_name(bid)]
+ for cid in category_ids:
+ try:
+ ret[cid].append(itm)
+ except KeyError:
+ ret[cid] = [itm]
+ for cid in ret.keys():
+ mylib.sort_by_name(ret[cid], 1)
+ return ret
+
+ index = sorted_reverse_index()
+ pth = mylib.path_data_index('category')
+ mylib.rm_dir(pth)
+ mylib.mkdir(pth)
+ for cid, cname in _dict_names.items():
+ mylib.json_write(mylib.path_add(pth, 'id_{}.json'.format(cid)),
+ {'cat': [cid, cname], 'apps': index[cid]})
+
+
def get_categories(bundle_id):
load_json_if_not_already()
try:
@@ -73,21 +105,8 @@ def get_categories(bundle_id):
return res
-def enum_all_categories():
- load_json_if_not_already()
- reverse_index = {}
- for bid, genre_ids in _dict_apps.items():
- for gid in genre_ids:
- try:
- reverse_index[gid].append(bid)
- except KeyError:
- reverse_index[gid] = [bid]
- for gid, name in _dict_names.items():
- yield gid, name, reverse_index[gid]
-
-
def process(bundle_ids, force=False):
- global _dict_apps, _dict_names
+ global _dict_apps
print('writing index: categories ...')
if force and bundle_ids == ['*']:
print(' full reset')
@@ -107,15 +126,14 @@ def process(bundle_ids, force=False):
if try_update_app(_dict_apps, bid, cateogory_ids):
write_app_index = True
+ if write_name_index:
+ print(' write name-index')
+ persist_name_index(name_index) # names first, they are used below
if write_app_index:
print(' write app-index')
mylib.json_write(fname_app_categories(), _dict_apps, pretty=False)
- if write_name_index:
- print(' write name-index')
- mylib.json_write(fname_cat_name_all(), name_index, pretty=False)
- _dict_names = {cid: download_itunes.choose_lang(names)
- for cid, names in name_index.items()}
- mylib.json_write(fname_cat_name_compact(), _dict_names, pretty=False)
+ if write_name_index or write_app_index:
+ persist_individual_files()
print('')
diff --git a/src/index_rank.py b/src/index_rank.py
index 1c076a0..ba5e8d2 100755
--- a/src/index_rank.py
+++ b/src/index_rank.py
@@ -64,7 +64,7 @@ def write_ranking_list(index):
# TODO: doesnt scale well, 100'000 apps ~> 12mb
if len(ret) > 500: # limit to most recent X entries
ret = ret[:500]
- # ret.sort(key=lambda x: x[1].lower()) # sort by name
+ # mylib.sort_by_name(ret, 1)
mylib.json_write(fname_ranking_list(), ret, pretty=False)
diff --git a/src/lib_common.py b/src/lib_common.py
index cd4f377..1a881a9 100755
--- a/src/lib_common.py
+++ b/src/lib_common.py
@@ -32,10 +32,10 @@ def path_data_app(bundle_id, filename=None):
return path_add(pth, filename) if filename else pth
-def path_data_index(filename):
+def path_data_index(*filename):
pth = path_root('data', '_eval')
mkdir(pth)
- return path_add(pth, filename)
+ return path_add(pth, *filename)
def path_out(*path_components):
@@ -92,6 +92,10 @@ def printf(msg):
print(msg, end='', flush=True)
+def sort_by_name(obj, col):
+ obj.sort(key=lambda x: x[col].lower())
+
+
# Binary Tree Search
_list_TLD = None
@@ -249,6 +253,12 @@ def enum_jsons(bundle_id):
yield fname, json.load(fp)
+def enum_categories():
+ for fname in glob.glob(path_data_index('category', 'id_*.json')):
+ with open(fname, 'r') as fp:
+ yield json.load(fp)
+
+
def appids_in_out(selection=None):
if selection and selection != ['*']:
return selection
diff --git a/src/lib_html.py b/src/lib_html.py
index 4105611..a9d8e1e 100755
--- a/src/lib_html.py
+++ b/src/lib_html.py
@@ -3,7 +3,6 @@
import math # ceil
import time # strftime, gmtime
import lib_common as mylib
-import index_app_names # get_name
# REFS
@@ -31,12 +30,6 @@ def p_download_json(href, download_name):
# Data object preparation
-def apps_sorted_batch(bundle_ids, batch_size=60):
- apps = [(x, index_app_names.get_name(x)) for x in bundle_ids]
- apps.sort(key=lambda x: (x[1].lower(), x[0]))
- for i in range(0, len(apps), batch_size):
- yield int(i / batch_size), apps[i:i + batch_size]
-
def attr_and(a, b):
res = {}
@@ -140,16 +133,17 @@ def app_tile_template():
'''
-def app_tiles_all(bundle_ids, per_page=60):
+def app_tiles_all(apps, per_page=60):
attr = {'id': 'app-toc', 'class': 'no-ul-all'}
- c_apps = len(bundle_ids)
+ c_apps = len(apps)
c_pages = int(math.ceil(c_apps / per_page))
- for i, apps in apps_sorted_batch(bundle_ids, batch_size=per_page):
- i += 1
+ for offset in range(0, len(apps), per_page):
+ idx = int(offset / per_page) + 1
+ batch = apps[offset:offset + per_page]
src = ''
- for x in apps:
+ for x in batch:
src += app_tile(x[0], x[1])
- yield i, len(apps), div(src, attr) + pagination(i, c_pages)
+ yield idx, len(batch), div(src, attr) + pagination(idx, c_pages)
# Write html to disk
@@ -173,11 +167,11 @@ def write(path, content, title=None, fname='index.html'):
fp.write(base_template(content, title=title))
-def write_app_pages(base, bundle_ids, title, per_page=60, pre='', post=''):
+def write_app_pages(base, apps, title, per_page=60, pre='', post=''):
pages = 0
entries = 0
mylib.rm_dir(base)
- for i, count, src in app_tiles_all(bundle_ids, per_page):
+ for i, count, src in app_tiles_all(apps, per_page):
pages += 1
entries += count
pth = base if i == 1 else mylib.path_add(base, str(i))
diff --git a/src/z_dependency.svg b/src/z_dependency.svg
index 4694e56..c99a3df 100644
--- a/src/z_dependency.svg
+++ b/src/z_dependency.svg
@@ -1,166 +1,161 @@
-