diff --git a/src/README.md b/src/README.md index 1eddae9..43f1aab 100644 --- a/src/README.md +++ b/src/README.md @@ -1,31 +1,55 @@ -- `main.py` -Simply call this script in a cron job or something and it will take care of everything else. -It will run the scripts below in the proper order. +![dependency](z_dependency.png) -- `3rd-domains.txt` -Contains a list of common 3rd level domains, such as `co.uk`. +## Structure -- `common_lib.py` -Library with useful functions used across multiple python scripts. +In general all `html_` scripts generate the html output and all other scripts generate intermediate or commonly used `json` files. -- `bundle_import.py` -Will copy all `*.json` files from `data/_in` to their bundle id dest folder e.g. -`mv data/_in/test.json data/com/apple/notes/id_42.json`. +### Adding new recordings +`api/v1/contribute/index.php` handles incomming recording contributions. They are automatically inserted in their appropriate folder e.g. `data/com/apple/notes`. Additionally the php places a marker (bundle-id) in `data/_in`. -- `bundle_combine.py` -Merges all `id_*.json` files from a bundle id into a single `combined.json`. -(run this script with one or multiple bundle ids as parameter.) +### Updating data +A cron job runs every minute and checks `data/_in` for new markers. If there is a new one, rebuild the app html page and all json files that are affected. +There are two special cases `_longterm` and `_manually`. In the latter case the user did not provide an appropriate app prior upload. They must be evaluated manually and completely ignored from automatic processing. +The former is self explainatory. Recordings with over an hour recording time. -- `bundle_download.py` -Download and cache app metadata from apple servers in de and en given a bundle id. Will also download the app icon and store it in the bundle id out folder. -(run this script with one or multiple bundle ids as parameter.) +**Run:** `main.py import` which does everything and avoids unnecessary rebuilding. -- `html_bundle.py` -Takes the `combined.json` file and generates the graphs and html file. -(run this script with one or multiple bundle ids as parameter.) +### Updating tracker db +Ad- and tracking domains are not automatically updated. In fact not at all. You could create a cron job for that too. Like once a week or so. -- `html_index.py` -Create all pages for the app index and link to bundle id subpages. +**Run:** `main.py tracker` which will update the db and all app pages that are affected. -- `html_root.py` -Create main `index.html`. +If you want to add custom domains, edit `api/v1/trackers/list.txt` and run the same command. + +### Delete a single app +The delete command does not delete the app result (json), only the html files. So if you rebuild the website it will reappear. This function is here for the cases where you already delete the json files, but the html output is still online. + +**Run:** `main.py del com.apple.notes com.apple.siri` + +### Development & Complete rebuild +During development or if pushing new changes to the server, you'll need to rebuild all existing html files. You can do that by rebuilding all individual apps and the app + domain indices. + +**Run:** `main.py run '*' && main.py index` + +If you are missing some icons run `main.py icons`. This should also download any missing iTunes information. E.g. App meta data like name and categories. + + +## Dependency graph + +Given A → B, B depends on A + +``` +digraph G { + "." -> html_root + "." -> bundle_download + bundle_download -> index_bundle_names + index_bundle_names -> html_bundle + index_bundle_names -> html_index + "." -> bundle_combine + bundle_combine -> index_reverse_domains + index_reverse_domains -> html_reverse_domains + bundle_combine -> html_bundle + "." -> tracker_download +} +``` +[graphviz](http://www.webgraphviz.com/) \ No newline at end of file diff --git a/src/html_root.py b/src/html_root.py index cd30635..a4e20ca 100755 --- a/src/html_root.py +++ b/src/html_root.py @@ -98,6 +98,7 @@ def process(): gen_root() # root index.thml gen_search() # root redirect.html?id=my.bundle.id gen_help() + print('') if __name__ == '__main__': diff --git a/src/main.py b/src/main.py index 872886f..30f0a5e 100755 --- a/src/main.py +++ b/src/main.py @@ -8,6 +8,7 @@ import bundle_download import html_root import html_index import html_bundle +import html_reverse_domains import index_bundle_names import index_reverse_domains import tracker_download @@ -26,28 +27,33 @@ def print_usage_and_exit(): exit(0) -def rebuild_index(inclRoot=False): +def rebuild_app_index(inclRoot=False): html_index.process() if inclRoot: # TODO: remove check if root contains dynamic content html_root.process() +def rebuild_domain_index(bundle_ids, deleteOnly=False): + index_reverse_domains.process(bundle_ids, deleteOnly=deleteOnly) + html_reverse_domains.process() + + def del_id(bundle_ids): print('removing apps from website:') if bundle_ids == ['*']: bundle_ids = list(mylib.enum_appids()) - update_index = False + update_app_index = False for bid in bundle_ids: dest = mylib.path_out_app(bid) if mylib.dir_exists(dest): print(' ' + bid) mylib.rm_dir(dest) - update_index = True + update_app_index = True print('') - index_reverse_domains.process(bundle_ids, deleteOnly=True) - if update_index: - rebuild_index() + rebuild_domain_index(bundle_ids, deleteOnly=True) + if update_app_index: + rebuild_app_index(inclRoot=True) def combine_and_update(bundle_ids, where=None): @@ -55,7 +61,7 @@ def combine_and_update(bundle_ids, where=None): new_ids = bundle_download.process(bundle_ids) # 2. if new apps, update bundle name index if len(new_ids) > 0: - index_bundle_names.process(new_ids) + index_bundle_names.process(new_ids) # after bundle_download # 3. re-calculate combined.json and evaluated.json files affected = bundle_combine.process(bundle_ids, where=where) # special case needed for reverse index. '*' will force rebuilt index @@ -63,13 +69,13 @@ def combine_and_update(bundle_ids, where=None): affected = ['*'] # 4. was any json updated? if so, make html and update reverse index if len(affected) > 0: - index_reverse_domains.process(affected) - html_bundle.process(affected) + rebuild_domain_index(affected) # after bundle_combine + html_bundle.process(affected) # after index_bundle_names else: print('no bundle affected by tracker, not generating bundle html') # 5. make all apps index if len(new_ids) > 0: - rebuild_index() # must be called after bundle_combine + rebuild_app_index() # must be called after bundle_combine else: print('no new bundle, not rebuilding index') @@ -116,12 +122,13 @@ try: import_update() elif cmd == 'tracker': tracker_update() - # tracker_download.combine_all('x') + # tracker_download.combine_all() elif cmd == 'icons': if bundle_download.download_missing_icons(force=False): - rebuild_index() + rebuild_app_index() elif cmd == 'index': - rebuild_index(inclRoot=True) + rebuild_domain_index(['*']) + rebuild_app_index(inclRoot=True) elif cmd == 'run': if len(params) == 0: print_usage_and_exit() diff --git a/src/tracker_download.py b/src/tracker_download.py index 725de0d..8f5a54f 100755 --- a/src/tracker_download.py +++ b/src/tracker_download.py @@ -117,7 +117,7 @@ def exodus(fname): return save_list(res, fname, binary=False) -def combine_all(changes): +def combine_all(changes=['_']): final = mylib.path_root('src', 'lists', 'tracker_all.txt') if changes or not mylib.file_exists(final): print(' updating: tracker_all.txt') diff --git a/src/z_dependency.png b/src/z_dependency.png new file mode 100644 index 0000000..3d60482 Binary files /dev/null and b/src/z_dependency.png differ