Update docu and domain index html generation

This commit is contained in:
relikd
2020-09-19 01:44:07 +02:00
parent 38a3115bdd
commit 25673e5f5a
5 changed files with 69 additions and 37 deletions

View File

@@ -1,31 +1,55 @@
- `main.py` ![dependency](z_dependency.png)
Simply call this script in a cron job or something and it will take care of everything else.
It will run the scripts below in the proper order.
- `3rd-domains.txt` ## Structure
Contains a list of common 3rd level domains, such as `co.uk`.
- `common_lib.py` In general all `html_` scripts generate the html output and all other scripts generate intermediate or commonly used `json` files.
Library with useful functions used across multiple python scripts.
- `bundle_import.py` ### Adding new recordings
Will copy all `*.json` files from `data/_in` to their bundle id dest folder e.g. `api/v1/contribute/index.php` handles incomming recording contributions. They are automatically inserted in their appropriate folder e.g. `data/com/apple/notes`. Additionally the php places a marker (bundle-id) in `data/_in`.
`mv data/_in/test.json data/com/apple/notes/id_42.json`.
- `bundle_combine.py` ### Updating data
Merges all `id_*.json` files from a bundle id into a single `combined.json`. A cron job runs every minute and checks `data/_in` for new markers. If there is a new one, rebuild the app html page and all json files that are affected.
(run this script with one or multiple bundle ids as parameter.) There are two special cases `_longterm` and `_manually`. In the latter case the user did not provide an appropriate app prior upload. They must be evaluated manually and completely ignored from automatic processing.
The former is self explainatory. Recordings with over an hour recording time.
- `bundle_download.py` **Run:** `main.py import` which does everything and avoids unnecessary rebuilding.
Download and cache app metadata from apple servers in de and en given a bundle id. Will also download the app icon and store it in the bundle id out folder.
(run this script with one or multiple bundle ids as parameter.)
- `html_bundle.py` ### Updating tracker db
Takes the `combined.json` file and generates the graphs and html file. Ad- and tracking domains are not automatically updated. In fact not at all. You could create a cron job for that too. Like once a week or so.
(run this script with one or multiple bundle ids as parameter.)
- `html_index.py` **Run:** `main.py tracker` which will update the db and all app pages that are affected.
Create all pages for the app index and link to bundle id subpages.
- `html_root.py` If you want to add custom domains, edit `api/v1/trackers/list.txt` and run the same command.
Create main `index.html`.
### Delete a single app
The delete command does not delete the app result (json), only the html files. So if you rebuild the website it will reappear. This function is here for the cases where you already delete the json files, but the html output is still online.
**Run:** `main.py del com.apple.notes com.apple.siri`
### Development & Complete rebuild
During development or if pushing new changes to the server, you'll need to rebuild all existing html files. You can do that by rebuilding all individual apps and the app + domain indices.
**Run:** `main.py run '*' && main.py index`
If you are missing some icons run `main.py icons`. This should also download any missing iTunes information. E.g. App meta data like name and categories.
## Dependency graph
Given A → B, B depends on A
```
digraph G {
"." -> html_root
"." -> bundle_download
bundle_download -> index_bundle_names
index_bundle_names -> html_bundle
index_bundle_names -> html_index
"." -> bundle_combine
bundle_combine -> index_reverse_domains
index_reverse_domains -> html_reverse_domains
bundle_combine -> html_bundle
"." -> tracker_download
}
```
[graphviz](http://www.webgraphviz.com/)

View File

@@ -98,6 +98,7 @@ def process():
gen_root() # root index.thml gen_root() # root index.thml
gen_search() # root redirect.html?id=my.bundle.id gen_search() # root redirect.html?id=my.bundle.id
gen_help() gen_help()
print('')
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -8,6 +8,7 @@ import bundle_download
import html_root import html_root
import html_index import html_index
import html_bundle import html_bundle
import html_reverse_domains
import index_bundle_names import index_bundle_names
import index_reverse_domains import index_reverse_domains
import tracker_download import tracker_download
@@ -26,28 +27,33 @@ def print_usage_and_exit():
exit(0) exit(0)
def rebuild_index(inclRoot=False): def rebuild_app_index(inclRoot=False):
html_index.process() html_index.process()
if inclRoot: # TODO: remove check if root contains dynamic content if inclRoot: # TODO: remove check if root contains dynamic content
html_root.process() html_root.process()
def rebuild_domain_index(bundle_ids, deleteOnly=False):
index_reverse_domains.process(bundle_ids, deleteOnly=deleteOnly)
html_reverse_domains.process()
def del_id(bundle_ids): def del_id(bundle_ids):
print('removing apps from website:') print('removing apps from website:')
if bundle_ids == ['*']: if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_appids()) bundle_ids = list(mylib.enum_appids())
update_index = False update_app_index = False
for bid in bundle_ids: for bid in bundle_ids:
dest = mylib.path_out_app(bid) dest = mylib.path_out_app(bid)
if mylib.dir_exists(dest): if mylib.dir_exists(dest):
print(' ' + bid) print(' ' + bid)
mylib.rm_dir(dest) mylib.rm_dir(dest)
update_index = True update_app_index = True
print('') print('')
index_reverse_domains.process(bundle_ids, deleteOnly=True) rebuild_domain_index(bundle_ids, deleteOnly=True)
if update_index: if update_app_index:
rebuild_index() rebuild_app_index(inclRoot=True)
def combine_and_update(bundle_ids, where=None): def combine_and_update(bundle_ids, where=None):
@@ -55,7 +61,7 @@ def combine_and_update(bundle_ids, where=None):
new_ids = bundle_download.process(bundle_ids) new_ids = bundle_download.process(bundle_ids)
# 2. if new apps, update bundle name index # 2. if new apps, update bundle name index
if len(new_ids) > 0: if len(new_ids) > 0:
index_bundle_names.process(new_ids) index_bundle_names.process(new_ids) # after bundle_download
# 3. re-calculate combined.json and evaluated.json files # 3. re-calculate combined.json and evaluated.json files
affected = bundle_combine.process(bundle_ids, where=where) affected = bundle_combine.process(bundle_ids, where=where)
# special case needed for reverse index. '*' will force rebuilt index # special case needed for reverse index. '*' will force rebuilt index
@@ -63,13 +69,13 @@ def combine_and_update(bundle_ids, where=None):
affected = ['*'] affected = ['*']
# 4. was any json updated? if so, make html and update reverse index # 4. was any json updated? if so, make html and update reverse index
if len(affected) > 0: if len(affected) > 0:
index_reverse_domains.process(affected) rebuild_domain_index(affected) # after bundle_combine
html_bundle.process(affected) html_bundle.process(affected) # after index_bundle_names
else: else:
print('no bundle affected by tracker, not generating bundle html') print('no bundle affected by tracker, not generating bundle html')
# 5. make all apps index # 5. make all apps index
if len(new_ids) > 0: if len(new_ids) > 0:
rebuild_index() # must be called after bundle_combine rebuild_app_index() # must be called after bundle_combine
else: else:
print('no new bundle, not rebuilding index') print('no new bundle, not rebuilding index')
@@ -116,12 +122,13 @@ try:
import_update() import_update()
elif cmd == 'tracker': elif cmd == 'tracker':
tracker_update() tracker_update()
# tracker_download.combine_all('x') # tracker_download.combine_all()
elif cmd == 'icons': elif cmd == 'icons':
if bundle_download.download_missing_icons(force=False): if bundle_download.download_missing_icons(force=False):
rebuild_index() rebuild_app_index()
elif cmd == 'index': elif cmd == 'index':
rebuild_index(inclRoot=True) rebuild_domain_index(['*'])
rebuild_app_index(inclRoot=True)
elif cmd == 'run': elif cmd == 'run':
if len(params) == 0: if len(params) == 0:
print_usage_and_exit() print_usage_and_exit()

View File

@@ -117,7 +117,7 @@ def exodus(fname):
return save_list(res, fname, binary=False) return save_list(res, fname, binary=False)
def combine_all(changes): def combine_all(changes=['_']):
final = mylib.path_root('src', 'lists', 'tracker_all.txt') final = mylib.path_root('src', 'lists', 'tracker_all.txt')
if changes or not mylib.file_exists(final): if changes or not mylib.file_exists(final):
print(' updating: tracker_all.txt') print(' updating: tracker_all.txt')

BIN
src/z_dependency.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB