diff --git a/src/common_lib.py b/src/common_lib.py index 4804c48..8a6fdd9 100755 --- a/src/common_lib.py +++ b/src/common_lib.py @@ -137,7 +137,14 @@ def mv(path, to, printOmitPrefix=None): Path(path).rename(to) -def rm(path): +def rm_file(file_path): + try: + os.remove(file_path) + except FileNotFoundError: + pass + + +def rm_dir(path): try: shutil.rmtree(path) except Exception: diff --git a/src/html_index.py b/src/html_index.py index 641248c..0e4b2ea 100755 --- a/src/html_index.py +++ b/src/html_index.py @@ -60,7 +60,7 @@ def gen_page(arr, base, page_id=1, total=1): def process(per_page=60): print('generating app-index ...') index_dir = mylib.path_out('index', 'page') - mylib.rm(index_dir) + mylib.rm_dir(index_dir) mylib.mkdir(index_dir) apps = [gen_obj(x) for x in mylib.enum_appids()] diff --git a/src/json_reverse_index.py b/src/json_reverse_index.py new file mode 100755 index 0000000..2a899c7 --- /dev/null +++ b/src/json_reverse_index.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import sys +import common_lib as mylib + + +def get_index_path(): + pth = mylib.path_root('data', '_eval') + mylib.mkdir(pth) + return mylib.path_add(pth, 'reverse_index.json') + + +def load_index_json(file_path): + if mylib.file_exists(file_path): + json = mylib.json_read(file_path) + else: + json = dict({'pardom': dict(), 'subdom': dict()}) + return json + + +def delete_from_index(index, bundle_ids): + for key in ['pardom', 'subdom']: + for domain in list(index[key].keys()): + for bid in bundle_ids: + try: + index[key][domain].remove(bid) + except ValueError: + pass # ignore if not present + if not index[key][domain]: + del(index[key][domain]) + + +def insert_in_index(index, bundle_ids): + for bid in bundle_ids: + json, _ = mylib.json_read_evaluated(bid) + for key in ['pardom', 'subdom']: # assuming keys are identical + for domain, _, _ in json[key]: + try: + index[key][domain].append(bid) + except KeyError: + index[key][domain] = [bid] + + +def process(bundle_ids, deleteOnly=False): + print('writing reverse index ...') + index_file = get_index_path() + if bundle_ids == ['*']: + bundle_ids = list(mylib.enum_data_appids()) + mylib.rm_file(index_file) # rebuild from ground up + # load previous index + json = load_index_json(index_file) + # delete previous index entries + delete_from_index(json, bundle_ids) + # write new index to disk + if not deleteOnly: + insert_in_index(json, bundle_ids) + mylib.json_write(index_file, json, pretty=False) + print('') + + +if __name__ == '__main__': + args = sys.argv[1:] + if len(args) > 0: + process(args) + else: + # process(['*']) + mylib.usage(__file__, '[bundle_id] [...]') diff --git a/src/main.py b/src/main.py index cc70943..e931f69 100755 --- a/src/main.py +++ b/src/main.py @@ -1,6 +1,5 @@ #!/usr/bin/env python3 -import os import sys import traceback import common_lib as mylib @@ -9,6 +8,7 @@ import bundle_download import html_root import html_index import html_bundle +import json_reverse_index import tracker_download @@ -41,9 +41,10 @@ def del_id(bundle_ids): dest = mylib.path_out_app(bid) if mylib.dir_exists(dest): print(' ' + bid) - mylib.rm(dest) + mylib.rm_dir(dest) update_index = True print('') + json_reverse_index.process(bundle_ids, deleteOnly=True) if update_index: rebuild_index() @@ -51,7 +52,10 @@ def del_id(bundle_ids): def combine_and_update(bundle_ids, where=None): new_ids = bundle_download.process(bundle_ids) affected = bundle_combine.process(bundle_ids, where=where) + if not where and bundle_ids == ['*']: + affected = ['*'] if len(affected) > 0: + json_reverse_index.process(affected) html_bundle.process(affected) else: print('no bundle affected by tracker, not generating bundle html') @@ -82,10 +86,7 @@ def import_update(): if len(then_delete) > 0: print('cleanup _in folder ...') for x in then_delete: - try: - os.remove(fname) - except FileNotFoundError: - pass + mylib.rm_file(fname) print('')