Reverse index json file

This commit is contained in:
relikd
2020-09-15 20:39:39 +02:00
parent c139886c61
commit 71bd8e2a43
4 changed files with 83 additions and 8 deletions

View File

@@ -137,7 +137,14 @@ def mv(path, to, printOmitPrefix=None):
Path(path).rename(to) Path(path).rename(to)
def rm(path): def rm_file(file_path):
try:
os.remove(file_path)
except FileNotFoundError:
pass
def rm_dir(path):
try: try:
shutil.rmtree(path) shutil.rmtree(path)
except Exception: except Exception:

View File

@@ -60,7 +60,7 @@ def gen_page(arr, base, page_id=1, total=1):
def process(per_page=60): def process(per_page=60):
print('generating app-index ...') print('generating app-index ...')
index_dir = mylib.path_out('index', 'page') index_dir = mylib.path_out('index', 'page')
mylib.rm(index_dir) mylib.rm_dir(index_dir)
mylib.mkdir(index_dir) mylib.mkdir(index_dir)
apps = [gen_obj(x) for x in mylib.enum_appids()] apps = [gen_obj(x) for x in mylib.enum_appids()]

67
src/json_reverse_index.py Executable file
View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python3
import sys
import common_lib as mylib
def get_index_path():
pth = mylib.path_root('data', '_eval')
mylib.mkdir(pth)
return mylib.path_add(pth, 'reverse_index.json')
def load_index_json(file_path):
if mylib.file_exists(file_path):
json = mylib.json_read(file_path)
else:
json = dict({'pardom': dict(), 'subdom': dict()})
return json
def delete_from_index(index, bundle_ids):
for key in ['pardom', 'subdom']:
for domain in list(index[key].keys()):
for bid in bundle_ids:
try:
index[key][domain].remove(bid)
except ValueError:
pass # ignore if not present
if not index[key][domain]:
del(index[key][domain])
def insert_in_index(index, bundle_ids):
for bid in bundle_ids:
json, _ = mylib.json_read_evaluated(bid)
for key in ['pardom', 'subdom']: # assuming keys are identical
for domain, _, _ in json[key]:
try:
index[key][domain].append(bid)
except KeyError:
index[key][domain] = [bid]
def process(bundle_ids, deleteOnly=False):
print('writing reverse index ...')
index_file = get_index_path()
if bundle_ids == ['*']:
bundle_ids = list(mylib.enum_data_appids())
mylib.rm_file(index_file) # rebuild from ground up
# load previous index
json = load_index_json(index_file)
# delete previous index entries
delete_from_index(json, bundle_ids)
# write new index to disk
if not deleteOnly:
insert_in_index(json, bundle_ids)
mylib.json_write(index_file, json, pretty=False)
print('')
if __name__ == '__main__':
args = sys.argv[1:]
if len(args) > 0:
process(args)
else:
# process(['*'])
mylib.usage(__file__, '[bundle_id] [...]')

View File

@@ -1,6 +1,5 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import os
import sys import sys
import traceback import traceback
import common_lib as mylib import common_lib as mylib
@@ -9,6 +8,7 @@ import bundle_download
import html_root import html_root
import html_index import html_index
import html_bundle import html_bundle
import json_reverse_index
import tracker_download import tracker_download
@@ -41,9 +41,10 @@ def del_id(bundle_ids):
dest = mylib.path_out_app(bid) dest = mylib.path_out_app(bid)
if mylib.dir_exists(dest): if mylib.dir_exists(dest):
print(' ' + bid) print(' ' + bid)
mylib.rm(dest) mylib.rm_dir(dest)
update_index = True update_index = True
print('') print('')
json_reverse_index.process(bundle_ids, deleteOnly=True)
if update_index: if update_index:
rebuild_index() rebuild_index()
@@ -51,7 +52,10 @@ def del_id(bundle_ids):
def combine_and_update(bundle_ids, where=None): def combine_and_update(bundle_ids, where=None):
new_ids = bundle_download.process(bundle_ids) new_ids = bundle_download.process(bundle_ids)
affected = bundle_combine.process(bundle_ids, where=where) affected = bundle_combine.process(bundle_ids, where=where)
if not where and bundle_ids == ['*']:
affected = ['*']
if len(affected) > 0: if len(affected) > 0:
json_reverse_index.process(affected)
html_bundle.process(affected) html_bundle.process(affected)
else: else:
print('no bundle affected by tracker, not generating bundle html') print('no bundle affected by tracker, not generating bundle html')
@@ -82,10 +86,7 @@ def import_update():
if len(then_delete) > 0: if len(then_delete) > 0:
print('cleanup _in folder ...') print('cleanup _in folder ...')
for x in then_delete: for x in then_delete:
try: mylib.rm_file(fname)
os.remove(fname)
except FileNotFoundError:
pass
print('') print('')