Reverse index json file
This commit is contained in:
@@ -137,7 +137,14 @@ def mv(path, to, printOmitPrefix=None):
|
|||||||
Path(path).rename(to)
|
Path(path).rename(to)
|
||||||
|
|
||||||
|
|
||||||
def rm(path):
|
def rm_file(file_path):
|
||||||
|
try:
|
||||||
|
os.remove(file_path)
|
||||||
|
except FileNotFoundError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def rm_dir(path):
|
||||||
try:
|
try:
|
||||||
shutil.rmtree(path)
|
shutil.rmtree(path)
|
||||||
except Exception:
|
except Exception:
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ def gen_page(arr, base, page_id=1, total=1):
|
|||||||
def process(per_page=60):
|
def process(per_page=60):
|
||||||
print('generating app-index ...')
|
print('generating app-index ...')
|
||||||
index_dir = mylib.path_out('index', 'page')
|
index_dir = mylib.path_out('index', 'page')
|
||||||
mylib.rm(index_dir)
|
mylib.rm_dir(index_dir)
|
||||||
mylib.mkdir(index_dir)
|
mylib.mkdir(index_dir)
|
||||||
|
|
||||||
apps = [gen_obj(x) for x in mylib.enum_appids()]
|
apps = [gen_obj(x) for x in mylib.enum_appids()]
|
||||||
|
|||||||
67
src/json_reverse_index.py
Executable file
67
src/json_reverse_index.py
Executable file
@@ -0,0 +1,67 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import common_lib as mylib
|
||||||
|
|
||||||
|
|
||||||
|
def get_index_path():
|
||||||
|
pth = mylib.path_root('data', '_eval')
|
||||||
|
mylib.mkdir(pth)
|
||||||
|
return mylib.path_add(pth, 'reverse_index.json')
|
||||||
|
|
||||||
|
|
||||||
|
def load_index_json(file_path):
|
||||||
|
if mylib.file_exists(file_path):
|
||||||
|
json = mylib.json_read(file_path)
|
||||||
|
else:
|
||||||
|
json = dict({'pardom': dict(), 'subdom': dict()})
|
||||||
|
return json
|
||||||
|
|
||||||
|
|
||||||
|
def delete_from_index(index, bundle_ids):
|
||||||
|
for key in ['pardom', 'subdom']:
|
||||||
|
for domain in list(index[key].keys()):
|
||||||
|
for bid in bundle_ids:
|
||||||
|
try:
|
||||||
|
index[key][domain].remove(bid)
|
||||||
|
except ValueError:
|
||||||
|
pass # ignore if not present
|
||||||
|
if not index[key][domain]:
|
||||||
|
del(index[key][domain])
|
||||||
|
|
||||||
|
|
||||||
|
def insert_in_index(index, bundle_ids):
|
||||||
|
for bid in bundle_ids:
|
||||||
|
json, _ = mylib.json_read_evaluated(bid)
|
||||||
|
for key in ['pardom', 'subdom']: # assuming keys are identical
|
||||||
|
for domain, _, _ in json[key]:
|
||||||
|
try:
|
||||||
|
index[key][domain].append(bid)
|
||||||
|
except KeyError:
|
||||||
|
index[key][domain] = [bid]
|
||||||
|
|
||||||
|
|
||||||
|
def process(bundle_ids, deleteOnly=False):
|
||||||
|
print('writing reverse index ...')
|
||||||
|
index_file = get_index_path()
|
||||||
|
if bundle_ids == ['*']:
|
||||||
|
bundle_ids = list(mylib.enum_data_appids())
|
||||||
|
mylib.rm_file(index_file) # rebuild from ground up
|
||||||
|
# load previous index
|
||||||
|
json = load_index_json(index_file)
|
||||||
|
# delete previous index entries
|
||||||
|
delete_from_index(json, bundle_ids)
|
||||||
|
# write new index to disk
|
||||||
|
if not deleteOnly:
|
||||||
|
insert_in_index(json, bundle_ids)
|
||||||
|
mylib.json_write(index_file, json, pretty=False)
|
||||||
|
print('')
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
args = sys.argv[1:]
|
||||||
|
if len(args) > 0:
|
||||||
|
process(args)
|
||||||
|
else:
|
||||||
|
# process(['*'])
|
||||||
|
mylib.usage(__file__, '[bundle_id] [...]')
|
||||||
13
src/main.py
13
src/main.py
@@ -1,6 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import os
|
|
||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
import common_lib as mylib
|
import common_lib as mylib
|
||||||
@@ -9,6 +8,7 @@ import bundle_download
|
|||||||
import html_root
|
import html_root
|
||||||
import html_index
|
import html_index
|
||||||
import html_bundle
|
import html_bundle
|
||||||
|
import json_reverse_index
|
||||||
import tracker_download
|
import tracker_download
|
||||||
|
|
||||||
|
|
||||||
@@ -41,9 +41,10 @@ def del_id(bundle_ids):
|
|||||||
dest = mylib.path_out_app(bid)
|
dest = mylib.path_out_app(bid)
|
||||||
if mylib.dir_exists(dest):
|
if mylib.dir_exists(dest):
|
||||||
print(' ' + bid)
|
print(' ' + bid)
|
||||||
mylib.rm(dest)
|
mylib.rm_dir(dest)
|
||||||
update_index = True
|
update_index = True
|
||||||
print('')
|
print('')
|
||||||
|
json_reverse_index.process(bundle_ids, deleteOnly=True)
|
||||||
if update_index:
|
if update_index:
|
||||||
rebuild_index()
|
rebuild_index()
|
||||||
|
|
||||||
@@ -51,7 +52,10 @@ def del_id(bundle_ids):
|
|||||||
def combine_and_update(bundle_ids, where=None):
|
def combine_and_update(bundle_ids, where=None):
|
||||||
new_ids = bundle_download.process(bundle_ids)
|
new_ids = bundle_download.process(bundle_ids)
|
||||||
affected = bundle_combine.process(bundle_ids, where=where)
|
affected = bundle_combine.process(bundle_ids, where=where)
|
||||||
|
if not where and bundle_ids == ['*']:
|
||||||
|
affected = ['*']
|
||||||
if len(affected) > 0:
|
if len(affected) > 0:
|
||||||
|
json_reverse_index.process(affected)
|
||||||
html_bundle.process(affected)
|
html_bundle.process(affected)
|
||||||
else:
|
else:
|
||||||
print('no bundle affected by tracker, not generating bundle html')
|
print('no bundle affected by tracker, not generating bundle html')
|
||||||
@@ -82,10 +86,7 @@ def import_update():
|
|||||||
if len(then_delete) > 0:
|
if len(then_delete) > 0:
|
||||||
print('cleanup _in folder ...')
|
print('cleanup _in folder ...')
|
||||||
for x in then_delete:
|
for x in then_delete:
|
||||||
try:
|
mylib.rm_file(fname)
|
||||||
os.remove(fname)
|
|
||||||
except FileNotFoundError:
|
|
||||||
pass
|
|
||||||
print('')
|
print('')
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user