refactoring II
This commit is contained in:
@@ -1,8 +1,9 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
import os
|
||||
from HeuristicSearch import SearchInterrupt
|
||||
from HeuristicLib import Probability
|
||||
from RuneText import RUNES, load_indices
|
||||
from InterruptSearch import InterruptSearch
|
||||
from Probability import Probability
|
||||
from IOReader import load_indices
|
||||
from LPath import FILES_ALL, FILES_UNSOLVED, LPath
|
||||
|
||||
|
||||
@@ -13,20 +14,14 @@ from LPath import FILES_ALL, FILES_UNSOLVED, LPath
|
||||
class InterruptDB(object):
|
||||
def __init__(self, data, interrupt):
|
||||
self.irp = interrupt
|
||||
self.iguess = SearchInterrupt(data, interrupt)
|
||||
self.iguess = InterruptSearch(data, irp=interrupt)
|
||||
self.irp_count = len(self.iguess.stops)
|
||||
|
||||
def make(self, dbname, name, keylen):
|
||||
def fn(x):
|
||||
return Probability.target_diff(x, keylen) # used in db_norm
|
||||
# return Probability.IC_w_keylen(x, keylen) # used in db_high
|
||||
|
||||
if keylen == 0:
|
||||
keylen = 1
|
||||
score, skips = fn(self.iguess.join()), [[]] # without interrupts
|
||||
def make(self, dbname, name, keylen, fn_score):
|
||||
if keylen == 0: # without interrupts
|
||||
score, skips = fn_score(self.iguess.join(), 1), [[]]
|
||||
else:
|
||||
score, skips = self.iguess.sequential(fn, startAt=0, maxdepth=99)
|
||||
# score, skips = self.iguess.genetic(fn, topDown=False, maxdepth=4)
|
||||
score, skips = self.iguess.all(keylen, fn_score)
|
||||
for i, interrupts in enumerate(skips):
|
||||
skips[i] = self.iguess.to_occurrence_index(interrupts)
|
||||
|
||||
@@ -35,18 +30,17 @@ class InterruptDB(object):
|
||||
name, score, self.irp, self.irp_count, keylen, nums, dbname)
|
||||
return score, skips
|
||||
|
||||
def make_secondary(self, dbname, name, keylen, threshold):
|
||||
def make_secondary(self, dbname, name, keylen, fn_score, threshold):
|
||||
scores = []
|
||||
|
||||
def fn(x):
|
||||
score = Probability.target_diff(x, keylen) # used in db_norm
|
||||
# score = Probability.IC_w_keylen(x, keylen) # used in db_high
|
||||
def fn(x, kl):
|
||||
score = fn_score(x, kl)
|
||||
if score >= threshold:
|
||||
scores.append(score)
|
||||
return 1
|
||||
return -1
|
||||
|
||||
_, skips = self.iguess.sequential(fn, startAt=0, maxdepth=99)
|
||||
_, skips = self.iguess.all(keylen, fn)
|
||||
for i, interrupts in enumerate(skips):
|
||||
skips[i] = self.iguess.to_occurrence_index(interrupts)
|
||||
ret = list(zip(scores, skips))
|
||||
@@ -58,25 +52,6 @@ class InterruptDB(object):
|
||||
name, score, self.irp, self.irp_count, keylen, nums, dbname)
|
||||
return len(filtered)
|
||||
|
||||
@staticmethod
|
||||
def longest_no_interrupt(data, interrupt, irpmax=0):
|
||||
def add(i):
|
||||
nonlocal ret, prev
|
||||
idx = prev.pop(0)
|
||||
if idx == 0:
|
||||
ret = []
|
||||
ret.append((i - idx, idx))
|
||||
|
||||
prev = [0] * (irpmax + 1)
|
||||
ret = []
|
||||
for i, x in enumerate(data):
|
||||
if x == interrupt:
|
||||
prev.append(i + 1)
|
||||
add(i)
|
||||
add(i + 1)
|
||||
length, pos = max(ret)
|
||||
return pos, length
|
||||
|
||||
@staticmethod
|
||||
def load(dbname):
|
||||
if not os.path.isfile(LPath.InterruptDB(dbname)):
|
||||
@@ -103,171 +78,12 @@ class InterruptDB(object):
|
||||
f.write(f'{name}|{irpmax}|{score:.5f}|{irp}|{keylen}|{nums}\n')
|
||||
|
||||
|
||||
#########################################
|
||||
# InterruptIndices : Read chapters and extract indices (cluster by runes)
|
||||
#########################################
|
||||
|
||||
class InterruptIndices(object):
|
||||
def __init__(self):
|
||||
self.pos = InterruptIndices.read()
|
||||
|
||||
def consider(self, name, irp, limit):
|
||||
nums = self.pos[name]['pos'][irp]
|
||||
if len(nums) <= limit:
|
||||
return self.pos[name]['total']
|
||||
return nums[limit] # number of runes, which is not last index
|
||||
|
||||
def total(self, name):
|
||||
return self.pos[name]['total']
|
||||
|
||||
def longest_no_interrupt(self, name, irp, irpmax=0):
|
||||
irpmax += 1
|
||||
nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
|
||||
ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
|
||||
return sorted(ret, reverse=True)
|
||||
|
||||
@staticmethod
|
||||
def write(dbname='db_indices'):
|
||||
with open(LPath.InterruptDB(dbname), 'w') as f:
|
||||
f.write('# file | total runes in file | interrupt | indices\n')
|
||||
for name in FILES_ALL:
|
||||
fname = f'pages/{name}.txt'
|
||||
data = load_indices(fname, 0)
|
||||
total = len(data)
|
||||
nums = [[] for x in range(29)]
|
||||
for idx, rune in enumerate(data):
|
||||
nums[rune].append(idx)
|
||||
for irp, pos in enumerate(nums):
|
||||
f.write('{}|{}|{}|{}\n'.format(
|
||||
name, total, irp, ','.join(map(str, pos))))
|
||||
|
||||
@staticmethod
|
||||
def read(dbname='db_indices'):
|
||||
with open(LPath.InterruptDB(dbname), 'r') as f:
|
||||
ret = {}
|
||||
for line in f.readlines():
|
||||
if line.startswith('#'):
|
||||
continue
|
||||
line = line.strip()
|
||||
name, total, irp, nums = line.split('|')
|
||||
if name not in ret:
|
||||
ret[name] = {'total': int(total),
|
||||
'pos': [[] for _ in range(29)]}
|
||||
pos = ret[name]['pos']
|
||||
pos[int(irp)] = list(map(int, nums.split(','))) if nums else []
|
||||
return ret
|
||||
|
||||
|
||||
#########################################
|
||||
# InterruptToWeb : Read interrupt DB and create html graphic / matrix
|
||||
#########################################
|
||||
|
||||
class InterruptToWeb(object):
|
||||
def __init__(self, dbname, template='template.html'):
|
||||
with open(LPath.results(template), 'r') as f:
|
||||
self.template = f.read()
|
||||
self.indices = InterruptIndices()
|
||||
self.scores = {}
|
||||
db = InterruptDB.load(dbname)
|
||||
for k, v in db.items():
|
||||
for irpc, score, irp, kl, nums in v:
|
||||
if k not in self.scores:
|
||||
self.scores[k] = [[] for _ in range(29)]
|
||||
part = self.scores[k][irp]
|
||||
while kl >= len(part):
|
||||
part.append((0, 0)) # (score, irpc)
|
||||
oldc = part[kl][1]
|
||||
if irpc > oldc or (irpc == oldc and score > part[kl][0]):
|
||||
part[kl] = (score, irpc)
|
||||
|
||||
def cls(self, x, low=0, high=1):
|
||||
if x <= low:
|
||||
return ' class="m0"'
|
||||
return f' class="m{int((min(high, x) - low) / (high - low) * 14) + 1}"'
|
||||
|
||||
def table_reliable(self):
|
||||
trh = '<tr class="rotate"><th></th>'
|
||||
trtotal = '<tr class="small"><th>Total</th>'
|
||||
trd = [f'<tr><th>{x}</th>' for x in RUNES]
|
||||
del_row = [True] * 29
|
||||
for name in FILES_ALL:
|
||||
if name not in self.scores:
|
||||
continue
|
||||
total = self.indices.total(name)
|
||||
trh += f'<th><div>{name}</div></th>'
|
||||
trtotal += f'<td>{total}</td>'
|
||||
for i in range(29):
|
||||
scrs = self.scores[name][i][1:]
|
||||
if not scrs:
|
||||
trd[i] += '<td>–</td>'
|
||||
continue
|
||||
del_row[i] = False
|
||||
worst_irpc = min([x[1] for x in scrs])
|
||||
if worst_irpc == 0:
|
||||
if max([x[1] for x in scrs]) != 0:
|
||||
trd[i] += '<td>?</td>'
|
||||
continue
|
||||
num = self.indices.consider(name, i, worst_irpc)
|
||||
trd[i] += f'<td{self.cls(num, 384, 812)}>{num}</td>'
|
||||
|
||||
trh += '</tr>\n'
|
||||
trtotal += '</tr>\n'
|
||||
for i in range(29):
|
||||
trd[i] += '</tr>\n'
|
||||
if del_row[i]:
|
||||
trd[i] = ''
|
||||
return f'<table>{trh}{"".join(trd)}{trtotal}</table>'
|
||||
|
||||
def table_interrupt(self, irp, pmin=1.25, pmax=1.65):
|
||||
maxkl = max(len(x[irp]) for x in self.scores.values())
|
||||
trh = '<tr class="rotate"><th></th>'
|
||||
trbest = '<tr class="small"><th>best</th>'
|
||||
trd = [f'<tr><th>{x}</th>' for x in range(maxkl)]
|
||||
for name in FILES_ALL:
|
||||
maxscore = 0
|
||||
bestkl = -1
|
||||
try:
|
||||
klarr = self.scores[name][irp]
|
||||
except KeyError:
|
||||
continue
|
||||
trh += f'<th><div>{name}</div></th>'
|
||||
for kl, (score, _) in enumerate(klarr):
|
||||
if score < 0:
|
||||
trd[kl] += f'<td{self.cls(0)}>–</td>'
|
||||
else:
|
||||
trd[kl] += f'<td{self.cls(score, pmin, pmax)}>{score:.2f}</td>'
|
||||
if score > maxscore:
|
||||
maxscore = score
|
||||
bestkl = kl
|
||||
trbest += f'<td>{bestkl}</td>'
|
||||
trh += '</tr>\n'
|
||||
trbest += '</tr>\n'
|
||||
for i in range(29):
|
||||
trd[i] += '</tr>\n'
|
||||
return f'<table>{trh}{"".join(trd[1:])}{trbest}</table>'
|
||||
|
||||
def make(self, outfile, pmin=1.25, pmax=1.65):
|
||||
nav = ''
|
||||
txt = ''
|
||||
for i in range(29):
|
||||
has_entries = any(True for x in self.scores.values() if x[i])
|
||||
if not has_entries:
|
||||
continue
|
||||
nav += f'<a href="#tb-i{i}">{RUNES[i]}</a>\n'
|
||||
txt += f'<h3 id="tb-i{i}">Interrupt {i}: <b>{RUNES[i]}</b></h3>'
|
||||
txt += self.table_interrupt(i, pmin, pmax)
|
||||
html = self.template.replace('__NAVIGATION__', nav)
|
||||
html = html.replace('__TAB_RELIABLE__', self.table_reliable())
|
||||
html = html.replace('__INTERRUPT_TABLES__', txt)
|
||||
with open(LPath.results(outfile), 'w') as f:
|
||||
f.write(html)
|
||||
|
||||
|
||||
#########################################
|
||||
# helper functions
|
||||
#########################################
|
||||
|
||||
def create_initial_db(dbname, minkl=1, maxkl=32, max_irp=20, irpset=range(29)):
|
||||
def create_initial_db(dbname, fn_score, klset=range(1, 33),
|
||||
max_irp=20, irpset=range(29)):
|
||||
oldDB = InterruptDB.load(dbname)
|
||||
oldValues = {k: set((a, b, c) for a, _, b, c, _ in v)
|
||||
for k, v in oldDB.items()}
|
||||
@@ -276,15 +92,16 @@ def create_initial_db(dbname, minkl=1, maxkl=32, max_irp=20, irpset=range(29)):
|
||||
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
|
||||
db = InterruptDB(data, irp)
|
||||
print('load:', name, 'interrupt:', irp, 'count:', db.irp_count)
|
||||
for keylen in range(minkl, maxkl + 1): # key length
|
||||
for keylen in klset: # key length
|
||||
if (db.irp_count, irp, keylen) in oldValues.get(name, []):
|
||||
print(f'{keylen}: skipped.')
|
||||
continue
|
||||
score, interrupts = db.make(dbname, name, keylen)
|
||||
score, interrupts = db.make(dbname, name, keylen, fn_score)
|
||||
print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}')
|
||||
|
||||
|
||||
def find_secondary_solutions(db_in, db_out, threshold=0.75, max_irp=20):
|
||||
def find_secondary_solutions(db_in, db_out, fn_score,
|
||||
threshold=0.75, max_irp=20):
|
||||
oldDB = InterruptDB.load(db_in)
|
||||
search_set = set()
|
||||
for name, arr in oldDB.items():
|
||||
@@ -299,13 +116,14 @@ def find_secondary_solutions(db_in, db_out, threshold=0.75, max_irp=20):
|
||||
print('load:', name, 'interrupt:', irp, 'keylen:', kl)
|
||||
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
|
||||
db = InterruptDB(data, irp)
|
||||
c = db.make_secondary(db_out, name, kl, threshold)
|
||||
c = db.make_secondary(db_out, name, kl, fn_score, threshold)
|
||||
print('found', c, 'additional solutions')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# find_secondary_solutions('db_high', 'db_high_secondary', threshold=1.4)
|
||||
# find_secondary_solutions('db_norm', 'db_norm_secondary', threshold=0.55)
|
||||
# create_initial_db('db_norm', minkl=1, maxkl=32, max_irp=20)
|
||||
# InterruptToWeb('db_high').make('index_high.html')
|
||||
InterruptToWeb('db_norm').make('index_norm.html', pmin=0.40, pmax=0.98)
|
||||
create_initial_db('db_high', Probability.IC_w_keylen, max_irp=20)
|
||||
create_initial_db('db_norm', Probability.target_diff, max_irp=20)
|
||||
# find_secondary_solutions('db_high', 'db_high_secondary',
|
||||
# Probability.IC_w_keylen, threshold=1.4)
|
||||
# find_secondary_solutions('db_norm', 'db_norm_secondary',
|
||||
# Probability.target_diff, threshold=0.55)
|
||||
|
||||
Reference in New Issue
Block a user