module refactoring + allow word mistakes for OEIS search
This commit is contained in:
233
LP/HeuristicSearch.py
Executable file
233
LP/HeuristicSearch.py
Executable file
@@ -0,0 +1,233 @@
|
||||
#!/usr/bin/env python3
|
||||
import itertools # product, compress, combinations
|
||||
import bisect # bisect_left, insort
|
||||
from lib import affine_decrypt
|
||||
|
||||
|
||||
#########################################
|
||||
# GuessVigenere : Shift values around with a given keylength
|
||||
#########################################
|
||||
|
||||
class GuessVigenere(object):
|
||||
def __init__(self, nums):
|
||||
self.nums = nums
|
||||
|
||||
def guess(self, keylength, score_fn): # minimize score_fn
|
||||
found = []
|
||||
avg_score = 0
|
||||
for offset in range(keylength):
|
||||
bi = -1
|
||||
bs = 9999999
|
||||
for i in range(29):
|
||||
shifted = [(x - i) % 29 for x in self.nums[offset::keylength]]
|
||||
score = score_fn(shifted)
|
||||
if score < bs:
|
||||
bs = score
|
||||
bi = i
|
||||
avg_score += bs
|
||||
found.append(bi)
|
||||
return avg_score / keylength, found
|
||||
|
||||
|
||||
#########################################
|
||||
# GuessAffine : Find greatest common affine key
|
||||
#########################################
|
||||
|
||||
class GuessAffine(object):
|
||||
def __init__(self, nums):
|
||||
self.nums = nums
|
||||
|
||||
def guess(self, keylength, score_fn): # minimize score_fn
|
||||
found = []
|
||||
avg_score = 0
|
||||
for offset in range(keylength):
|
||||
candidate = (None, None)
|
||||
best = 9999999
|
||||
for s in range(29):
|
||||
for t in range(29):
|
||||
shifted = [affine_decrypt(x, (s, t))
|
||||
for x in self.nums[offset::keylength]]
|
||||
score = score_fn(shifted)
|
||||
if score < best:
|
||||
best = score
|
||||
candidate = (s, t)
|
||||
avg_score += best
|
||||
found.append(candidate)
|
||||
return avg_score / keylength, found
|
||||
|
||||
|
||||
#########################################
|
||||
# GuessPattern : Find a key that is rotated ABC BCA CAB, or ABC CAB BCA
|
||||
#########################################
|
||||
|
||||
class GuessPattern(object):
|
||||
def __init__(self, nums):
|
||||
self.nums = nums
|
||||
|
||||
@staticmethod
|
||||
def pattern(keylen, fn_pattern):
|
||||
mask = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[:keylen]
|
||||
return fn_pattern(mask, keylen)
|
||||
|
||||
def split(self, keylen, mask, offset=0):
|
||||
ret = {}
|
||||
for _ in range(offset):
|
||||
next(mask)
|
||||
ret = {k: [] for k in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[:keylen]}
|
||||
for n, k in zip(self.nums, mask):
|
||||
ret[k].append(n)
|
||||
return ret.values()
|
||||
|
||||
def zip(self, key_mask, offset=0):
|
||||
for _ in range(offset):
|
||||
next(key_mask)
|
||||
return [(n - k) % 29 for n, k in zip(self.nums, key_mask)]
|
||||
|
||||
@staticmethod
|
||||
def guess(parts, score_fn): # minimize score_fn
|
||||
found = []
|
||||
avg_score = 0
|
||||
for nums in parts:
|
||||
best = 9999999
|
||||
candidate = 0
|
||||
for i in range(29):
|
||||
score = score_fn([(x - i) % 29 for x in nums])
|
||||
if score < best:
|
||||
best = score
|
||||
candidate = i
|
||||
avg_score += best
|
||||
found.append(candidate)
|
||||
return avg_score / len(parts), found
|
||||
|
||||
|
||||
#########################################
|
||||
# SearchInterrupt : Hill climbing algorithm for interrupt detection
|
||||
#########################################
|
||||
|
||||
class SearchInterrupt(object):
|
||||
def __init__(self, arr, interrupt_chr): # remove all whitespace in arr
|
||||
self.single_result = False # if False, return list of equal likelihood
|
||||
self.full = arr
|
||||
self.stops = [i for i, n in enumerate(arr) if n == interrupt_chr]
|
||||
|
||||
def to_occurrence_index(self, interrupts):
|
||||
return [self.stops.index(x) + 1 for x in interrupts]
|
||||
|
||||
def from_occurrence_index(self, interrupts):
|
||||
return [self.stops[x - 1] for x in interrupts]
|
||||
|
||||
def join(self, interrupts=[]): # rune positions, not occurrence index
|
||||
ret = []
|
||||
i = -1
|
||||
for x in interrupts:
|
||||
ret += self.full[i + 1:x]
|
||||
i = x
|
||||
return ret + self.full[i + 1:]
|
||||
|
||||
# Go over the full string but only look at the first {maxdepth} interrupts.
|
||||
# Enumerate all possibilities and choose the one with the highest score.
|
||||
# If first interrupt is set, add it to the resulting set. If not, ignore it
|
||||
# Every iteration will add a single interrupt only, not the full set.
|
||||
def sequential(self, score_fn, startAt=0, maxdepth=9):
|
||||
found = [[]]
|
||||
|
||||
def best_in_one(i, depth, prefix=[]):
|
||||
best_s = -8
|
||||
best_p = [] # [match, match, ...]
|
||||
irp = self.stops[i:i + depth]
|
||||
for x in itertools.product([False, True], repeat=depth):
|
||||
part = list(itertools.compress(irp, x))
|
||||
score = score_fn(self.join(prefix + part))
|
||||
if score >= best_s:
|
||||
if score > best_s or self.single_result:
|
||||
best_s = score
|
||||
best_p = [part]
|
||||
else:
|
||||
best_p.append(part)
|
||||
return best_p, best_s
|
||||
|
||||
def best_in_all(i, depth):
|
||||
best_s = -8
|
||||
best_p = [] # [(prefix, [match, match, ...]), ...]
|
||||
for pre in found:
|
||||
parts, score = best_in_one(i, depth, prefix=pre)
|
||||
if score >= best_s:
|
||||
if score > best_s or self.single_result:
|
||||
best_s = score
|
||||
best_p = [(pre, parts)]
|
||||
else:
|
||||
best_p.append((pre, parts))
|
||||
return best_p, best_s
|
||||
|
||||
# first step: move maxdepth-sized window over data
|
||||
i = startAt - 1 # in case loop isnt called
|
||||
for i in range(startAt, len(self.stops) - maxdepth):
|
||||
# print('.', end='')
|
||||
parts, _ = best_in_all(i, maxdepth)
|
||||
found = []
|
||||
search = self.stops[i]
|
||||
for prfx, candidates in parts:
|
||||
bitSet = False
|
||||
bitNotSet = False
|
||||
for x in candidates:
|
||||
if len(x) > 0 and x[0] == search:
|
||||
bitSet = True
|
||||
else:
|
||||
bitNotSet = True
|
||||
if bitSet and bitNotSet:
|
||||
break
|
||||
if bitSet:
|
||||
found.append(prfx + [search])
|
||||
if bitNotSet:
|
||||
found.append(prfx)
|
||||
# print('.')
|
||||
# last step: all permutations for the remaining (< maxdepth) bits
|
||||
i += 1
|
||||
remaining, score = best_in_all(i, min(maxdepth, len(self.stops) - i))
|
||||
found = [x + z for x, y in remaining for z in y]
|
||||
return score, found
|
||||
|
||||
# Flip upto {maxdepth} bits anywhere in the full string.
|
||||
# Choose the bitset with the highest score and repeat.
|
||||
# If no better score found, increment number of testing bits and repeat.
|
||||
# Either start with all interrupts set (topDown) or none set.
|
||||
def genetic(self, score_fn, topDown=False, maxdepth=3):
|
||||
current = self.stops if topDown else []
|
||||
|
||||
def evolve(lvl):
|
||||
for x in itertools.combinations(self.stops, lvl + 1):
|
||||
tmp = current[:]
|
||||
for y in x:
|
||||
if y in current:
|
||||
tmp.pop(bisect.bisect_left(tmp, y))
|
||||
else:
|
||||
bisect.insort(tmp, y)
|
||||
yield tmp, score_fn(self.join(tmp))
|
||||
|
||||
best = score_fn(self.join())
|
||||
level = 0 # or start directly with maxdepth - 1
|
||||
while level < maxdepth:
|
||||
print('.', end='')
|
||||
update = None
|
||||
for interrupts, score in evolve(level):
|
||||
if score > best:
|
||||
best = score
|
||||
update = interrupts
|
||||
if update:
|
||||
level = 0 # restart with 1-bit again
|
||||
current = update
|
||||
continue # did optimize, so retry with same level
|
||||
level += 1
|
||||
print('.')
|
||||
# find equally likely candidates
|
||||
if self.single_result:
|
||||
return best, [current]
|
||||
all_of_them = [x for x, score in evolve(2) if score == best]
|
||||
all_of_them.append(current)
|
||||
return best, all_of_them
|
||||
|
||||
|
||||
# a = GuessInterrupt([2, 0, 1, 0, 14, 15, 0, 13, 24, 25, 25, 25], 0)
|
||||
# print(a.sequential(lambda x: (1.2 if len(x) == 11 else 0.1)))
|
||||
# print(a.sequential(lambda x: (1.1 if len(x) == 10 else 0.1)))
|
||||
# print(a.sequential(lambda x: (1.3 if len(x) == 9 else 0.1)))
|
||||
Reference in New Issue
Block a user