149 lines
4.5 KiB
Python
Executable File
149 lines
4.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
import re
|
|
from RuneSolver import VigenereSolver
|
|
from RuneText import RuneText
|
|
from NGrams import NGrams
|
|
from HeuristicSearch import GuessVigenere, SearchInterrupt
|
|
# from FailedAttempts import NGramShifter
|
|
|
|
RUNES = 'ᚠᚢᚦᚩᚱᚳᚷᚹᚻᚾᛁᛄᛇᛈᛉᛋᛏᛒᛖᛗᛚᛝᛟᛞᚪᚫᚣᛡᛠ'
|
|
RCOUNT = len(RUNES)
|
|
ORG_INTERRUPT = 'ᚠ'
|
|
INV_INTERRUPT = RUNES.index(ORG_INTERRUPT)
|
|
INVERT = False
|
|
if INVERT:
|
|
INV_INTERRUPT = 28 - INV_INTERRUPT
|
|
re_norune = re.compile('[^' + RUNES + ']')
|
|
|
|
|
|
def load_data(fname):
|
|
fname = 'pages/{}.txt'.format(fname)
|
|
print()
|
|
print('loading file:', fname)
|
|
with open(fname, 'r') as f:
|
|
data = RuneText(re_norune.sub('', f.read()))['index']
|
|
if INVERT:
|
|
data = [28 - x for x in data]
|
|
return data
|
|
|
|
|
|
#########################################
|
|
# Probability : Count runes and simple frequency analysis
|
|
#########################################
|
|
|
|
class Probability(object):
|
|
def __init__(self, numstream):
|
|
self.prob = [0] * RCOUNT
|
|
for r in numstream:
|
|
self.prob[r] += 1
|
|
self.N = len(numstream)
|
|
|
|
def IC(self):
|
|
X = sum(x * (x - 1) for x in self.prob)
|
|
return X / ((self.N * (self.N - 1)) / 29)
|
|
|
|
def friedman(self):
|
|
return (K_p - K_r) / (self.IC() - K_r)
|
|
|
|
def similarity(self):
|
|
probs = Probability.normalized(self.prob)
|
|
return sum((x - y) ** 2 for x, y in zip(PROB_NORM, probs))
|
|
|
|
@staticmethod
|
|
def normalized(int_prob):
|
|
total = sum(int_prob)
|
|
return [x / total for x in int_prob] # math.log(x / total, 10)
|
|
|
|
@staticmethod
|
|
def IC_w_keylen(nums, keylen):
|
|
val = sum(Probability(nums[x::keylen]).IC() for x in range(keylen))
|
|
return val / keylen
|
|
|
|
|
|
#########################################
|
|
# Perform heuristic search on the keylength, interrupts, and key
|
|
#########################################
|
|
|
|
def enum_keylengths(nums, fn_interrupt, fn_keyguess, kmin=1, kmax=32):
|
|
best_s = 0
|
|
best_kl = 0
|
|
iguess = SearchInterrupt(nums, INV_INTERRUPT)
|
|
print('interrupt:', ORG_INTERRUPT, 'count:', len(iguess.stops))
|
|
for kl in range(kmin, kmax + 1):
|
|
score, intrpts = fn_interrupt(kl, iguess)
|
|
print('{} {:.4f}'.format(kl, score))
|
|
key_guess = []
|
|
for i, skips in enumerate(intrpts):
|
|
key = fn_keyguess(kl, iguess.join(skips))
|
|
yield kl, score, i, skips, key
|
|
key_guess.append(key)
|
|
intrpts[i] = iguess.to_occurrence_index(skips)
|
|
print(' skip:', intrpts)
|
|
print(' key:', key_guess)
|
|
if score > best_s:
|
|
best_s = score
|
|
best_kl = kl
|
|
print(f'best estimate: keylength: {best_kl}, score: {best_s:.4f}')
|
|
|
|
|
|
def fn_break_vigenere(fname, data):
|
|
def fn_similarity(x):
|
|
return Probability(x).similarity()
|
|
|
|
def fn_irp(kl, iguess):
|
|
def fn_IoC(x):
|
|
return Probability.IC_w_keylen(x, kl)
|
|
return iguess.sequential(fn_IoC, startAt=0, maxdepth=9)
|
|
# return iguess.genetic(fn_IoC, topDown=False, maxdepth=4)
|
|
# return fn_IoC(iguess.join()), [[]] # without interrupts
|
|
|
|
def fn_key(kl, data):
|
|
return GuessVigenere(data).guess(kl, fn_similarity)
|
|
|
|
slvr = VigenereSolver()
|
|
slvr.input.load(file=f'pages/{fname}.txt')
|
|
slvr.output.QUIET = True
|
|
slvr.output.COLORS = False
|
|
slvr.INTERRUPT = ORG_INTERRUPT
|
|
slvr.KEY_INVERT = INVERT
|
|
for kl, score, i, skips, key in enum_keylengths(data, fn_irp, fn_key,
|
|
kmin=1, kmax=32):
|
|
outfile = f'out/{fname}.{score:.3f}.{kl}.{i}.txt'
|
|
with open(outfile, 'w') as f:
|
|
f.write(f'{kl}, {score:.4f}, {key}, {skips}\n')
|
|
slvr.output.file_output = outfile
|
|
slvr.INTERRUPT_POS = skips
|
|
slvr.KEY_DATA = key
|
|
slvr.run()
|
|
|
|
|
|
#########################################
|
|
# main
|
|
#########################################
|
|
|
|
PROB_INT = [0] * RCOUNT
|
|
for k, v in NGrams.load().items():
|
|
PROB_INT[RUNES.index(k)] = v
|
|
PROB_NORM = Probability.normalized(PROB_INT)
|
|
K_r = 1 / 29 # 0.034482758620689655
|
|
K_p = sum(x ** 2 for x in PROB_INT) # 0.06116195419412538
|
|
|
|
for fname in [
|
|
# '0_welcome', # V8
|
|
# 'jpg107-167', # V13
|
|
# '0_warning', # invert
|
|
# '0_wisdom', # plain
|
|
# 'p0-2', # ???
|
|
# 'p3-7', # ???
|
|
# 'p8-14', # ??? -> kl 11? or 12?
|
|
# 'p15-22', # ???
|
|
# 'p23-26', # ???
|
|
# 'p27-32', # ???
|
|
# 'p33-39', # ???
|
|
# 'p40-53', # ???
|
|
'p54-55', # ???
|
|
]:
|
|
data = load_data(fname)
|
|
# NGramShifter().guess(data, RUNES[INV_INTERRUPT])
|
|
fn_break_vigenere(fname, data)
|