Files
LiberPrayground/probability.py
2021-01-20 00:26:04 +01:00

149 lines
4.5 KiB
Python
Executable File

#!/usr/bin/env python3
import re
from RuneSolver import VigenereSolver
from RuneText import RuneText
from NGrams import NGrams
from HeuristicSearch import GuessVigenere, SearchInterrupt
# from FailedAttempts import NGramShifter
RUNES = 'ᚠᚢᚦᚩᚱᚳᚷᚹᚻᚾᛁᛄᛇᛈᛉᛋᛏᛒᛖᛗᛚᛝᛟᛞᚪᚫᚣᛡᛠ'
RCOUNT = len(RUNES)
ORG_INTERRUPT = ''
INV_INTERRUPT = RUNES.index(ORG_INTERRUPT)
INVERT = False
if INVERT:
INV_INTERRUPT = 28 - INV_INTERRUPT
re_norune = re.compile('[^' + RUNES + ']')
def load_data(fname):
fname = 'pages/{}.txt'.format(fname)
print()
print('loading file:', fname)
with open(fname, 'r') as f:
data = RuneText(re_norune.sub('', f.read()))['index']
if INVERT:
data = [28 - x for x in data]
return data
#########################################
# Probability : Count runes and simple frequency analysis
#########################################
class Probability(object):
def __init__(self, numstream):
self.prob = [0] * RCOUNT
for r in numstream:
self.prob[r] += 1
self.N = len(numstream)
def IC(self):
X = sum(x * (x - 1) for x in self.prob)
return X / ((self.N * (self.N - 1)) / 29)
def friedman(self):
return (K_p - K_r) / (self.IC() - K_r)
def similarity(self):
probs = Probability.normalized(self.prob)
return sum((x - y) ** 2 for x, y in zip(PROB_NORM, probs))
@staticmethod
def normalized(int_prob):
total = sum(int_prob)
return [x / total for x in int_prob] # math.log(x / total, 10)
@staticmethod
def IC_w_keylen(nums, keylen):
val = sum(Probability(nums[x::keylen]).IC() for x in range(keylen))
return val / keylen
#########################################
# Perform heuristic search on the keylength, interrupts, and key
#########################################
def enum_keylengths(nums, fn_interrupt, fn_keyguess, kmin=1, kmax=32):
best_s = 0
best_kl = 0
iguess = SearchInterrupt(nums, INV_INTERRUPT)
print('interrupt:', ORG_INTERRUPT, 'count:', len(iguess.stops))
for kl in range(kmin, kmax + 1):
score, intrpts = fn_interrupt(kl, iguess)
print('{} {:.4f}'.format(kl, score))
key_guess = []
for i, skips in enumerate(intrpts):
key = fn_keyguess(kl, iguess.join(skips))
yield kl, score, i, skips, key
key_guess.append(key)
intrpts[i] = iguess.to_occurrence_index(skips)
print(' skip:', intrpts)
print(' key:', key_guess)
if score > best_s:
best_s = score
best_kl = kl
print(f'best estimate: keylength: {best_kl}, score: {best_s:.4f}')
def fn_break_vigenere(fname, data):
def fn_similarity(x):
return Probability(x).similarity()
def fn_irp(kl, iguess):
def fn_IoC(x):
return Probability.IC_w_keylen(x, kl)
return iguess.sequential(fn_IoC, startAt=0, maxdepth=9)
# return iguess.genetic(fn_IoC, topDown=False, maxdepth=4)
# return fn_IoC(iguess.join()), [[]] # without interrupts
def fn_key(kl, data):
return GuessVigenere(data).guess(kl, fn_similarity)
slvr = VigenereSolver()
slvr.input.load(file=f'pages/{fname}.txt')
slvr.output.QUIET = True
slvr.output.COLORS = False
slvr.INTERRUPT = ORG_INTERRUPT
slvr.KEY_INVERT = INVERT
for kl, score, i, skips, key in enum_keylengths(data, fn_irp, fn_key,
kmin=1, kmax=32):
outfile = f'out/{fname}.{score:.3f}.{kl}.{i}.txt'
with open(outfile, 'w') as f:
f.write(f'{kl}, {score:.4f}, {key}, {skips}\n')
slvr.output.file_output = outfile
slvr.INTERRUPT_POS = skips
slvr.KEY_DATA = key
slvr.run()
#########################################
# main
#########################################
PROB_INT = [0] * RCOUNT
for k, v in NGrams.load().items():
PROB_INT[RUNES.index(k)] = v
PROB_NORM = Probability.normalized(PROB_INT)
K_r = 1 / 29 # 0.034482758620689655
K_p = sum(x ** 2 for x in PROB_INT) # 0.06116195419412538
for fname in [
# '0_welcome', # V8
# 'jpg107-167', # V13
# '0_warning', # invert
# '0_wisdom', # plain
# 'p0-2', # ???
# 'p3-7', # ???
# 'p8-14', # ??? -> kl 11? or 12?
# 'p15-22', # ???
# 'p23-26', # ???
# 'p27-32', # ???
# 'p33-39', # ???
# 'p40-53', # ???
'p54-55', # ???
]:
data = load_data(fname)
# NGramShifter().guess(data, RUNES[INV_INTERRUPT])
fn_break_vigenere(fname, data)