diff --git a/LP/Alphabet.py b/LP/Alphabet.py new file mode 100755 index 0000000..e5f78e6 --- /dev/null +++ b/LP/Alphabet.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- + +white_rune = {'•': ' ', '⁘': '.', '⁚': ',', '⁖': ';', '⁜': '#'} +white_text = {v: k for k, v in white_rune.items()} +alphabet = [ # Using last value for display. Custom added: V + (2, 'ᚠ', ['F']), (3, 'ᚢ', ['V', 'U']), (5, 'ᚦ', ['TH']), (7, 'ᚩ', ['O']), + (11, 'ᚱ', ['R']), (13, 'ᚳ', ['K', 'C']), (17, 'ᚷ', ['G']), + (19, 'ᚹ', ['W']), (23, 'ᚻ', ['H']), (29, 'ᚾ', ['N']), (31, 'ᛁ', ['I']), + (37, 'ᛄ', ['J']), (41, 'ᛇ', ['EO']), (43, 'ᛈ', ['P']), (47, 'ᛉ', ['X']), + (53, 'ᛋ', ['Z', 'S']), (59, 'ᛏ', ['T']), (61, 'ᛒ', ['B']), + (67, 'ᛖ', ['E']), (71, 'ᛗ', ['M']), (73, 'ᛚ', ['L']), + (79, 'ᛝ', ['ING', 'NG']), (83, 'ᛟ', ['OE']), (89, 'ᛞ', ['D']), + (97, 'ᚪ', ['A']), (101, 'ᚫ', ['AE']), (103, 'ᚣ', ['Y']), + (107, 'ᛡ', ['IO', 'IA']), (109, 'ᛠ', ['EA']) +] +text_map = {t: r for _, r, ta in alphabet for t in ta} +rune_map = {r: t for _, r, ta in alphabet for t in ta} +primes_map = {r: p for p, r, _ in alphabet} +RUNES = [r for _, r, _ in alphabet] # array already sorted +# del alphabet # used in playground for GP display diff --git a/LP/FailedAttempts.py b/LP/FailedAttempts.py index de12c02..385c5e2 100755 --- a/LP/FailedAttempts.py +++ b/LP/FailedAttempts.py @@ -1,5 +1,7 @@ #!/usr/bin/env python3 -from RuneText import rune_map, RuneText +# -*- coding: UTF-8 -*- +from Alphabet import rune_map +from RuneText import RuneText from NGrams import NGrams @@ -57,5 +59,6 @@ class NGramShifter(object): print() -# NGramShifter().guess('ᛈᚢᛟᚫᛈᚠᛖᚱᛋᛈᛈᚦᛗᚾᚪᚱᛚᚹᛈᛖᚩᛈᚢᛠᛁᛁᚻᛞᛚᛟᛠ', 'ᛟ') -# NGramShifter().guess([1, 2, 4, 5, 7, 9, 0, 12], 'ᛟ') +if __name__ == '__main__': + NGramShifter().guess('ᛈᚢᛟᚫᛈᚠᛖᚱᛋᛈᛈᚦᛗᚾᚪᚱᛚᚹᛈᛖᚩᛈᚢᛠᛁᛁᚻᛞᛚᛟᛠ', 'ᛟ') + NGramShifter().guess([1, 2, 4, 5, 7, 9, 0, 12], 'ᛟ') diff --git a/LP/IOReader.py b/LP/IOReader.py new file mode 100755 index 0000000..9ed68d3 --- /dev/null +++ b/LP/IOReader.py @@ -0,0 +1,51 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +import re # load_indices +from Alphabet import RUNES +from RuneText import RuneText + +re_norune = re.compile('[^' + ''.join(RUNES) + ']') + + +######################################### +# load page and convert to indices for faster access +######################################### + +def load_indices(fname, interrupt, maxinterrupt=None, minlen=None, limit=None): + with open(fname, 'r') as f: + data = RuneText(re_norune.sub('', f.read())).index_no_white[:limit] + if maxinterrupt is not None: + # incl. everything up to but not including next interrupt + # e.g., maxinterrupt = 0 will return text until first interrupt + for i, x in enumerate(data): + if x != interrupt: + continue + if maxinterrupt == 0: + if minlen and i < minlen: + continue + return data[:i] + maxinterrupt -= 1 + return data + + +######################################### +# find the longest chunk in a list of indices, which does not include an irp +######################################### + +def longest_no_interrupt(data, interrupt, irpmax=0): + def add(i): + nonlocal ret, prev + idx = prev.pop(0) + if idx == 0: + ret = [] + ret.append((i - idx, idx)) + + prev = [0] * (irpmax + 1) + ret = [] + for i, x in enumerate(data): + if x == interrupt: + prev.append(i + 1) + add(i) + add(i + 1) + length, pos = max(ret) + return pos, length diff --git a/LP/IOWriter.py b/LP/IOWriter.py new file mode 100755 index 0000000..99a3b25 --- /dev/null +++ b/LP/IOWriter.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +import sys +from RuneText import RuneText +import utils + + +######################################### +# IOWriter : handle std output with highlight etc. +######################################### + +class IOWriter(object): + def __init__(self): + self.BREAK_MODE = None + self.VERBOSE = '-v' in sys.argv + self.QUIET = '-q' in sys.argv + self.COLORS = True # sys.stdout.isatty() doesnt matter if no highlight + self.cur_color = None + self.file_output = None + + def clear(self): + self.linesum = 0 + self.out_r = '' + self.out_t = '' + self.out_p = '' + self.out_ps = '' + + def mark(self, color=None): + if self.COLORS: + m = f'\x1b[{color}' if color else '\x1b[0m' + self.cur_color = color + self.out_r += m + self.out_t += m + self.out_p += m + # self.out_ps += m # No. Because a word may be split-up + + def run(self, data, highlight=None): # make sure sorted, non-overlapping + break_on = self.BREAK_MODE # set by user + if break_on is None: # check None specifically, to allow '' as value + break_on = 'l' if self.VERBOSE else 's' # dynamically adapt mode + + wsum = 0 + self.clear() + if not highlight: + highlight = [] + highlight.append((len(data), len(data))) + + for i in range(len(data)): + # Handle color highlight + if i == highlight[0][0]: + try: + color = highlight[0][2] # e.g. 1;30m for bold black + except IndexError: + color = '1;31m' # fallback to bold red + self.mark(color) + elif i >= highlight[0][1]: + self.mark() + highlight.pop(0) + + cur = data[i] + eow = i + 1 == len(data) or data[i + 1].kind not in 'rl' + + # Output current rune + if cur.kind == 'l': + if cur.kind == break_on: + self.write() + continue # ignore all \n,\r if not forced explicitly + self.out_r += cur.rune + self.out_t += cur.text + if cur.kind != 'r': + if self.VERBOSE: + self.out_p += ' ' + if cur.kind == break_on: + self.write() + continue + + # Special case when printing numbers. + # Keep both lines (text + numbers) in sync. + if self.VERBOSE: + b = f'{cur.prime}' # TODO: option for indices instead + fillup = len(b) - len(cur.text) + self.out_t += ' ' * fillup + self.out_p += b + if not eow: + if fillup >= 0: + self.out_t += ' ' + self.out_p += '+' + + # Mark prime words + wsum += cur.prime + if eow and wsum > 0: + self.linesum += wsum + if self.VERBOSE: + if self.out_ps: + self.out_ps += ' + ' + self.out_ps += str(wsum) + if utils.is_prime(wsum): + if self.VERBOSE: + self.out_ps += '*' + elif not self.QUIET: # and wsum > 109 + self.out_t += '__' + wsum = 0 + self.write() + + def write(self): + def print_f(x=''): + if self.file_output: + with open(self.file_output, 'a') as f: + f.write(x + '\n') + else: + print(x) + + if not self.out_t: + return + + prev_color = self.cur_color + if prev_color: + self.mark() + + sffx = ' = {}'.format(self.linesum) + if utils.is_prime(self.linesum): + sffx += '*' + if utils.is_emirp(self.linesum): + sffx += '√' + + if not self.QUIET or self.VERBOSE: + print_f() + if not self.QUIET: + print_f(self.out_r) + if not (self.QUIET or self.VERBOSE): + self.out_t += sffx + print_f(self.out_t) + if self.VERBOSE: + self.out_ps += sffx + print_f(self.out_p) + print_f(self.out_ps) + self.clear() + if prev_color: + self.mark(prev_color) + + +if __name__ == '__main__': + txt = RuneText('Hi there. And welc\nome, to my world; "manatee"') + io = IOWriter() + io.BREAK_MODE = 's' # 'l' + # io.VERBOSE = True + # io.QUIET = True + io.run(txt, [(4, 12), (13, 27)]) diff --git a/LP/InterruptDB.py b/LP/InterruptDB.py index 49fafce..24bcf5e 100755 --- a/LP/InterruptDB.py +++ b/LP/InterruptDB.py @@ -1,8 +1,9 @@ #!/usr/bin/env python3 +# -*- coding: UTF-8 -*- import os -from HeuristicSearch import SearchInterrupt -from HeuristicLib import Probability -from RuneText import RUNES, load_indices +from InterruptSearch import InterruptSearch +from Probability import Probability +from IOReader import load_indices from LPath import FILES_ALL, FILES_UNSOLVED, LPath @@ -13,20 +14,14 @@ from LPath import FILES_ALL, FILES_UNSOLVED, LPath class InterruptDB(object): def __init__(self, data, interrupt): self.irp = interrupt - self.iguess = SearchInterrupt(data, interrupt) + self.iguess = InterruptSearch(data, irp=interrupt) self.irp_count = len(self.iguess.stops) - def make(self, dbname, name, keylen): - def fn(x): - return Probability.target_diff(x, keylen) # used in db_norm - # return Probability.IC_w_keylen(x, keylen) # used in db_high - - if keylen == 0: - keylen = 1 - score, skips = fn(self.iguess.join()), [[]] # without interrupts + def make(self, dbname, name, keylen, fn_score): + if keylen == 0: # without interrupts + score, skips = fn_score(self.iguess.join(), 1), [[]] else: - score, skips = self.iguess.sequential(fn, startAt=0, maxdepth=99) - # score, skips = self.iguess.genetic(fn, topDown=False, maxdepth=4) + score, skips = self.iguess.all(keylen, fn_score) for i, interrupts in enumerate(skips): skips[i] = self.iguess.to_occurrence_index(interrupts) @@ -35,18 +30,17 @@ class InterruptDB(object): name, score, self.irp, self.irp_count, keylen, nums, dbname) return score, skips - def make_secondary(self, dbname, name, keylen, threshold): + def make_secondary(self, dbname, name, keylen, fn_score, threshold): scores = [] - def fn(x): - score = Probability.target_diff(x, keylen) # used in db_norm - # score = Probability.IC_w_keylen(x, keylen) # used in db_high + def fn(x, kl): + score = fn_score(x, kl) if score >= threshold: scores.append(score) return 1 return -1 - _, skips = self.iguess.sequential(fn, startAt=0, maxdepth=99) + _, skips = self.iguess.all(keylen, fn) for i, interrupts in enumerate(skips): skips[i] = self.iguess.to_occurrence_index(interrupts) ret = list(zip(scores, skips)) @@ -58,25 +52,6 @@ class InterruptDB(object): name, score, self.irp, self.irp_count, keylen, nums, dbname) return len(filtered) - @staticmethod - def longest_no_interrupt(data, interrupt, irpmax=0): - def add(i): - nonlocal ret, prev - idx = prev.pop(0) - if idx == 0: - ret = [] - ret.append((i - idx, idx)) - - prev = [0] * (irpmax + 1) - ret = [] - for i, x in enumerate(data): - if x == interrupt: - prev.append(i + 1) - add(i) - add(i + 1) - length, pos = max(ret) - return pos, length - @staticmethod def load(dbname): if not os.path.isfile(LPath.InterruptDB(dbname)): @@ -103,171 +78,12 @@ class InterruptDB(object): f.write(f'{name}|{irpmax}|{score:.5f}|{irp}|{keylen}|{nums}\n') -######################################### -# InterruptIndices : Read chapters and extract indices (cluster by runes) -######################################### - -class InterruptIndices(object): - def __init__(self): - self.pos = InterruptIndices.read() - - def consider(self, name, irp, limit): - nums = self.pos[name]['pos'][irp] - if len(nums) <= limit: - return self.pos[name]['total'] - return nums[limit] # number of runes, which is not last index - - def total(self, name): - return self.pos[name]['total'] - - def longest_no_interrupt(self, name, irp, irpmax=0): - irpmax += 1 - nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax - ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])] - return sorted(ret, reverse=True) - - @staticmethod - def write(dbname='db_indices'): - with open(LPath.InterruptDB(dbname), 'w') as f: - f.write('# file | total runes in file | interrupt | indices\n') - for name in FILES_ALL: - fname = f'pages/{name}.txt' - data = load_indices(fname, 0) - total = len(data) - nums = [[] for x in range(29)] - for idx, rune in enumerate(data): - nums[rune].append(idx) - for irp, pos in enumerate(nums): - f.write('{}|{}|{}|{}\n'.format( - name, total, irp, ','.join(map(str, pos)))) - - @staticmethod - def read(dbname='db_indices'): - with open(LPath.InterruptDB(dbname), 'r') as f: - ret = {} - for line in f.readlines(): - if line.startswith('#'): - continue - line = line.strip() - name, total, irp, nums = line.split('|') - if name not in ret: - ret[name] = {'total': int(total), - 'pos': [[] for _ in range(29)]} - pos = ret[name]['pos'] - pos[int(irp)] = list(map(int, nums.split(','))) if nums else [] - return ret - - -######################################### -# InterruptToWeb : Read interrupt DB and create html graphic / matrix -######################################### - -class InterruptToWeb(object): - def __init__(self, dbname, template='template.html'): - with open(LPath.results(template), 'r') as f: - self.template = f.read() - self.indices = InterruptIndices() - self.scores = {} - db = InterruptDB.load(dbname) - for k, v in db.items(): - for irpc, score, irp, kl, nums in v: - if k not in self.scores: - self.scores[k] = [[] for _ in range(29)] - part = self.scores[k][irp] - while kl >= len(part): - part.append((0, 0)) # (score, irpc) - oldc = part[kl][1] - if irpc > oldc or (irpc == oldc and score > part[kl][0]): - part[kl] = (score, irpc) - - def cls(self, x, low=0, high=1): - if x <= low: - return ' class="m0"' - return f' class="m{int((min(high, x) - low) / (high - low) * 14) + 1}"' - - def table_reliable(self): - trh = '' - trtotal = 'Total' - trd = [f'{x}' for x in RUNES] - del_row = [True] * 29 - for name in FILES_ALL: - if name not in self.scores: - continue - total = self.indices.total(name) - trh += f'
{name}
' - trtotal += f'{total}' - for i in range(29): - scrs = self.scores[name][i][1:] - if not scrs: - trd[i] += '–' - continue - del_row[i] = False - worst_irpc = min([x[1] for x in scrs]) - if worst_irpc == 0: - if max([x[1] for x in scrs]) != 0: - trd[i] += '?' - continue - num = self.indices.consider(name, i, worst_irpc) - trd[i] += f'{num}' - - trh += '\n' - trtotal += '\n' - for i in range(29): - trd[i] += '\n' - if del_row[i]: - trd[i] = '' - return f'{trh}{"".join(trd)}{trtotal}
' - - def table_interrupt(self, irp, pmin=1.25, pmax=1.65): - maxkl = max(len(x[irp]) for x in self.scores.values()) - trh = '' - trbest = 'best' - trd = [f'{x}' for x in range(maxkl)] - for name in FILES_ALL: - maxscore = 0 - bestkl = -1 - try: - klarr = self.scores[name][irp] - except KeyError: - continue - trh += f'
{name}
' - for kl, (score, _) in enumerate(klarr): - if score < 0: - trd[kl] += f'–' - else: - trd[kl] += f'{score:.2f}' - if score > maxscore: - maxscore = score - bestkl = kl - trbest += f'{bestkl}' - trh += '\n' - trbest += '\n' - for i in range(29): - trd[i] += '\n' - return f'{trh}{"".join(trd[1:])}{trbest}
' - - def make(self, outfile, pmin=1.25, pmax=1.65): - nav = '' - txt = '' - for i in range(29): - has_entries = any(True for x in self.scores.values() if x[i]) - if not has_entries: - continue - nav += f'{RUNES[i]}\n' - txt += f'

Interrupt {i}: {RUNES[i]}

' - txt += self.table_interrupt(i, pmin, pmax) - html = self.template.replace('__NAVIGATION__', nav) - html = html.replace('__TAB_RELIABLE__', self.table_reliable()) - html = html.replace('__INTERRUPT_TABLES__', txt) - with open(LPath.results(outfile), 'w') as f: - f.write(html) - - ######################################### # helper functions ######################################### -def create_initial_db(dbname, minkl=1, maxkl=32, max_irp=20, irpset=range(29)): +def create_initial_db(dbname, fn_score, klset=range(1, 33), + max_irp=20, irpset=range(29)): oldDB = InterruptDB.load(dbname) oldValues = {k: set((a, b, c) for a, _, b, c, _ in v) for k, v in oldDB.items()} @@ -276,15 +92,16 @@ def create_initial_db(dbname, minkl=1, maxkl=32, max_irp=20, irpset=range(29)): data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp) db = InterruptDB(data, irp) print('load:', name, 'interrupt:', irp, 'count:', db.irp_count) - for keylen in range(minkl, maxkl + 1): # key length + for keylen in klset: # key length if (db.irp_count, irp, keylen) in oldValues.get(name, []): print(f'{keylen}: skipped.') continue - score, interrupts = db.make(dbname, name, keylen) + score, interrupts = db.make(dbname, name, keylen, fn_score) print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}') -def find_secondary_solutions(db_in, db_out, threshold=0.75, max_irp=20): +def find_secondary_solutions(db_in, db_out, fn_score, + threshold=0.75, max_irp=20): oldDB = InterruptDB.load(db_in) search_set = set() for name, arr in oldDB.items(): @@ -299,13 +116,14 @@ def find_secondary_solutions(db_in, db_out, threshold=0.75, max_irp=20): print('load:', name, 'interrupt:', irp, 'keylen:', kl) data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp) db = InterruptDB(data, irp) - c = db.make_secondary(db_out, name, kl, threshold) + c = db.make_secondary(db_out, name, kl, fn_score, threshold) print('found', c, 'additional solutions') if __name__ == '__main__': - # find_secondary_solutions('db_high', 'db_high_secondary', threshold=1.4) - # find_secondary_solutions('db_norm', 'db_norm_secondary', threshold=0.55) - # create_initial_db('db_norm', minkl=1, maxkl=32, max_irp=20) - # InterruptToWeb('db_high').make('index_high.html') - InterruptToWeb('db_norm').make('index_norm.html', pmin=0.40, pmax=0.98) + create_initial_db('db_high', Probability.IC_w_keylen, max_irp=20) + create_initial_db('db_norm', Probability.target_diff, max_irp=20) + # find_secondary_solutions('db_high', 'db_high_secondary', + # Probability.IC_w_keylen, threshold=1.4) + # find_secondary_solutions('db_norm', 'db_norm_secondary', + # Probability.target_diff, threshold=0.55) diff --git a/LP/InterruptIndices.py b/LP/InterruptIndices.py new file mode 100755 index 0000000..f2f2f70 --- /dev/null +++ b/LP/InterruptIndices.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +from IOReader import load_indices +from LPath import FILES_ALL, LPath + + +######################################### +# InterruptIndices : Read chapters and extract indices (cluster by runes) +######################################### + +class InterruptIndices(object): + def __init__(self): + self.pos = InterruptIndices.load() + + def consider(self, name, irp, limit): + nums = self.pos[name]['pos'][irp] + total = self.pos[name]['total'] if len(nums) <= limit else nums[limit] + return nums[:limit], total + + def total(self, name): + return self.pos[name]['total'] + + def longest_no_interrupt(self, name, irp, irpmax=0): + irpmax += 1 + nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax + ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])] + return sorted(ret, reverse=True) + + @staticmethod + def write(dbname='db_indices'): + with open(LPath.InterruptDB(dbname), 'w') as f: + f.write('# file | total runes in file | interrupt | indices\n') + for name in FILES_ALL: + data = load_indices(LPath.page(name), 0) + total = len(data) + nums = [[] for x in range(29)] + for idx, rune in enumerate(data): + nums[rune].append(idx) + for irp, pos in enumerate(nums): + f.write('{}|{}|{}|{}\n'.format( + name, total, irp, ','.join(map(str, pos)))) + + @staticmethod + def load(dbname='db_indices'): + with open(LPath.InterruptDB(dbname), 'r') as f: + ret = {} + for line in f.readlines(): + if line.startswith('#'): + continue + line = line.strip() + name, total, irp, nums = line.split('|') + if name not in ret: + ret[name] = {'total': int(total), + 'pos': [[] for _ in range(29)]} + pos = ret[name]['pos'] + pos[int(irp)] = list(map(int, nums.split(','))) if nums else [] + return ret + + +if __name__ == '__main__': + # InterruptIndices.write() + for name, val in InterruptIndices.load().items(): + print(name, 'total:', val['total']) + print(' ', [len(x) for x in val['pos']]) diff --git a/LP/HeuristicSearch.py b/LP/InterruptSearch.py similarity index 54% rename from LP/HeuristicSearch.py rename to LP/InterruptSearch.py index 2eec534..f94837c 100755 --- a/LP/HeuristicSearch.py +++ b/LP/InterruptSearch.py @@ -1,114 +1,18 @@ #!/usr/bin/env python3 +# -*- coding: UTF-8 -*- import itertools # product, compress, combinations import bisect # bisect_left, insort -from lib import affine_decrypt ######################################### -# GuessVigenere : Shift values around with a given keylength +# InterruptSearch : Hill climbing algorithm for interrupt detection ######################################### -class GuessVigenere(object): - def __init__(self, nums): - self.nums = nums - - def guess(self, keylength, score_fn): # minimize score_fn - found = [] - avg_score = 0 - for offset in range(keylength): - bi = -1 - bs = 9999999 - for i in range(29): - shifted = [(x - i) % 29 for x in self.nums[offset::keylength]] - score = score_fn(shifted) - if score < bs: - bs = score - bi = i - avg_score += bs - found.append(bi) - return avg_score / keylength, found - - -######################################### -# GuessAffine : Find greatest common affine key -######################################### - -class GuessAffine(object): - def __init__(self, nums): - self.nums = nums - - def guess(self, keylength, score_fn): # minimize score_fn - found = [] - avg_score = 0 - for offset in range(keylength): - candidate = (None, None) - best = 9999999 - for s in range(29): - for t in range(29): - shifted = [affine_decrypt(x, (s, t)) - for x in self.nums[offset::keylength]] - score = score_fn(shifted) - if score < best: - best = score - candidate = (s, t) - avg_score += best - found.append(candidate) - return avg_score / keylength, found - - -######################################### -# GuessPattern : Find a key that is rotated ABC BCA CAB, or ABC CAB BCA -######################################### - -class GuessPattern(object): - def __init__(self, nums): - self.nums = nums - - @staticmethod - def pattern(keylen, fn_pattern): - mask = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[:keylen] - return fn_pattern(mask, keylen) - - def split(self, keylen, mask, offset=0): - ret = {} - for _ in range(offset): - next(mask) - ret = {k: [] for k in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[:keylen]} - for n, k in zip(self.nums, mask): - ret[k].append(n) - return ret.values() - - def zip(self, key_mask, offset=0): - for _ in range(offset): - next(key_mask) - return [(n - k) % 29 for n, k in zip(self.nums, key_mask)] - - @staticmethod - def guess(parts, score_fn): # minimize score_fn - found = [] - avg_score = 0 - for nums in parts: - best = 9999999 - candidate = 0 - for i in range(29): - score = score_fn([(x - i) % 29 for x in nums]) - if score < best: - best = score - candidate = i - avg_score += best - found.append(candidate) - return avg_score / len(parts), found - - -######################################### -# SearchInterrupt : Hill climbing algorithm for interrupt detection -######################################### - -class SearchInterrupt(object): - def __init__(self, arr, interrupt_chr): # remove all whitespace in arr +class InterruptSearch(object): + def __init__(self, arr, irp): # remove all whitespace in arr self.single_result = False # if False, return list of equal likelihood self.full = arr - self.stops = [i for i, n in enumerate(arr) if n == interrupt_chr] + self.stops = [i for i, n in enumerate(arr) if n == irp] def to_occurrence_index(self, interrupts): return [self.stops.index(x) + 1 for x in interrupts] @@ -124,11 +28,27 @@ class SearchInterrupt(object): i = x return ret + self.full[i + 1:] + # Just enumerate all possibilities. + # If you need to limit the options, trim the data before computation + def all(self, keylen, score_fn): + best_s = -8 + found = [] # [match, match, ...] + for x in itertools.product([False, True], repeat=len(self.stops)): + part = list(itertools.compress(self.stops, x)) + score = score_fn(self.join(part), keylen) + if score >= best_s: + if score > best_s or self.single_result: + best_s = score + found = [part] + else: + found.append(part) + return best_s, found + # Go over the full string but only look at the first {maxdepth} interrupts. # Enumerate all possibilities and choose the one with the highest score. # If first interrupt is set, add it to the resulting set. If not, ignore it # Every iteration will add a single interrupt only, not the full set. - def sequential(self, score_fn, startAt=0, maxdepth=9): + def sequential(self, keylen, score_fn, startAt=0, maxdepth=9): found = [[]] def best_in_one(i, depth, prefix=[]): @@ -137,7 +57,7 @@ class SearchInterrupt(object): irp = self.stops[i:i + depth] for x in itertools.product([False, True], repeat=depth): part = list(itertools.compress(irp, x)) - score = score_fn(self.join(prefix + part)) + score = score_fn(self.join(prefix + part), keylen) if score >= best_s: if score > best_s or self.single_result: best_s = score @@ -162,7 +82,7 @@ class SearchInterrupt(object): # first step: move maxdepth-sized window over data i = startAt - 1 # in case loop isnt called for i in range(startAt, len(self.stops) - maxdepth): - # print('.', end='') + print('.', end='') parts, _ = best_in_all(i, maxdepth) found = [] search = self.stops[i] @@ -180,7 +100,7 @@ class SearchInterrupt(object): found.append(prfx + [search]) if bitNotSet: found.append(prfx) - # print('.') + print('.') # last step: all permutations for the remaining (< maxdepth) bits i += 1 remaining, score = best_in_all(i, min(maxdepth, len(self.stops) - i)) @@ -191,10 +111,12 @@ class SearchInterrupt(object): # Choose the bitset with the highest score and repeat. # If no better score found, increment number of testing bits and repeat. # Either start with all interrupts set (topDown) or none set. - def genetic(self, score_fn, topDown=False, maxdepth=3): + def genetic(self, keylen, score_fn, topDown=False, maxdepth=3): current = self.stops if topDown else [] def evolve(lvl): + if lvl > 0: + yield from evolve(lvl - 1) for x in itertools.combinations(self.stops, lvl + 1): tmp = current[:] for y in x: @@ -202,9 +124,9 @@ class SearchInterrupt(object): tmp.pop(bisect.bisect_left(tmp, y)) else: bisect.insort(tmp, y) - yield tmp, score_fn(self.join(tmp)) + yield tmp, score_fn(self.join(tmp), keylen) - best = score_fn(self.join()) + best = score_fn(self.join(), keylen) level = 0 # or start directly with maxdepth - 1 while level < maxdepth: print('.', end='') @@ -227,7 +149,10 @@ class SearchInterrupt(object): return best, all_of_them -# a = GuessInterrupt([2, 0, 1, 0, 14, 15, 0, 13, 24, 25, 25, 25], 0) -# print(a.sequential(lambda x: (1.2 if len(x) == 11 else 0.1))) -# print(a.sequential(lambda x: (1.1 if len(x) == 10 else 0.1))) -# print(a.sequential(lambda x: (1.3 if len(x) == 9 else 0.1))) +if __name__ == '__main__': + a = InterruptSearch([2, 0, 1, 0, 14, 15, 0, 13, 24, 25, 25, 25], irp=0) + print(a.sequential(1, lambda x, k: (1.2 if len(x) == 11 else 0.1))) + print(a.sequential(1, lambda x, k: (1.1 if len(x) == 10 else 0.1))) + print(a.sequential(1, lambda x, k: (1.3 if len(x) == 9 else 0.1))) + print(a.genetic(1, lambda x, k: (1.5 if len(x) == 10 else 0.1))) + print(a.all(1, lambda x, k: (1.4 if len(x) == 11 else 0.1))) diff --git a/LP/InterruptToWeb.py b/LP/InterruptToWeb.py new file mode 100755 index 0000000..a6e6426 --- /dev/null +++ b/LP/InterruptToWeb.py @@ -0,0 +1,116 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +from Alphabet import RUNES +from LPath import FILES_ALL, LPath +from InterruptDB import InterruptDB +from InterruptIndices import InterruptIndices + + +######################################### +# InterruptToWeb : Read interrupt DB and create html graphic / matrix +######################################### + +class InterruptToWeb(object): + def __init__(self, dbname, template='template.html'): + with open(LPath.results(template), 'r') as f: + self.template = f.read() + self.indices = InterruptIndices() + self.scores = {} + db = InterruptDB.load(dbname) + for k, v in db.items(): + for irpc, score, irp, kl, nums in v: + if k not in self.scores: + self.scores[k] = [[] for _ in range(29)] + part = self.scores[k][irp] + while kl >= len(part): + part.append((0, 0)) # (score, irpc) + oldc = part[kl][1] + if irpc > oldc or (irpc == oldc and score > part[kl][0]): + part[kl] = (score, irpc) + + def cls(self, x, low=0, high=1): + if x <= low: + return ' class="m0"' + return f' class="m{int((min(high, x) - low) / (high - low) * 14) + 1}"' + + def table_reliable(self): + trh = '' + trtotal = 'Total' + trd = [f'{x}' for x in RUNES] + del_row = [True] * 29 + for name in FILES_ALL: + if name not in self.scores: + continue + total = self.indices.total(name) + trh += f'
{name}
' + trtotal += f'{total}' + for i in range(29): + scrs = self.scores[name][i][1:] + if not scrs: + trd[i] += '–' + continue + del_row[i] = False + worst_irpc = min([x[1] for x in scrs]) + if worst_irpc == 0: + if max([x[1] for x in scrs]) != 0: + trd[i] += '?' + continue + _, num = self.indices.consider(name, i, worst_irpc) + trd[i] += f'{num}' + + trh += '\n' + trtotal += '\n' + for i in range(29): + trd[i] += '\n' + if del_row[i]: + trd[i] = '' + return f'{trh}{"".join(trd)}{trtotal}
' + + def table_interrupt(self, irp, pmin=1.25, pmax=1.65): + maxkl = max(len(x[irp]) for x in self.scores.values()) + trh = '' + trbest = 'best' + trd = [f'{x}' for x in range(maxkl)] + for name in FILES_ALL: + maxscore = 0 + bestkl = -1 + try: + klarr = self.scores[name][irp] + except KeyError: + continue + trh += f'
{name}
' + for kl, (score, _) in enumerate(klarr): + if score < 0: + trd[kl] += f'–' + else: + trd[kl] += f'{score:.2f}' + if score > maxscore: + maxscore = score + bestkl = kl + trbest += f'{bestkl}' + trh += '\n' + trbest += '\n' + for i in range(29): + trd[i] += '\n' + return f'{trh}{"".join(trd[1:])}{trbest}
' + + def make(self, outfile, pmin=1.25, pmax=1.65): + nav = '' + txt = '' + for i in range(29): + has_entries = any(True for x in self.scores.values() if x[i]) + if not has_entries: + continue + nav += f'{RUNES[i]}\n' + txt += f'

Interrupt {i}: {RUNES[i]}

' + txt += self.table_interrupt(i, pmin, pmax) + html = self.template.replace('__NAVIGATION__', nav) + html = html.replace('__TAB_RELIABLE__', self.table_reliable()) + html = html.replace('__INTERRUPT_TABLES__', txt) + with open(LPath.results(outfile), 'w') as f: + f.write(html) + + +if __name__ == '__main__': + InterruptToWeb('db_high').make('index_high.html') + InterruptToWeb('db_norm').make('index_norm.html', pmin=0.40, pmax=0.98) diff --git a/LP/KeySearch.py b/LP/KeySearch.py new file mode 100755 index 0000000..8269b0a --- /dev/null +++ b/LP/KeySearch.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +from utils import affine_decrypt + + +######################################### +# GuessVigenere : Shift values around with a given keylength +######################################### + +class GuessVigenere(object): + def __init__(self, nums): + self.nums = nums + + def guess(self, keylength, score_fn): # minimize score_fn + found = [] + avg_score = 0 + for offset in range(keylength): + bi = -1 + bs = 9999999 + for i in range(29): + shifted = [(x - i) % 29 for x in self.nums[offset::keylength]] + score = score_fn(shifted) + if score < bs: + bs = score + bi = i + avg_score += bs + found.append(bi) + return avg_score / keylength, found + + +######################################### +# GuessAffine : Find greatest common affine key +######################################### + +class GuessAffine(object): + def __init__(self, nums): + self.nums = nums + + def guess(self, keylength, score_fn): # minimize score_fn + found = [] + avg_score = 0 + for offset in range(keylength): + candidate = (None, None) + best = 9999999 + for s in range(29): + for t in range(29): + shifted = [affine_decrypt(x, (s, t)) + for x in self.nums[offset::keylength]] + score = score_fn(shifted) + if score < best: + best = score + candidate = (s, t) + avg_score += best + found.append(candidate) + return avg_score / keylength, found + + +######################################### +# GuessPattern : Find a key that is rotated ABC BCA CAB, or ABC CAB BCA +######################################### + +class GuessPattern(object): + def __init__(self, nums): + self.nums = nums + + @staticmethod + def pattern(keylen, fn_pattern): + mask = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[:keylen] + return fn_pattern(mask, keylen) + + def split(self, keylen, mask, offset=0): + ret = {} + for _ in range(offset): + next(mask) + ret = {k: [] for k in '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'[:keylen]} + for n, k in zip(self.nums, mask): + ret[k].append(n) + return ret.values() + + def zip(self, key_mask, offset=0): + for _ in range(offset): + next(key_mask) + return [(n - k) % 29 for n, k in zip(self.nums, key_mask)] + + @staticmethod + def guess(parts, score_fn): # minimize score_fn + found = [] + avg_score = 0 + for nums in parts: + best = 9999999 + candidate = 0 + for i in range(29): + score = score_fn([(x - i) % 29 for x in nums]) + if score < best: + best = score + candidate = i + avg_score += best + found.append(candidate) + return avg_score / len(parts), found diff --git a/LP/LPath.py b/LP/LPath.py index 20f66ad..f635cc7 100755 --- a/LP/LPath.py +++ b/LP/LPath.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: UTF-8 -*- import os.path FILES_SOLVED = ['0_warning', '0_welcome', '0_wisdom', '0_koan_1', diff --git a/LP/NGrams.py b/LP/NGrams.py index 0a2dd81..5303ecf 100755 --- a/LP/NGrams.py +++ b/LP/NGrams.py @@ -1,6 +1,9 @@ #!/usr/bin/env python3 +# -*- coding: UTF-8 -*- import re -from RuneText import RUNES, re_norune, RuneText +from Alphabet import RUNES +from IOReader import re_norune +from RuneText import RuneText from LPath import LPath @@ -54,21 +57,20 @@ class NGrams(object): return ret -def make_translation(stream=False): # if true, ignore spaces / word bounds - NGrams.translate(LPath.data('baseline-text'), - LPath.data('baseline-rune'), stream) +if __name__ == '__main__': + def make_translation(stream=False): # if true, ignore spaces / word bounds + NGrams.translate(LPath.data('baseline-text'), + LPath.data('baseline-rune'), stream) + def make_ngrams(max_ngram=1): + for i in range(1, max_ngram + 1): + print(f'generate {i}-gram file') + NGrams.make(i, infile=LPath.data('baseline-rune-words'), + outfile=LPath.data(f'p-{i}gram')) + NGrams.make(i, infile=LPath.root('_solved.txt'), + outfile=LPath.data(f'p-solved-{i}gram')) + NGrams.make(i, infile=LPath.data('baseline-rune-no-e'), + outfile=LPath.data(f'p-no-e-{i}gram')) -def make_ngrams(max_ngram=1): - for i in range(1, max_ngram + 1): - print(f'generate {i}-gram file') - NGrams.make(i, infile=LPath.data('baseline-rune-words'), - outfile=LPath.data(f'p-{i}gram')) - NGrams.make(i, infile=LPath.root('_solved.txt'), - outfile=LPath.data(f'p-solved-{i}gram')) - NGrams.make(i, infile=LPath.data('baseline-rune-no-e'), - outfile=LPath.data(f'p-no-e-{i}gram')) - - -# make_translation(stream=False) -# make_ngrams(5) + # make_translation(stream=False) + # make_ngrams(5) diff --git a/LP/HeuristicLib.py b/LP/Probability.py similarity index 96% rename from LP/HeuristicLib.py rename to LP/Probability.py index 2b37440..4d70c98 100755 --- a/LP/HeuristicLib.py +++ b/LP/Probability.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 +# -*- coding: UTF-8 -*- from NGrams import NGrams -from RuneText import RUNES +from Alphabet import RUNES def normalized_probability(int_prob): diff --git a/LP/Rune.py b/LP/Rune.py new file mode 100755 index 0000000..d849b38 --- /dev/null +++ b/LP/Rune.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +# -*- coding: UTF-8 -*- +from Alphabet import RUNES, white_rune, rune_map, primes_map + + +######################################### +# Rune : Stores a single rune. Incl. text, prime, index, and kind +######################################### + +class Rune(object): + def __init__(self, r=None, t=None, i=None, p=None): + self._rune = r + self._text = t + self._index = i + self._prime = p + self._kind = None # one of: r n s l w + + def __repr__(self): + return f'<{self._rune}, {self._text}, {self._index}, {self._prime}>' + + @property + def rune(self): + if self._rune is None: + self._rune = RUNES[self._index] if self._index < 29 else '•' + return self._rune + + @property + def text(self): + if self._text is None: + r = self.rune + try: + self._text = rune_map[self.rune] + except KeyError: + self._text = white_rune.get(r, r) + return self._text + + @property + def index(self): + if self._index is None: + r = self._rune + self._index = RUNES.index(r) if r in RUNES else 29 + return self._index + + @property + def prime(self): + if self._prime is None: + self._prime = primes_map.get(self.rune, 0) + return self._prime + + @property + def kind(self): + if self._kind is None: + x = self.rune + if x in rune_map: + self._kind = 'r' # rune + elif x == '⁜': + self._kind = 's' # paragraph, but treat as sentence + elif x == '⁘': + self._kind = 's' # sentence + elif x == '\n' or x == '\r': + self._kind = 'l' # line end + elif x in '1234567890': + self._kind = 'n' # number + else: + self._kind = 'w' # whitespace (explicitly not n or s) + return self._kind + + def __add__(self, o): + if isinstance(o, Rune): + o = o.index + if self.index == 29 or o == 29: + return self + return Rune(i=(self.index + o) % 29) + + def __sub__(self, o): + if isinstance(o, Rune): + o = o.index + if self.index == 29 or o == 29: + return self + return Rune(i=(self.index - o) % 29) + + def __invert__(self): + return self if self.index == 29 else Rune(i=28 - self.index) diff --git a/LP/RuneRunner.py b/LP/RuneRunner.py deleted file mode 100755 index 8a08e89..0000000 --- a/LP/RuneRunner.py +++ /dev/null @@ -1,245 +0,0 @@ -#!/usr/bin/env python3 -import sys -from RuneText import RuneText -import lib as utils - - -######################################### -# RuneWriter : handle std output with highlight etc. -######################################### - -class RuneWriter(object): - MARKS = {'_': '\x1b[0m', 'r': '\x1b[31;01m', 'b': '\x1b[30;01m'} - - def __init__(self): - self.COLORS = True - self.VERBOSE = '-v' in sys.argv - self.QUIET = '-q' in sys.argv - self.BREAK_MODE = None - self.file_output = None - self.clear() - - def clear(self): - self.mark = False - self.alternate = False - self._marked = ['_'] * 4 - self.txt = [''] * 4 - - def is_empty(self): - return not self.txt[0] - - def line_break_mode(self): - if self.BREAK_MODE is not None: # if set by user - return self.BREAK_MODE - return 'l' if self.VERBOSE else 's' # dynamically adapt to mode - - def write(self, r=None, t=None, n1=None, n2=None): - m = ('b' if self.alternate else 'r') if self.mark else '_' - for i, v in enumerate([r, t, n1, n2]): - if v is None: - continue - if self.COLORS and self._marked[i] != m and i != 3: - self._marked[i] = m - prfx = self.MARKS[m] - else: - prfx = '' - self.txt[i] += prfx + v - - # def rm(self, r=0, t=0, n1=0, n2=0): - # for i, v in enumerate([r, t, n1, n2]): - # if v > 0: - # self.txt[i] = self.txt[i][:-v] - - def stdout(self): - def print_f(x=''): - if self.file_output: - with open(self.file_output, 'a') as f: - f.write(x + '\n') - else: - print(x) - - if self.is_empty(): - return - m = self.mark - self.mark = False # flush closing color - self.write(r='', t='', n1='', n2='') - self.mark = m - - if not self.QUIET or self.VERBOSE: - print_f() - if not self.QUIET: - print_f(self.txt[0]) - print_f(self.txt[1]) - if self.VERBOSE: - print_f(self.txt[2]) - print_f(self.txt[3]) - self.clear() - - -######################################### -# RuneReader : handles parsing of the file and line breaks etc. -######################################### - -class RuneReader(object): - def __init__(self): - self.data = None - self.loaded_file = None - self.words = {x: [] for x in range(20)} # increase for longer words - - def load(self, data=None, file=None, limit=None): - self.loaded_file = None - if not data: - with open(file, 'r') as f: - data = f.read()[:limit] - self.loaded_file = file - self.data = data if isinstance(data, RuneText) else RuneText(data) - self.generate_word_list() - - def has_data(self): - if len(self.data) > 0: - return True - return False - - def runes_no_whitespace(self): - return [x for x in self.data if x.kind == 'r'] - - def generate_word_list(self): - for x in self.words.values(): - x.clear() - res = [] - ai = 0 - ari = 0 - zri = 0 - for zi, x in enumerate(self.data): - if x.kind == 'l': - continue - elif x.kind == 'r': - res.append(x) - zri += 1 - else: - if len(res) > 0: - xt = RuneText(res) - self.words[len(xt)].append((ai, zi, ari, zri, xt)) - res = [] - ai = zi - ari = zri - - # count_callback('c|w|l', count, is-first-flag) - def parse(self, rune_fn, count_fn, whitespace_fn, break_line_on='l'): - word_sum = 0 - line_sum = 0 - for i, x in enumerate(self.data): - if x.kind == 'r': - r = rune_fn(self.data, i, word_sum == 0) - count_fn('c', r.prime, word_sum == 0) - word_sum += r.prime - elif x.kind == 'l' and x.kind != break_line_on: - continue # ignore all \n,\r if not forced explicitly - else: - if word_sum > 0: - count_fn('w', word_sum, line_sum == 0) - line_sum += word_sum - word_sum = 0 - if x.kind != 'l': # still ignore \n,\r - whitespace_fn(x) - if x.kind == break_line_on: - count_fn('l', line_sum, line_sum == 0) - line_sum = 0 - if word_sum > 0: - count_fn('w', word_sum, line_sum == 0) - line_sum += word_sum - if line_sum > 0: - count_fn('l', line_sum, True) - - -######################################### -# RuneRunner : Merge RuneWriter and RuneReader and stay in sync -######################################### - -class RuneRunner(object): - def __init__(self): - self.input = RuneReader() - self.output = RuneWriter() - self.marked_chars = [] - self.mark_alternate = [] - self.next_mark = False - self.fn_cipher = None - - def highlight_words_with_len(self, search_length): - found = [x for x in self.input.words[search_length]] - self.marked_chars = set(x for fp in found for x in range(fp[0], fp[1])) - return found - - def highlight_rune(self, rune, mark_occurrences=[]): - ip = 0 - tp = 0 - ret = [] - for i, x in enumerate(self.input.data): - if x.kind == 'r': - if x.rune == rune: - ip += 1 - ret.append((ip, tp, i, ip in mark_occurrences)) - tp += 1 - self.marked_chars = set(i for _, _, i, _ in ret) - self.mark_alternate = set(i for _, _, i, f in ret if not f) - return ret - - def reset_highlight(self): - self.marked_chars = [] - self.mark_alternate = [] - - def start(self, fn_cipher): - self.fn_cipher = fn_cipher - self.next_mark = False - self.input.parse( - self.rune_callback, self.count_callback, self.whitespace_callback, - self.output.line_break_mode()) - - def rune_callback(self, encrypted_data, index, is_first): - if self.output.VERBOSE: - fillup = len(self.output.txt[2]) - len(self.output.txt[1]) - if not is_first: - fillup += 1 # +1 cause n1 will add a '+' - if fillup > 0: - self.output.write(t=' ' * fillup) - if self.marked_chars: - x = encrypted_data[index] # always search on original data - mt = index in self.marked_chars - mn = index + 1 in self.marked_chars - self.output.alternate = index in self.mark_alternate - else: - x, mt, mn = self.fn_cipher(encrypted_data, index) - self.output.mark = mt - self.output.write(r=x.rune, t=x.text) - self.next_mark = mn - return x - - def count_callback(self, typ, num, is_first): - if typ == 'c': # char - if self.output.VERBOSE: - self.output.write(n1=('' if is_first else '+') + str(num)) - return - prm = utils.is_prime(num) - if typ == 'w': # word - tt = ('' if is_first else ' + ') + str(num) + ('*' if prm else '') - self.output.write(n2=tt) - if prm and num > 109 and not (self.output.VERBOSE or self.output.QUIET): - self.output.write(t='__') - elif typ == 'l': # line end (ignoring \n if mode is set to 's') - self.output.mark = False - # if not is_first: - sffx = ' = {}'.format(num) + ('*' if prm else '') - if utils.is_emirp(num): - sffx += '√' - if self.output.VERBOSE: - self.output.write(n2=sffx) - elif not self.output.QUIET: - self.output.write(t=sffx) - self.output.stdout() - - def whitespace_callback(self, rune): - if not self.next_mark: # dont mark whitespace after selection - self.output.mark = False - self.output.write(r=rune.rune, t=rune.text) - if self.output.VERBOSE: - self.output.write(n1=' ') diff --git a/LP/RuneSolver.py b/LP/RuneSolver.py index e0602d4..9492499 100755 --- a/LP/RuneSolver.py +++ b/LP/RuneSolver.py @@ -1,16 +1,15 @@ #!/usr/bin/env python3 -from RuneRunner import RuneRunner +# -*- coding: UTF-8 -*- from RuneText import Rune, RuneText -from lib import affine_decrypt +from utils import affine_decrypt ######################################### # RuneSolver : Generic parent class handles interrupts and text highlight ######################################### -class RuneSolver(RuneRunner): +class RuneSolver(object): def __init__(self): - super().__init__() self.reset() def reset(self): @@ -20,43 +19,31 @@ class RuneSolver(RuneRunner): def highlight_interrupt(self): return self.highlight_rune(self.INTERRUPT, self.INTERRUPT_POS) - def substitute_get(self, pos, keylen, search_term, found_term): - return found_term.zip_sub(search_term).description(count=True) - def substitute_supports_keylen(self): return False - def run(self, data=None): - if data: - self.input.load(data=data) - self.interrupt_counter = 0 - self.start(self.cipher_callback) + def substitute_get(self, pos, keylen, search_term, found_term, all_data): + return found_term.zip_sub(search_term).description(count=True) - def cipher_callback(self, encrypted_data, index): - obj = encrypted_data[index] - is_interrupt = obj.rune == self.INTERRUPT - if is_interrupt: - self.interrupt_counter += 1 - skip = is_interrupt and self.interrupt_counter in self.INTERRUPT_POS - mark_this = self.mark_char_at(index) - if not skip: - obj = self.cipher(obj, (index, encrypted_data)) - mark_next = self.mark_char_at(index) - return obj, mark_this, mark_next + def enum_data(self, data): + irp_i = 0 + r_pos = -1 + for i, obj in enumerate(data): + skip = obj.index == 29 + if not skip: + r_pos += 1 + is_interrupt = obj.rune == self.INTERRUPT + if is_interrupt: + irp_i += 1 + skip = is_interrupt and irp_i in self.INTERRUPT_POS + yield obj, i, r_pos, skip - def cipher(self, rune, context): - raise NotImplementedError # must subclass - - def mark_char_at(self, position): - return False + def run(self, data): + raise NotImplementedError('must subclass') + # return RuneText(), [(start-highlight, end-highlight), ...] def __str__(self): - txt = f'DATA: {len(self.input.data) if self.input.data else 0} bytes' - if self.input.loaded_file: - txt += f' (file: {self.input.loaded_file})' - else: - txt += f' (manual input)' - return txt + f'\ninterrupt jumps: {self.INTERRUPT_POS}' + return f'interrupt: {self.INTERRUPT}, jumps: {self.INTERRUPT_POS}' ######################################### @@ -66,22 +53,22 @@ class RuneSolver(RuneRunner): class SequenceSolver(RuneSolver): def __init__(self): super().__init__() - self.seq_index = 0 self.reset() def reset(self): super().reset() self.FN = None - def run(self, data=None): - self.seq_index = 0 + def run(self, data): assert(self.FN) - super().run(data=data) - - def cipher(self, rune, context): - x = self.FN(self.seq_index, rune) - self.seq_index += 1 - return x + seq_i = 0 + ret = [] + for rune, i, ri, skip in self.enum_data(data): + if not skip: + rune = self.FN(seq_i, rune) + seq_i += 1 + ret.append(rune) + return RuneText(ret), [] def __str__(self): return super().__str__() + f'\nf(x): {self.FN}' @@ -99,59 +86,52 @@ class RunningKeySolver(RuneSolver): def reset(self): super().reset() self.KEY_DATA = [] # the key material - self.KEY_INVERT = False # ABCD -> ZYXW self.KEY_SHIFT = 0 # ABCD -> DABC self.KEY_ROTATE = 0 # ABCD -> ZABC self.KEY_OFFSET = 0 # ABCD -> __ABCD self.KEY_POST_PAD = 0 # ABCD -> ABCD__ - def run(self, data=None): - self.k_current_pos = 0 - self.k_len = len(self.KEY_DATA) - self.k_full_len = self.KEY_OFFSET + self.k_len + self.KEY_POST_PAD - super().run(data=data) + def run(self, data): + k_len = len(self.KEY_DATA) + if k_len <= 0: + return data, [] + k_full_len = self.KEY_OFFSET + k_len + self.KEY_POST_PAD + k_current_pos = 0 + ret = [] + highlight = [[0, 0]] + for rune, i, ri, skip in self.enum_data(data): + if not skip: + u = k_current_pos - self.KEY_OFFSET + if u < 0 or u >= k_len or self.KEY_DATA[u] == 29: + self.unmodified_callback(rune) + else: + key_i = (u + self.KEY_SHIFT) % k_len + decrypted = self.decrypt(rune.index, key_i) + rune = Rune(i=(decrypted - self.KEY_ROTATE) % 29) + if i == highlight[-1][1]: + highlight[-1][1] = i + 1 + else: + highlight.append([i, i + 1]) + # rotate_key + if k_full_len > 0: # e.g., for key invert without a key + k_current_pos = (k_current_pos + 1) % k_full_len + ret.append(rune) + if highlight[0][1] == 0: + highlight = highlight[1:] + return RuneText(ret), highlight - def mark_char_at(self, position): - return self.active_key_pos() != -1 + def decrypt(self, rune_index, key_index): + raise NotImplementedError('must subclass') - def active_key_pos(self): - i = self.k_current_pos - self.KEY_OFFSET - if i >= 0 and i < self.k_len: - if self.KEY_DATA[i] != 29: # placeholder for unknown - return i - return -1 + def unmodified_callback(self, rune_index): + pass # subclass if needed - def cipher(self, rune, context): - r_idx = rune.index - if self.KEY_INVERT: - r_idx = 28 - r_idx - pos = self.active_key_pos() - if pos == -1: - self.copy_unmodified(r_idx) - else: - i = (pos + self.KEY_SHIFT) % self.k_len - r_idx = (self.decrypt(r_idx, i) - self.KEY_ROTATE) % 29 - # rotate_key - if self.k_full_len > 0: # e.g., for key invert without a key - self.k_current_pos = (self.k_current_pos + 1) % self.k_full_len - return Rune(i=r_idx) - - def decrypt(self, rune_index, key_index): # must subclass - raise NotImplementedError - - def copy_unmodified(self, rune_index): # subclass if needed - pass - - def key__str__(self): - return self.KEY_DATA # you should override this - - def key__str__basic_runes(self): + def key__str__(self): # you should override this return RuneText(self.KEY_DATA).description(indexWhitespace=True) def __str__(self): txt = super().__str__() txt += f'\nkey: {self.key__str__()}' - txt += f'\nkey invert: {self.KEY_INVERT}' txt += f'\nkey offset: {self.KEY_OFFSET} runes' txt += f'\nkey post pad: {self.KEY_POST_PAD} runes' txt += f'\nkey shift: {self.KEY_SHIFT} indices' @@ -170,15 +150,12 @@ class VigenereSolver(RunningKeySolver): def substitute_supports_keylen(self): return True - def substitute_get(self, pos, keylen, search_term, found_term): + def substitute_get(self, pos, keylen, search_term, found_term, all_data): ret = [Rune(r='⁚')] * keylen for i, r in enumerate(found_term.zip_sub(search_term)): ret[(pos + i) % keylen] = r return RuneText(ret).description(count=True, index=False) - def key__str__(self): - return self.key__str__basic_runes() - ######################################### # AffineSolver : Decrypt runes with an array of (s, t) affine keys @@ -188,42 +165,55 @@ class AffineSolver(RunningKeySolver): def decrypt(self, rune_index, key_index): return affine_decrypt(rune_index, self.KEY_DATA[key_index]) + def key__str__(self): + return self.KEY_DATA + ######################################### # AutokeySolver : Decrypts runes by using previously decrypted ones as input ######################################### class AutokeySolver(RunningKeySolver): - def run(self, data=None): + def run(self, data): key = self.KEY_DATA[self.KEY_SHIFT:] + self.KEY_DATA[:self.KEY_SHIFT] key = [29] * self.KEY_OFFSET + key + [29] * self.KEY_POST_PAD self.running_key = key - super().run(data=data) + return super().run(data) - def decrypt(self, rune_index, _): + def decrypt(self, rune_index, key_index): rune_index = (rune_index - self.running_key.pop(0)) % 29 self.running_key.append(rune_index) return rune_index - def copy_unmodified(self, rune_index): - if self.k_len > 0: - self.running_key.pop(0) - self.running_key.append(rune_index) + def unmodified_callback(self, rune_index): + self.running_key.pop(0) + self.running_key.append(rune_index) def substitute_supports_keylen(self): return True - def substitute_get(self, pos, keylen, search_term, found_term): - data = self.input.runes_no_whitespace() + def substitute_get(self, pos, keylen, search_term, found_term, all_data): + data = all_data.index_no_white ret = [Rune(r='⁚')] * keylen for o in range(len(search_term)): - plain = search_term[o] + plain = search_term[o].index i = pos + o while i >= 0: - plain = data[i] - plain + plain = (data[i] - plain) % 29 i -= keylen - ret[i + keylen] = plain + ret[i + keylen] = Rune(i=plain) return RuneText(ret).description(count=True, index=False) - def key__str__(self): - return self.key__str__basic_runes() + +if __name__ == '__main__': + slvr = VigenereSolver() + slvr.KEY_DATA = [1] + print(slvr) + txt = RuneText('hi there') + sol = slvr.run(txt) + print(sol[0].text) + sol, mark = slvr.run(txt) + print(sol.text) + slvr.KEY_DATA = [-1] + print(slvr.run(sol)[0].text) + print(mark) diff --git a/LP/RuneText.py b/LP/RuneText.py index 98b8fbf..7a52e21 100755 --- a/LP/RuneText.py +++ b/LP/RuneText.py @@ -1,112 +1,7 @@ #!/usr/bin/env python3 # -*- coding: UTF-8 -*- -import re # load_indices - -white_rune = {'•': ' ', '⁘': '.', '⁚': ',', '⁖': ';', '⁜': '#'} -white_text = {v: k for k, v in white_rune.items()} -alphabet = [ # Using last value for display. Custom added: V - (2, 'ᚠ', ['F']), (3, 'ᚢ', ['V', 'U']), (5, 'ᚦ', ['TH']), (7, 'ᚩ', ['O']), - (11, 'ᚱ', ['R']), (13, 'ᚳ', ['K', 'C']), (17, 'ᚷ', ['G']), - (19, 'ᚹ', ['W']), (23, 'ᚻ', ['H']), (29, 'ᚾ', ['N']), (31, 'ᛁ', ['I']), - (37, 'ᛄ', ['J']), (41, 'ᛇ', ['EO']), (43, 'ᛈ', ['P']), (47, 'ᛉ', ['X']), - (53, 'ᛋ', ['Z', 'S']), (59, 'ᛏ', ['T']), (61, 'ᛒ', ['B']), - (67, 'ᛖ', ['E']), (71, 'ᛗ', ['M']), (73, 'ᛚ', ['L']), - (79, 'ᛝ', ['ING', 'NG']), (83, 'ᛟ', ['OE']), (89, 'ᛞ', ['D']), - (97, 'ᚪ', ['A']), (101, 'ᚫ', ['AE']), (103, 'ᚣ', ['Y']), - (107, 'ᛡ', ['IO', 'IA']), (109, 'ᛠ', ['EA']) -] -text_map = {t: r for _, r, ta in alphabet for t in ta} -rune_map = {r: t for _, r, ta in alphabet for t in ta} -primes_map = {r: p for p, r, _ in alphabet} -RUNES = [r for _, r, _ in alphabet] # array already sorted -re_norune = re.compile('[^' + ''.join(RUNES) + ']') -# del alphabet # used in playground for GP display - - -######################################### -# Rune : Stores a single rune. Incl. text, prime, index, and kind -######################################### - -class Rune(object): - def __init__(self, r=None, t=None, i=None, p=None): - self._rune = r - self._text = t - self._index = i - self._prime = p - self._kind = None # one of: r n s l w - - def __repr__(self): - return f'<{self._rune}, {self._text}, {self._index}, {self._prime}>' - - @property - def rune(self): - if self._rune is None: - self._rune = RUNES[self._index] if self._index < 29 else '•' - return self._rune - - @property - def text(self, sameWhitespace=False): - if self._text is None: - if sameWhitespace: - self._text = rune_map.get(self.rune, ' ') - else: - r = self.rune - self._text = rune_map.get(r, white_rune.get(r, r)) - return self._text - - @property - def index(self): - if self._index is None: - r = self._rune - self._index = RUNES.index(r) if r in RUNES else 29 - return self._index - - @property - def prime(self): - if self._prime is None: - self._prime = primes_map.get(self.rune, 0) - return self._prime - - @property - def kind(self): - if self._kind is None: - x = self.rune - if x in rune_map: - self._kind = 'r' # rune - elif x == '⁜': - self._kind = 's' # paragraph, but treat as sentence - elif x == '⁘': - self._kind = 's' # sentence - elif x == '\n' or x == '\r': - self._kind = 'l' # line end - elif x in '1234567890': - self._kind = 'n' # number - else: - self._kind = 'w' # whitespace (explicitly not n or s) - return self._kind - - def __add__(self, o): - if isinstance(o, Rune): - o = o.index - if self.index == 29 or o == 29: - return self - return Rune(i=(self.index + o) % 29) - - def __sub__(self, o): - if isinstance(o, Rune): - o = o.index - if self.index == 29 or o == 29: - return self - return Rune(i=(self.index - o) % 29) - - def __radd__(self, o): - return self if self.index == 29 else Rune(i=(o + self.index) % 29) - - def __rsub__(self, o): - return self if self.index == 29 else Rune(i=(o - self.index) % 29) - - def __invert__(self): - return self if self.index == 29 else Rune(i=28 - self.index) +from Alphabet import white_rune, white_text, rune_map, text_map +from Rune import Rune ######################################### @@ -139,16 +34,6 @@ class RuneText(object): self._data_len = len(self._data) - def __len__(self): - return self._data_len - - def trim(self, maxlen): - if self._data_len > maxlen: - if self._rune_sum and self._rune_sum > 0: - self._rune_sum -= sum(x.prime for x in self._data[maxlen:]) - self._data = self._data[:maxlen] - self._data_len = maxlen - @classmethod def from_text(self, text): res = [] @@ -186,17 +71,29 @@ class RuneText(object): res.append(Rune(r=rune, t=char)) return res - def description(self, count=False, index=True, indexWhitespace=False): - return None if len(self) == 0 else \ - self.rune + (f' ({len(self)})' if count else '') + ' - ' + \ - self.text + (f' ({len(self.text)})' if count else '') + \ - (f' - {self.index if indexWhitespace else self.index_rune_only}' - if index else '') + def __len__(self): + return self._data_len - def zip_sub(self, other): - if len(self) != len(other): - raise IndexError('RuneText length mismatch') - return RuneText([x - y for x, y in zip(self._data, other._data)]) + def __getitem__(self, key): + if isinstance(key, str): + return [getattr(x, key) for x in self._data] + else: + return self._data[key] + + # def __setitem__(self, key, value): + # self._data[key] = value + + def __add__(self, other): + return RuneText([x + other for x in self._data]) + + def __sub__(self, other): + return RuneText([x - other for x in self._data]) + + def __invert__(self): + return RuneText([~x for x in self._data]) + + def __str__(self): + return f'RuneText<{len(self)}>' @property def text(self): @@ -207,11 +104,11 @@ class RuneText(object): return ''.join(x.rune for x in self._data) @property - def index(self): + def index_no_newline(self): return [x.index for x in self._data if x.kind != 'l'] @property - def index_rune_only(self): + def index_no_white(self): return [x.index for x in self._data if x.index != 29] @property @@ -224,50 +121,82 @@ class RuneText(object): self._rune_sum = sum(self.prime) return self._rune_sum - def __getitem__(self, key): - if isinstance(key, str): - return [getattr(x, key) for x in self._data] - else: - return self._data[key] + @property + def data_clean(self): + return [x if x.kind == 'r' else Rune(i=29) + for x in self._data if x.kind != 'l'] - def __setitem__(self, key, value): - self._data[key] = value + def description(self, count=False, index=True, indexWhitespace=False): + return None if len(self) == 0 else \ + self.rune + (f' ({len(self)})' if count else '') + ' - ' + \ + self.text + (f' ({len(self.text)})' if count else '') + \ + (' - {}'.format(self.index_no_newline if indexWhitespace else + self.index_no_white) + if index else '') - def __add__(self, other): - return RuneText([x + other for x in self._data]) + def trim(self, maxlen): + if self._data_len > maxlen: + if self._rune_sum and self._rune_sum > 0: + self._rune_sum -= sum(x.prime for x in self._data[maxlen:]) + self._data = self._data[:maxlen] + self._data_len = maxlen - def __sub__(self, other): - return RuneText([x - other for x in self._data]) + def zip_sub(self, other): + if len(self) != len(other): + raise IndexError('RuneText length mismatch') + return RuneText([x - y for x, y in zip(self._data, other._data)]) - def __radd__(self, other): - return RuneText([other + x for x in self._data]) + # def equal(self, other): + # if len(self) != len(other): + # return False + # return all(x.index == y.index for x, y in zip(self, other)) - def __rsub__(self, other): - return RuneText([other - x for x in self._data]) - - def __invert__(self): - return RuneText([~x for x in self._data]) - - def __repr__(self): - return f'RuneText<{len(self._data)}>' - - -######################################### -# load page and convert to indices for faster access -######################################### - -def load_indices(fname, interrupt, maxinterrupt=None, minlen=None, limit=None): - with open(fname, 'r') as f: - data = RuneText(re_norune.sub('', f.read())).index_rune_only[:limit] - if maxinterrupt is not None: - # incl. everything up to but not including next interrupt - # e.g., maxinterrupt = 0 will return text until first interrupt - for i, x in enumerate(data): - if x != interrupt: + def enum_words(self): # [(start, end, len), ...] may include \n \r + start = 0 + r_pos = 0 + word = [] + for i, x in enumerate(self._data): + if x.kind == 'r': + r_pos += 1 + word.append(x) + elif x.kind == 'l': continue - if maxinterrupt == 0: - if minlen and i < minlen: - continue - return data[:i] - maxinterrupt -= 1 - return data + else: + if len(word) > 0: + yield start, i, r_pos - len(word), RuneText(word) + word = [] + start = i + 1 + + +class RuneTextFile(RuneText): + def __init__(self, file, limit=None): + with open(file, 'r') as f: + super().__init__(f.read()[:limit]) + self.inverted = False + self.loaded_file = file + + def reopen(self, limit=None): + ret = RuneTextFile(self.loaded_file, limit) + if self.inverted: + ret.invert() + return ret + + def invert(self): + self.inverted = not self.inverted + self._rune_sum = None + self._data = [~x for x in self._data] + + def __str__(self): + return '@file: {} ({} bytes), inverted: {}'.format( + self.loaded_file, len(self._data), self.inverted) + + +if __name__ == '__main__': + x = RuneText('Hi there. And welc\nome, to my "world";') + for a, z, r_pos, word in x.enum_words(): + print((a, z), r_pos, word.text) + + y = RuneTextFile(file='../_input.txt') + print(y.loaded_file) + print(y.prime_sum) + print(y) diff --git a/LP/__init__.py b/LP/__init__.py index e1c85bc..60cef66 100644 --- a/LP/__init__.py +++ b/LP/__init__.py @@ -1,17 +1,24 @@ import sys -if True: - sys.path.append(__path__[0]) +if __name__ != '__main__': + sys.path.insert(0, __path__[0]) -import lib as utils +import utils from LPath import FILES_ALL, FILES_UNSOLVED, FILES_SOLVED from LPath import LPath as path -from RuneSolver import VigenereSolver, AffineSolver, AutokeySolver, SequenceSolver -from RuneText import Rune, RuneText -from RuneText import RUNES, alphabet, load_indices -from HeuristicSearch import GuessVigenere, GuessAffine, GuessPattern -from HeuristicSearch import SearchInterrupt -from HeuristicLib import Probability +from Alphabet import RUNES, alphabet +from Rune import Rune +from RuneText import RuneText, RuneTextFile + +from IOReader import load_indices, longest_no_interrupt +from IOWriter import IOWriter + +from RuneSolver import SequenceSolver, VigenereSolver, AffineSolver, AutokeySolver +from KeySearch import GuessVigenere, GuessAffine, GuessPattern +from Probability import Probability + from InterruptDB import InterruptDB +from InterruptIndices import InterruptIndices +from InterruptSearch import InterruptSearch from FailedAttempts import NGramShifter diff --git a/LP/lib.py b/LP/utils.py similarity index 90% rename from LP/lib.py rename to LP/utils.py index 3522074..82bb932 100755 --- a/LP/lib.py +++ b/LP/utils.py @@ -1,4 +1,5 @@ #!/usr/bin/env python3 +# -*- coding: UTF-8 -*- import math @@ -113,7 +114,9 @@ def autokey_reverse(data, keylen, pos, search_term): ret[i + keylen] = plain return ret -# alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' -# cipher = 'YDIDWYASDDJVAPJMMBIASDTJVAMD' -# indices = [affine_decrypt(alphabet.index(x), (5, 9), 26) for x in cipher] -# print(''.join(alphabet[x] for x in indices)) + +if __name__ == '__main__': + alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + cipher = 'YDIDWYASDDJVAPJMMBIASDTJVAMD' + indices = [affine_decrypt(alphabet.index(x), (5, 9), 26) for x in cipher] + print(''.join(alphabet[x] for x in indices)) diff --git a/README.md b/README.md index 200fc09..ac30a70 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ ### Main components: -- `playground.py` this is where you want to start. Simply run it and it will great you with all the posibilities. Use this if you want to experiment, translate runes, check for primes, etc. See [Playground](#playground) for more info. +- `playground.py` this is where you want to start. Simply run it and it will greet you with all the posibilities. Use this if you want to experiment, translate runes, check for primes, etc. See [Playground](#playground) for more info. - `solver.py` you can run `solver.py -s` to output all already solved pages. Other than that, this is the playground to test new ideas against the unsolved pages. Here you can automate stuff and test it on all the remaining pages; e.g., there is a section to try out totient functions. See [Solving](#solving) for more info. @@ -22,18 +22,20 @@ The `pages` folder contains all LP pages in text and graphic. Note, I have doubl Rune values are taken from Gematria, with these unicode characters representing: space (`•`), period (`⁘`), comma (`⁚`), semicolon (`⁖`), and chapter mark (`⁜`). -### The library +### The LP library These files you probably wont need to touch unless you want to modify some output behavior or rune handling. E.g. if you want to add a rune multiply method. These are the building blocks for the main components. -- `lib.py`, a small collection of reusable functions like `is_prime` and `rev` for emirp (reverse prime) checking. +- `utils.py`, a small collection of reusable functions like `rev`, `is_prime`, and `is_emirp` (reverse prime) checking. - `RuneText.py` is the representation layer. The class `RuneText` holds an array of `Rune` objects, which represent the individual runes. Each `Rune` has the attributes `rune`, `text`, `prime`, `index`, and `kind` (see [Solving](#solving)). -- `RuneRunner.py` is a collection of classes that handles data input as well as ouput to stdout. It does all the word sum calculations, prime word detection, line sums, and output formatting (including colors). Everything you don't want to worry about when processing the actual runes. - - `RuneSolver.py` contains a specific implementation for each cipher type. Two implementations in particular, `VigenereSolver` which has methods for setting and modifying key material as well as automatic key rotation and interrupt skipping. `SequenceSolver` interprets the cipher on a continuous or discrete function (i.e., Euler's totient). +- `IOWriter.py` handles data ouput to stdout. It does all the word sum calculations, prime word detection, line sums, and output formatting (including colors). Everything you don't want to worry about when processing the actual runes. + +- and many more … + Refer to `solver.py` or section [Solving](#solving) for examples on usage. @@ -51,13 +53,13 @@ Available commands are: d : Get decryption key (substitution) for a single phrase f : Find words with a given length (f 4, or f word) g : Print Gematria Primus (gp) or reversed Gematria (gpr) - h : Highlight occurrences of interrupt jumps (hj) or reset (h) + h : Highlight occurrences of interrupt jumps (hj or hj 28) k : Re/set decryption key (k), invert key (ki), ': change key shift (ks), rotation (kr), offset (ko), or after padding (kp) ': set key jumps (kj) e.g., [1,2] (first appearence of ᚠ is index 1) l : Toggle log level: normal (ln), quiet (lq) verbose (lv) p : Prime number and emirp check - t : Translate between runes, text, and indices (0-28) + t : Translate between runes, text, indices (0-28), and primes x : Execute decryption. Also: load data into memory ': set manually (x DATA) or load from file (xf p0-2) (default: _input.txt) ': limit/trim loaded data up to nth character (xl 300) @@ -148,7 +150,8 @@ Gematria Primus (reversed) ### h) Hightlight occurrences -Highlighting is currently very limited. The only supported option is `hj` which will hightlight all interrupts. That is, it will hightlight all occurrences of `ᚠ` in the text and mark those that are actively skipped or jumped over. +Highlighting is currently very limited. The only supported option is `hj` which will hightlight all interrupts. That is, it will hightlight all occurrences of `ᚠ` in the text and mark those that are actively skipped or jumped over. +Or use `hj x` to highlight a different rune (where x is either a number or text). ![highlight interrupts](img/hj.png) @@ -249,56 +252,87 @@ If the output is too long, you can limit (the already loaded data) with `xl 180` `Rune.kind` can be one of `r n s l w` – meaning (r)une, (n)umber, (s)entence, (l)ine, or (w)hitespace. A line is what you see in the source file (which is equivalent to a line in the original jpg page). A sentence is one that ends with a period (`⁘`). -`Rune` as well as `RuneText` both support simple arithmetic operations: `Rune(i=2) - 2` will yield a `ᚠ` rune. For example, you can invert a text with `28 - RuneText('test me')` or simply `~RuneText('inverted')`. +`Rune` as well as `RuneText` both support simple arithmetic operations: `Rune(i=2) - 2` will yield a `ᚠ` rune. You can invert text with `~RuneText('inverted')`. __Note:__ Always initialize a rune with its rune character or its index, never ASCII or its prime value. -### RuneRunner and I/O +### RuneTextFile, IOWriter, and I/O -`RuneRunner` has two noteworthy attributes `input` and `output`; `RuneReader` and `RuneWriter` respectively. Use the former to load data into memory: +Here is a fully working example to load rune data from file, solve it with a vigenere solver, and output it with color highlighting. -``` -solver.load(file='p33.txt') -solver.load(data='will be parsed') -solver.load(RuneText('will be copied')) +```python +solver = LP.VigenereSolver() +solver.KEY_DATA = LP.RuneText('divinity').index_no_white +solver.INTERRUPT_POS = [4, 5, 6, 7, 10, 11, 14, 18, 20, 21, 25] +d_in = LP.RuneTextFile(LP.path.page('0_welcome')) +d_out, _ = solver.run(d_in) +LP.IOWriter().run(d_out, [(0, 8), (510, 517), (630, 667)]) ``` The output writer has the options `COLORS`, `VERBOSE`, `QUIET`, and `BREAK_MODE` to control the appearance. `BREAK_MODE` can be one of the `Rune.kind` values. -### RuneSolver, VigenereSolver, SequenceSolver, AffineSolver +### RuneSolver, VigenereSolver, SequenceSolver, AffineSolver, AutokeySolver -All `RuneSolver` subclasses inherit the attributes of `RuneRunner` and will include additional data fields that can be set. In its most basic form it has the two fields `INTERRUPT` (must be rune) and `INTERRUPT_POS` (list of indices). +All solver subclasses inherit the attributes of `RuneSolver ` and will include additional data fields that can be set. The most basic form has the two fields `INTERRUPT` (must be rune) and `INTERRUPT_POS` (list of indices). -In the case of `VigenereSolver` the additional fields are `KEY_DATA` (list of indices), `KEY_INVERT` (bool), `KEY_SHIFT` (int), `KEY_ROTATE` (int), `KEY_OFFSET` (int), and `KEY_POST_PAD` (int). +In the case of `VigenereSolver` the additional fields are `KEY_DATA` (list of indices), `KEY_SHIFT` (int), `KEY_ROTATE` (int), `KEY_OFFSET` (int), and `KEY_POST_PAD` (int). The class `SequenceSolver` has only one additional parameter which is `FN` (function pointer or lambda expression). -`AffineSolver` is very similar to `VigenereSolver` but does not support key manipulation (yet). `KEY_DATA` and `KEY_INVERT` are the only two attributes. +`AffineSolver` is very similar to `VigenereSolver` but uses `(a, b)` tuples as key data. + +`AutokeySolver` is also based on `VigenereSolver` but reuses key data that was previously decrypted. + + +### OEIS checker + +`solver.py` has also a fully automated OEIS sequence checker. The script tests all 294k sequences that contain at least 14 numbers but limits each sequence to the first 40 numbers. For each sequence the script will try to shift the runes and see if a useful word is generated. Useful in this case means it appears in a [dictionary of 350k words](https://github.com/dwyl/english-words/). + +If all, or all but one, words appear in said dictionary, the seqeuence is printed out. Additionally, the script will also try to shift the generated sequence by all rune indices mod 29. Further, each sequence is tested not only starting at position 0, but also with an offset of -1 to +3 (e.g., 00123 to 345...). Each input is tested with all interrupt combinations (assuming ᚠ is interrupt). + +Assumptions: + +- the sequence starts at the beginning +- the beginning is at the top-left and text goes left-to-right +- whitespace is actually correct +- ᚠ is interrupt (or none at all) + ## Heuristics -This is where the magic happens. `HeuristicLib.py` contains the basic frequency analysis metrics like Index of Coincidence (IoC) and similarity matching. The latter is used to automatically detect key shifts – like in Vigenere or Affine. These metrics are based on english sample texts, in this case “Peace and War” or “Gadsby” (text without the letter ‘e’ [well almost, because there are still 6 e's in there ... liar!]). +This is where the magic happens. `Probability.py` contains the basic frequency analysis metrics like Index of Coincidence (IoC) and similarity matching. The latter is used to automatically detect key shifts – like in Vigenere or Affine. These metrics are based on english sample texts, in this case “Peace and War” or “Gadsby” (text without the letter ‘e’ [well almost, because there are still 6 e's in there ... liar!]). `NGrams.py` is respobsible for taking english text (or any other language) and translating it to runes. Also, counts runes in a text and creates the frequency distribution. The translation is the slowest part, but still very efficient. Creating all 1-gram to 5-grams of a 7 Mb text file takes approx. 20 sec. `FailedAttempts.py` is a collection of what the title is saying – failed attempts. Currently only holds a n-gram shifter. Which will shift every n runes in contrast to the normal decrypting of a single rune at a time. -#### GuessVigenere, GuessAffine +#### GuessVigenere, GuessAffine, GuessPattern -Two classes that enumerate all possible shifts for a key. For Vigenere that is key length * 29, for Affine key length * 29^2. To determine whether one shift is more likely than another, a similarity metric is used. In this case, the least square distance to a normal english distribution. The value will be lowest if it closely matches the frequencies of each rune. +These classes enumerate all possible shifts for a key. For Vigenere that is key length * 29, for Affine key length * 29^2. To determine whether one shift is more likely than another, a similarity metric is used. In this case, the least square distance to a normal english distribution. The value will be lowest if it closely matches the frequencies of each rune. + +`GuessPattern` uses an input template, e.g., `1234` and outputs different key permutations such as: + +`1234234134124123`, +`1234412334122341`, +`1234341212343412`, +`12344321`, and `123432`. -### HeuristicSearch.py +### InterruptSearch.py This is the heart of the interrupt detector. Searching the full set of possible constellations is not feasable (2 ^ {number of possible interrupts}). Thus, the class has two methods to avoid the full search. Both come with a maximum look ahead parameter that can be tweaked. Lets look at an example with 66 interrupts (p8–14). Testing all would require 2^66 or __7.4*10^19__ calculations. +#### SearchInterrupt.all + +Just tries all combinations without leaving anything out. + #### SearchInterrupt.sequential This will go through the text sequentially. Looking at the first N interrupts and try all combinations in this subset. The best combination will determine whether the current interrupt (1. interrupt index) should be added to the final result. If the current index was used to generate the best value then it is included otherwise not. __Note:__ it will only add the first interrupt, not all of them. The next iteration will look at the interrupts at index 1 to N+1. Adding the next index if it was in the set, and repeating with the remaining text. @@ -318,7 +352,7 @@ The complexity is not linear and depends on whether “there was just another be Calculating the best interrupt position takes quite long, so we can optimize our program by pre-calculating the IoC's. That is what `InterruptDB.py` is for. The class will search for the best interrupts and store the IoC score as well as the set of interrupts in a file. Later queries just need to process this file instead. -The current configuration will look at the first 20 interrupts, for all runes, on all pages, and up to a key length of 32 – thats 1.36*10^10 operations! The full execution time is somewhere around 38 hours. Luckily, it is a one-time job. The resulting database is used directly as is, plus a html file is generated by `InterruptToWeb` for a graphical representation. Meanwhile, `InterruptIndices` keeps count how reliable the results are, e.g., how many runes were considered when looking for the first 20 interrupts, and adds that information to the html. Here is the [html overview](./InterruptDB/). +The current configuration will look at the first 20 interrupts, for all runes, on all pages, and up to a key length of 32 – thats 1.36*10^10 operations! The full execution time is somewhere around 38 hours. Luckily, it is a one-time job. The resulting database is used directly as is, plus a html file is generated by `InterruptToWeb.py` for a graphical representation. Meanwhile, `InterruptIndices.py` keeps count how reliable the results are, e.g., how many runes were considered when looking for the first 20 interrupts, and adds that information to the html. Here is the [html overview](./InterruptDB/). ### probability.py diff --git a/img/4gq25.jpg b/img/4gq25.jpg new file mode 100644 index 0000000..2202729 Binary files /dev/null and b/img/4gq25.jpg differ diff --git a/img/gematria-primus.jpg b/img/gematria-primus.jpg new file mode 100755 index 0000000..412ecb2 Binary files /dev/null and b/img/gematria-primus.jpg differ diff --git a/img/zN4h51m.jpg b/img/zN4h51m.jpg new file mode 100644 index 0000000..47a4d27 Binary files /dev/null and b/img/zN4h51m.jpg differ diff --git a/playground.py b/playground.py index de27176..9800d50 100755 --- a/playground.py +++ b/playground.py @@ -1,8 +1,14 @@ #!/usr/bin/env python3 import LP +INPUT = LP.RuneTextFile(LP.path.root('_input.txt')) +OUTPUT = LP.IOWriter() SOLVER = LP.VigenereSolver() # VigenereSolver, AffineSolver, AutokeySolver -SOLVER.input.load(file=LP.path.root('_input.txt')) + + +def solve(): + derypted, highlight = SOLVER.run(INPUT) + OUTPUT.run(derypted, highlight) def main(): @@ -14,13 +20,13 @@ Available commands are: d : Get decryption key (substitution) for a single phrase f : Find words with a given length (f 4, or f word) g : Print Gematria Primus (gp) or reversed Gematria (gpr) - h : Highlight occurrences of interrupt jumps (hj) or reset (h) + h : Highlight occurrences of interrupt jumps (hj or hj 28) k : Re/set decryption key (k), invert key (ki), ': change key shift (ks), rotation (kr), offset (ko), or after padding (kp) ': set key jumps (kj) e.g., [1,2] (first appearence of ᚠ is index 1) l : Toggle log level: normal (ln), quiet (lq) verbose (lv) p : Prime number and emirp check - t : Translate between runes, text, and indices (0-28) + t : Translate between runes, text, indices (0-28), and primes x : Execute decryption. Also: load data into memory ': set manually (x DATA) or load from file (xf p0-2) (default: _input.txt) ': limit/trim loaded data up to nth character (xl 300) @@ -38,14 +44,12 @@ Available commands are: cmd = cmd_p[0].strip().lower() args = cmd_p[1].strip() if len(cmd_p) > 1 else '' - if cmd[0] != 'l': # only log mode allowed after find operation - SOLVER.reset_highlight() - if cmd == 'help': print(help_str) elif cmd == 'q' or cmd == 'exit' or cmd == 'quit': exit() elif cmd == '?': + print('DATA:', INPUT) print(SOLVER) else: cmdX = {'a': command_a, 'd': command_d, 'f': command_f, @@ -83,7 +87,7 @@ def command_a(cmd, args): # [a]ll variations if 'i' in cmd: root = ~root for i in range(29): - print('{:02d}: {}'.format(i, (root + i).description(index=inclIndex))) + print('{:02d}: {}'.format(i, (root - i).description(index=inclIndex))) ######################################### @@ -125,12 +129,16 @@ def command_f(cmd, args): # (f)ind word search_term = LP.RuneText(args) s_len = len(search_term) - cur_words = SOLVER.highlight_words_with_len(s_len) - SOLVER.run() + cur_words = [x for x in INPUT.enum_words() if len(x[-1]) == s_len] + if len(cur_words) == 0: + print('No matching word found.') + return + + OUTPUT.run(INPUT, [(a, b) for a, b, _, _ in cur_words]) print() print('Found:') - for _, _, pos, _, w in cur_words: - print(f'{pos:04}: {w.description(count=True)}') + for _, _, pos, word in cur_words: + print(f'{pos:04}: {word.description(count=True)}') if search_term: print() keylen = [len(search_term)] @@ -146,9 +154,9 @@ def command_f(cmd, args): # (f)ind word raise ValueError('not a number.') print() print('Available substition:') - for _, _, pos, _, w in cur_words: + for _, _, pos, word in cur_words: for kl in keylen: - res = SOLVER.substitute_get(pos, kl, search_term, w) + res = SOLVER.substitute_get(pos, kl, search_term, word, INPUT) print(f'{pos:04}: {res}') @@ -179,19 +187,31 @@ def command_g(cmd, args): # (g)ematria primus ######################################### def command_h(cmd, args): # (h)ighlight - if cmd == 'h': - SOLVER.reset_highlight() - SOLVER.run() - elif cmd in 'hj hi': - res = SOLVER.highlight_interrupt() - SOLVER.run() + if len(cmd) > 1 and cmd[1] in 'ji': + try: + irp = get_cmd_int(cmd, args) + except ValueError: + irp = LP.RuneText(args)[0].index + res = [] + r_pos = -1 + for i, x in enumerate(INPUT): + if x.index != 29: + r_pos += 1 + if x.index == irp: + res.append(([i, i + 1], r_pos)) + + irp_set = [x for x in SOLVER.INTERRUPT_POS if x <= len(res)] + for i in irp_set: + res[i - 1][0].append('1;37m\x1b[45m') + # run without decryption + OUTPUT.run(INPUT, [x for x, _ in res]) txt = '' bits = '' - # first appearance of ᚠ is l_pos == 1; r_pos is the index on runes only - for l_pos, r_pos, _, is_set in res: - txt += '{}.{}.{} '.format(l_pos, 'T' if is_set else 'F', r_pos) - bits += '1' if is_set else '0' - print(f'\nInterrupts: {bits}\n{txt}') + for i, (_, r_pos) in enumerate(res): + i += 1 # first occurrence of interrupt is index 1 + txt += f"{i}.{'T' if i in irp_set else 'F'}.{r_pos} " + bits += '1' if i in irp_set else '0' + print(f'\nInterrupt({LP.RUNES[irp]}): {bits}\n{txt}') else: return False @@ -202,7 +222,7 @@ def command_h(cmd, args): # (h)ighlight def command_k(cmd, args): # (k)ey manipulation if cmd == 'k' or cmd == 'key': - SOLVER.KEY_DATA = LP.RuneText(args).index + SOLVER.KEY_DATA = LP.RuneText(args).index_no_newline print(f'set key: {SOLVER.KEY_DATA}') elif cmd[1] == 's': SOLVER.KEY_SHIFT = get_cmd_int(cmd, args, 'shift') @@ -213,8 +233,12 @@ def command_k(cmd, args): # (k)ey manipulation elif cmd[1] == 'p': SOLVER.KEY_POST_PAD = get_cmd_int(cmd, args, 'post padding') elif cmd[1] == 'i': - SOLVER.KEY_INVERT = not SOLVER.KEY_INVERT - print(f'set key invert: {SOLVER.KEY_INVERT}') + global INPUT + if isinstance(INPUT, LP.RuneTextFile): + INPUT.invert() + print(f'set key invert: {INPUT.inverted}') + else: + INPUT = ~INPUT elif cmd == 'kj': args = args.strip('[]') pos = [int(x) for x in args.split(',')] if args else [] @@ -222,7 +246,7 @@ def command_k(cmd, args): # (k)ey manipulation print(f'set interrupt jumps: {SOLVER.INTERRUPT_POS}') else: return False # command not found - SOLVER.run() + solve() ######################################### @@ -231,15 +255,15 @@ def command_k(cmd, args): # (k)ey manipulation def command_l(cmd, args): # (l)og level if cmd == 'lv' or args == 'v' or args == 'verbose': - SOLVER.output.VERBOSE = not SOLVER.output.VERBOSE + OUTPUT.VERBOSE = not OUTPUT.VERBOSE elif cmd == 'lq' or args == 'q' or args == 'quiet': - SOLVER.output.QUIET = not SOLVER.output.QUIET + OUTPUT.QUIET = not OUTPUT.QUIET elif cmd == 'ln' or args == 'n' or args == 'normal': - SOLVER.output.VERBOSE = False - SOLVER.output.QUIET = False + OUTPUT.VERBOSE = False + OUTPUT.QUIET = False else: return False - SOLVER.run() + solve() ######################################### @@ -267,7 +291,7 @@ def command_t(cmd, args): # (t)ranslate print('runes({}): {}'.format(len(word), word.rune)) print('plain({}): {}'.format(len(word.text), word.text)) print('reversed: {}'.format((~word).rune)) - print('indices: {}'.format(word.index)) + print('indices: {}'.format(word.index_no_newline)) print('prime({}{}): {}'.format(word.prime_sum, sffx, word.prime)) @@ -276,24 +300,23 @@ def command_t(cmd, args): # (t)ranslate ######################################### def command_x(cmd, args): # e(x)ecute decryption + global INPUT if cmd == 'x': - pass # just run the solver + if args.strip(): + INPUT = LP.RuneText(args) elif cmd == 'xf': # reload from file file = LP.path.page(args) if args else LP.path.root('_input.txt') print('loading file:', file) - SOLVER.input.load(file=file) - args = None # so run() won't override data + INPUT = LP.RuneTextFile(file) elif len(cmd) > 0 and cmd[1] == 'l': # limit content limit = get_cmd_int(cmd, args, 'read limit') - last_file = SOLVER.input.loaded_file - if last_file: - SOLVER.input.load(file=last_file) + if isinstance(INPUT, LP.RuneTextFile): + INPUT = INPUT.reopen() if limit > 0: - SOLVER.input.data.trim(limit) - args = None + INPUT.trim(limit) else: return False - SOLVER.run(args if args else None) + solve() if __name__ == '__main__': diff --git a/probability.py b/probability.py index b765d79..1085b8e 100755 --- a/probability.py +++ b/probability.py @@ -5,44 +5,51 @@ import LP INVERT = False KEY_MAX_SCORE = 0.05 AFF_MAX_SCORE = 0.04 -IRP_F_ONLY = True session_files = [] +db_i = LP.InterruptIndices() +if True: + db = LP.InterruptDB.load('db_norm') + IOC_MIN_SCORE = 0.55 +else: + db = LP.InterruptDB.load('db_high') + IOC_MIN_SCORE = 1.35 + ######################################### # Perform heuristic search on the keylength, interrupts, and key ######################################### def break_cipher(fname, candidates, solver, key_fn): + slvr = solver() + io = LP.IOWriter() + io.QUIET = True + inpt = LP.RuneTextFile(LP.path.page(fname)) + if INVERT: + inpt.invert() + data = inpt.index_no_white + + if key_fn.__name__ == 'GuessAffine': + key_max_score = AFF_MAX_SCORE + else: + key_max_score = KEY_MAX_SCORE + def fn_similarity(x): return LP.Probability(x).similarity() - filename = LP.path.page(fname) - slvr = solver() - slvr.input.load(file=filename) - slvr.output.QUIET = True - slvr.output.COLORS = False - slvr.KEY_INVERT = INVERT - key_max_score = KEY_MAX_SCORE - if key_fn.__name__ == 'GuessAffine': - key_max_score = AFF_MAX_SCORE + outfmt = 'IoC: {}, interrupt: {}, count: {}, solver: {}' for irp_count, score, irp, kl, skips in candidates: - if IRP_F_ONLY and irp != 0: - continue - data = LP.load_indices(filename, irp, maxinterrupt=irp_count) - if INVERT: - data = [28 - x for x in data] - iguess = LP.SearchInterrupt(data, (28 - irp) if INVERT else irp) - print('IoC: {}, interrupt: {}, count: {}, solver: {}'.format( - score, LP.RUNES[irp], len(iguess.stops), key_fn.__name__)) - testcase = iguess.join(iguess.from_occurrence_index(skips)) - + stops, upto = db_i.consider(fname, irp, irp_count) + print(outfmt.format(score, LP.RUNES[irp], len(stops), key_fn.__name__)) + testcase = data[:upto] + for x in reversed(skips): + testcase.pop(stops[x - 1]) key_score, key = key_fn(testcase).guess(kl, fn_similarity) if key_score > key_max_score: continue prio = (1 - key_score) * max(0, score) print(f' key_score: {prio:.4f}, {key}') - print(' skip:', skips) + print(f' skip: {skips}') txtname = f'{fname}_{prio:.4f}.{key_fn.__name__}.{irp}_{kl}' if INVERT: txtname += '.inv' @@ -53,26 +60,25 @@ def break_cipher(fname, candidates, solver, key_fn): with open(outfile, 'w') as f: f.write( f'{irp}, {kl}, {score:.4f}, {key_score:.4f}, {key}, {skips}\n') - slvr.output.file_output = outfile + io.file_output = outfile slvr.INTERRUPT = LP.RUNES[irp] slvr.INTERRUPT_POS = skips slvr.KEY_DATA = key - slvr.run() + io.run(slvr.run(inpt)[0]) def pattern_solver(fname, irp=0): - with open(LP.path.page(fname), 'r') as f: - orig = LP.RuneText(f.read()) + orig = LP.RuneTextFile(LP.path.page(fname)) # orig = LP.RuneText('ᛄᚹᚻᛗᛋᚪ-ᛋᛁᚫᛇ-ᛋᛠᚾᛞ-ᛇᛞ-ᛞᚾᚣᚹᛗ.ᛞᛈ-ᛝᛚᚳᚾᛗᚾᚣ-ᛖᛝᛖᚦᚣᚢ-ᚱᚻᛁᚠ-ᛟᛝ-ᛚᛖᚫᛋᛚᚳᛋᛇ.ᚣᚾᚻᛄᚾᚳᛡ-ᚷᚳᛝ-ᛈᛝ-ᛡᚷᚦᚷᛖ.ᚻᛠᚣ-ᛄᛞᚹᛒ-ᛇᛄᛝᚩᛟ.ᛗᛠᚣᛋᛖᛚᚠ-ᚾᚫᛁ-ᛄᚹᚻᚻᛚᛈᚹᚠ-ᚫᚩᛡᚫᛟ-ᚷᛠ-ᚪᛡᚠᛄᚱᛏᚢᛈ.ᛏᛈ-ᛇᛞ-ᛟᛗᛇᛒᛄᚳᛈ.ᛉᛟ-ᛒᚻᚱᛄᚣ-ᚾᚱ-ᚾᛡᛈᛈ-ᛚᛉᛗᛞ-ᛟᛝ-ᚷᛁᚱᚩᚹᛗ-ᚠᛇᚣ-ᚣᛝᛒ-ᛁ-ᚠᚾᚹᚢ-ᛠᚾᛈᚠᚻ.ᚫᛋᛄᚪᚻ-ᛒᛖᛋᚻᛠ-ᛄᛗ-ᛟᛡᚹᚪᛡ-ᛄᛋᛖᚢᛗ-ᛏᛖᛉᚪ-ᛞᛟᛉᚾᚠ-ᚱᛡᛒᛚᚩᛈᛝ-ᛋᛄᛚᛗ-ᛞᚱᛗᛗ-ᛒᛈ-ᛁᛉᚱᛄᛝ.ᛋᛇᚪ-ᛗᚠᚻᚣᚹᛉᛞ-ᛡᛁᚷᚪ-ᚩᚱ-ᚪᚾᚹᛇᛋᛞᛄᚷ-ᛡ-ᛖᚫᛄ-ᛞᛟᛁᚻᚹᛝ-ᛠᛈᛏ-ᚪᛗᛗᛚᛚᚪᛞ.ᛁᛠᛈᚷᛞ-ᛗᚣᛄᚳᚹᛚ-ᚻᛋᛟᛗ-ᚣᚫᛝᛚ-ᛠᛁᛝᛝᚪ-ᚳᛗ-ᚢᚫᛋ-ᛉᛠᚱ-ᛇᛡᛄᚻᛗᚾ-ᚻᛗᛝᛚ-ᛇᛞ-ᛟᚢᚣᚪᚷᚱ-ᛡᚷ-ᚷᛠ-ᛚᚻᛒ.ᛡᛒ-ᚩᛁᛄ-ᛗᛟᛉᚩᚣ-ᛞᚩ-ᚳᛗ-ᚾᛗᚩ-ᚷᛠ-ᛚᚱᚠᚷ-ᛁᚫᛗᛉ-ᛁᛠᚹᛚ-ᛖᛝᚾᛟᛗᚾ-ᛄᚾ-ᚾᚳᛚᛝ-ᛡ-ᚷᛞᛗᚱᚻᚩ-ᛗᛞᛠᚫᛞ-ᛞᚱᛗᛗ-ᚣᚪ-ᛗᛉᚢᛞᛇᚹ-ᛟᚱᛏᚱᛟᚢᛉᛗᛚᛈᛉᛝ.ᛏᛖ-ᛗᛋᚣ-ᚹᛁ-ᚹᛝ-ᛋᛇᛄᚳᛁᛋᛝ.ᛄᛚᚹ-ᚷᚠᛝ-ᚫᚷᛚᛡᛁᛡ.ᛖᚠᚣ-ᛉᛝᚻᛄᚾᛈᚠ-ᛉᚣ-ᛚᛄᛞᛝᛞᚪ-ᚩᛈ-ᚻᛟ-ᛖᚻᚱᚹ-ᛚᚷᚳ-ᛒᛈᛏᚻ-ᚠᛋᛠᚣᛋᚠ-ᛏᚷᛈᚪᛒ.') # orig = LP.RuneText('ᛇᚦ-ᛒᛏᚣᚳ-ᛇᛚᛉ-ᛄᛚᚦᚪ-ᛋᚱᛉᚦ-ᚦᛄᚻ-ᛉᛗ-ᛏᛞᛋ-ᚣᚾ-ᚣᛟᛇᛈᛟᛚ-ᛈᚹᛚᚪᛗ-ᚪᛉᛁᛇᛝᚢᚱᛉ.ᛞᛄ-ᚻᛠᚪᛚ.ᚠᛚ-ᚩᛋᚾ-ᚫᛞᛋᛁᛞᚹᚾᚪᚪ-ᚱᛟᚻ-ᛚᚠᛚᚳᛟᚱ-ᚣᛏ-ᚹᛏᛝᚣ-ᚳᚩ-ᛄᚷᛟ-ᛖ-ᚫᚻᚦᛠ-ᛒᛠᛁ-ᛁᚩᛡ-ᛗᛉᚠᚷᛁ-ᚣᚣᛋᛇᛗᛠᚹ.ᛇᚪ-ᛇᛉᛡ-ᛄᚾᛇᛁᛇᚫ-ᛋᚱᚹ-ᛝᚣᚦ-ᛠᛁᛄᛚᚢᛄ-ᚻᛇᛚᛟ-ᛒᛠᛒᛚ-ᚩᛈᛈ-ᚢᚻᛚ-ᛡᚾᛚ-ᛒᚦᚱᚠᚦᚫ-ᛞᚳ-ᛄᚳᚷ-ᚹᚫ-ᚱᛉᚣᛖᚱ.ᛒᛝᚹ-ᛟᚳᚫᚹᛈᚢ-ᚱᛋᛒ-ᚷᚦᚳᛏᛏᛠᚹ-ᚱᚣᛞ-ᚣᛠᛄ-ᛋ-qᚪᛚᚾᛖᛄᚪ-ᛇᚻᛖ-ᛏᛠᛈ-ᛝᛉᚾᚳ-ᛋᚾᚹᚦᚾ-ᚣᛞᛝᚣ-ᛠᛠᛡ-ᛉᛁᛚᚢᚩ.ᛗᛉᚦ-ᛒᛝᛇᛠᛟ-ᛁᛟᛏ-ᛠᛏᛄ-ᚫᚳᛉᛝᛖᚠ-ᛇᚠ.ᛄᛄᛝᛟᛡᛟ-ᛠᛖᚫ-ᚦᛏᛠᛗ-ᛁᛏᚩᛒᛡ-ᛝᛟ-ᛉᚠᛇᚷᛗᛠ-ᚠᛖ-ᚳᛖᛖᚾᛠᛁᚪᛟ-ᛉᚣ-ᚢᛁ.ᛒᛏ.ᛒᛠ-ᛠᛁᚢᛗ-ᛞᛟᛋᛠᚷᚠᛇᚫ-ᛏᚪ-ᛇᚦ-ᛒᚪᛟᚩᛗ.ᛟᚳᛇ-ᛞᛞ-ᛋᚱᛁᛋᚦ-ᛇᛒ-ᚳᛒᛟ-ᚳᛟᚳᚷᛇ.ᛗᛉᚦ-ᛞᚦᛉᛈᛚᛈᛚᛁᚢ-ᚳᛞᛡᛝᚻᚷ-ᛞᚪ-ᚳᛟᚳᛁᛟᛞ-') - data = orig.index + data = orig.index_no_newline if False: # longest uninterrupted text - pos, lg = LP.InterruptDB.longest_no_interrupt(data, interrupt=0, irpmax=0) + pos, lg = LP.longest_no_interrupt(data, interrupt=0, irpmax=0) data = data[pos:pos + lg] else: # from the beginning - data = data[:170] + data = data[:970] - data_i = [i for i, x in enumerate(data) if x == 29] + whitespace_i = [i for i, x in enumerate(data) if x == 29] data = [x for x in data if x != 29] def fn_similarity(x): @@ -84,22 +90,20 @@ def pattern_solver(fname, irp=0): # yield from x[::-1] yield from x[::-1][1:-1] + prnt_fmt = 'kl: {}, pattern-n: {}, IoC: {:.3f}, dist: {:.4f}, offset: {}, key: {}' print(fname) gr = LP.GuessPattern(data) for kl in range(3, 19): - # for pattern_shift in range(1): - # fn_pattern = fn_pattern_mirror for pattern_shift in range(1, kl): def fn_pattern_shift(x, kl): # shift by (more than) one, 012201120 for i in range(10000): yield from x[(i * pattern_shift) % kl:] yield from x[:(i * pattern_shift) % kl] - fn_pattern = fn_pattern_shift # Find proper pattern res = [] for offset in range(kl): # up to keylen offset - mask = LP.GuessPattern.pattern(kl, fn_pattern) + mask = LP.GuessPattern.pattern(kl, fn_pattern_shift) parts = gr.split(kl, mask, offset) score = sum(LP.Probability(x).IC() for x in parts) / kl if score > 1.6 and score < 2.1: @@ -107,13 +111,12 @@ def pattern_solver(fname, irp=0): # Find best matching key for pattern for score, parts, off in res: - sc, solution = LP.GuessPattern.guess(parts, fn_similarity) + sc, key = LP.GuessPattern.guess(parts, fn_similarity) if sc < 0.1: - fmt = 'kl: {}, pattern-n: {}, IoC: {:.3f}, dist: {:.4f}, offset: {}, key: {}' - print(fmt.format(kl, pattern_shift, score, sc, off, - LP.RuneText(solution).text)) - solved = gr.zip(fn_pattern(solution, kl), off) - for i in data_i: + print(prnt_fmt.format(kl, pattern_shift, score, sc, off, + LP.RuneText(key).text)) + solved = gr.zip(fn_pattern_shift(key, kl), off) + for i in whitespace_i: solved.insert(i, 29) print(' ', LP.RuneText(solved).text) @@ -121,10 +124,6 @@ def pattern_solver(fname, irp=0): ######################################### # main ######################################### -db = LP.InterruptDB.load('db_norm') -# IOC_MIN_SCORE = 1.4 # for db_high -IOC_MIN_SCORE = 0.55 # for db_norm - for fname in [ 'p0-2', # ??? 'p3-7', # ??? @@ -153,8 +152,8 @@ for fname in [ print(fname, 'not in db.') continue print() - print(f'loading file: pages/{fname}.txt') - candidates = [x for x in db[fname] if x[1] >= IOC_MIN_SCORE] + print(f'loading: {fname}') + candidates = [x for x in db[fname] if x[1] >= IOC_MIN_SCORE and x[2] == 0] if not candidates: maxscore = max(x[1] for x in db[fname]) print('No candidates. Highest score is only', maxscore) diff --git a/solver.py b/solver.py index d5a3953..dfc1039 100755 --- a/solver.py +++ b/solver.py @@ -18,41 +18,40 @@ MOEBIUS = load_sequence_file('seq_moebius') def print_all_solved(): - def plain(slvr): - slvr.KEY_DATA = [] + def plain(slvr, inpt): + pass - def invert(slvr): - slvr.KEY_DATA = [] - slvr.KEY_INVERT = True + def invert(slvr, inpt): + inpt.invert() - def solution_welcome(slvr): + def solution_welcome(slvr, inpt): slvr.KEY_DATA = [23, 10, 1, 10, 9, 10, 16, 26] # DIVINITY slvr.INTERRUPT = 'ᚠ' slvr.INTERRUPT_POS = [4, 5, 6, 7, 10, 11, 14, 18, 20, 21, 25] - def solution_koan_1(slvr): + def solution_koan_1(slvr, inpt): slvr.KEY_DATA = [26] # Y - slvr.KEY_INVERT = True + inpt.invert() - def solution_jpg107_167(slvr): # FIRFUMFERENFE + def solution_jpg107_167(slvr, inpt): # FIRFUMFERENFE slvr.KEY_DATA = [0, 10, 4, 0, 1, 19, 0, 18, 4, 18, 9, 0, 18] slvr.INTERRUPT = 'ᚠ' slvr.INTERRUPT_POS = [2, 3] - def solution_p56_end(slvr): + def solution_p56_end(slvr, inpt): slvr.FN = lambda i, r: r - (PRIMES[i] - 1) slvr.INTERRUPT = 'ᚠ' slvr.INTERRUPT_POS = [4] def solve(fname, fn_solution, solver=LP.VigenereSolver): slvr = solver() - slvr.output.COLORS = False - slvr.output.QUIET = True # or use -v/-q while calling - slvr.input.load(file=LP.path.page(fname)) - fn_solution(slvr) + inpt = LP.RuneTextFile(LP.path.page(fname)) + fn_solution(slvr, inpt) print(f'pages/{fname}.txt') print() - slvr.run() + io = LP.IOWriter() + # io.QUIET = True # or use -v/-q while calling + io.run(slvr.run(inpt)[0]) print() solve('0_warning', invert) @@ -67,36 +66,29 @@ def print_all_solved(): def play_around(): - slvr = LP.VigenereSolver() - slvr.output.COLORS = False - slvr.output.QUIET = True - slvr.KEY_DATA = [] - vowels = 'ᚢᚩᛁᛇᛖᛟᚪᚫᛡᛠ' + vowels = [LP.RUNES.index(x) for x in 'ᚢᚩᛁᛇᛖᛟᚪᚫᛡᛠ'] for uuu in LP.FILES_UNSOLVED: - slvr.input.load(file=LP.path.page(uuu)) + inpt = LP.RuneTextFile(LP.path.page(uuu)) print(uuu) - print('word count:', sum(len(x) for x in slvr.input.words.values())) - a = [1 if x.rune in vowels else 0 for x in slvr.input.runes_no_whitespace()] + print('word count:', sum(1 for _ in inpt.enum_words())) + a = [1 if x in vowels else 0 for x in inpt.index_no_white] b = [a[i:i + 5] for i in range(0, len(a), 5)] c = [int(''.join(str(y) for y in x), 2) for x in b] # print('-'.join(str(x) for x in c)) # print(LP.RuneText(c).text) # print(''.join('ABCDEFGHIJKLMNOPQRSTUVWXYZ___...'[x] for x in c)) - # slvr.run() def try_totient_on_unsolved(): slvr = LP.SequenceSolver() - slvr.output.QUIET = True - slvr.output.BREAK_MODE = '' # disable line breaks # slvr.INTERRUPT = 'ᛝ' # slvr.INTERRUPT_POS = [1] # for uuu in ['15-22']: for uuu in LP.FILES_UNSOLVED: print() print(uuu) - slvr.input.load(file=LP.path.page(uuu), limit=25) - # alldata = slvr.input.runes_no_whitespace() + [LP.Rune(i=29)] + inpt = LP.RuneTextFile(LP.path.page(uuu), limit=25).data_clean + # alldata = [x for x in inpt if x.index != 29] + [LP.Rune(i=29)] * 1 def ec(r, i): p1, p2 = LP.utils.elliptic_curve(i, 149, 263, 3299) @@ -110,7 +102,7 @@ def try_totient_on_unsolved(): # slvr.FN = lambda i, r: LP.Rune(i=(r.prime - PRIMES[FIBONACCI[i]] + z) % 29) # slvr.FN = lambda i, r: LP.Rune(i=(r.prime ** i + z) % 29) slvr.FN = lambda i, r: LP.Rune(i=(ec(r, i) + z) % 29) - slvr.run() + print(slvr.run(inpt)[0].text) def find_oeis(irp=0, invert=False, offset=0, allow_fails=1, min_match=2): @@ -158,12 +150,11 @@ def find_oeis(irp=0, invert=False, offset=0, allow_fails=1, min_match=2): splits = splits[1:] print() print(uuu) - with open(LP.path.page(uuu), 'r') as f: - data = LP.RuneText(f.read()[:120]).index_rune_only - irps = [i for i, x in enumerate(data[:splits[-1][1]]) if x == irp] - irps.reverse() # insert -1 starting with the last - if invert: - data = [28 - x for x in data] + data = LP.RuneTextFile(LP.path.page(uuu), limit=120).index_no_white + if invert: + data = [28 - x for x in data] + irps = [i for i, x in enumerate(data[:splits[-1][1]]) if x == irp] + irps.reverse() # insert -1 starting with the last min_len = sum(wlen[:2]) # must match at least n words data_len = len(data)