diff --git a/LP/IOReader.py b/LP/IOReader.py index 9ed68d3..900337e 100755 --- a/LP/IOReader.py +++ b/LP/IOReader.py @@ -1,32 +1,5 @@ #!/usr/bin/env python3 # -*- coding: UTF-8 -*- -import re # load_indices -from Alphabet import RUNES -from RuneText import RuneText - -re_norune = re.compile('[^' + ''.join(RUNES) + ']') - - -######################################### -# load page and convert to indices for faster access -######################################### - -def load_indices(fname, interrupt, maxinterrupt=None, minlen=None, limit=None): - with open(fname, 'r') as f: - data = RuneText(re_norune.sub('', f.read())).index_no_white[:limit] - if maxinterrupt is not None: - # incl. everything up to but not including next interrupt - # e.g., maxinterrupt = 0 will return text until first interrupt - for i, x in enumerate(data): - if x != interrupt: - continue - if maxinterrupt == 0: - if minlen and i < minlen: - continue - return data[:i] - maxinterrupt -= 1 - return data - ######################################### # find the longest chunk in a list of indices, which does not include an irp diff --git a/LP/InterruptDB.py b/LP/InterruptDB.py index 87fc282..9a9767b 100755 --- a/LP/InterruptDB.py +++ b/LP/InterruptDB.py @@ -2,8 +2,9 @@ # -*- coding: UTF-8 -*- import os from InterruptSearch import InterruptSearch +from InterruptIndices import InterruptIndices from Probability import Probability -from IOReader import load_indices +from RuneText import RuneTextFile from LPath import FILES_ALL, FILES_UNSOLVED, LPath @@ -82,15 +83,23 @@ class InterruptDB(object): # helper functions ######################################### -def create_initial_db(dbname, fn_score, klset=range(1, 33), - max_irp=20, irpset=range(29)): +def get_db(fname, irp, max_irp): + T = False # inverse + _, Z = InterruptIndices().consider(fname, 28 - irp if T else irp, max_irp) + data = RuneTextFile(LPath.page(fname)).index_no_white[:Z] + if T: + data = [28 - x for x in data] + return InterruptDB(data, irp) + + +def create_primary(dbname, fn_score, klset=range(1, 33), + max_irp=20, irpset=range(29)): oldDB = InterruptDB.load(dbname) oldValues = {k: set((a, b, c) for a, _, b, c, _ in v) for k, v in oldDB.items()} for irp in irpset: # interrupt rune index for name in FILES_ALL: - data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp) - db = InterruptDB(data, irp) + db = get_db(name, irp, max_irp) print('load:', name, 'interrupt:', irp, 'count:', db.irp_count) for keylen in klset: # key length if (db.irp_count, irp, keylen) in oldValues.get(name, []): @@ -100,8 +109,7 @@ def create_initial_db(dbname, fn_score, klset=range(1, 33), print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}') -def find_secondary_solutions(db_in, db_out, fn_score, - threshold=0.75, max_irp=20): +def create_secondary(db_in, db_out, fn_score, threshold=0.75, max_irp=20): oldDB = InterruptDB.load(db_in) search_set = set() for name, arr in oldDB.items(): @@ -114,16 +122,15 @@ def find_secondary_solutions(db_in, db_out, fn_score, print('searching through', len(search_set), 'files.') for name, irp, kl in search_set: print('load:', name, 'interrupt:', irp, 'keylen:', kl) - data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp) - db = InterruptDB(data, irp) + db = get_db(name, irp, max_irp) c = db.make_secondary(db_out, name, kl, fn_score, threshold) print('found', c, 'additional solutions') if __name__ == '__main__': - create_initial_db('db_high', Probability.IC_w_keylen, max_irp=20) - create_initial_db('db_norm', Probability.target_diff, max_irp=20) - # find_secondary_solutions('db_high', 'db_high_secondary', - # Probability.IC_w_keylen, threshold=1.4) - # find_secondary_solutions('db_norm', 'db_norm_secondary', - # Probability.target_diff, threshold=0.55) + create_primary('db_high', Probability.IC_w_keylen, max_irp=20) + create_primary('db_norm', Probability.target_diff, max_irp=20) + # create_secondary('db_high', 'db_high_secondary', + # Probability.IC_w_keylen, threshold=1.4) + # create_secondary('db_norm', 'db_norm_secondary', + # Probability.target_diff, threshold=0.55) diff --git a/LP/InterruptIndices.py b/LP/InterruptIndices.py index 3615c87..80dbe57 100755 --- a/LP/InterruptIndices.py +++ b/LP/InterruptIndices.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: UTF-8 -*- -from IOReader import load_indices from LPath import FILES_ALL, LPath +from RuneText import RuneTextFile ######################################### @@ -20,18 +20,18 @@ class InterruptIndices(object): def total(self, name): return self.pos[name]['total'] - def longest_no_interrupt(self, name, irp, irpmax=0): - irpmax += 1 - nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax - ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])] - return sorted(ret, reverse=True) + # def longest_no_interrupt(self, name, irp, irpmax=0): + # irpmax += 1 + # nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax + # ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])] + # return sorted(ret, reverse=True) @staticmethod def write(dbname='db_indices'): with open(LPath.db(dbname), 'w') as f: f.write('# file | total runes in file | interrupt | indices\n') for name in FILES_ALL: - data = load_indices(LPath.page(name), 0) + data = RuneTextFile(LPath.page(name)).index_no_white total = len(data) nums = [[] for x in range(29)] for idx, rune in enumerate(data): diff --git a/LP/NGrams.py b/LP/NGrams.py index 5303ecf..d04d505 100755 --- a/LP/NGrams.py +++ b/LP/NGrams.py @@ -2,7 +2,6 @@ # -*- coding: UTF-8 -*- import re from Alphabet import RUNES -from IOReader import re_norune from RuneText import RuneText from LPath import LPath @@ -33,7 +32,7 @@ class NGrams(object): @staticmethod def make(gramsize, infile, outfile): with open(infile, 'r') as f: - data = re_norune.sub('', f.read()) + data = re.sub('[^' + ''.join(RUNES) + ']', '', f.read()) res = {x: 0 for x in RUNES} if gramsize == 1 else {} for i in range(len(data) - gramsize + 1): @@ -74,3 +73,4 @@ if __name__ == '__main__': # make_translation(stream=False) # make_ngrams(5) + print(NGrams.load(2)) diff --git a/LP/__init__.py b/LP/__init__.py index 60cef66..8412f4a 100644 --- a/LP/__init__.py +++ b/LP/__init__.py @@ -10,7 +10,7 @@ from Alphabet import RUNES, alphabet from Rune import Rune from RuneText import RuneText, RuneTextFile -from IOReader import load_indices, longest_no_interrupt +from IOReader import longest_no_interrupt from IOWriter import IOWriter from RuneSolver import SequenceSolver, VigenereSolver, AffineSolver, AutokeySolver