remove load_indices()

This commit is contained in:
relikd
2021-02-13 00:41:19 +01:00
parent 686afb6f26
commit e4b4ed4498
5 changed files with 32 additions and 52 deletions

View File

@@ -1,32 +1,5 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
import re # load_indices
from Alphabet import RUNES
from RuneText import RuneText
re_norune = re.compile('[^' + ''.join(RUNES) + ']')
#########################################
# load page and convert to indices for faster access
#########################################
def load_indices(fname, interrupt, maxinterrupt=None, minlen=None, limit=None):
with open(fname, 'r') as f:
data = RuneText(re_norune.sub('', f.read())).index_no_white[:limit]
if maxinterrupt is not None:
# incl. everything up to but not including next interrupt
# e.g., maxinterrupt = 0 will return text until first interrupt
for i, x in enumerate(data):
if x != interrupt:
continue
if maxinterrupt == 0:
if minlen and i < minlen:
continue
return data[:i]
maxinterrupt -= 1
return data
#########################################
# find the longest chunk in a list of indices, which does not include an irp

View File

@@ -2,8 +2,9 @@
# -*- coding: UTF-8 -*-
import os
from InterruptSearch import InterruptSearch
from InterruptIndices import InterruptIndices
from Probability import Probability
from IOReader import load_indices
from RuneText import RuneTextFile
from LPath import FILES_ALL, FILES_UNSOLVED, LPath
@@ -82,15 +83,23 @@ class InterruptDB(object):
# helper functions
#########################################
def create_initial_db(dbname, fn_score, klset=range(1, 33),
max_irp=20, irpset=range(29)):
def get_db(fname, irp, max_irp):
T = False # inverse
_, Z = InterruptIndices().consider(fname, 28 - irp if T else irp, max_irp)
data = RuneTextFile(LPath.page(fname)).index_no_white[:Z]
if T:
data = [28 - x for x in data]
return InterruptDB(data, irp)
def create_primary(dbname, fn_score, klset=range(1, 33),
max_irp=20, irpset=range(29)):
oldDB = InterruptDB.load(dbname)
oldValues = {k: set((a, b, c) for a, _, b, c, _ in v)
for k, v in oldDB.items()}
for irp in irpset: # interrupt rune index
for name in FILES_ALL:
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
db = InterruptDB(data, irp)
db = get_db(name, irp, max_irp)
print('load:', name, 'interrupt:', irp, 'count:', db.irp_count)
for keylen in klset: # key length
if (db.irp_count, irp, keylen) in oldValues.get(name, []):
@@ -100,8 +109,7 @@ def create_initial_db(dbname, fn_score, klset=range(1, 33),
print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}')
def find_secondary_solutions(db_in, db_out, fn_score,
threshold=0.75, max_irp=20):
def create_secondary(db_in, db_out, fn_score, threshold=0.75, max_irp=20):
oldDB = InterruptDB.load(db_in)
search_set = set()
for name, arr in oldDB.items():
@@ -114,16 +122,15 @@ def find_secondary_solutions(db_in, db_out, fn_score,
print('searching through', len(search_set), 'files.')
for name, irp, kl in search_set:
print('load:', name, 'interrupt:', irp, 'keylen:', kl)
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
db = InterruptDB(data, irp)
db = get_db(name, irp, max_irp)
c = db.make_secondary(db_out, name, kl, fn_score, threshold)
print('found', c, 'additional solutions')
if __name__ == '__main__':
create_initial_db('db_high', Probability.IC_w_keylen, max_irp=20)
create_initial_db('db_norm', Probability.target_diff, max_irp=20)
# find_secondary_solutions('db_high', 'db_high_secondary',
# Probability.IC_w_keylen, threshold=1.4)
# find_secondary_solutions('db_norm', 'db_norm_secondary',
# Probability.target_diff, threshold=0.55)
create_primary('db_high', Probability.IC_w_keylen, max_irp=20)
create_primary('db_norm', Probability.target_diff, max_irp=20)
# create_secondary('db_high', 'db_high_secondary',
# Probability.IC_w_keylen, threshold=1.4)
# create_secondary('db_norm', 'db_norm_secondary',
# Probability.target_diff, threshold=0.55)

View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
from IOReader import load_indices
from LPath import FILES_ALL, LPath
from RuneText import RuneTextFile
#########################################
@@ -20,18 +20,18 @@ class InterruptIndices(object):
def total(self, name):
return self.pos[name]['total']
def longest_no_interrupt(self, name, irp, irpmax=0):
irpmax += 1
nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
return sorted(ret, reverse=True)
# def longest_no_interrupt(self, name, irp, irpmax=0):
# irpmax += 1
# nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
# ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
# return sorted(ret, reverse=True)
@staticmethod
def write(dbname='db_indices'):
with open(LPath.db(dbname), 'w') as f:
f.write('# file | total runes in file | interrupt | indices\n')
for name in FILES_ALL:
data = load_indices(LPath.page(name), 0)
data = RuneTextFile(LPath.page(name)).index_no_white
total = len(data)
nums = [[] for x in range(29)]
for idx, rune in enumerate(data):

View File

@@ -2,7 +2,6 @@
# -*- coding: UTF-8 -*-
import re
from Alphabet import RUNES
from IOReader import re_norune
from RuneText import RuneText
from LPath import LPath
@@ -33,7 +32,7 @@ class NGrams(object):
@staticmethod
def make(gramsize, infile, outfile):
with open(infile, 'r') as f:
data = re_norune.sub('', f.read())
data = re.sub('[^' + ''.join(RUNES) + ']', '', f.read())
res = {x: 0 for x in RUNES} if gramsize == 1 else {}
for i in range(len(data) - gramsize + 1):
@@ -74,3 +73,4 @@ if __name__ == '__main__':
# make_translation(stream=False)
# make_ngrams(5)
print(NGrams.load(2))

View File

@@ -10,7 +10,7 @@ from Alphabet import RUNES, alphabet
from Rune import Rune
from RuneText import RuneText, RuneTextFile
from IOReader import load_indices, longest_no_interrupt
from IOReader import longest_no_interrupt
from IOWriter import IOWriter
from RuneSolver import SequenceSolver, VigenereSolver, AffineSolver, AutokeySolver