remove load_indices()
This commit is contained in:
@@ -1,32 +1,5 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
import re # load_indices
|
||||
from Alphabet import RUNES
|
||||
from RuneText import RuneText
|
||||
|
||||
re_norune = re.compile('[^' + ''.join(RUNES) + ']')
|
||||
|
||||
|
||||
#########################################
|
||||
# load page and convert to indices for faster access
|
||||
#########################################
|
||||
|
||||
def load_indices(fname, interrupt, maxinterrupt=None, minlen=None, limit=None):
|
||||
with open(fname, 'r') as f:
|
||||
data = RuneText(re_norune.sub('', f.read())).index_no_white[:limit]
|
||||
if maxinterrupt is not None:
|
||||
# incl. everything up to but not including next interrupt
|
||||
# e.g., maxinterrupt = 0 will return text until first interrupt
|
||||
for i, x in enumerate(data):
|
||||
if x != interrupt:
|
||||
continue
|
||||
if maxinterrupt == 0:
|
||||
if minlen and i < minlen:
|
||||
continue
|
||||
return data[:i]
|
||||
maxinterrupt -= 1
|
||||
return data
|
||||
|
||||
|
||||
#########################################
|
||||
# find the longest chunk in a list of indices, which does not include an irp
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
import os
|
||||
from InterruptSearch import InterruptSearch
|
||||
from InterruptIndices import InterruptIndices
|
||||
from Probability import Probability
|
||||
from IOReader import load_indices
|
||||
from RuneText import RuneTextFile
|
||||
from LPath import FILES_ALL, FILES_UNSOLVED, LPath
|
||||
|
||||
|
||||
@@ -82,15 +83,23 @@ class InterruptDB(object):
|
||||
# helper functions
|
||||
#########################################
|
||||
|
||||
def create_initial_db(dbname, fn_score, klset=range(1, 33),
|
||||
max_irp=20, irpset=range(29)):
|
||||
def get_db(fname, irp, max_irp):
|
||||
T = False # inverse
|
||||
_, Z = InterruptIndices().consider(fname, 28 - irp if T else irp, max_irp)
|
||||
data = RuneTextFile(LPath.page(fname)).index_no_white[:Z]
|
||||
if T:
|
||||
data = [28 - x for x in data]
|
||||
return InterruptDB(data, irp)
|
||||
|
||||
|
||||
def create_primary(dbname, fn_score, klset=range(1, 33),
|
||||
max_irp=20, irpset=range(29)):
|
||||
oldDB = InterruptDB.load(dbname)
|
||||
oldValues = {k: set((a, b, c) for a, _, b, c, _ in v)
|
||||
for k, v in oldDB.items()}
|
||||
for irp in irpset: # interrupt rune index
|
||||
for name in FILES_ALL:
|
||||
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
|
||||
db = InterruptDB(data, irp)
|
||||
db = get_db(name, irp, max_irp)
|
||||
print('load:', name, 'interrupt:', irp, 'count:', db.irp_count)
|
||||
for keylen in klset: # key length
|
||||
if (db.irp_count, irp, keylen) in oldValues.get(name, []):
|
||||
@@ -100,8 +109,7 @@ def create_initial_db(dbname, fn_score, klset=range(1, 33),
|
||||
print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}')
|
||||
|
||||
|
||||
def find_secondary_solutions(db_in, db_out, fn_score,
|
||||
threshold=0.75, max_irp=20):
|
||||
def create_secondary(db_in, db_out, fn_score, threshold=0.75, max_irp=20):
|
||||
oldDB = InterruptDB.load(db_in)
|
||||
search_set = set()
|
||||
for name, arr in oldDB.items():
|
||||
@@ -114,16 +122,15 @@ def find_secondary_solutions(db_in, db_out, fn_score,
|
||||
print('searching through', len(search_set), 'files.')
|
||||
for name, irp, kl in search_set:
|
||||
print('load:', name, 'interrupt:', irp, 'keylen:', kl)
|
||||
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
|
||||
db = InterruptDB(data, irp)
|
||||
db = get_db(name, irp, max_irp)
|
||||
c = db.make_secondary(db_out, name, kl, fn_score, threshold)
|
||||
print('found', c, 'additional solutions')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_initial_db('db_high', Probability.IC_w_keylen, max_irp=20)
|
||||
create_initial_db('db_norm', Probability.target_diff, max_irp=20)
|
||||
# find_secondary_solutions('db_high', 'db_high_secondary',
|
||||
# Probability.IC_w_keylen, threshold=1.4)
|
||||
# find_secondary_solutions('db_norm', 'db_norm_secondary',
|
||||
# Probability.target_diff, threshold=0.55)
|
||||
create_primary('db_high', Probability.IC_w_keylen, max_irp=20)
|
||||
create_primary('db_norm', Probability.target_diff, max_irp=20)
|
||||
# create_secondary('db_high', 'db_high_secondary',
|
||||
# Probability.IC_w_keylen, threshold=1.4)
|
||||
# create_secondary('db_norm', 'db_norm_secondary',
|
||||
# Probability.target_diff, threshold=0.55)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: UTF-8 -*-
|
||||
from IOReader import load_indices
|
||||
from LPath import FILES_ALL, LPath
|
||||
from RuneText import RuneTextFile
|
||||
|
||||
|
||||
#########################################
|
||||
@@ -20,18 +20,18 @@ class InterruptIndices(object):
|
||||
def total(self, name):
|
||||
return self.pos[name]['total']
|
||||
|
||||
def longest_no_interrupt(self, name, irp, irpmax=0):
|
||||
irpmax += 1
|
||||
nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
|
||||
ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
|
||||
return sorted(ret, reverse=True)
|
||||
# def longest_no_interrupt(self, name, irp, irpmax=0):
|
||||
# irpmax += 1
|
||||
# nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
|
||||
# ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
|
||||
# return sorted(ret, reverse=True)
|
||||
|
||||
@staticmethod
|
||||
def write(dbname='db_indices'):
|
||||
with open(LPath.db(dbname), 'w') as f:
|
||||
f.write('# file | total runes in file | interrupt | indices\n')
|
||||
for name in FILES_ALL:
|
||||
data = load_indices(LPath.page(name), 0)
|
||||
data = RuneTextFile(LPath.page(name)).index_no_white
|
||||
total = len(data)
|
||||
nums = [[] for x in range(29)]
|
||||
for idx, rune in enumerate(data):
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
# -*- coding: UTF-8 -*-
|
||||
import re
|
||||
from Alphabet import RUNES
|
||||
from IOReader import re_norune
|
||||
from RuneText import RuneText
|
||||
from LPath import LPath
|
||||
|
||||
@@ -33,7 +32,7 @@ class NGrams(object):
|
||||
@staticmethod
|
||||
def make(gramsize, infile, outfile):
|
||||
with open(infile, 'r') as f:
|
||||
data = re_norune.sub('', f.read())
|
||||
data = re.sub('[^' + ''.join(RUNES) + ']', '', f.read())
|
||||
|
||||
res = {x: 0 for x in RUNES} if gramsize == 1 else {}
|
||||
for i in range(len(data) - gramsize + 1):
|
||||
@@ -74,3 +73,4 @@ if __name__ == '__main__':
|
||||
|
||||
# make_translation(stream=False)
|
||||
# make_ngrams(5)
|
||||
print(NGrams.load(2))
|
||||
|
||||
@@ -10,7 +10,7 @@ from Alphabet import RUNES, alphabet
|
||||
from Rune import Rune
|
||||
from RuneText import RuneText, RuneTextFile
|
||||
|
||||
from IOReader import load_indices, longest_no_interrupt
|
||||
from IOReader import longest_no_interrupt
|
||||
from IOWriter import IOWriter
|
||||
|
||||
from RuneSolver import SequenceSolver, VigenereSolver, AffineSolver, AutokeySolver
|
||||
|
||||
Reference in New Issue
Block a user