remove load_indices()
This commit is contained in:
@@ -1,32 +1,5 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
import re # load_indices
|
|
||||||
from Alphabet import RUNES
|
|
||||||
from RuneText import RuneText
|
|
||||||
|
|
||||||
re_norune = re.compile('[^' + ''.join(RUNES) + ']')
|
|
||||||
|
|
||||||
|
|
||||||
#########################################
|
|
||||||
# load page and convert to indices for faster access
|
|
||||||
#########################################
|
|
||||||
|
|
||||||
def load_indices(fname, interrupt, maxinterrupt=None, minlen=None, limit=None):
|
|
||||||
with open(fname, 'r') as f:
|
|
||||||
data = RuneText(re_norune.sub('', f.read())).index_no_white[:limit]
|
|
||||||
if maxinterrupt is not None:
|
|
||||||
# incl. everything up to but not including next interrupt
|
|
||||||
# e.g., maxinterrupt = 0 will return text until first interrupt
|
|
||||||
for i, x in enumerate(data):
|
|
||||||
if x != interrupt:
|
|
||||||
continue
|
|
||||||
if maxinterrupt == 0:
|
|
||||||
if minlen and i < minlen:
|
|
||||||
continue
|
|
||||||
return data[:i]
|
|
||||||
maxinterrupt -= 1
|
|
||||||
return data
|
|
||||||
|
|
||||||
|
|
||||||
#########################################
|
#########################################
|
||||||
# find the longest chunk in a list of indices, which does not include an irp
|
# find the longest chunk in a list of indices, which does not include an irp
|
||||||
|
|||||||
@@ -2,8 +2,9 @@
|
|||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
import os
|
import os
|
||||||
from InterruptSearch import InterruptSearch
|
from InterruptSearch import InterruptSearch
|
||||||
|
from InterruptIndices import InterruptIndices
|
||||||
from Probability import Probability
|
from Probability import Probability
|
||||||
from IOReader import load_indices
|
from RuneText import RuneTextFile
|
||||||
from LPath import FILES_ALL, FILES_UNSOLVED, LPath
|
from LPath import FILES_ALL, FILES_UNSOLVED, LPath
|
||||||
|
|
||||||
|
|
||||||
@@ -82,15 +83,23 @@ class InterruptDB(object):
|
|||||||
# helper functions
|
# helper functions
|
||||||
#########################################
|
#########################################
|
||||||
|
|
||||||
def create_initial_db(dbname, fn_score, klset=range(1, 33),
|
def get_db(fname, irp, max_irp):
|
||||||
max_irp=20, irpset=range(29)):
|
T = False # inverse
|
||||||
|
_, Z = InterruptIndices().consider(fname, 28 - irp if T else irp, max_irp)
|
||||||
|
data = RuneTextFile(LPath.page(fname)).index_no_white[:Z]
|
||||||
|
if T:
|
||||||
|
data = [28 - x for x in data]
|
||||||
|
return InterruptDB(data, irp)
|
||||||
|
|
||||||
|
|
||||||
|
def create_primary(dbname, fn_score, klset=range(1, 33),
|
||||||
|
max_irp=20, irpset=range(29)):
|
||||||
oldDB = InterruptDB.load(dbname)
|
oldDB = InterruptDB.load(dbname)
|
||||||
oldValues = {k: set((a, b, c) for a, _, b, c, _ in v)
|
oldValues = {k: set((a, b, c) for a, _, b, c, _ in v)
|
||||||
for k, v in oldDB.items()}
|
for k, v in oldDB.items()}
|
||||||
for irp in irpset: # interrupt rune index
|
for irp in irpset: # interrupt rune index
|
||||||
for name in FILES_ALL:
|
for name in FILES_ALL:
|
||||||
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
|
db = get_db(name, irp, max_irp)
|
||||||
db = InterruptDB(data, irp)
|
|
||||||
print('load:', name, 'interrupt:', irp, 'count:', db.irp_count)
|
print('load:', name, 'interrupt:', irp, 'count:', db.irp_count)
|
||||||
for keylen in klset: # key length
|
for keylen in klset: # key length
|
||||||
if (db.irp_count, irp, keylen) in oldValues.get(name, []):
|
if (db.irp_count, irp, keylen) in oldValues.get(name, []):
|
||||||
@@ -100,8 +109,7 @@ def create_initial_db(dbname, fn_score, klset=range(1, 33),
|
|||||||
print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}')
|
print(f'{keylen}: {score:.4f}, solutions: {len(interrupts)}')
|
||||||
|
|
||||||
|
|
||||||
def find_secondary_solutions(db_in, db_out, fn_score,
|
def create_secondary(db_in, db_out, fn_score, threshold=0.75, max_irp=20):
|
||||||
threshold=0.75, max_irp=20):
|
|
||||||
oldDB = InterruptDB.load(db_in)
|
oldDB = InterruptDB.load(db_in)
|
||||||
search_set = set()
|
search_set = set()
|
||||||
for name, arr in oldDB.items():
|
for name, arr in oldDB.items():
|
||||||
@@ -114,16 +122,15 @@ def find_secondary_solutions(db_in, db_out, fn_score,
|
|||||||
print('searching through', len(search_set), 'files.')
|
print('searching through', len(search_set), 'files.')
|
||||||
for name, irp, kl in search_set:
|
for name, irp, kl in search_set:
|
||||||
print('load:', name, 'interrupt:', irp, 'keylen:', kl)
|
print('load:', name, 'interrupt:', irp, 'keylen:', kl)
|
||||||
data = load_indices(LPath.page(name), irp, maxinterrupt=max_irp)
|
db = get_db(name, irp, max_irp)
|
||||||
db = InterruptDB(data, irp)
|
|
||||||
c = db.make_secondary(db_out, name, kl, fn_score, threshold)
|
c = db.make_secondary(db_out, name, kl, fn_score, threshold)
|
||||||
print('found', c, 'additional solutions')
|
print('found', c, 'additional solutions')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
create_initial_db('db_high', Probability.IC_w_keylen, max_irp=20)
|
create_primary('db_high', Probability.IC_w_keylen, max_irp=20)
|
||||||
create_initial_db('db_norm', Probability.target_diff, max_irp=20)
|
create_primary('db_norm', Probability.target_diff, max_irp=20)
|
||||||
# find_secondary_solutions('db_high', 'db_high_secondary',
|
# create_secondary('db_high', 'db_high_secondary',
|
||||||
# Probability.IC_w_keylen, threshold=1.4)
|
# Probability.IC_w_keylen, threshold=1.4)
|
||||||
# find_secondary_solutions('db_norm', 'db_norm_secondary',
|
# create_secondary('db_norm', 'db_norm_secondary',
|
||||||
# Probability.target_diff, threshold=0.55)
|
# Probability.target_diff, threshold=0.55)
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
from IOReader import load_indices
|
|
||||||
from LPath import FILES_ALL, LPath
|
from LPath import FILES_ALL, LPath
|
||||||
|
from RuneText import RuneTextFile
|
||||||
|
|
||||||
|
|
||||||
#########################################
|
#########################################
|
||||||
@@ -20,18 +20,18 @@ class InterruptIndices(object):
|
|||||||
def total(self, name):
|
def total(self, name):
|
||||||
return self.pos[name]['total']
|
return self.pos[name]['total']
|
||||||
|
|
||||||
def longest_no_interrupt(self, name, irp, irpmax=0):
|
# def longest_no_interrupt(self, name, irp, irpmax=0):
|
||||||
irpmax += 1
|
# irpmax += 1
|
||||||
nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
|
# nums = self.pos[name]['pos'][irp] + [self.pos[name]['total']] * irpmax
|
||||||
ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
|
# ret = [(y - x, x) for x, y in zip(nums, nums[irpmax:])]
|
||||||
return sorted(ret, reverse=True)
|
# return sorted(ret, reverse=True)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def write(dbname='db_indices'):
|
def write(dbname='db_indices'):
|
||||||
with open(LPath.db(dbname), 'w') as f:
|
with open(LPath.db(dbname), 'w') as f:
|
||||||
f.write('# file | total runes in file | interrupt | indices\n')
|
f.write('# file | total runes in file | interrupt | indices\n')
|
||||||
for name in FILES_ALL:
|
for name in FILES_ALL:
|
||||||
data = load_indices(LPath.page(name), 0)
|
data = RuneTextFile(LPath.page(name)).index_no_white
|
||||||
total = len(data)
|
total = len(data)
|
||||||
nums = [[] for x in range(29)]
|
nums = [[] for x in range(29)]
|
||||||
for idx, rune in enumerate(data):
|
for idx, rune in enumerate(data):
|
||||||
|
|||||||
@@ -2,7 +2,6 @@
|
|||||||
# -*- coding: UTF-8 -*-
|
# -*- coding: UTF-8 -*-
|
||||||
import re
|
import re
|
||||||
from Alphabet import RUNES
|
from Alphabet import RUNES
|
||||||
from IOReader import re_norune
|
|
||||||
from RuneText import RuneText
|
from RuneText import RuneText
|
||||||
from LPath import LPath
|
from LPath import LPath
|
||||||
|
|
||||||
@@ -33,7 +32,7 @@ class NGrams(object):
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def make(gramsize, infile, outfile):
|
def make(gramsize, infile, outfile):
|
||||||
with open(infile, 'r') as f:
|
with open(infile, 'r') as f:
|
||||||
data = re_norune.sub('', f.read())
|
data = re.sub('[^' + ''.join(RUNES) + ']', '', f.read())
|
||||||
|
|
||||||
res = {x: 0 for x in RUNES} if gramsize == 1 else {}
|
res = {x: 0 for x in RUNES} if gramsize == 1 else {}
|
||||||
for i in range(len(data) - gramsize + 1):
|
for i in range(len(data) - gramsize + 1):
|
||||||
@@ -74,3 +73,4 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
# make_translation(stream=False)
|
# make_translation(stream=False)
|
||||||
# make_ngrams(5)
|
# make_ngrams(5)
|
||||||
|
print(NGrams.load(2))
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from Alphabet import RUNES, alphabet
|
|||||||
from Rune import Rune
|
from Rune import Rune
|
||||||
from RuneText import RuneText, RuneTextFile
|
from RuneText import RuneText, RuneTextFile
|
||||||
|
|
||||||
from IOReader import load_indices, longest_no_interrupt
|
from IOReader import longest_no_interrupt
|
||||||
from IOWriter import IOWriter
|
from IOWriter import IOWriter
|
||||||
|
|
||||||
from RuneSolver import SequenceSolver, VigenereSolver, AffineSolver, AutokeySolver
|
from RuneSolver import SequenceSolver, VigenereSolver, AffineSolver, AutokeySolver
|
||||||
|
|||||||
Reference in New Issue
Block a user