move OEIS solver to separate file + add reverse chapter and reverse word search
This commit is contained in:
@@ -151,18 +151,18 @@ class RuneText(object):
|
|||||||
# return False
|
# return False
|
||||||
# return all(x.index == y.index for x, y in zip(self, other))
|
# return all(x.index == y.index for x, y in zip(self, other))
|
||||||
|
|
||||||
def enum_words(self): # [(start, end, len), ...] may include \n \r
|
def enum_words(self, reverse=False): # [(start, end, len), ...]
|
||||||
start = 0
|
start = 0
|
||||||
r_pos = 0
|
r_pos = 0
|
||||||
word = []
|
word = []
|
||||||
for i, x in enumerate(self._data):
|
for i, x in enumerate(reversed(self._data) if reverse else self._data):
|
||||||
if x.kind == 'r':
|
if x.kind == 'r':
|
||||||
r_pos += 1
|
r_pos += 1
|
||||||
word.append(x)
|
word.append(x)
|
||||||
elif x.kind == 'l':
|
elif x.kind == 'l':
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
if len(word) > 0:
|
if len(word) > 0: # RuneText may include \n and \r
|
||||||
yield start, i, r_pos - len(word), RuneText(word)
|
yield start, i, r_pos - len(word), RuneText(word)
|
||||||
word = []
|
word = []
|
||||||
start = i + 1
|
start = i + 1
|
||||||
|
|||||||
127
oeis.py
Executable file
127
oeis.py
Executable file
@@ -0,0 +1,127 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
import LP
|
||||||
|
import itertools
|
||||||
|
|
||||||
|
WORDS_MIN_MATCH = 2
|
||||||
|
TRIM_AFTER = 40
|
||||||
|
SEQS = []
|
||||||
|
WORDS = [set()] * 13
|
||||||
|
|
||||||
|
|
||||||
|
def convert_orig_oeis(minlen=15, trim=TRIM_AFTER):
|
||||||
|
# download and unzip: https://oeis.org/stripped.gz
|
||||||
|
with open(LP.path.db('oeis_orig'), 'r') as f_in:
|
||||||
|
with open(LP.path.db('oeis'), 'w') as f_out:
|
||||||
|
for line in f_in.readlines():
|
||||||
|
if line[0] == '#':
|
||||||
|
continue
|
||||||
|
name, *vals = line.split(',')
|
||||||
|
vals = [str(int(x) % 29) for x in vals if x.strip()][:trim]
|
||||||
|
if len(vals) < minlen:
|
||||||
|
continue
|
||||||
|
f_out.write(name + ',' + ','.join(vals) + '\n')
|
||||||
|
|
||||||
|
|
||||||
|
def load_db(): # takes 3 seconds
|
||||||
|
print('load OEIS db ...')
|
||||||
|
with open(LP.path.db('oeis'), 'r') as f:
|
||||||
|
for line in f.readlines():
|
||||||
|
vals = line.split(',')
|
||||||
|
SEQS.append((vals[0], list(map(int, vals[1:]))))
|
||||||
|
|
||||||
|
print('load dictionary ...')
|
||||||
|
WORDS[1] = set(x for x in LP.RUNES)
|
||||||
|
for i in range(2, 13): # since 12 is the longest word
|
||||||
|
with open(LP.path.data(f'dictionary_{i}'), 'r') as f:
|
||||||
|
WORDS[i] = set(x.strip() for x in f.readlines())
|
||||||
|
|
||||||
|
|
||||||
|
def enum_irp_combinations(irps):
|
||||||
|
for i in range(len(irps) + 1):
|
||||||
|
for x in itertools.combinations(irps, i): # 2^3
|
||||||
|
if len(x) > 0 and x[0] - len(x) >= TRIM_AFTER:
|
||||||
|
continue
|
||||||
|
yield x
|
||||||
|
|
||||||
|
|
||||||
|
def get_word_splits(data, irp, reverse=False, reverse_word=False):
|
||||||
|
new_data = []
|
||||||
|
irps = []
|
||||||
|
splits = []
|
||||||
|
max_len = TRIM_AFTER # same as trim above
|
||||||
|
for _, _, i, w in data.enum_words(reverse=reverse):
|
||||||
|
irp_is = [i + ii for ii, r in enumerate(w) if r.index == irp]
|
||||||
|
if (len(w) - len(irp_is)) > max_len: # include only full words
|
||||||
|
break
|
||||||
|
max_len = max_len + len(irp_is) - len(w)
|
||||||
|
irps += irp_is
|
||||||
|
splits.append((i, i + len(w)))
|
||||||
|
for r in (reversed(w) if reverse_word else w):
|
||||||
|
if r.index != 29:
|
||||||
|
new_data.append(r.index)
|
||||||
|
return new_data[::-1 if reverse else 1], irps, splits
|
||||||
|
|
||||||
|
|
||||||
|
# invert: 28 - rune.index
|
||||||
|
# reverse: start chapter from the end
|
||||||
|
# reverse_word: start it word from the end, but keep sentence direction
|
||||||
|
# allow_fails: number of words that can be wrong
|
||||||
|
# fail_threshold: at least one word w/ len x+1 must match, else all must match
|
||||||
|
def find_oeis(irp=0, offset=0, invert=False, reverse=False, reverse_word=False,
|
||||||
|
allow_fails=1, fail_threshold=4):
|
||||||
|
print()
|
||||||
|
print('irp:', irp, ' offset:', offset, ' invert:', invert,
|
||||||
|
' reverse:', reverse, ' reverse_word:', reverse_word,
|
||||||
|
' allow_fails:', allow_fails, ' fail_threshold:', fail_threshold)
|
||||||
|
# for fname in ['p56_an_end']:
|
||||||
|
for fname in LP.FILES_UNSOLVED:
|
||||||
|
data = LP.RuneTextFile(LP.path.page(fname))
|
||||||
|
if invert:
|
||||||
|
data.invert()
|
||||||
|
data, irps, splits = get_word_splits(data, irp, reverse, reverse_word)
|
||||||
|
irps.reverse() # reverse to start inserting at the end
|
||||||
|
min_len = splits[WORDS_MIN_MATCH - 1][1]
|
||||||
|
max_len = splits[-1][1]
|
||||||
|
data = data[:max_len]
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(fname, 'words:', [y - x for x, y in splits])
|
||||||
|
for comb in enum_irp_combinations(irps):
|
||||||
|
for oeis, vals in SEQS: # 390k
|
||||||
|
vals = vals[offset:]
|
||||||
|
if len(vals) < min_len:
|
||||||
|
continue
|
||||||
|
for z in comb:
|
||||||
|
vals.insert(z, -1) # insert interrupts
|
||||||
|
shortest = min(max_len, len(vals))
|
||||||
|
for s in range(29):
|
||||||
|
failed = 0
|
||||||
|
onematch = False
|
||||||
|
full = []
|
||||||
|
for a, b in splits:
|
||||||
|
if b > shortest:
|
||||||
|
break
|
||||||
|
nums = [x if y == -1 else (x - y - s) % 29
|
||||||
|
for x, y in zip(data[a:b], vals[a:b])]
|
||||||
|
word = ''.join(LP.RUNES[x] for x in nums)
|
||||||
|
if word in WORDS[len(nums)]:
|
||||||
|
if len(nums) > fail_threshold:
|
||||||
|
onematch = True
|
||||||
|
else:
|
||||||
|
failed += 1
|
||||||
|
if failed > allow_fails:
|
||||||
|
break
|
||||||
|
full.append(nums)
|
||||||
|
|
||||||
|
if failed > allow_fails or failed > 0 and not onematch:
|
||||||
|
continue # too many failed
|
||||||
|
print(oeis, 'shift:', s, 'irps:', comb)
|
||||||
|
print(' ', ' '.join(LP.RuneText(x).text for x in full))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
# convert_orig_oeis() # create db if not present already
|
||||||
|
load_db()
|
||||||
|
for i in range(0, 3):
|
||||||
|
find_oeis(irp=0, offset=i, invert=False, reverse=False,
|
||||||
|
reverse_word=False, allow_fails=1, fail_threshold=4)
|
||||||
93
solver.py
93
solver.py
@@ -1,7 +1,6 @@
|
|||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
import LP
|
import LP
|
||||||
import sys
|
import sys
|
||||||
import itertools
|
|
||||||
|
|
||||||
|
|
||||||
def load_sequence_file(fname):
|
def load_sequence_file(fname):
|
||||||
@@ -105,100 +104,8 @@ def try_totient_on_unsolved():
|
|||||||
print(slvr.run(inpt)[0].text)
|
print(slvr.run(inpt)[0].text)
|
||||||
|
|
||||||
|
|
||||||
def find_oeis(irp=0, invert=False, offset=0, allow_fails=1, min_match=2):
|
|
||||||
def trim_orig_oeis(minlen=15, trim=40):
|
|
||||||
# download and unzip: https://oeis.org/stripped.gz
|
|
||||||
with open(LP.path.db('oeis_orig'), 'r') as f_in:
|
|
||||||
with open(LP.path.db('oeis'), 'w') as f_out:
|
|
||||||
for line in f_in.readlines():
|
|
||||||
if line[0] == '#':
|
|
||||||
continue
|
|
||||||
name, *vals = line.split(',')
|
|
||||||
vals = [str(int(x) % 29) for x in vals if x.strip()][:trim]
|
|
||||||
if len(vals) < minlen:
|
|
||||||
continue
|
|
||||||
f_out.write(name + ',' + ','.join(vals) + '\n')
|
|
||||||
|
|
||||||
# trim_orig_oeis() # create db if not present already
|
|
||||||
with open(LP.path.db('oeis'), 'r') as f:
|
|
||||||
seqs = []
|
|
||||||
for line in f.readlines():
|
|
||||||
vals = line.split(',')
|
|
||||||
seqs.append((vals[0], list(map(int, vals[1:]))))
|
|
||||||
|
|
||||||
words = [set()] * 13
|
|
||||||
words[1] = set(x for x in LP.RUNES)
|
|
||||||
for i in range(2, 13): # since 12 is the longest word
|
|
||||||
with open(LP.path.data(f'dictionary_{i}'), 'r') as f:
|
|
||||||
words[i] = set(x.strip() for x in f.readlines())
|
|
||||||
|
|
||||||
for uuu, wlen in {
|
|
||||||
'p0-2': [8, 5, 4, 3, 3, 11, 5, 4, 3, 3],
|
|
||||||
'p3-7': [2, 11, 3, 4, 7, 7, 7, 4, 6],
|
|
||||||
'p8-14': [4, 8, 3, 2, 3, 9, 4, 3, 4, 2, 2],
|
|
||||||
'p15-22': [4, 5, 4, 2, 5, 4, 5, 6, 5, 6, 3, 3],
|
|
||||||
'p23-26': [2, 6, 3, 4, 8, 3, 3, 7, 5, 5],
|
|
||||||
'p27-32': [3, 12, 4, 7, 2, 3, 3, 2, 1, 3, 4],
|
|
||||||
'p33-39': [2, 8, 2, 9, 6, 3, 3, 5, 3, 2],
|
|
||||||
'p40-53': [3, 5, 5, 4, 3, 5, 4, 2, 12, 3, 3, 2],
|
|
||||||
'p54-55': [1, 8, 8, 3, 6, 2, 5, 3, 2, 3, 5, 7],
|
|
||||||
# 'p56_an_end': [2, 3, 5, 2, 4, 3, 4, 6, 1, 4, 3, 6, 2],
|
|
||||||
}.items():
|
|
||||||
splits = [(0, 0, 0)]
|
|
||||||
for x in wlen:
|
|
||||||
splits.append((splits[-1][1], splits[-1][1] + x))
|
|
||||||
splits = splits[1:]
|
|
||||||
print()
|
|
||||||
print(uuu)
|
|
||||||
data = LP.RuneTextFile(LP.path.page(uuu), limit=120).index_no_white
|
|
||||||
if invert:
|
|
||||||
data = [28 - x for x in data]
|
|
||||||
irps = [i for i, x in enumerate(data[:splits[-1][1]]) if x == irp]
|
|
||||||
irps.reverse() # insert -1 starting with the last
|
|
||||||
|
|
||||||
min_len = sum(wlen[:2]) # must match at least n words
|
|
||||||
data_len = len(data)
|
|
||||||
for oeis, vals in seqs: # 390k
|
|
||||||
vals = vals[offset:]
|
|
||||||
if len(vals) < min_len:
|
|
||||||
continue
|
|
||||||
cases = [x for x in irps if x < len(vals)]
|
|
||||||
for i in range(len(cases) + 1):
|
|
||||||
for comb in itertools.combinations(cases, i): # 2^3
|
|
||||||
res = vals[:]
|
|
||||||
for z in comb:
|
|
||||||
res.insert(z, -1) # insert interrupts
|
|
||||||
shortest = min(data_len, len(res))
|
|
||||||
|
|
||||||
for s in range(29):
|
|
||||||
failed = 0
|
|
||||||
full = []
|
|
||||||
clen = 0
|
|
||||||
for a, b in splits:
|
|
||||||
if b > shortest:
|
|
||||||
break
|
|
||||||
nums = [x if y == -1 else (x - y - s) % 29
|
|
||||||
for x, y in zip(data[a:b], res[a:b])]
|
|
||||||
word = ''.join(LP.RUNES[x] for x in nums)
|
|
||||||
if word in words[len(nums)]:
|
|
||||||
clen += len(nums)
|
|
||||||
else:
|
|
||||||
failed += 1
|
|
||||||
if failed > allow_fails:
|
|
||||||
break
|
|
||||||
full.append(LP.RuneText(nums).text)
|
|
||||||
|
|
||||||
if failed > allow_fails or clen < min_match:
|
|
||||||
continue # too many failed
|
|
||||||
print(oeis.split()[0], 'shift:', s, 'irps:', comb)
|
|
||||||
print(' ', ' '.join(full))
|
|
||||||
|
|
||||||
|
|
||||||
if '-s' in sys.argv: # print [s]olved
|
if '-s' in sys.argv: # print [s]olved
|
||||||
print_all_solved()
|
print_all_solved()
|
||||||
else:
|
else:
|
||||||
play_around()
|
play_around()
|
||||||
# try_totient_on_unsolved()
|
# try_totient_on_unsolved()
|
||||||
# for i in range(0, 4):
|
|
||||||
# print('offset:', i)
|
|
||||||
# find_oeis(irp=0, invert=False, offset=i, allow_fails=1, min_match=10)
|
|
||||||
|
|||||||
Reference in New Issue
Block a user