diff --git a/LP/RuneText.py b/LP/RuneText.py index 66b1414..e39bc44 100755 --- a/LP/RuneText.py +++ b/LP/RuneText.py @@ -151,18 +151,18 @@ class RuneText(object): # return False # return all(x.index == y.index for x, y in zip(self, other)) - def enum_words(self): # [(start, end, len), ...] may include \n \r + def enum_words(self, reverse=False): # [(start, end, len), ...] start = 0 r_pos = 0 word = [] - for i, x in enumerate(self._data): + for i, x in enumerate(reversed(self._data) if reverse else self._data): if x.kind == 'r': r_pos += 1 word.append(x) elif x.kind == 'l': continue else: - if len(word) > 0: + if len(word) > 0: # RuneText may include \n and \r yield start, i, r_pos - len(word), RuneText(word) word = [] start = i + 1 diff --git a/oeis.py b/oeis.py new file mode 100755 index 0000000..b8fb3ef --- /dev/null +++ b/oeis.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 +import LP +import itertools + +WORDS_MIN_MATCH = 2 +TRIM_AFTER = 40 +SEQS = [] +WORDS = [set()] * 13 + + +def convert_orig_oeis(minlen=15, trim=TRIM_AFTER): + # download and unzip: https://oeis.org/stripped.gz + with open(LP.path.db('oeis_orig'), 'r') as f_in: + with open(LP.path.db('oeis'), 'w') as f_out: + for line in f_in.readlines(): + if line[0] == '#': + continue + name, *vals = line.split(',') + vals = [str(int(x) % 29) for x in vals if x.strip()][:trim] + if len(vals) < minlen: + continue + f_out.write(name + ',' + ','.join(vals) + '\n') + + +def load_db(): # takes 3 seconds + print('load OEIS db ...') + with open(LP.path.db('oeis'), 'r') as f: + for line in f.readlines(): + vals = line.split(',') + SEQS.append((vals[0], list(map(int, vals[1:])))) + + print('load dictionary ...') + WORDS[1] = set(x for x in LP.RUNES) + for i in range(2, 13): # since 12 is the longest word + with open(LP.path.data(f'dictionary_{i}'), 'r') as f: + WORDS[i] = set(x.strip() for x in f.readlines()) + + +def enum_irp_combinations(irps): + for i in range(len(irps) + 1): + for x in itertools.combinations(irps, i): # 2^3 + if len(x) > 0 and x[0] - len(x) >= TRIM_AFTER: + continue + yield x + + +def get_word_splits(data, irp, reverse=False, reverse_word=False): + new_data = [] + irps = [] + splits = [] + max_len = TRIM_AFTER # same as trim above + for _, _, i, w in data.enum_words(reverse=reverse): + irp_is = [i + ii for ii, r in enumerate(w) if r.index == irp] + if (len(w) - len(irp_is)) > max_len: # include only full words + break + max_len = max_len + len(irp_is) - len(w) + irps += irp_is + splits.append((i, i + len(w))) + for r in (reversed(w) if reverse_word else w): + if r.index != 29: + new_data.append(r.index) + return new_data[::-1 if reverse else 1], irps, splits + + +# invert: 28 - rune.index +# reverse: start chapter from the end +# reverse_word: start it word from the end, but keep sentence direction +# allow_fails: number of words that can be wrong +# fail_threshold: at least one word w/ len x+1 must match, else all must match +def find_oeis(irp=0, offset=0, invert=False, reverse=False, reverse_word=False, + allow_fails=1, fail_threshold=4): + print() + print('irp:', irp, ' offset:', offset, ' invert:', invert, + ' reverse:', reverse, ' reverse_word:', reverse_word, + ' allow_fails:', allow_fails, ' fail_threshold:', fail_threshold) + # for fname in ['p56_an_end']: + for fname in LP.FILES_UNSOLVED: + data = LP.RuneTextFile(LP.path.page(fname)) + if invert: + data.invert() + data, irps, splits = get_word_splits(data, irp, reverse, reverse_word) + irps.reverse() # reverse to start inserting at the end + min_len = splits[WORDS_MIN_MATCH - 1][1] + max_len = splits[-1][1] + data = data[:max_len] + + print() + print(fname, 'words:', [y - x for x, y in splits]) + for comb in enum_irp_combinations(irps): + for oeis, vals in SEQS: # 390k + vals = vals[offset:] + if len(vals) < min_len: + continue + for z in comb: + vals.insert(z, -1) # insert interrupts + shortest = min(max_len, len(vals)) + for s in range(29): + failed = 0 + onematch = False + full = [] + for a, b in splits: + if b > shortest: + break + nums = [x if y == -1 else (x - y - s) % 29 + for x, y in zip(data[a:b], vals[a:b])] + word = ''.join(LP.RUNES[x] for x in nums) + if word in WORDS[len(nums)]: + if len(nums) > fail_threshold: + onematch = True + else: + failed += 1 + if failed > allow_fails: + break + full.append(nums) + + if failed > allow_fails or failed > 0 and not onematch: + continue # too many failed + print(oeis, 'shift:', s, 'irps:', comb) + print(' ', ' '.join(LP.RuneText(x).text for x in full)) + + +if __name__ == '__main__': + # convert_orig_oeis() # create db if not present already + load_db() + for i in range(0, 3): + find_oeis(irp=0, offset=i, invert=False, reverse=False, + reverse_word=False, allow_fails=1, fail_threshold=4) diff --git a/solver.py b/solver.py index 5cf656a..6df70bc 100755 --- a/solver.py +++ b/solver.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import LP import sys -import itertools def load_sequence_file(fname): @@ -105,100 +104,8 @@ def try_totient_on_unsolved(): print(slvr.run(inpt)[0].text) -def find_oeis(irp=0, invert=False, offset=0, allow_fails=1, min_match=2): - def trim_orig_oeis(minlen=15, trim=40): - # download and unzip: https://oeis.org/stripped.gz - with open(LP.path.db('oeis_orig'), 'r') as f_in: - with open(LP.path.db('oeis'), 'w') as f_out: - for line in f_in.readlines(): - if line[0] == '#': - continue - name, *vals = line.split(',') - vals = [str(int(x) % 29) for x in vals if x.strip()][:trim] - if len(vals) < minlen: - continue - f_out.write(name + ',' + ','.join(vals) + '\n') - - # trim_orig_oeis() # create db if not present already - with open(LP.path.db('oeis'), 'r') as f: - seqs = [] - for line in f.readlines(): - vals = line.split(',') - seqs.append((vals[0], list(map(int, vals[1:])))) - - words = [set()] * 13 - words[1] = set(x for x in LP.RUNES) - for i in range(2, 13): # since 12 is the longest word - with open(LP.path.data(f'dictionary_{i}'), 'r') as f: - words[i] = set(x.strip() for x in f.readlines()) - - for uuu, wlen in { - 'p0-2': [8, 5, 4, 3, 3, 11, 5, 4, 3, 3], - 'p3-7': [2, 11, 3, 4, 7, 7, 7, 4, 6], - 'p8-14': [4, 8, 3, 2, 3, 9, 4, 3, 4, 2, 2], - 'p15-22': [4, 5, 4, 2, 5, 4, 5, 6, 5, 6, 3, 3], - 'p23-26': [2, 6, 3, 4, 8, 3, 3, 7, 5, 5], - 'p27-32': [3, 12, 4, 7, 2, 3, 3, 2, 1, 3, 4], - 'p33-39': [2, 8, 2, 9, 6, 3, 3, 5, 3, 2], - 'p40-53': [3, 5, 5, 4, 3, 5, 4, 2, 12, 3, 3, 2], - 'p54-55': [1, 8, 8, 3, 6, 2, 5, 3, 2, 3, 5, 7], - # 'p56_an_end': [2, 3, 5, 2, 4, 3, 4, 6, 1, 4, 3, 6, 2], - }.items(): - splits = [(0, 0, 0)] - for x in wlen: - splits.append((splits[-1][1], splits[-1][1] + x)) - splits = splits[1:] - print() - print(uuu) - data = LP.RuneTextFile(LP.path.page(uuu), limit=120).index_no_white - if invert: - data = [28 - x for x in data] - irps = [i for i, x in enumerate(data[:splits[-1][1]]) if x == irp] - irps.reverse() # insert -1 starting with the last - - min_len = sum(wlen[:2]) # must match at least n words - data_len = len(data) - for oeis, vals in seqs: # 390k - vals = vals[offset:] - if len(vals) < min_len: - continue - cases = [x for x in irps if x < len(vals)] - for i in range(len(cases) + 1): - for comb in itertools.combinations(cases, i): # 2^3 - res = vals[:] - for z in comb: - res.insert(z, -1) # insert interrupts - shortest = min(data_len, len(res)) - - for s in range(29): - failed = 0 - full = [] - clen = 0 - for a, b in splits: - if b > shortest: - break - nums = [x if y == -1 else (x - y - s) % 29 - for x, y in zip(data[a:b], res[a:b])] - word = ''.join(LP.RUNES[x] for x in nums) - if word in words[len(nums)]: - clen += len(nums) - else: - failed += 1 - if failed > allow_fails: - break - full.append(LP.RuneText(nums).text) - - if failed > allow_fails or clen < min_match: - continue # too many failed - print(oeis.split()[0], 'shift:', s, 'irps:', comb) - print(' ', ' '.join(full)) - - if '-s' in sys.argv: # print [s]olved print_all_solved() else: play_around() # try_totient_on_unsolved() - # for i in range(0, 4): - # print('offset:', i) - # find_oeis(irp=0, invert=False, offset=i, allow_fails=1, min_match=10)