#!/usr/bin/env python3 import LP import itertools WORDS_MIN_MATCH = 2 TRIM_AFTER = 40 SEQS = [] WORDS = [set()] * 13 def convert_orig_oeis(minlen=15, trim=TRIM_AFTER): # download and unzip: https://oeis.org/stripped.gz with open(LP.path.db('oeis_orig'), 'r') as f_in: with open(LP.path.db('oeis'), 'w') as f_out: for line in f_in.readlines(): if line[0] == '#': continue name, *vals = line.split(',') vals = [str(int(x) % 29) for x in vals if x.strip()][:trim] if len(vals) < minlen: continue f_out.write(name + ',' + ','.join(vals) + '\n') def load_db(): # takes 3 seconds print('load OEIS db ...') with open(LP.path.db('oeis'), 'r') as f: for line in f.readlines(): vals = line.split(',') SEQS.append((vals[0], list(map(int, vals[1:])))) print('load dictionary ...') WORDS[1] = set(x for x in LP.RUNES) for i in range(2, 13): # since 12 is the longest word with open(LP.path.data(f'dictionary_{i}'), 'r') as f: WORDS[i] = set(x.strip() for x in f.readlines()) def enum_irp_combinations(irps): for i in range(len(irps) + 1): for x in itertools.combinations(irps, i): # 2^3 if len(x) > 0 and x[0] - len(x) >= TRIM_AFTER: continue yield x def get_word_splits(data, irp, reverse=False, reverse_word=False): new_data = [] irps = [] splits = [] max_len = TRIM_AFTER # same as trim above for _, _, i, w in data.enum_words(reverse=reverse): irp_is = [i + ii for ii, r in enumerate(w) if r.index == irp] if (len(w) - len(irp_is)) > max_len: # include only full words break max_len = max_len + len(irp_is) - len(w) irps += irp_is splits.append((i, i + len(w))) for r in (reversed(w) if reverse_word else w): if r.index != 29: new_data.append(r.index) return new_data[::-1 if reverse else 1], irps, splits # invert: 28 - rune.index # reverse: start chapter from the end # reverse_word: start it word from the end, but keep sentence direction # allow_fails: number of words that can be wrong # fail_threshold: at least one word w/ len x+1 must match, else all must match def find_oeis(irp=0, offset=0, invert=False, reverse=False, reverse_word=False, allow_fails=1, fail_threshold=4): print() print('irp:', irp, ' offset:', offset, ' invert:', invert, ' reverse:', reverse, ' reverse_word:', reverse_word, ' allow_fails:', allow_fails, ' fail_threshold:', fail_threshold) # for fname in ['p56_an_end']: for fname in LP.FILES_UNSOLVED: data = LP.RuneTextFile(LP.path.page(fname)) if invert: data.invert() data, irps, splits = get_word_splits(data, irp, reverse, reverse_word) irps.reverse() # reverse to start inserting at the end min_len = splits[WORDS_MIN_MATCH - 1][1] max_len = splits[-1][1] data = data[:max_len] print() print(fname, 'words:', [y - x for x, y in splits]) for comb in enum_irp_combinations(irps): for oeis, vals in SEQS: # 390k vals = vals[offset:] if len(vals) < min_len: continue for z in comb: vals.insert(z, -1) # insert interrupts shortest = min(max_len, len(vals)) for s in range(29): failed = 0 onematch = False full = [] for a, b in splits: if b > shortest: break nums = [x if y == -1 else (x - y - s) % 29 for x, y in zip(data[a:b], vals[a:b])] word = ''.join(LP.RUNES[x] for x in nums) if word in WORDS[len(nums)]: if len(nums) > fail_threshold: onematch = True else: failed += 1 if failed > allow_fails: break full.append(nums) if failed > allow_fails or failed > 0 and not onematch: continue # too many failed print(oeis, 'shift:', s, 'irps:', comb) print(' ', ' '.join(LP.RuneText(x).text for x in full)) if __name__ == '__main__': # convert_orig_oeis() # create db if not present already load_db() for i in range(0, 3): find_oeis(irp=0, offset=i, invert=False, reverse=False, reverse_word=False, allow_fails=1, fail_threshold=4)