move OEIS solver to separate file + add reverse chapter and reverse word search

2021-02-16 20:20:14 +01:00
parent d2e5ad9b86
commit 1ddc30cd27
3 changed files with 130 additions and 96 deletions
--- a/LP/RuneText.py
+++ b/LP/RuneText.py
@@ -151,18 +151,18 @@ class RuneText(object):
    #         return False
    #     return all(x.index == y.index for x, y in zip(self, other))

-    def enum_words(self):  # [(start, end, len), ...] may include \n \r
+    def enum_words(self, reverse=False):  # [(start, end, len), ...]
        start = 0
        r_pos = 0
        word = []
-        for i, x in enumerate(self._data):
+        for i, x in enumerate(reversed(self._data) if reverse else self._data):
            if x.kind == 'r':
                r_pos += 1
                word.append(x)
            elif x.kind == 'l':
                continue
            else:
-                if len(word) > 0:
+                if len(word) > 0:  # RuneText may include \n and \r
                    yield start, i, r_pos - len(word), RuneText(word)
                    word = []
                start = i + 1
--- a/oeis.py
+++ b/oeis.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+import LP
+import itertools
+
+WORDS_MIN_MATCH = 2
+TRIM_AFTER = 40
+SEQS = []
+WORDS = [set()] * 13
+
+
+def convert_orig_oeis(minlen=15, trim=TRIM_AFTER):
+    # download and unzip: https://oeis.org/stripped.gz
+    with open(LP.path.db('oeis_orig'), 'r') as f_in:
+        with open(LP.path.db('oeis'), 'w') as f_out:
+            for line in f_in.readlines():
+                if line[0] == '#':
+                    continue
+                name, *vals = line.split(',')
+                vals = [str(int(x) % 29) for x in vals if x.strip()][:trim]
+                if len(vals) < minlen:
+                    continue
+                f_out.write(name + ',' + ','.join(vals) + '\n')
+
+
+def load_db():  # takes 3 seconds
+    print('load OEIS db ...')
+    with open(LP.path.db('oeis'), 'r') as f:
+        for line in f.readlines():
+            vals = line.split(',')
+            SEQS.append((vals[0], list(map(int, vals[1:]))))
+
+    print('load dictionary ...')
+    WORDS[1] = set(x for x in LP.RUNES)
+    for i in range(2, 13):  # since 12 is the longest word
+        with open(LP.path.data(f'dictionary_{i}'), 'r') as f:
+            WORDS[i] = set(x.strip() for x in f.readlines())
+
+
+def enum_irp_combinations(irps):
+    for i in range(len(irps) + 1):
+        for x in itertools.combinations(irps, i):  # 2^3
+            if len(x) > 0 and x[0] - len(x) >= TRIM_AFTER:
+                continue
+            yield x
+
+
+def get_word_splits(data, irp, reverse=False, reverse_word=False):
+    new_data = []
+    irps = []
+    splits = []
+    max_len = TRIM_AFTER  # same as trim above
+    for _, _, i, w in data.enum_words(reverse=reverse):
+        irp_is = [i + ii for ii, r in enumerate(w) if r.index == irp]
+        if (len(w) - len(irp_is)) > max_len:  # include only full words
+            break
+        max_len = max_len + len(irp_is) - len(w)
+        irps += irp_is
+        splits.append((i, i + len(w)))
+        for r in (reversed(w) if reverse_word else w):
+            if r.index != 29:
+                new_data.append(r.index)
+    return new_data[::-1 if reverse else 1], irps, splits
+
+
+# invert:         28 - rune.index
+# reverse:        start chapter from the end
+# reverse_word:   start it word from the end, but keep sentence direction
+# allow_fails:    number of words that can be wrong
+# fail_threshold: at least one word w/ len x+1 must match, else all must match
+def find_oeis(irp=0, offset=0, invert=False, reverse=False, reverse_word=False,
+              allow_fails=1, fail_threshold=4):
+    print()
+    print('irp:', irp, ' offset:', offset, ' invert:', invert,
+          ' reverse:', reverse, ' reverse_word:', reverse_word,
+          ' allow_fails:', allow_fails, ' fail_threshold:', fail_threshold)
+    # for fname in ['p56_an_end']:
+    for fname in LP.FILES_UNSOLVED:
+        data = LP.RuneTextFile(LP.path.page(fname))
+        if invert:
+            data.invert()
+        data, irps, splits = get_word_splits(data, irp, reverse, reverse_word)
+        irps.reverse()  # reverse to start inserting at the end
+        min_len = splits[WORDS_MIN_MATCH - 1][1]
+        max_len = splits[-1][1]
+        data = data[:max_len]
+
+        print()
+        print(fname, 'words:', [y - x for x, y in splits])
+        for comb in enum_irp_combinations(irps):
+            for oeis, vals in SEQS:  # 390k
+                vals = vals[offset:]
+                if len(vals) < min_len:
+                    continue
+                for z in comb:
+                    vals.insert(z, -1)  # insert interrupts
+                shortest = min(max_len, len(vals))
+                for s in range(29):
+                    failed = 0
+                    onematch = False
+                    full = []
+                    for a, b in splits:
+                        if b > shortest:
+                            break
+                        nums = [x if y == -1 else (x - y - s) % 29
+                                for x, y in zip(data[a:b], vals[a:b])]
+                        word = ''.join(LP.RUNES[x] for x in nums)
+                        if word in WORDS[len(nums)]:
+                            if len(nums) > fail_threshold:
+                                onematch = True
+                        else:
+                            failed += 1
+                            if failed > allow_fails:
+                                break
+                        full.append(nums)
+
+                    if failed > allow_fails or failed > 0 and not onematch:
+                        continue  # too many failed
+                    print(oeis, 'shift:', s, 'irps:', comb)
+                    print(' ', ' '.join(LP.RuneText(x).text for x in full))
+
+
+if __name__ == '__main__':
+    # convert_orig_oeis()  # create db if not present already
+    load_db()
+    for i in range(0, 3):
+        find_oeis(irp=0, offset=i, invert=False, reverse=False,
+                  reverse_word=False, allow_fails=1, fail_threshold=4)
--- a/solver.py
+++ b/solver.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 import LP
 import sys
-import itertools


 def load_sequence_file(fname):
@@ -105,100 +104,8 @@ def try_totient_on_unsolved():
            print(slvr.run(inpt)[0].text)


-def find_oeis(irp=0, invert=False, offset=0, allow_fails=1, min_match=2):
-    def trim_orig_oeis(minlen=15, trim=40):
-        # download and unzip: https://oeis.org/stripped.gz
-        with open(LP.path.db('oeis_orig'), 'r') as f_in:
-            with open(LP.path.db('oeis'), 'w') as f_out:
-                for line in f_in.readlines():
-                    if line[0] == '#':
-                        continue
-                    name, *vals = line.split(',')
-                    vals = [str(int(x) % 29) for x in vals if x.strip()][:trim]
-                    if len(vals) < minlen:
-                        continue
-                    f_out.write(name + ',' + ','.join(vals) + '\n')
-
-    # trim_orig_oeis()  # create db if not present already
-    with open(LP.path.db('oeis'), 'r') as f:
-        seqs = []
-        for line in f.readlines():
-            vals = line.split(',')
-            seqs.append((vals[0], list(map(int, vals[1:]))))
-
-    words = [set()] * 13
-    words[1] = set(x for x in LP.RUNES)
-    for i in range(2, 13):  # since 12 is the longest word
-        with open(LP.path.data(f'dictionary_{i}'), 'r') as f:
-            words[i] = set(x.strip() for x in f.readlines())
-
-    for uuu, wlen in {
-        'p0-2': [8, 5, 4, 3, 3, 11, 5, 4, 3, 3],
-        'p3-7': [2, 11, 3, 4, 7, 7, 7, 4, 6],
-        'p8-14': [4, 8, 3, 2, 3, 9, 4, 3, 4, 2, 2],
-        'p15-22': [4, 5, 4, 2, 5, 4, 5, 6, 5, 6, 3, 3],
-        'p23-26': [2, 6, 3, 4, 8, 3, 3, 7, 5, 5],
-        'p27-32': [3, 12, 4, 7, 2, 3, 3, 2, 1, 3, 4],
-        'p33-39': [2, 8, 2, 9, 6, 3, 3, 5, 3, 2],
-        'p40-53': [3, 5, 5, 4, 3, 5, 4, 2, 12, 3, 3, 2],
-        'p54-55': [1, 8, 8, 3, 6, 2, 5, 3, 2, 3, 5, 7],
-        # 'p56_an_end': [2, 3, 5, 2, 4, 3, 4, 6, 1, 4, 3, 6, 2],
-    }.items():
-        splits = [(0, 0, 0)]
-        for x in wlen:
-            splits.append((splits[-1][1], splits[-1][1] + x))
-        splits = splits[1:]
-        print()
-        print(uuu)
-        data = LP.RuneTextFile(LP.path.page(uuu), limit=120).index_no_white
-        if invert:
-            data = [28 - x for x in data]
-        irps = [i for i, x in enumerate(data[:splits[-1][1]]) if x == irp]
-        irps.reverse()  # insert -1 starting with the last
-
-        min_len = sum(wlen[:2])  # must match at least n words
-        data_len = len(data)
-        for oeis, vals in seqs:  # 390k
-            vals = vals[offset:]
-            if len(vals) < min_len:
-                continue
-            cases = [x for x in irps if x < len(vals)]
-            for i in range(len(cases) + 1):
-                for comb in itertools.combinations(cases, i):  # 2^3
-                    res = vals[:]
-                    for z in comb:
-                        res.insert(z, -1)  # insert interrupts
-                    shortest = min(data_len, len(res))
-
-                    for s in range(29):
-                        failed = 0
-                        full = []
-                        clen = 0
-                        for a, b in splits:
-                            if b > shortest:
-                                break
-                            nums = [x if y == -1 else (x - y - s) % 29
-                                    for x, y in zip(data[a:b], res[a:b])]
-                            word = ''.join(LP.RUNES[x] for x in nums)
-                            if word in words[len(nums)]:
-                                clen += len(nums)
-                            else:
-                                failed += 1
-                                if failed > allow_fails:
-                                    break
-                            full.append(LP.RuneText(nums).text)
-
-                        if failed > allow_fails or clen < min_match:
-                            continue  # too many failed
-                        print(oeis.split()[0], 'shift:', s, 'irps:', comb)
-                        print(' ', ' '.join(full))
-
-
 if '-s' in sys.argv:  # print [s]olved
    print_all_solved()
 else:
    play_around()
    # try_totient_on_unsolved()
-    # for i in range(0, 4):
-    #     print('offset:', i)
-    #     find_oeis(irp=0, invert=False, offset=i, allow_fails=1, min_match=10)