114 lines
4.0 KiB
Python
Executable File
114 lines
4.0 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
'''
|
|
Usage: Load existing `OnceDB()` and `put(cohort, uid, obj)` new entries.
|
|
The db ensures that (cohort, uid) pairs are unique. You can add as
|
|
many times as you like. Use an (reversed) iterator to enumerate
|
|
outstanding entries `for rowid, cohort, uid, obj in reversed(db)`.
|
|
Call `mark_done(rowid)` to not process an item again.
|
|
|
|
Once in a while call `cleanup()` to remove old entries.
|
|
'''
|
|
import sqlite3
|
|
from typing import Tuple, Any, Callable, Iterator
|
|
|
|
DBEntry = Tuple[int, str, str, Any]
|
|
|
|
|
|
class OnceDB:
|
|
def __init__(self, db_path: str) -> None:
|
|
self._db = sqlite3.connect(db_path)
|
|
self._db.execute('''
|
|
CREATE TABLE IF NOT EXISTS queue(
|
|
ts DATE DEFAULT (strftime('%s', 'now')),
|
|
cohort TEXT NOT NULL,
|
|
uid TEXT NOT NULL,
|
|
obj BLOB, -- NULL signals a done mark. OR: introduce new var
|
|
PRIMARY KEY (cohort, uid) -- SQLite will auto-create index
|
|
);
|
|
''')
|
|
|
|
def __del__(self) -> None:
|
|
self._db.close()
|
|
|
|
def cleanup(self, limit: int = 200) -> None:
|
|
''' Delete oldest (cohort) entries if more than limit exist. '''
|
|
self._db.execute('''
|
|
WITH _tmp AS (
|
|
SELECT ROWID, row_number() OVER (
|
|
PARTITION BY cohort ORDER by ROWID DESC) AS c
|
|
FROM queue
|
|
WHERE obj IS NULL
|
|
)
|
|
DELETE FROM queue
|
|
WHERE ROWID in (SELECT ROWID from _tmp WHERE c > ?);
|
|
''', (limit,))
|
|
self._db.commit()
|
|
|
|
def put(self, cohort: str, uid: str, obj: str) -> bool:
|
|
''' Silently ignore if a duplicate (cohort, uid) is added. '''
|
|
try:
|
|
self._db.execute('''
|
|
INSERT INTO queue (cohort, uid, obj) VALUES (?, ?, ?);
|
|
''', (cohort, uid, obj))
|
|
self._db.commit()
|
|
return True
|
|
except sqlite3.IntegrityError:
|
|
# entry (cohort, uid) already exists
|
|
return False
|
|
|
|
def contains(self, cohort: str, uid: str) -> bool:
|
|
''' Test if cohort + uid pair exists in database. '''
|
|
cur = self._db.cursor()
|
|
cur.execute('''
|
|
SELECT 1 FROM queue WHERE cohort IS ? AND uid is ? LIMIT 1;
|
|
''', (cohort, uid))
|
|
flag = cur.fetchone() is not None
|
|
cur.close()
|
|
return flag
|
|
|
|
def mark_done(self, rowid: int) -> None:
|
|
''' Mark (ROWID) as done. Entry remains in cache until cleanup(). '''
|
|
if not isinstance(rowid, int):
|
|
raise AttributeError('Not of type ROWID: {}'.format(rowid))
|
|
self._db.execute('UPDATE queue SET obj = NULL WHERE ROWID = ?;',
|
|
(rowid, ))
|
|
self._db.commit()
|
|
|
|
def mark_all_done(self) -> None:
|
|
''' Mark all entries done. Entry remains in cache until cleanup(). '''
|
|
self._db.execute('UPDATE queue SET obj = NULL;')
|
|
self._db.commit()
|
|
|
|
def foreach(
|
|
self,
|
|
callback: Callable[[str, str, Any], bool],
|
|
*, reverse: bool = False
|
|
) -> bool:
|
|
'''
|
|
Exec for all until callback evaluates to false (or end of list).
|
|
Automatically marks entries as done (only on success).
|
|
'''
|
|
for rowid, *elem in reversed(self) if reverse else self:
|
|
if callback(*elem):
|
|
self.mark_done(rowid)
|
|
else:
|
|
return False
|
|
return True
|
|
|
|
def __iter__(self) -> Iterator[DBEntry]:
|
|
return self.iter()
|
|
|
|
def __reversed__(self) -> Iterator[DBEntry]:
|
|
return self.iter(desc=True)
|
|
|
|
def iter(self, *, desc: bool = False) -> Iterator[DBEntry]:
|
|
''' Perform query on all un-marked / not-done entries. '''
|
|
cur = self._db.cursor()
|
|
cur.execute('''
|
|
SELECT ROWID, cohort, uid, obj FROM queue
|
|
WHERE obj IS NOT NULL
|
|
ORDER BY ROWID {};
|
|
'''.format('DESC' if desc else 'ASC'))
|
|
yield from cur.fetchall()
|
|
cur.close()
|