typing + smaller bugfixes

2022-04-09 03:45:48 +02:00
parent a25b62d934
commit d0c5072d27
15 changed files with 415 additions and 199 deletions
--- a/examples/podcast-download/generic_rss.py
+++ b/examples/podcast-download/generic_rss.py
@@ -1,6 +1,8 @@
 #!/usr/bin/env python3
 import os
 from sys import stderr
+from typing import Dict, Any, Optional, TextIO
+from datetime import datetime  # typing

 from botlib.cli import Cli
 from botlib.curl import Curl
@@ -8,7 +10,8 @@ from botlib.feed2list import Feed2List
 from botlib.helper import StrFormat, FileWrite


-def main():
+def main() -> None:
+    ''' CLI entry. '''
    cli = Cli()
    cli.arg_dir('dest_dir', help='Download all entries here')
    cli.arg('source', help='RSS file or web-url')
@@ -25,10 +28,16 @@ def main():
        print('ERROR: ' + str(e), file=stderr)


-def process(source, dest_dir, *, by_year=False, dry_run=False):
+def process(
+    source: str,  # local file path or remote url
+    dest_dir: str,
+    *, by_year: bool = False,
+    dry_run: bool = False
+) -> bool:
+    ''' Parse a full podcast file / source. '''
    # open source
    if os.path.isfile(source):
-        fp = open(source)  # closed in Feed2List
+        fp = open(source)  # type: Optional[TextIO] # closed in Feed2List
    elif Curl.valid_url(source):
        fp = Curl.get(source)  # closed in Feed2List
    else:
@@ -41,7 +50,7 @@ def process(source, dest_dir, *, by_year=False, dry_run=False):
        'pubDate', 'media:content',  # image
        # 'itunes:image', 'itunes:duration', 'itunes:summary'
    ])):
-        date = entry.get('pubDate')  # try RSS only
+        date = entry['pubDate']  # try RSS only # type: datetime
        if by_year:
            dest = os.path.join(dest_dir, str(date.year))
            if not dry_run and not os.path.exists(dest):
@@ -50,7 +59,13 @@ def process(source, dest_dir, *, by_year=False, dry_run=False):
    return True


-def process_entry(entry, date, dest_dir, *, dry_run=False):
+def process_entry(
+    entry: Dict[str, Any],
+    date: datetime,
+    dest_dir: str,
+    *, dry_run: bool = False
+) -> None:
+    ''' Parse a single podcast media entry. '''
    title = entry['title']
    # <enclosure url="*.mp3" length="47216000" type="audio/mpeg"/>
    audio_url = entry.get('enclosure', {}).get('url')
@@ -78,10 +93,11 @@ def process_entry(entry, date, dest_dir, *, dry_run=False):

    @FileWrite.once(dest_dir, fname + '.txt', date, override=False,
                    dry_run=dry_run, verbose=True, intro=flag or intro)
-    def _description():
-        desc = title + '\n' + '=' * len(title)
-        desc += '\n\n' + StrFormat.strip_html(entry.get('description', ''))
-        return desc + '\n\n\n' + entry.get('link', '') + '\n'
+    def _description() -> str:
+        return '{}\n{}\n\n{}\n\n\n{}\n'.format(
+            title, '=' * len(title),
+            StrFormat.strip_html(entry.get('description', '')),
+            entry.get('link', ''))


 if __name__ == '__main__':
--- a/examples/podcast-download/radiolab.py
+++ b/examples/podcast-download/radiolab.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import os
 from sys import stderr
+from typing import Dict, Tuple, Optional, Any

 from botlib.cli import Cli
 from botlib.curl import Curl, URLError
@@ -15,7 +16,8 @@ db_slugs = OnceDB('radiolab_slugs.sqlite')
 os.environ['TZ'] = 'America/New_York'


-def main():
+def main() -> None:
+    ''' CLI entry. '''
    cli = Cli()
    cli.arg_dir('dest_dir', help='Download all episodes to dest_dir/year/')
    cli.arg_bool('--dry-run', help='Do not download, just parse')
@@ -36,9 +38,17 @@ def main():
    print('\nDone.\n\nNow check MP3 tags (consistency).')


-def processEpisodeList(basedir, title, query, index=1, *, dry_run=False):
+def processEpisodeList(
+    basedir: str,
+    title: str,
+    query: str,
+    index: int = 1,
+    *, dry_run: bool = False
+) -> None:
+    ''' Parse full podcast category. '''
    print('\nProcessing: {}'.format(title), end='')
-    dat = Curl.json('{}/channel/shows/{}/{}?limit=9'.format(API, query, index))
+    url = '{}/channel/shows/{}/{}?limit=9'.format(API, query, index)
+    dat = Curl.json(url)  # type: Dict[str, Any]
    total = dat['data']['attributes']['total-pages']
    print(' ({}/{})'.format(index, total))
    anything_new = False
@@ -49,7 +59,12 @@ def processEpisodeList(basedir, title, query, index=1, *, dry_run=False):
        processEpisodeList(basedir, title, query, index + 1, dry_run=dry_run)


-def processEpisode(obj, basedir, *, dry_run=False):
+def processEpisode(
+    obj: Dict[str, Any],
+    basedir: str,
+    *, dry_run: bool = False
+) -> bool:
+    ''' Parse a single podcast episode. '''
    uid = obj['cms-pk']
    if db_ids.contains(COHORT, uid):
        return False  # Already exists
@@ -86,18 +101,18 @@ def processEpisode(obj, basedir, *, dry_run=False):

    @FileWrite.once(dest_dir, fname + '.txt', date, override=False,
                    dry_run=dry_run, verbose=True, intro=flag or intro)
-    def write_description():
+    def write_description() -> str:
        nonlocal flag
        flag = True
-        desc = title + '\n' + '=' * len(title)
-        desc += '\n\n' + StrFormat.strip_html(obj['body'])
+        desc = '{}\n{}\n\n{}'.format(
+            title, '=' * len(title), StrFormat.strip_html(obj['body']))
        if img_desc:
            desc += '\n\n' + img_desc
-        return desc + '\n\n\n' + obj['url'].strip() + '\n'  # link to article
+        return '{}\n\n\n{}\n'.format(desc, obj['url'].strip())  # article link

    @FileWrite.once(dest_dir, fname + '.transcript.txt', date, override=False,
                    dry_run=dry_run, verbose=True, intro=flag or intro)
-    def write_transcript():
+    def write_transcript() -> Optional[str]:
        nonlocal flag
        flag = True
        data = StrFormat.strip_html(obj['transcript'])
@@ -111,7 +126,8 @@ def processEpisode(obj, basedir, *, dry_run=False):
    return flag  # potentially need to query the next page too


-def get_img_desc(obj):
+def get_img_desc(obj: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
+    ''' Extract image description. '''
    if not obj:
        return (None, None)
    url = (obj['url'] or '').strip()
@@ -135,7 +151,8 @@ def get_img_desc(obj):
 # -> inurl:radiolab/episodes site:wnycstudios.org
 # Then regex:  /episodes/([^;]*?)" onmousedown

-def processSingle(slug, basedir):
+def processSingle(slug: str, basedir: str) -> None:
+    ''' [internal] process single episode if only the slug is known. '''
    # cms-pk = 91947 , slug = '91947-do-i-know-you'
    all_slugs = [slug for _, _, _, slug in db_slugs]
    if slug not in all_slugs:
--- a/examples/telegram-send/main-interactive.py
+++ b/examples/telegram-send/main-interactive.py
@@ -1,20 +1,20 @@
 #!/usr/bin/env python3
-from botlib.tgclient import TGClient
+from botlib.tgclient import TGClient, Message

 bot = TGClient(__API_KEY__, polling=True, allowedUsers=['my-username'])


@bot.message_handler(commands=['hi'])
-def bot_reply(message):
+def bot_reply(message: Message) -> None:
    if bot.allowed(message):  # only reply to a single user (my-username)
        bot.reply_to(message, 'Good evening my dear.')


@bot.message_handler(commands=['set'])
-def update_config(message):
+def update_config(message: Message) -> None:
    if bot.allowed(message):
        try:
-            config = data_store.get(message.chat.id)
+            config = DATA_STORE.get(message.chat.id)
        except KeyError:
            bot.reply_to(message, 'Not found.')
            return
@@ -28,32 +28,32 @@ def update_config(message):


@bot.message_handler(commands=['start'])
-def new_chat_info(message):
+def new_chat_info(message: Message) -> None:
    bot.log_chat_info(message.chat)
    if bot.allowed(message):
-        if data_store.get(message.chat.id):
+        if DATA_STORE.get(message.chat.id):
            bot.reply_to(message, 'Already exists')
        else:
            CreateNew(message)


 class CreateNew:
-    def __init__(self, message):
+    def __init__(self, message: Message) -> None:
        self.ask_name(message)

-    def ask_name(self, message):
+    def ask_name(self, message: Message) -> None:
        msg = bot.send_force_reply(message.chat.id, 'Enter Name:')
        bot.register_next_step_handler(msg, self.ask_interval)

-    def ask_interval(self, message):
+    def ask_interval(self, message: Message) -> None:
        self.name = message.text
        msg = bot.send_buttons(message.chat.id, 'Update interval (minutes):',
                               options=[3, 5, 10, 15, 30, 60])
        bot.register_next_step_handler(msg, self.finish)

-    def finish(self, message):
+    def finish(self, message: Message) -> None:
        try:
-            interval = int(message.text)
+            interval = int(message.text or 'error')
        except ValueError:
            bot.send_abort_keyboard(message, 'Not a number. Aborting.')
            return
--- a/examples/telegram-send/main-recurring.py
+++ b/examples/telegram-send/main-recurring.py
@@ -17,15 +17,15 @@ bot.set_on_kill(cron.stop)


 def main():
-    def clean_db(_):
+    def clean_db(_) -> None:
        Log.info('[clean up]')
        OnceDB('cache.sqlite').cleanup(limit=150)

-    def notify_jobA(_):
+    def notify_jobA(_) -> None:
        jobA.download(topic='development', cohort='dev:py')
        send2telegram(__A_CHAT_ID__)

-    def notify_jobB(_):
+    def notify_jobB(_) -> None:
        jobB.download()
        send2telegram(__ANOTHER_CHAT_ID__)

@@ -37,14 +37,15 @@ def main():
    # cron.fire()


-def send2telegram(chat_id):
+def send2telegram(chat_id: int) -> None:
    db = OnceDB('cache.sqlite')
    # db.mark_all_done()

-    def _send(cohort, uid, obj):
+    def _send(cohort: str, uid: str, obj: str) -> bool:
        Log.info('[push] {} {}'.format(cohort, uid))
-        return bot.send(chat_id, obj, parse_mode='HTML',
-                        disable_web_page_preview=True)
+        msg = bot.send(chat_id, obj, parse_mode='HTML',
+                       disable_web_page_preview=True)
+        return msg is not None

    if not db.foreach(_send):
        # send() sleeps 45 sec (on error), safe to call immediatelly
--- a/examples/web-scraper/news_vice.py
+++ b/examples/web-scraper/news_vice.py
@@ -4,7 +4,7 @@ from botlib.html2list import HTML2List, MatchGroup
 from botlib.oncedb import OnceDB


-def download(*, topic='motherboard', cohort='vice:motherboard'):
+def download(*, topic: str = 'motherboard', cohort: str = 'vice:mb') -> None:
    db = OnceDB('cache.sqlite')
    url = 'https://www.vice.com/en/topic/{}'.format(topic)

--- a/examples/web-scraper/shopping_craigslist.py
+++ b/examples/web-scraper/shopping_craigslist.py
@@ -2,19 +2,26 @@
 from botlib.curl import Curl
 from botlib.html2list import HTML2List, MatchGroup
 from botlib.oncedb import OnceDB
+from typing import Optional, Callable, TextIO

 CRAIGSLIST = 'https://newyork.craigslist.org/search/boo'


-def load(url):
+def load(url: str) -> Optional[TextIO]:
    # return open('test.html')
    return Curl.get(url)


-def download():
+def download() -> None:
    db = OnceDB('cache.sqlite')

-    def proc(cohort, source, select, regex={}, fn=str):
+    def proc(
+        cohort: str,
+        source: Optional[TextIO],
+        select: str,
+        regex: dict = {},
+        fn: Callable[[MatchGroup], str] = str
+    ) -> None:
        match = MatchGroup(regex)
        for elem in reversed(HTML2List(select).parse(source)):
            match.set_html(elem)