typing + smaller bugfixes
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
from sys import stderr
|
||||
from typing import Dict, Any, Optional, TextIO
|
||||
from datetime import datetime # typing
|
||||
|
||||
from botlib.cli import Cli
|
||||
from botlib.curl import Curl
|
||||
@@ -8,7 +10,8 @@ from botlib.feed2list import Feed2List
|
||||
from botlib.helper import StrFormat, FileWrite
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
''' CLI entry. '''
|
||||
cli = Cli()
|
||||
cli.arg_dir('dest_dir', help='Download all entries here')
|
||||
cli.arg('source', help='RSS file or web-url')
|
||||
@@ -25,10 +28,16 @@ def main():
|
||||
print('ERROR: ' + str(e), file=stderr)
|
||||
|
||||
|
||||
def process(source, dest_dir, *, by_year=False, dry_run=False):
|
||||
def process(
|
||||
source: str, # local file path or remote url
|
||||
dest_dir: str,
|
||||
*, by_year: bool = False,
|
||||
dry_run: bool = False
|
||||
) -> bool:
|
||||
''' Parse a full podcast file / source. '''
|
||||
# open source
|
||||
if os.path.isfile(source):
|
||||
fp = open(source) # closed in Feed2List
|
||||
fp = open(source) # type: Optional[TextIO] # closed in Feed2List
|
||||
elif Curl.valid_url(source):
|
||||
fp = Curl.get(source) # closed in Feed2List
|
||||
else:
|
||||
@@ -41,7 +50,7 @@ def process(source, dest_dir, *, by_year=False, dry_run=False):
|
||||
'pubDate', 'media:content', # image
|
||||
# 'itunes:image', 'itunes:duration', 'itunes:summary'
|
||||
])):
|
||||
date = entry.get('pubDate') # try RSS only
|
||||
date = entry['pubDate'] # try RSS only # type: datetime
|
||||
if by_year:
|
||||
dest = os.path.join(dest_dir, str(date.year))
|
||||
if not dry_run and not os.path.exists(dest):
|
||||
@@ -50,7 +59,13 @@ def process(source, dest_dir, *, by_year=False, dry_run=False):
|
||||
return True
|
||||
|
||||
|
||||
def process_entry(entry, date, dest_dir, *, dry_run=False):
|
||||
def process_entry(
|
||||
entry: Dict[str, Any],
|
||||
date: datetime,
|
||||
dest_dir: str,
|
||||
*, dry_run: bool = False
|
||||
) -> None:
|
||||
''' Parse a single podcast media entry. '''
|
||||
title = entry['title']
|
||||
# <enclosure url="*.mp3" length="47216000" type="audio/mpeg"/>
|
||||
audio_url = entry.get('enclosure', {}).get('url')
|
||||
@@ -78,10 +93,11 @@ def process_entry(entry, date, dest_dir, *, dry_run=False):
|
||||
|
||||
@FileWrite.once(dest_dir, fname + '.txt', date, override=False,
|
||||
dry_run=dry_run, verbose=True, intro=flag or intro)
|
||||
def _description():
|
||||
desc = title + '\n' + '=' * len(title)
|
||||
desc += '\n\n' + StrFormat.strip_html(entry.get('description', ''))
|
||||
return desc + '\n\n\n' + entry.get('link', '') + '\n'
|
||||
def _description() -> str:
|
||||
return '{}\n{}\n\n{}\n\n\n{}\n'.format(
|
||||
title, '=' * len(title),
|
||||
StrFormat.strip_html(entry.get('description', '')),
|
||||
entry.get('link', ''))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
from sys import stderr
|
||||
from typing import Dict, Tuple, Optional, Any
|
||||
|
||||
from botlib.cli import Cli
|
||||
from botlib.curl import Curl, URLError
|
||||
@@ -15,7 +16,8 @@ db_slugs = OnceDB('radiolab_slugs.sqlite')
|
||||
os.environ['TZ'] = 'America/New_York'
|
||||
|
||||
|
||||
def main():
|
||||
def main() -> None:
|
||||
''' CLI entry. '''
|
||||
cli = Cli()
|
||||
cli.arg_dir('dest_dir', help='Download all episodes to dest_dir/year/')
|
||||
cli.arg_bool('--dry-run', help='Do not download, just parse')
|
||||
@@ -36,9 +38,17 @@ def main():
|
||||
print('\nDone.\n\nNow check MP3 tags (consistency).')
|
||||
|
||||
|
||||
def processEpisodeList(basedir, title, query, index=1, *, dry_run=False):
|
||||
def processEpisodeList(
|
||||
basedir: str,
|
||||
title: str,
|
||||
query: str,
|
||||
index: int = 1,
|
||||
*, dry_run: bool = False
|
||||
) -> None:
|
||||
''' Parse full podcast category. '''
|
||||
print('\nProcessing: {}'.format(title), end='')
|
||||
dat = Curl.json('{}/channel/shows/{}/{}?limit=9'.format(API, query, index))
|
||||
url = '{}/channel/shows/{}/{}?limit=9'.format(API, query, index)
|
||||
dat = Curl.json(url) # type: Dict[str, Any]
|
||||
total = dat['data']['attributes']['total-pages']
|
||||
print(' ({}/{})'.format(index, total))
|
||||
anything_new = False
|
||||
@@ -49,7 +59,12 @@ def processEpisodeList(basedir, title, query, index=1, *, dry_run=False):
|
||||
processEpisodeList(basedir, title, query, index + 1, dry_run=dry_run)
|
||||
|
||||
|
||||
def processEpisode(obj, basedir, *, dry_run=False):
|
||||
def processEpisode(
|
||||
obj: Dict[str, Any],
|
||||
basedir: str,
|
||||
*, dry_run: bool = False
|
||||
) -> bool:
|
||||
''' Parse a single podcast episode. '''
|
||||
uid = obj['cms-pk']
|
||||
if db_ids.contains(COHORT, uid):
|
||||
return False # Already exists
|
||||
@@ -86,18 +101,18 @@ def processEpisode(obj, basedir, *, dry_run=False):
|
||||
|
||||
@FileWrite.once(dest_dir, fname + '.txt', date, override=False,
|
||||
dry_run=dry_run, verbose=True, intro=flag or intro)
|
||||
def write_description():
|
||||
def write_description() -> str:
|
||||
nonlocal flag
|
||||
flag = True
|
||||
desc = title + '\n' + '=' * len(title)
|
||||
desc += '\n\n' + StrFormat.strip_html(obj['body'])
|
||||
desc = '{}\n{}\n\n{}'.format(
|
||||
title, '=' * len(title), StrFormat.strip_html(obj['body']))
|
||||
if img_desc:
|
||||
desc += '\n\n' + img_desc
|
||||
return desc + '\n\n\n' + obj['url'].strip() + '\n' # link to article
|
||||
return '{}\n\n\n{}\n'.format(desc, obj['url'].strip()) # article link
|
||||
|
||||
@FileWrite.once(dest_dir, fname + '.transcript.txt', date, override=False,
|
||||
dry_run=dry_run, verbose=True, intro=flag or intro)
|
||||
def write_transcript():
|
||||
def write_transcript() -> Optional[str]:
|
||||
nonlocal flag
|
||||
flag = True
|
||||
data = StrFormat.strip_html(obj['transcript'])
|
||||
@@ -111,7 +126,8 @@ def processEpisode(obj, basedir, *, dry_run=False):
|
||||
return flag # potentially need to query the next page too
|
||||
|
||||
|
||||
def get_img_desc(obj):
|
||||
def get_img_desc(obj: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
|
||||
''' Extract image description. '''
|
||||
if not obj:
|
||||
return (None, None)
|
||||
url = (obj['url'] or '').strip()
|
||||
@@ -135,7 +151,8 @@ def get_img_desc(obj):
|
||||
# -> inurl:radiolab/episodes site:wnycstudios.org
|
||||
# Then regex: /episodes/([^;]*?)" onmousedown
|
||||
|
||||
def processSingle(slug, basedir):
|
||||
def processSingle(slug: str, basedir: str) -> None:
|
||||
''' [internal] process single episode if only the slug is known. '''
|
||||
# cms-pk = 91947 , slug = '91947-do-i-know-you'
|
||||
all_slugs = [slug for _, _, _, slug in db_slugs]
|
||||
if slug not in all_slugs:
|
||||
|
||||
@@ -1,20 +1,20 @@
|
||||
#!/usr/bin/env python3
|
||||
from botlib.tgclient import TGClient
|
||||
from botlib.tgclient import TGClient, Message
|
||||
|
||||
bot = TGClient(__API_KEY__, polling=True, allowedUsers=['my-username'])
|
||||
|
||||
|
||||
@bot.message_handler(commands=['hi'])
|
||||
def bot_reply(message):
|
||||
def bot_reply(message: Message) -> None:
|
||||
if bot.allowed(message): # only reply to a single user (my-username)
|
||||
bot.reply_to(message, 'Good evening my dear.')
|
||||
|
||||
|
||||
@bot.message_handler(commands=['set'])
|
||||
def update_config(message):
|
||||
def update_config(message: Message) -> None:
|
||||
if bot.allowed(message):
|
||||
try:
|
||||
config = data_store.get(message.chat.id)
|
||||
config = DATA_STORE.get(message.chat.id)
|
||||
except KeyError:
|
||||
bot.reply_to(message, 'Not found.')
|
||||
return
|
||||
@@ -28,32 +28,32 @@ def update_config(message):
|
||||
|
||||
|
||||
@bot.message_handler(commands=['start'])
|
||||
def new_chat_info(message):
|
||||
def new_chat_info(message: Message) -> None:
|
||||
bot.log_chat_info(message.chat)
|
||||
if bot.allowed(message):
|
||||
if data_store.get(message.chat.id):
|
||||
if DATA_STORE.get(message.chat.id):
|
||||
bot.reply_to(message, 'Already exists')
|
||||
else:
|
||||
CreateNew(message)
|
||||
|
||||
|
||||
class CreateNew:
|
||||
def __init__(self, message):
|
||||
def __init__(self, message: Message) -> None:
|
||||
self.ask_name(message)
|
||||
|
||||
def ask_name(self, message):
|
||||
def ask_name(self, message: Message) -> None:
|
||||
msg = bot.send_force_reply(message.chat.id, 'Enter Name:')
|
||||
bot.register_next_step_handler(msg, self.ask_interval)
|
||||
|
||||
def ask_interval(self, message):
|
||||
def ask_interval(self, message: Message) -> None:
|
||||
self.name = message.text
|
||||
msg = bot.send_buttons(message.chat.id, 'Update interval (minutes):',
|
||||
options=[3, 5, 10, 15, 30, 60])
|
||||
bot.register_next_step_handler(msg, self.finish)
|
||||
|
||||
def finish(self, message):
|
||||
def finish(self, message: Message) -> None:
|
||||
try:
|
||||
interval = int(message.text)
|
||||
interval = int(message.text or 'error')
|
||||
except ValueError:
|
||||
bot.send_abort_keyboard(message, 'Not a number. Aborting.')
|
||||
return
|
||||
|
||||
@@ -17,15 +17,15 @@ bot.set_on_kill(cron.stop)
|
||||
|
||||
|
||||
def main():
|
||||
def clean_db(_):
|
||||
def clean_db(_) -> None:
|
||||
Log.info('[clean up]')
|
||||
OnceDB('cache.sqlite').cleanup(limit=150)
|
||||
|
||||
def notify_jobA(_):
|
||||
def notify_jobA(_) -> None:
|
||||
jobA.download(topic='development', cohort='dev:py')
|
||||
send2telegram(__A_CHAT_ID__)
|
||||
|
||||
def notify_jobB(_):
|
||||
def notify_jobB(_) -> None:
|
||||
jobB.download()
|
||||
send2telegram(__ANOTHER_CHAT_ID__)
|
||||
|
||||
@@ -37,14 +37,15 @@ def main():
|
||||
# cron.fire()
|
||||
|
||||
|
||||
def send2telegram(chat_id):
|
||||
def send2telegram(chat_id: int) -> None:
|
||||
db = OnceDB('cache.sqlite')
|
||||
# db.mark_all_done()
|
||||
|
||||
def _send(cohort, uid, obj):
|
||||
def _send(cohort: str, uid: str, obj: str) -> bool:
|
||||
Log.info('[push] {} {}'.format(cohort, uid))
|
||||
return bot.send(chat_id, obj, parse_mode='HTML',
|
||||
disable_web_page_preview=True)
|
||||
msg = bot.send(chat_id, obj, parse_mode='HTML',
|
||||
disable_web_page_preview=True)
|
||||
return msg is not None
|
||||
|
||||
if not db.foreach(_send):
|
||||
# send() sleeps 45 sec (on error), safe to call immediatelly
|
||||
|
||||
@@ -4,7 +4,7 @@ from botlib.html2list import HTML2List, MatchGroup
|
||||
from botlib.oncedb import OnceDB
|
||||
|
||||
|
||||
def download(*, topic='motherboard', cohort='vice:motherboard'):
|
||||
def download(*, topic: str = 'motherboard', cohort: str = 'vice:mb') -> None:
|
||||
db = OnceDB('cache.sqlite')
|
||||
url = 'https://www.vice.com/en/topic/{}'.format(topic)
|
||||
|
||||
|
||||
@@ -2,19 +2,26 @@
|
||||
from botlib.curl import Curl
|
||||
from botlib.html2list import HTML2List, MatchGroup
|
||||
from botlib.oncedb import OnceDB
|
||||
from typing import Optional, Callable, TextIO
|
||||
|
||||
CRAIGSLIST = 'https://newyork.craigslist.org/search/boo'
|
||||
|
||||
|
||||
def load(url):
|
||||
def load(url: str) -> Optional[TextIO]:
|
||||
# return open('test.html')
|
||||
return Curl.get(url)
|
||||
|
||||
|
||||
def download():
|
||||
def download() -> None:
|
||||
db = OnceDB('cache.sqlite')
|
||||
|
||||
def proc(cohort, source, select, regex={}, fn=str):
|
||||
def proc(
|
||||
cohort: str,
|
||||
source: Optional[TextIO],
|
||||
select: str,
|
||||
regex: dict = {},
|
||||
fn: Callable[[MatchGroup], str] = str
|
||||
) -> None:
|
||||
match = MatchGroup(regex)
|
||||
for elem in reversed(HTML2List(select).parse(source)):
|
||||
match.set_html(elem)
|
||||
|
||||
Reference in New Issue
Block a user