diff --git a/personal_data/__main__.py b/personal_data/__main__.py index 437f049..87c3dc2 100644 --- a/personal_data/__main__.py +++ b/personal_data/__main__.py @@ -5,8 +5,10 @@ import personal_data.main def parse_arguments(): + available_scraper_names = personal_data.main.available_scraper_names() parser = argparse.ArgumentParser() - parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+') + parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+', + choices=available_scraper_names) parser.add_argument('--cookiejar', action='store_true') return parser.parse_args() diff --git a/personal_data/data.py b/personal_data/data.py index 510ec0c..27a7d6a 100644 --- a/personal_data/data.py +++ b/personal_data/data.py @@ -1,5 +1,6 @@ import abc import dataclasses +from collections.abc import Iterator from enum import Enum import requests @@ -33,5 +34,5 @@ class Scraper(abc.ABC): return False @abc.abstractmethod - def scrape(self): + def scrape(self) -> Iterator[object]: pass diff --git a/personal_data/main.py b/personal_data/main.py index 5f5ae89..4dabcf4 100644 --- a/personal_data/main.py +++ b/personal_data/main.py @@ -1,4 +1,5 @@ import csv +from collections.abc import Iterator import datetime import io import logging @@ -176,8 +177,14 @@ def send_notification( body.append(f'{k}: {v}\n') mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body)) +def available_scrapers() -> list[type[personal_data.data.Scraper]]: + return personal_data.data.Scraper.__subclasses__() -def main(scraper_filter: frozenset[str], use_cookiejar: bool): +def available_scraper_names() -> list[str]: + return [scraper_cls.__name__ for scraper_cls in available_scrapers()] + + +def main(scraper_filter: frozenset[str], use_cookiejar: bool) -> None: if use_cookiejar: cookiejar = browsercookie.firefox() logger.info('Got cookiejar from firefox: %s cookies', len(cookiejar)) @@ -185,7 +192,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool): cookiejar = [] logger.warning('No cookiejar is used') - for scraper_cls in personal_data.data.Scraper.__subclasses__(): + for scraper_cls in available_scrapers(): session = get_session(cookiejar, with_cfscrape=scraper_cls.requires_cfscrape()) scraper = scraper_cls(session) if scraper_cls.__name__ not in scraper_filter: @@ -195,7 +202,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool): scraper_cls.__name__, scraper.dataset_name, ) - result_rows = list() + result_rows = [] try: for result in scraper.scrape(): result_rows.append(result)