1
0

Improved help

This commit is contained in:
Jon Michael Aanes 2024-04-28 23:45:47 +02:00
parent 033f0dcf5b
commit f32ff7f412
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
3 changed files with 15 additions and 5 deletions

View File

@ -5,8 +5,10 @@ import personal_data.main
def parse_arguments():
available_scraper_names = personal_data.main.available_scraper_names()
parser = argparse.ArgumentParser()
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+')
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+',
choices=available_scraper_names)
parser.add_argument('--cookiejar', action='store_true')
return parser.parse_args()

View File

@ -1,5 +1,6 @@
import abc
import dataclasses
from collections.abc import Iterator
from enum import Enum
import requests
@ -33,5 +34,5 @@ class Scraper(abc.ABC):
return False
@abc.abstractmethod
def scrape(self):
def scrape(self) -> Iterator[object]:
pass

View File

@ -1,4 +1,5 @@
import csv
from collections.abc import Iterator
import datetime
import io
import logging
@ -176,8 +177,14 @@ def send_notification(
body.append(f'{k}: {v}\n')
mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body))
def available_scrapers() -> list[type[personal_data.data.Scraper]]:
return personal_data.data.Scraper.__subclasses__()
def main(scraper_filter: frozenset[str], use_cookiejar: bool):
def available_scraper_names() -> list[str]:
return [scraper_cls.__name__ for scraper_cls in available_scrapers()]
def main(scraper_filter: frozenset[str], use_cookiejar: bool) -> None:
if use_cookiejar:
cookiejar = browsercookie.firefox()
logger.info('Got cookiejar from firefox: %s cookies', len(cookiejar))
@ -185,7 +192,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool):
cookiejar = []
logger.warning('No cookiejar is used')
for scraper_cls in personal_data.data.Scraper.__subclasses__():
for scraper_cls in available_scrapers():
session = get_session(cookiejar, with_cfscrape=scraper_cls.requires_cfscrape())
scraper = scraper_cls(session)
if scraper_cls.__name__ not in scraper_filter:
@ -195,7 +202,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool):
scraper_cls.__name__,
scraper.dataset_name,
)
result_rows = list()
result_rows = []
try:
for result in scraper.scrape():
result_rows.append(result)