Improved help
This commit is contained in:
parent
033f0dcf5b
commit
f32ff7f412
|
@ -5,8 +5,10 @@ import personal_data.main
|
|||
|
||||
|
||||
def parse_arguments():
|
||||
available_scraper_names = personal_data.main.available_scraper_names()
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+')
|
||||
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+',
|
||||
choices=available_scraper_names)
|
||||
parser.add_argument('--cookiejar', action='store_true')
|
||||
return parser.parse_args()
|
||||
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
import abc
|
||||
import dataclasses
|
||||
from collections.abc import Iterator
|
||||
from enum import Enum
|
||||
|
||||
import requests
|
||||
|
@ -33,5 +34,5 @@ class Scraper(abc.ABC):
|
|||
return False
|
||||
|
||||
@abc.abstractmethod
|
||||
def scrape(self):
|
||||
def scrape(self) -> Iterator[object]:
|
||||
pass
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
import csv
|
||||
from collections.abc import Iterator
|
||||
import datetime
|
||||
import io
|
||||
import logging
|
||||
|
@ -176,8 +177,14 @@ def send_notification(
|
|||
body.append(f'{k}: {v}\n')
|
||||
mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body))
|
||||
|
||||
def available_scrapers() -> list[type[personal_data.data.Scraper]]:
|
||||
return personal_data.data.Scraper.__subclasses__()
|
||||
|
||||
def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
||||
def available_scraper_names() -> list[str]:
|
||||
return [scraper_cls.__name__ for scraper_cls in available_scrapers()]
|
||||
|
||||
|
||||
def main(scraper_filter: frozenset[str], use_cookiejar: bool) -> None:
|
||||
if use_cookiejar:
|
||||
cookiejar = browsercookie.firefox()
|
||||
logger.info('Got cookiejar from firefox: %s cookies', len(cookiejar))
|
||||
|
@ -185,7 +192,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
|||
cookiejar = []
|
||||
logger.warning('No cookiejar is used')
|
||||
|
||||
for scraper_cls in personal_data.data.Scraper.__subclasses__():
|
||||
for scraper_cls in available_scrapers():
|
||||
session = get_session(cookiejar, with_cfscrape=scraper_cls.requires_cfscrape())
|
||||
scraper = scraper_cls(session)
|
||||
if scraper_cls.__name__ not in scraper_filter:
|
||||
|
@ -195,7 +202,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
|||
scraper_cls.__name__,
|
||||
scraper.dataset_name,
|
||||
)
|
||||
result_rows = list()
|
||||
result_rows = []
|
||||
try:
|
||||
for result in scraper.scrape():
|
||||
result_rows.append(result)
|
||||
|
|
Loading…
Reference in New Issue
Block a user