Improved help
This commit is contained in:
parent
033f0dcf5b
commit
f32ff7f412
|
@ -5,8 +5,10 @@ import personal_data.main
|
||||||
|
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
|
available_scraper_names = personal_data.main.available_scraper_names()
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+')
|
parser.add_argument('fetchers', metavar='FETCHER', type=str, nargs='+',
|
||||||
|
choices=available_scraper_names)
|
||||||
parser.add_argument('--cookiejar', action='store_true')
|
parser.add_argument('--cookiejar', action='store_true')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import abc
|
import abc
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
from collections.abc import Iterator
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
@ -33,5 +34,5 @@ class Scraper(abc.ABC):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@abc.abstractmethod
|
@abc.abstractmethod
|
||||||
def scrape(self):
|
def scrape(self) -> Iterator[object]:
|
||||||
pass
|
pass
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import csv
|
import csv
|
||||||
|
from collections.abc import Iterator
|
||||||
import datetime
|
import datetime
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
@ -176,8 +177,14 @@ def send_notification(
|
||||||
body.append(f'{k}: {v}\n')
|
body.append(f'{k}: {v}\n')
|
||||||
mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body))
|
mailgun.send_email(session, f'Updated {scraper_name}', ''.join(body))
|
||||||
|
|
||||||
|
def available_scrapers() -> list[type[personal_data.data.Scraper]]:
|
||||||
|
return personal_data.data.Scraper.__subclasses__()
|
||||||
|
|
||||||
def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
def available_scraper_names() -> list[str]:
|
||||||
|
return [scraper_cls.__name__ for scraper_cls in available_scrapers()]
|
||||||
|
|
||||||
|
|
||||||
|
def main(scraper_filter: frozenset[str], use_cookiejar: bool) -> None:
|
||||||
if use_cookiejar:
|
if use_cookiejar:
|
||||||
cookiejar = browsercookie.firefox()
|
cookiejar = browsercookie.firefox()
|
||||||
logger.info('Got cookiejar from firefox: %s cookies', len(cookiejar))
|
logger.info('Got cookiejar from firefox: %s cookies', len(cookiejar))
|
||||||
|
@ -185,7 +192,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
||||||
cookiejar = []
|
cookiejar = []
|
||||||
logger.warning('No cookiejar is used')
|
logger.warning('No cookiejar is used')
|
||||||
|
|
||||||
for scraper_cls in personal_data.data.Scraper.__subclasses__():
|
for scraper_cls in available_scrapers():
|
||||||
session = get_session(cookiejar, with_cfscrape=scraper_cls.requires_cfscrape())
|
session = get_session(cookiejar, with_cfscrape=scraper_cls.requires_cfscrape())
|
||||||
scraper = scraper_cls(session)
|
scraper = scraper_cls(session)
|
||||||
if scraper_cls.__name__ not in scraper_filter:
|
if scraper_cls.__name__ not in scraper_filter:
|
||||||
|
@ -195,7 +202,7 @@ def main(scraper_filter: frozenset[str], use_cookiejar: bool):
|
||||||
scraper_cls.__name__,
|
scraper_cls.__name__,
|
||||||
scraper.dataset_name,
|
scraper.dataset_name,
|
||||||
)
|
)
|
||||||
result_rows = list()
|
result_rows = []
|
||||||
try:
|
try:
|
||||||
for result in scraper.scrape():
|
for result in scraper.scrape():
|
||||||
result_rows.append(result)
|
result_rows.append(result)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user