import abc import dataclasses from collections.abc import Iterator from enum import Enum import requests class DeduplicateMode(Enum): NONE = 0 BY_FIRST_COLUMN = 1 BY_ALL_COLUMNS = 2 ONLY_LATEST = 3 @dataclasses.dataclass(frozen=True) class Scraper(abc.ABC): session: requests.Session @staticmethod def dataset_name() -> str: pass @staticmethod def deduplicate_mode() -> DeduplicateMode: pass @staticmethod def deduplicate_ignore_columns() -> list[str]: return [] @staticmethod def dataset_format() -> str: return 'list-of-dicts' @staticmethod def requires_cfscrape() -> bool: return False @abc.abstractmethod def scrape(self) -> Iterator[object]: pass