import abc import dataclasses from collections.abc import Iterator, Mapping from enum import Enum import requests class DeduplicateMode(Enum): NONE = 0 BY_FIRST_COLUMN = 1 BY_ALL_COLUMNS = 2 ONLY_LATEST = 3 @dataclasses.dataclass(frozen=True) class Scraper(abc.ABC): """Base scraper class.""" session: requests.Session @staticmethod def dataset_name() -> str: """Indicates the filename of the produced dataset. Must be overwritten by the implementation.""" @staticmethod def deduplicate_mode() -> DeduplicateMode: """Indicates how the rows should be deduplicated. Must be overwritten by the implementation.""" @staticmethod def deduplicate_ignore_columns() -> list[str]: """Indicates columns which are not included in the deduplication check. SQL comparison: Columns not in this set is part of the primary key. """ return [] @staticmethod def dataset_format() -> str: return 'list-of-dicts' @staticmethod def requires_cfscrape() -> bool: """Whether the scraper requires advanced CloudFlare circumvention.""" return False @abc.abstractmethod def scrape(self) -> Iterator[Mapping[str, object]]: """Implementation of the scraper."""