2023-12-10 23:27:56 +00:00
|
|
|
import dataclasses
|
2024-02-25 19:20:37 +00:00
|
|
|
from enum import Enum
|
|
|
|
|
|
|
|
class DeduplicateMode(Enum):
|
|
|
|
NONE = 0
|
|
|
|
BY_FIRST_COLUMN = 1
|
|
|
|
BY_ALL_COLUMNS = 2
|
2023-12-10 23:27:56 +00:00
|
|
|
|
|
|
|
@dataclasses.dataclass
|
|
|
|
class Scraper:
|
|
|
|
scraper: object # TODO: Callable
|
|
|
|
dataset_name: str
|
2024-02-25 19:20:37 +00:00
|
|
|
deduplicate_mode: DeduplicateMode
|
2023-12-10 23:27:56 +00:00
|
|
|
dataset_format: str = 'list-of-dicts'
|
|
|
|
|