1
0
personal-data/personal_data/data.py

36 lines
639 B
Python
Raw Normal View History

2023-12-10 23:27:56 +00:00
import dataclasses
2024-03-03 15:59:03 +00:00
import requests
2024-02-25 19:20:37 +00:00
from enum import Enum
2024-03-03 15:59:03 +00:00
import abc
2024-02-25 19:20:37 +00:00
class DeduplicateMode(Enum):
NONE = 0
BY_FIRST_COLUMN = 1
BY_ALL_COLUMNS = 2
2023-12-10 23:27:56 +00:00
2024-03-03 15:59:03 +00:00
@dataclasses.dataclass(frozen = True)
class Scraper(abc.ABC):
session: requests.Session
@staticmethod
@property
@abc.abstractmethod
def dataset_name(self) -> str:
pass
@staticmethod
@property
@abc.abstractmethod
def deduplicate_mode(self) -> DeduplicateMode:
pass
@staticmethod
@property
def dataset_format(self) -> str:
return 'list-of-dicts'
@abc.abstractmethod
def scrape(self):
pass
2023-12-10 23:27:56 +00:00