1
0

Compare commits

...

2 Commits

Author SHA1 Message Date
088cac75fc Ruff
Some checks failed
Run Python tests (through Pytest) / Test (push) Failing after 29s
Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 26s
2025-06-25 00:12:37 +02:00
8feb3e2cde Document scraper 2025-06-25 00:07:09 +02:00
5 changed files with 18 additions and 4 deletions

View File

@ -8,6 +8,7 @@ from personal_data.csv_import import determine_possible_keys, load_csv_file, sta
logger = getLogger(__name__)
def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]:
assert len(rows) > 0
max_title_parts = 2

View File

@ -106,6 +106,7 @@ def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
MINIMUM_BELIEVABLE_STEP_COUNT = 300
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
num_updated = 0

View File

@ -13,7 +13,9 @@ import frontmatter
import marko
import marko.md_renderer
assert hasattr(frontmatter, 'loads'), 'Incorrect frontmatter package installed. Use: pip install python-frontmatter'
assert hasattr(frontmatter, 'loads'), (
'Incorrect frontmatter package installed. Use: pip install python-frontmatter'
)
logger = getLogger(__name__)

View File

@ -15,18 +15,26 @@ class DeduplicateMode(Enum):
@dataclasses.dataclass(frozen=True)
class Scraper(abc.ABC):
"""Base scraper class."""
session: requests.Session
@staticmethod
def dataset_name() -> str:
pass
"""Indicates the filename of the produced dataset. Must be overwritten
by the implementation."""
@staticmethod
def deduplicate_mode() -> DeduplicateMode:
pass
"""Indicates how the rows should be deduplicated. Must be overwritten
by the implementation."""
@staticmethod
def deduplicate_ignore_columns() -> list[str]:
"""Indicates columns which are not included in the deduplication check.
SQL comparison: Columns not in this set is part of the primary key.
"""
return []
@staticmethod
@ -35,8 +43,9 @@ class Scraper(abc.ABC):
@staticmethod
def requires_cfscrape() -> bool:
"""Whether the scraper requires advanced CloudFlare circumvention."""
return False
@abc.abstractmethod
def scrape(self) -> Iterator[Mapping[str, object]]:
pass
"""Implementation of the scraper."""

View File

@ -78,6 +78,7 @@ def find_python_packages() -> list[str]:
return sorted(packages)
with open(PACKAGE_NAME + '/_version.py') as f:
version = parse_version_file(f.read())