diff --git a/personal_data/fetchers/wanikani_summary.py b/personal_data/fetchers/wanikani_summary.py new file mode 100644 index 0000000..3583e72 --- /dev/null +++ b/personal_data/fetchers/wanikani_summary.py @@ -0,0 +1,46 @@ +from __future__ import annotations +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Iterator +import requests + +from personal_data.data import DeduplicateMode +from personal_data.fetchers import Scraper +from personal_data.secrets import wanikani_api_key + +@dataclass(frozen=True) +class WaniKaniSummaryFetcher(Scraper): + dataset_name: str = "wanikani_summary" + + @staticmethod + def deduplicate_mode() -> DeduplicateMode: + return DeduplicateMode.BY_ALL_COLUMNS + + def scrape(self) -> Iterator[dict]: + token = wanikani_api_key() + headers = { + "Authorization": f"Bearer {token}", + "Wanikani-Revision": "20170710", + } + url = "https://api.wanikani.com/v2/summary" + response = requests.get(url, headers=headers) + response.raise_for_status() + data = response.json() + + lessons = data.get("data", {}).get("lessons", []) + total_lessons = sum(len(lesson.get("subject_ids", [])) for lesson in lessons) + + reviews = data.get("data", {}).get("reviews", []) + now = datetime.now(timezone.utc) + total_reviews = 0 + for review in reviews: + available_at_str = review.get("available_at") + if available_at_str: + available_at = datetime.fromisoformat(available_at_str.replace("Z", "+00:00")) + if available_at <= now: + total_reviews += len(review.get("subject_ids", [])) + + yield { + "lessons_available": total_lessons, + "reviews_available": total_reviews, + }