from __future__ import annotations from dataclasses import dataclass from datetime import datetime, timezone from typing import Iterator import requests from personal_data.data import DeduplicateMode from personal_data.fetchers import Scraper from personal_data.secrets import wanikani_api_key @dataclass(frozen=True) class WaniKaniSummaryFetcher(Scraper): dataset_name: str = "wanikani_summary" @staticmethod def deduplicate_mode() -> DeduplicateMode: return DeduplicateMode.BY_ALL_COLUMNS def scrape(self) -> Iterator[dict]: token = wanikani_api_key() headers = { "Authorization": f"Bearer {token}", "Wanikani-Revision": "20170710", } url = "https://api.wanikani.com/v2/summary" response = requests.get(url, headers=headers) response.raise_for_status() data = response.json() lessons = data.get("data", {}).get("lessons", []) total_lessons = sum(len(lesson.get("subject_ids", [])) for lesson in lessons) reviews = data.get("data", {}).get("reviews", []) now = datetime.now(timezone.utc) total_reviews = 0 for review in reviews: available_at_str = review.get("available_at") if available_at_str: available_at = datetime.fromisoformat(available_at_str.replace("Z", "+00:00")) if available_at <= now: total_reviews += len(review.get("subject_ids", [])) yield { "lessons_available": total_lessons, "reviews_available": total_reviews, }