from __future__ import annotations from collections.abc import Iterator from dataclasses import dataclass from datetime import datetime, timezone import requests from personal_data.data import DeduplicateMode from personal_data.fetchers import Scraper from personal_data.secrets import wanikani_api_key @dataclass(frozen=True) class WaniKaniSummaryFetcher(Scraper): dataset_name: str = 'wanikani_summary' @staticmethod def deduplicate_mode() -> DeduplicateMode: return DeduplicateMode.BY_ALL_COLUMNS def scrape(self) -> Iterator[dict]: token = wanikani_api_key() headers = { 'Authorization': f'Bearer {token}', 'Wanikani-Revision': '20170710', } url = 'https://api.wanikani.com/v2/summary' response = requests.get(url, headers=headers) response.raise_for_status() data = response.json() lessons = data.get('data', {}).get('lessons', []) total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons) reviews = data.get('data', {}).get('reviews', []) now = datetime.now(timezone.utc) total_reviews = 0 for review in reviews: available_at_str = review.get('available_at') if available_at_str: available_at = datetime.fromisoformat( available_at_str.replace('Z', '+00:00'), ) if available_at <= now: total_reviews += len(review.get('subject_ids', [])) yield { 'lessons_available': total_lessons, 'reviews_available': total_reviews, }