diff --git a/personal_data/fetchers/wanikani_lessons.py b/personal_data/fetchers/wanikani_lessons.py index 9b811d3..198adfa 100644 --- a/personal_data/fetchers/wanikani_lessons.py +++ b/personal_data/fetchers/wanikani_lessons.py @@ -13,31 +13,32 @@ logger = logging.getLogger(__name__) URL_API_ROOT = 'https://api.wanikani.com/v2' URL_ASSIGNMENTS = URL_API_ROOT + '/assignments' +URL_SUMMARY = URL_API_ROOT + '/summary' URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}' +def _setup_cache(session): + requests_util.setup_limiter( + session, + URL_API_ROOT, + expire_after=datetime.timedelta(days=90), + per_minute=30, + ) + requests_util.setup_limiter( + session, + URL_ASSIGNMENTS, + expire_after=datetime.timedelta(days=3), + per_minute=30, + ) + @dataclasses.dataclass(frozen=True) class WaniKaniLessonsFetcher(Scraper): dataset_name = 'wanikani_lessons' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS - def _setup_cache(self): - requests_util.setup_limiter( - self.session, - URL_API_ROOT, - expire_after=datetime.timedelta(days=90), - per_minute=30, - ) - requests_util.setup_limiter( - self.session, - URL_ASSIGNMENTS, - expire_after=datetime.timedelta(days=3), - per_minute=30, - ) - def scrape(self) -> Iterator[Mapping[str, object]]: """Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp.""" - self._setup_cache() + _setup_cache(self.session) headers = { 'Authorization': f'Bearer {secrets.wanikani_api_key()}', 'Wanikani-Revision': '20170710', @@ -61,3 +62,44 @@ class WaniKaniLessonsFetcher(Scraper): data_item['subject_characters'] = subject_characters yield data_item url = json_resp.get('pages', {}).get('next_url') + +@dataclasses.dataclass(frozen=True) +class WaniKaniSummaryFetcher(Scraper): + dataset_name: str = 'wanikani_summary' + deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS + + @staticmethod + def deduplicate_mode() -> DeduplicateMode: + return DeduplicateMode.BY_ALL_COLUMNS + + def scrape(self) -> Iterator[dict]: + _setup_cache(self.session) + headers = { + 'Authorization': f'Bearer {secrets.wanikani_api_key()}', + 'Wanikani-Revision': '20170710', + } + response = self.session.get(URL_SUMMARY, headers=headers) + response.raise_for_status() + data = response.json() + + lessons = data.get('data', {}).get('lessons', []) + total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons) + + reviews = data.get('data', {}).get('reviews', []) + now = datetime.datetime.now(datetime.timezone.utc) + total_reviews = 0 + for review in reviews: + available_at_str = review.get('available_at') + if available_at_str: + available_at = datetime.datetime.fromisoformat( + available_at_str.replace('Z', '+00:00'), + ) + if available_at <= now: + total_reviews += len(review.get('subject_ids', [])) + del review + + yield { + 'time': now, + 'lessons_available': total_lessons, + 'reviews_available': total_reviews, + } diff --git a/personal_data/fetchers/wanikani_summary.py b/personal_data/fetchers/wanikani_summary.py deleted file mode 100644 index 4a39cae..0000000 --- a/personal_data/fetchers/wanikani_summary.py +++ /dev/null @@ -1,51 +0,0 @@ -from __future__ import annotations - -from collections.abc import Iterator -from dataclasses import dataclass -from datetime import datetime, timezone - -import requests - -from personal_data.data import DeduplicateMode -from personal_data.fetchers import Scraper -from personal_data.secrets import wanikani_api_key - - -@dataclass(frozen=True) -class WaniKaniSummaryFetcher(Scraper): - dataset_name: str = 'wanikani_summary' - - @staticmethod - def deduplicate_mode() -> DeduplicateMode: - return DeduplicateMode.BY_ALL_COLUMNS - - def scrape(self) -> Iterator[dict]: - token = wanikani_api_key() - headers = { - 'Authorization': f'Bearer {token}', - 'Wanikani-Revision': '20170710', - } - url = 'https://api.wanikani.com/v2/summary' - response = requests.get(url, headers=headers) - response.raise_for_status() - data = response.json() - - lessons = data.get('data', {}).get('lessons', []) - total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons) - - reviews = data.get('data', {}).get('reviews', []) - now = datetime.now(timezone.utc) - total_reviews = 0 - for review in reviews: - available_at_str = review.get('available_at') - if available_at_str: - available_at = datetime.fromisoformat( - available_at_str.replace('Z', '+00:00'), - ) - if available_at <= now: - total_reviews += len(review.get('subject_ids', [])) - - yield { - 'lessons_available': total_lessons, - 'reviews_available': total_reviews, - }