From 6ef5cfff2d735662b3669c3d264947fd2c97782b Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 16 Mar 2025 15:24:59 +0100 Subject: [PATCH] Retention --- personal_data/fetchers/wanikani_lessons.py | 28 ++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/personal_data/fetchers/wanikani_lessons.py b/personal_data/fetchers/wanikani_lessons.py index 49927d7..9b811d3 100644 --- a/personal_data/fetchers/wanikani_lessons.py +++ b/personal_data/fetchers/wanikani_lessons.py @@ -1,27 +1,50 @@ import dataclasses +import datetime import logging from collections.abc import Iterator, Mapping +import requests_util + from personal_data.data import DeduplicateMode, Scraper from .. import secrets logger = logging.getLogger(__name__) +URL_API_ROOT = 'https://api.wanikani.com/v2' +URL_ASSIGNMENTS = URL_API_ROOT + '/assignments' +URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}' + @dataclasses.dataclass(frozen=True) class WaniKaniLessonsFetcher(Scraper): dataset_name = 'wanikani_lessons' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS + def _setup_cache(self): + requests_util.setup_limiter( + self.session, + URL_API_ROOT, + expire_after=datetime.timedelta(days=90), + per_minute=30, + ) + requests_util.setup_limiter( + self.session, + URL_ASSIGNMENTS, + expire_after=datetime.timedelta(days=3), + per_minute=30, + ) + def scrape(self) -> Iterator[Mapping[str, object]]: """Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp.""" - url = 'https://api.wanikani.com/v2/assignments' + self._setup_cache() headers = { 'Authorization': f'Bearer {secrets.wanikani_api_key()}', 'Wanikani-Revision': '20170710', } + url = URL_ASSIGNMENTS while url: + logger.warning('Getting: %s', url) response = self.session.get(url, headers=headers) response.raise_for_status() json_resp = response.json() @@ -29,7 +52,8 @@ class WaniKaniLessonsFetcher(Scraper): data_item = assignment['data'] subject_id = data_item.get('subject_id') if subject_id: - subj_url = f'https://api.wanikani.com/v2/subjects/{subject_id}' + subj_url = URL_SUBJECTS.format(subject_id=subject_id) + logger.warning('Getting: %s', subj_url) subj_response = self.session.get(subj_url, headers=headers) subj_response.raise_for_status() subj_json = subj_response.json()