import dataclasses import datetime import logging from collections.abc import Iterator, Mapping import requests_util from personal_data.data import DeduplicateMode, Scraper from .. import secrets logger = logging.getLogger(__name__) URL_API_ROOT = 'https://api.wanikani.com/v2' URL_ASSIGNMENTS = URL_API_ROOT + '/assignments' URL_SUMMARY = URL_API_ROOT + '/summary' URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}' def _setup_cache(session): requests_util.setup_limiter( session, URL_API_ROOT, expire_after=datetime.timedelta(days=90), per_minute=30, ) requests_util.setup_limiter( session, URL_ASSIGNMENTS, expire_after=datetime.timedelta(days=3), per_minute=30, ) @dataclasses.dataclass(frozen=True) class WaniKaniLessonsFetcher(Scraper): dataset_name = 'wanikani_lessons' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS def scrape(self) -> Iterator[Mapping[str, object]]: """Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp.""" _setup_cache(self.session) headers = { 'Authorization': f'Bearer {secrets.wanikani_api_key()}', 'Wanikani-Revision': '20170710', } url = URL_ASSIGNMENTS while url: logger.warning('Getting: %s', url) response = self.session.get(url, headers=headers) response.raise_for_status() json_resp = response.json() for assignment in json_resp.get('data', []): data_item = assignment['data'] subject_id = data_item.get('subject_id') if subject_id: subj_url = URL_SUBJECTS.format(subject_id=subject_id) logger.warning('Getting: %s', subj_url) subj_response = self.session.get(subj_url, headers=headers) subj_response.raise_for_status() subj_json = subj_response.json() subject_characters = subj_json.get('data', {}).get('characters') data_item['subject_characters'] = subject_characters yield data_item url = json_resp.get('pages', {}).get('next_url') @dataclasses.dataclass(frozen=True) class WaniKaniSummaryFetcher(Scraper): dataset_name: str = 'wanikani_summary' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS @staticmethod def deduplicate_mode() -> DeduplicateMode: return DeduplicateMode.BY_ALL_COLUMNS def scrape(self) -> Iterator[dict]: _setup_cache(self.session) headers = { 'Authorization': f'Bearer {secrets.wanikani_api_key()}', 'Wanikani-Revision': '20170710', } response = self.session.get(URL_SUMMARY, headers=headers) response.raise_for_status() data = response.json() lessons = data.get('data', {}).get('lessons', []) total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons) reviews = data.get('data', {}).get('reviews', []) now = datetime.datetime.now(datetime.timezone.utc) total_reviews = 0 for review in reviews: available_at_str = review.get('available_at') if available_at_str: available_at = datetime.datetime.fromisoformat( available_at_str.replace('Z', '+00:00'), ) if available_at <= now: total_reviews += len(review.get('subject_ids', [])) del review yield { 'time': now, 'lessons_available': total_lessons, 'reviews_available': total_reviews, }