1
0

Retention
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 35s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 31s

This commit is contained in:
Jon Michael Aanes 2025-03-16 15:24:59 +01:00
parent b3af7e36fd
commit 6ef5cfff2d

View File

@ -1,27 +1,50 @@
import dataclasses import dataclasses
import datetime
import logging import logging
from collections.abc import Iterator, Mapping from collections.abc import Iterator, Mapping
import requests_util
from personal_data.data import DeduplicateMode, Scraper from personal_data.data import DeduplicateMode, Scraper
from .. import secrets from .. import secrets
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
URL_API_ROOT = 'https://api.wanikani.com/v2'
URL_ASSIGNMENTS = URL_API_ROOT + '/assignments'
URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}'
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class WaniKaniLessonsFetcher(Scraper): class WaniKaniLessonsFetcher(Scraper):
dataset_name = 'wanikani_lessons' dataset_name = 'wanikani_lessons'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def _setup_cache(self):
requests_util.setup_limiter(
self.session,
URL_API_ROOT,
expire_after=datetime.timedelta(days=90),
per_minute=30,
)
requests_util.setup_limiter(
self.session,
URL_ASSIGNMENTS,
expire_after=datetime.timedelta(days=3),
per_minute=30,
)
def scrape(self) -> Iterator[Mapping[str, object]]: def scrape(self) -> Iterator[Mapping[str, object]]:
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp.""" """Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
url = 'https://api.wanikani.com/v2/assignments' self._setup_cache()
headers = { headers = {
'Authorization': f'Bearer {secrets.wanikani_api_key()}', 'Authorization': f'Bearer {secrets.wanikani_api_key()}',
'Wanikani-Revision': '20170710', 'Wanikani-Revision': '20170710',
} }
url = URL_ASSIGNMENTS
while url: while url:
logger.warning('Getting: %s', url)
response = self.session.get(url, headers=headers) response = self.session.get(url, headers=headers)
response.raise_for_status() response.raise_for_status()
json_resp = response.json() json_resp = response.json()
@ -29,7 +52,8 @@ class WaniKaniLessonsFetcher(Scraper):
data_item = assignment['data'] data_item = assignment['data']
subject_id = data_item.get('subject_id') subject_id = data_item.get('subject_id')
if subject_id: if subject_id:
subj_url = f'https://api.wanikani.com/v2/subjects/{subject_id}' subj_url = URL_SUBJECTS.format(subject_id=subject_id)
logger.warning('Getting: %s', subj_url)
subj_response = self.session.get(subj_url, headers=headers) subj_response = self.session.get(subj_url, headers=headers)
subj_response.raise_for_status() subj_response.raise_for_status()
subj_json = subj_response.json() subj_json = subj_response.json()