40 lines
1.6 KiB
Python
40 lines
1.6 KiB
Python
import dataclasses
|
|
import logging
|
|
from collections.abc import Iterator, Mapping
|
|
|
|
from personal_data.data import DeduplicateMode, Scraper
|
|
|
|
from .. import secrets
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
class WaniKaniLessonsFetcher(Scraper):
|
|
dataset_name = 'wanikani_lessons'
|
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
|
|
|
def scrape(self) -> Iterator[Mapping[str, object]]:
|
|
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
|
|
url = 'https://api.wanikani.com/v2/assignments'
|
|
headers = {
|
|
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
|
|
'Wanikani-Revision': '20170710',
|
|
}
|
|
while url:
|
|
response = self.session.get(url, headers=headers)
|
|
response.raise_for_status()
|
|
json_resp = response.json()
|
|
for assignment in json_resp.get('data', []):
|
|
data_item = assignment['data']
|
|
subject_id = data_item.get("subject_id")
|
|
if subject_id:
|
|
subj_url = f'https://api.wanikani.com/v2/subjects/{subject_id}'
|
|
subj_response = self.session.get(subj_url, headers=headers)
|
|
subj_response.raise_for_status()
|
|
subj_json = subj_response.json()
|
|
subject_characters = subj_json.get("data", {}).get("characters")
|
|
data_item["subject_characters"] = subject_characters
|
|
yield data_item
|
|
url = json_resp.get('pages', {}).get('next_url')
|