Compare commits
No commits in common. "6ef5cfff2d735662b3669c3d264947fd2c97782b" and "1590586c320990ac7b73b39aaddb056ab8c7d752" have entirely different histories.
6ef5cfff2d
...
1590586c32
|
@ -1,63 +1,31 @@
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Iterator, Mapping
|
from collections.abc import Iterator, Mapping
|
||||||
|
|
||||||
import requests_util
|
|
||||||
|
|
||||||
from personal_data.data import DeduplicateMode, Scraper
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
|
|
||||||
from .. import secrets
|
from .. import secrets
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
URL_API_ROOT = 'https://api.wanikani.com/v2'
|
|
||||||
URL_ASSIGNMENTS = URL_API_ROOT + '/assignments'
|
|
||||||
URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}'
|
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class WaniKaniLessonsFetcher(Scraper):
|
class WaniKaniLessonsFetcher(Scraper):
|
||||||
dataset_name = 'wanikani_lessons'
|
dataset_name = 'wanikani_lessons'
|
||||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
def _setup_cache(self):
|
|
||||||
requests_util.setup_limiter(
|
|
||||||
self.session,
|
|
||||||
URL_API_ROOT,
|
|
||||||
expire_after=datetime.timedelta(days=90),
|
|
||||||
per_minute=30,
|
|
||||||
)
|
|
||||||
requests_util.setup_limiter(
|
|
||||||
self.session,
|
|
||||||
URL_ASSIGNMENTS,
|
|
||||||
expire_after=datetime.timedelta(days=3),
|
|
||||||
per_minute=30,
|
|
||||||
)
|
|
||||||
|
|
||||||
def scrape(self) -> Iterator[Mapping[str, object]]:
|
def scrape(self) -> Iterator[Mapping[str, object]]:
|
||||||
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
|
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
|
||||||
self._setup_cache()
|
url = 'https://api.wanikani.com/v2/assignments'
|
||||||
headers = {
|
headers = {
|
||||||
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
|
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
|
||||||
'Wanikani-Revision': '20170710',
|
'Wanikani-Revision': '20170710',
|
||||||
}
|
}
|
||||||
url = URL_ASSIGNMENTS
|
|
||||||
while url:
|
while url:
|
||||||
logger.warning('Getting: %s', url)
|
|
||||||
response = self.session.get(url, headers=headers)
|
response = self.session.get(url, headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
json_resp = response.json()
|
json_resp = response.json()
|
||||||
for assignment in json_resp.get('data', []):
|
for assignment in json_resp.get('data', []):
|
||||||
data_item = assignment['data']
|
data_item = assignment['data']
|
||||||
subject_id = data_item.get('subject_id')
|
|
||||||
if subject_id:
|
|
||||||
subj_url = URL_SUBJECTS.format(subject_id=subject_id)
|
|
||||||
logger.warning('Getting: %s', subj_url)
|
|
||||||
subj_response = self.session.get(subj_url, headers=headers)
|
|
||||||
subj_response.raise_for_status()
|
|
||||||
subj_json = subj_response.json()
|
|
||||||
subject_characters = subj_json.get('data', {}).get('characters')
|
|
||||||
data_item['subject_characters'] = subject_characters
|
|
||||||
yield data_item
|
yield data_item
|
||||||
url = json_resp.get('pages', {}).get('next_url')
|
url = json_resp.get('pages', {}).get('next_url')
|
||||||
|
|
Loading…
Reference in New Issue
Block a user