Compare commits
7 Commits
4068834f19
...
0cc55e44ff
Author | SHA1 | Date | |
---|---|---|---|
0cc55e44ff | |||
0d787532d4 | |||
fcb8df6619 | |||
cd62c9dc3f | |||
ff12a6450b | |||
bfba29dfc1 | |||
256b6f2cda |
|
@ -1,7 +1,9 @@
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
|
import requests
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Iterator, Mapping
|
from collections.abc import Iterator, Mapping
|
||||||
|
from email.utils import parsedate_to_datetime
|
||||||
|
|
||||||
import requests_util
|
import requests_util
|
||||||
|
|
||||||
|
@ -13,31 +15,33 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
URL_API_ROOT = 'https://api.wanikani.com/v2'
|
URL_API_ROOT = 'https://api.wanikani.com/v2'
|
||||||
URL_ASSIGNMENTS = URL_API_ROOT + '/assignments'
|
URL_ASSIGNMENTS = URL_API_ROOT + '/assignments'
|
||||||
|
URL_SUMMARY = URL_API_ROOT + '/summary'
|
||||||
URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}'
|
URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}'
|
||||||
|
|
||||||
|
|
||||||
|
def _setup_cache(session):
|
||||||
|
requests_util.setup_limiter(
|
||||||
|
session,
|
||||||
|
URL_API_ROOT,
|
||||||
|
expire_after=datetime.timedelta(days=90),
|
||||||
|
per_minute=30,
|
||||||
|
)
|
||||||
|
requests_util.setup_limiter(
|
||||||
|
session,
|
||||||
|
URL_ASSIGNMENTS,
|
||||||
|
expire_after=datetime.timedelta(days=3),
|
||||||
|
per_minute=30,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class WaniKaniLessonsFetcher(Scraper):
|
class WaniKaniLessonsFetcher(Scraper):
|
||||||
dataset_name = 'wanikani_lessons'
|
dataset_name = 'wanikani_lessons'
|
||||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
def _setup_cache(self):
|
|
||||||
requests_util.setup_limiter(
|
|
||||||
self.session,
|
|
||||||
URL_API_ROOT,
|
|
||||||
expire_after=datetime.timedelta(days=90),
|
|
||||||
per_minute=30,
|
|
||||||
)
|
|
||||||
requests_util.setup_limiter(
|
|
||||||
self.session,
|
|
||||||
URL_ASSIGNMENTS,
|
|
||||||
expire_after=datetime.timedelta(days=3),
|
|
||||||
per_minute=30,
|
|
||||||
)
|
|
||||||
|
|
||||||
def scrape(self) -> Iterator[Mapping[str, object]]:
|
def scrape(self) -> Iterator[Mapping[str, object]]:
|
||||||
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
|
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
|
||||||
self._setup_cache()
|
_setup_cache(self.session)
|
||||||
headers = {
|
headers = {
|
||||||
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
|
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
|
||||||
'Wanikani-Revision': '20170710',
|
'Wanikani-Revision': '20170710',
|
||||||
|
@ -61,3 +65,46 @@ class WaniKaniLessonsFetcher(Scraper):
|
||||||
data_item['subject_characters'] = subject_characters
|
data_item['subject_characters'] = subject_characters
|
||||||
yield data_item
|
yield data_item
|
||||||
url = json_resp.get('pages', {}).get('next_url')
|
url = json_resp.get('pages', {}).get('next_url')
|
||||||
|
|
||||||
|
def date_from_response(response) -> datetime.datetime:
|
||||||
|
if date_header := response.headers.get('Date'):
|
||||||
|
return parsedate_to_datetime(date_header)
|
||||||
|
return datetime.datetime.now(datetime.timezone.utc)
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class WaniKaniSummaryFetcher(Scraper):
|
||||||
|
dataset_name: str = 'wanikani_summary'
|
||||||
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
deduplicate_ignore_columns = ['time.current']
|
||||||
|
|
||||||
|
def scrape(self) -> Iterator[dict]:
|
||||||
|
_setup_cache(self.session)
|
||||||
|
headers = {
|
||||||
|
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
|
||||||
|
'Wanikani-Revision': '20170710',
|
||||||
|
}
|
||||||
|
response = requests.get(URL_SUMMARY, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
|
||||||
|
lessons = data.get('data', {}).get('lessons', [])
|
||||||
|
total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons)
|
||||||
|
|
||||||
|
reviews = data.get('data', {}).get('reviews', [])
|
||||||
|
total_reviews = 0
|
||||||
|
now = date_from_response(response)
|
||||||
|
for review in reviews:
|
||||||
|
available_at_str = review.get('available_at')
|
||||||
|
if available_at_str:
|
||||||
|
available_at = datetime.datetime.fromisoformat(
|
||||||
|
available_at_str.replace('Z', '+00:00'),
|
||||||
|
)
|
||||||
|
if available_at <= now:
|
||||||
|
total_reviews += len(review.get('subject_ids', []))
|
||||||
|
del review
|
||||||
|
|
||||||
|
yield {
|
||||||
|
'time.current': now,
|
||||||
|
'lessons.available': total_lessons,
|
||||||
|
'reviews.available': total_reviews,
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user