1
0

Compare commits

...

7 Commits

Author SHA1 Message Date
0cc55e44ff Further cleanup
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 36s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 31s
2025-03-24 20:33:10 +01:00
0d787532d4 style: Run linter to format code in wanikani_lessons.py 2025-03-24 20:26:58 +01:00
fcb8df6619 feat: Use HTTP response timestamp for WaniKani summary instead of local time 2025-03-24 20:26:57 +01:00
cd62c9dc3f Ruff 2025-03-24 20:25:30 +01:00
ff12a6450b Adjusted wanikani 2025-03-24 20:25:13 +01:00
bfba29dfc1 style: Run linter and fix code style issues in wanikani_summary.py 2025-03-24 20:20:19 +01:00
256b6f2cda feat: Add WaniKani summary fetcher for available lessons and reviews 2025-03-24 20:20:15 +01:00

View File

@ -1,7 +1,9 @@
import dataclasses
import datetime
import requests
import logging
from collections.abc import Iterator, Mapping
from email.utils import parsedate_to_datetime
import requests_util
@ -13,31 +15,33 @@ logger = logging.getLogger(__name__)
URL_API_ROOT = 'https://api.wanikani.com/v2'
URL_ASSIGNMENTS = URL_API_ROOT + '/assignments'
URL_SUMMARY = URL_API_ROOT + '/summary'
URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}'
def _setup_cache(session):
requests_util.setup_limiter(
session,
URL_API_ROOT,
expire_after=datetime.timedelta(days=90),
per_minute=30,
)
requests_util.setup_limiter(
session,
URL_ASSIGNMENTS,
expire_after=datetime.timedelta(days=3),
per_minute=30,
)
@dataclasses.dataclass(frozen=True)
class WaniKaniLessonsFetcher(Scraper):
dataset_name = 'wanikani_lessons'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def _setup_cache(self):
requests_util.setup_limiter(
self.session,
URL_API_ROOT,
expire_after=datetime.timedelta(days=90),
per_minute=30,
)
requests_util.setup_limiter(
self.session,
URL_ASSIGNMENTS,
expire_after=datetime.timedelta(days=3),
per_minute=30,
)
def scrape(self) -> Iterator[Mapping[str, object]]:
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
self._setup_cache()
_setup_cache(self.session)
headers = {
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
'Wanikani-Revision': '20170710',
@ -61,3 +65,46 @@ class WaniKaniLessonsFetcher(Scraper):
data_item['subject_characters'] = subject_characters
yield data_item
url = json_resp.get('pages', {}).get('next_url')
def date_from_response(response) -> datetime.datetime:
if date_header := response.headers.get('Date'):
return parsedate_to_datetime(date_header)
return datetime.datetime.now(datetime.timezone.utc)
@dataclasses.dataclass(frozen=True)
class WaniKaniSummaryFetcher(Scraper):
dataset_name: str = 'wanikani_summary'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
deduplicate_ignore_columns = ['time.current']
def scrape(self) -> Iterator[dict]:
_setup_cache(self.session)
headers = {
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
'Wanikani-Revision': '20170710',
}
response = requests.get(URL_SUMMARY, headers=headers)
response.raise_for_status()
data = response.json()
lessons = data.get('data', {}).get('lessons', [])
total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons)
reviews = data.get('data', {}).get('reviews', [])
total_reviews = 0
now = date_from_response(response)
for review in reviews:
available_at_str = review.get('available_at')
if available_at_str:
available_at = datetime.datetime.fromisoformat(
available_at_str.replace('Z', '+00:00'),
)
if available_at <= now:
total_reviews += len(review.get('subject_ids', []))
del review
yield {
'time.current': now,
'lessons.available': total_lessons,
'reviews.available': total_reviews,
}