1
0

Adjusted wanikani

This commit is contained in:
Jon Michael Aanes 2025-03-24 20:25:13 +01:00
parent bfba29dfc1
commit ff12a6450b
2 changed files with 57 additions and 66 deletions

View File

@ -13,31 +13,32 @@ logger = logging.getLogger(__name__)
URL_API_ROOT = 'https://api.wanikani.com/v2'
URL_ASSIGNMENTS = URL_API_ROOT + '/assignments'
URL_SUMMARY = URL_API_ROOT + '/summary'
URL_SUBJECTS = URL_API_ROOT + '/subjects/{subject_id}'
def _setup_cache(session):
requests_util.setup_limiter(
session,
URL_API_ROOT,
expire_after=datetime.timedelta(days=90),
per_minute=30,
)
requests_util.setup_limiter(
session,
URL_ASSIGNMENTS,
expire_after=datetime.timedelta(days=3),
per_minute=30,
)
@dataclasses.dataclass(frozen=True)
class WaniKaniLessonsFetcher(Scraper):
dataset_name = 'wanikani_lessons'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def _setup_cache(self):
requests_util.setup_limiter(
self.session,
URL_API_ROOT,
expire_after=datetime.timedelta(days=90),
per_minute=30,
)
requests_util.setup_limiter(
self.session,
URL_ASSIGNMENTS,
expire_after=datetime.timedelta(days=3),
per_minute=30,
)
def scrape(self) -> Iterator[Mapping[str, object]]:
"""Fetch assignments from the WaniKani API and yield a dict for each assignment with a non-null unlocked_at timestamp."""
self._setup_cache()
_setup_cache(self.session)
headers = {
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
'Wanikani-Revision': '20170710',
@ -61,3 +62,44 @@ class WaniKaniLessonsFetcher(Scraper):
data_item['subject_characters'] = subject_characters
yield data_item
url = json_resp.get('pages', {}).get('next_url')
@dataclasses.dataclass(frozen=True)
class WaniKaniSummaryFetcher(Scraper):
dataset_name: str = 'wanikani_summary'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
@staticmethod
def deduplicate_mode() -> DeduplicateMode:
return DeduplicateMode.BY_ALL_COLUMNS
def scrape(self) -> Iterator[dict]:
_setup_cache(self.session)
headers = {
'Authorization': f'Bearer {secrets.wanikani_api_key()}',
'Wanikani-Revision': '20170710',
}
response = self.session.get(URL_SUMMARY, headers=headers)
response.raise_for_status()
data = response.json()
lessons = data.get('data', {}).get('lessons', [])
total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons)
reviews = data.get('data', {}).get('reviews', [])
now = datetime.datetime.now(datetime.timezone.utc)
total_reviews = 0
for review in reviews:
available_at_str = review.get('available_at')
if available_at_str:
available_at = datetime.datetime.fromisoformat(
available_at_str.replace('Z', '+00:00'),
)
if available_at <= now:
total_reviews += len(review.get('subject_ids', []))
del review
yield {
'time': now,
'lessons_available': total_lessons,
'reviews_available': total_reviews,
}

View File

@ -1,51 +0,0 @@
from __future__ import annotations
from collections.abc import Iterator
from dataclasses import dataclass
from datetime import datetime, timezone
import requests
from personal_data.data import DeduplicateMode
from personal_data.fetchers import Scraper
from personal_data.secrets import wanikani_api_key
@dataclass(frozen=True)
class WaniKaniSummaryFetcher(Scraper):
dataset_name: str = 'wanikani_summary'
@staticmethod
def deduplicate_mode() -> DeduplicateMode:
return DeduplicateMode.BY_ALL_COLUMNS
def scrape(self) -> Iterator[dict]:
token = wanikani_api_key()
headers = {
'Authorization': f'Bearer {token}',
'Wanikani-Revision': '20170710',
}
url = 'https://api.wanikani.com/v2/summary'
response = requests.get(url, headers=headers)
response.raise_for_status()
data = response.json()
lessons = data.get('data', {}).get('lessons', [])
total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons)
reviews = data.get('data', {}).get('reviews', [])
now = datetime.now(timezone.utc)
total_reviews = 0
for review in reviews:
available_at_str = review.get('available_at')
if available_at_str:
available_at = datetime.fromisoformat(
available_at_str.replace('Z', '+00:00'),
)
if available_at <= now:
total_reviews += len(review.get('subject_ids', []))
yield {
'lessons_available': total_lessons,
'reviews_available': total_reviews,
}