From 0cc55e44ff90eaea717c96e013fbe2417701aa75 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 24 Mar 2025 20:33:10 +0100 Subject: [PATCH] Further cleanup --- personal_data/fetchers/wanikani_lessons.py | 28 +++++++++------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/personal_data/fetchers/wanikani_lessons.py b/personal_data/fetchers/wanikani_lessons.py index 99cbd20..20ceff8 100644 --- a/personal_data/fetchers/wanikani_lessons.py +++ b/personal_data/fetchers/wanikani_lessons.py @@ -1,5 +1,6 @@ import dataclasses import datetime +import requests import logging from collections.abc import Iterator, Mapping from email.utils import parsedate_to_datetime @@ -65,15 +66,16 @@ class WaniKaniLessonsFetcher(Scraper): yield data_item url = json_resp.get('pages', {}).get('next_url') +def date_from_response(response) -> datetime.datetime: + if date_header := response.headers.get('Date'): + return parsedate_to_datetime(date_header) + return datetime.datetime.now(datetime.timezone.utc) @dataclasses.dataclass(frozen=True) class WaniKaniSummaryFetcher(Scraper): dataset_name: str = 'wanikani_summary' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS - - @staticmethod - def deduplicate_mode() -> DeduplicateMode: - return DeduplicateMode.BY_ALL_COLUMNS + deduplicate_ignore_columns = ['time.current'] def scrape(self) -> Iterator[dict]: _setup_cache(self.session) @@ -81,7 +83,7 @@ class WaniKaniSummaryFetcher(Scraper): 'Authorization': f'Bearer {secrets.wanikani_api_key()}', 'Wanikani-Revision': '20170710', } - response = self.session.get(URL_SUMMARY, headers=headers) + response = requests.get(URL_SUMMARY, headers=headers) response.raise_for_status() data = response.json() @@ -89,16 +91,8 @@ class WaniKaniSummaryFetcher(Scraper): total_lessons = sum(len(lesson.get('subject_ids', [])) for lesson in lessons) reviews = data.get('data', {}).get('reviews', []) - date_hdr = response.headers.get('Date') - try: - now = ( - parsedate_to_datetime(date_hdr) - if date_hdr - else datetime.datetime.now(datetime.timezone.utc) - ) - except Exception: - now = datetime.datetime.now(datetime.timezone.utc) total_reviews = 0 + now = date_from_response(response) for review in reviews: available_at_str = review.get('available_at') if available_at_str: @@ -110,7 +104,7 @@ class WaniKaniSummaryFetcher(Scraper): del review yield { - 'time': now, - 'lessons_available': total_lessons, - 'reviews_available': total_reviews, + 'time.current': now, + 'lessons.available': total_lessons, + 'reviews.available': total_reviews, }