From 6749479f383043fc0fcc5a78ba0c2be77dc9b385 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sat, 1 Feb 2025 20:33:54 +0100 Subject: [PATCH] Support dataclasses obj --- personal_data/fetchers/myanimelist.py | 24 ++++++++++++++++-------- personal_data/util.py | 8 ++++++++ 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/personal_data/fetchers/myanimelist.py b/personal_data/fetchers/myanimelist.py index 2ccf06a..eb9f333 100644 --- a/personal_data/fetchers/myanimelist.py +++ b/personal_data/fetchers/myanimelist.py @@ -12,12 +12,20 @@ from personal_data.data import DeduplicateMode, Scraper logger = logging.getLogger(__name__) +@dataclasses.dataclass(frozen=True) +class MyAnimeListAnime: + series_name_eng: str + series_name: str + series_myanimelist_url: urllib.parse.ParseResult + series_icon: urllib.parse.ParseResult + me_score: int + @dataclasses.dataclass(frozen=True) class MyAnimeList(Scraper): dataset_name = 'myanimelist_anime' deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN - def scrape(self) -> Iterator[Mapping[str, object]]: + def scrape(self) -> Iterator[MyAnimeListAnime]: username = 'WhereTheDogGoin' url = f'https://myanimelist.net/animelist/{username}' response = self.session.get(url) @@ -28,12 +36,12 @@ class MyAnimeList(Scraper): data_items = json.loads(data_items_soup.get('data-items')) for data_item in data_items: - yield { - 'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'), - 'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'), - 'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])), - 'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])), - 'me.score': data_item.get('score'), - } + yield MyAnimeListAnime( + series_name_eng= data_item.get('anime_title_eng') or data_item.get('anime_title'), + series_name= data_item.get('anime_title') or data_item.get('anime_title_eng'), + series_myanimelist_url= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])), + series_icon= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])), + me_score= data_item.get('score'), + ) del data_item diff --git a/personal_data/util.py b/personal_data/util.py index c281436..32e4135 100644 --- a/personal_data/util.py +++ b/personal_data/util.py @@ -1,5 +1,6 @@ import _csv import csv +import dataclasses import datetime import io import logging @@ -88,7 +89,14 @@ def deduplicate_dicts( return dicts, fieldnames +def dataclass_to_dict(obj) -> dict[str, Any]: + d = dataclasses.asdict(obj) + return {k.replace('_','.',1):v for k,v in d.items()} + + def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]: + if not isinstance(d, dict) and not isinstance(d, frozendict): + d = dataclass_to_dict(d) safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ] return frozendict( {k:v for k,v in safe_values if v is not None})