1
0

Support dataclasses obj
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 34s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 30s

This commit is contained in:
Jon Michael Aanes 2025-02-01 20:33:54 +01:00
parent e35948a5d1
commit 6749479f38
2 changed files with 24 additions and 8 deletions

View File

@ -12,12 +12,20 @@ from personal_data.data import DeduplicateMode, Scraper
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@dataclasses.dataclass(frozen=True)
class MyAnimeListAnime:
series_name_eng: str
series_name: str
series_myanimelist_url: urllib.parse.ParseResult
series_icon: urllib.parse.ParseResult
me_score: int
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class MyAnimeList(Scraper): class MyAnimeList(Scraper):
dataset_name = 'myanimelist_anime' dataset_name = 'myanimelist_anime'
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
def scrape(self) -> Iterator[Mapping[str, object]]: def scrape(self) -> Iterator[MyAnimeListAnime]:
username = 'WhereTheDogGoin' username = 'WhereTheDogGoin'
url = f'https://myanimelist.net/animelist/{username}' url = f'https://myanimelist.net/animelist/{username}'
response = self.session.get(url) response = self.session.get(url)
@ -28,12 +36,12 @@ class MyAnimeList(Scraper):
data_items = json.loads(data_items_soup.get('data-items')) data_items = json.loads(data_items_soup.get('data-items'))
for data_item in data_items: for data_item in data_items:
yield { yield MyAnimeListAnime(
'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'), series_name_eng= data_item.get('anime_title_eng') or data_item.get('anime_title'),
'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'), series_name= data_item.get('anime_title') or data_item.get('anime_title_eng'),
'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])), series_myanimelist_url= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])),
'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])), series_icon= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])),
'me.score': data_item.get('score'), me_score= data_item.get('score'),
} )
del data_item del data_item

View File

@ -1,5 +1,6 @@
import _csv import _csv
import csv import csv
import dataclasses
import datetime import datetime
import io import io
import logging import logging
@ -88,7 +89,14 @@ def deduplicate_dicts(
return dicts, fieldnames return dicts, fieldnames
def dataclass_to_dict(obj) -> dict[str, Any]:
d = dataclasses.asdict(obj)
return {k.replace('_','.',1):v for k,v in d.items()}
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]: def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
if not isinstance(d, dict) and not isinstance(d, frozendict):
d = dataclass_to_dict(d)
safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ] safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ]
return frozendict( {k:v for k,v in safe_values if v is not None}) return frozendict( {k:v for k,v in safe_values if v is not None})