Support dataclasses obj
This commit is contained in:
parent
e35948a5d1
commit
6749479f38
|
@ -12,12 +12,20 @@ from personal_data.data import DeduplicateMode, Scraper
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class MyAnimeListAnime:
|
||||||
|
series_name_eng: str
|
||||||
|
series_name: str
|
||||||
|
series_myanimelist_url: urllib.parse.ParseResult
|
||||||
|
series_icon: urllib.parse.ParseResult
|
||||||
|
me_score: int
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class MyAnimeList(Scraper):
|
class MyAnimeList(Scraper):
|
||||||
dataset_name = 'myanimelist_anime'
|
dataset_name = 'myanimelist_anime'
|
||||||
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||||||
|
|
||||||
def scrape(self) -> Iterator[Mapping[str, object]]:
|
def scrape(self) -> Iterator[MyAnimeListAnime]:
|
||||||
username = 'WhereTheDogGoin'
|
username = 'WhereTheDogGoin'
|
||||||
url = f'https://myanimelist.net/animelist/{username}'
|
url = f'https://myanimelist.net/animelist/{username}'
|
||||||
response = self.session.get(url)
|
response = self.session.get(url)
|
||||||
|
@ -28,12 +36,12 @@ class MyAnimeList(Scraper):
|
||||||
data_items = json.loads(data_items_soup.get('data-items'))
|
data_items = json.loads(data_items_soup.get('data-items'))
|
||||||
|
|
||||||
for data_item in data_items:
|
for data_item in data_items:
|
||||||
yield {
|
yield MyAnimeListAnime(
|
||||||
'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
series_name_eng= data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
||||||
'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'),
|
series_name= data_item.get('anime_title') or data_item.get('anime_title_eng'),
|
||||||
'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])),
|
series_myanimelist_url= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])),
|
||||||
'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])),
|
series_icon= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])),
|
||||||
'me.score': data_item.get('score'),
|
me_score= data_item.get('score'),
|
||||||
}
|
)
|
||||||
|
|
||||||
del data_item
|
del data_item
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import _csv
|
import _csv
|
||||||
import csv
|
import csv
|
||||||
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
import io
|
import io
|
||||||
import logging
|
import logging
|
||||||
|
@ -88,7 +89,14 @@ def deduplicate_dicts(
|
||||||
return dicts, fieldnames
|
return dicts, fieldnames
|
||||||
|
|
||||||
|
|
||||||
|
def dataclass_to_dict(obj) -> dict[str, Any]:
|
||||||
|
d = dataclasses.asdict(obj)
|
||||||
|
return {k.replace('_','.',1):v for k,v in d.items()}
|
||||||
|
|
||||||
|
|
||||||
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
||||||
|
if not isinstance(d, dict) and not isinstance(d, frozendict):
|
||||||
|
d = dataclass_to_dict(d)
|
||||||
safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ]
|
safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ]
|
||||||
return frozendict( {k:v for k,v in safe_values if v is not None})
|
return frozendict( {k:v for k,v in safe_values if v is not None})
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user