diff --git a/personal_data/csv_import.py b/personal_data/csv_import.py index 2fed13c..5416275 100644 --- a/personal_data/csv_import.py +++ b/personal_data/csv_import.py @@ -17,6 +17,18 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True) T = typing.TypeVar('T') +def csv_safe_value(v: Any) -> str: + if isinstance(v, urllib.parse.ParseResult): + return v.geturl() + if isinstance(v, datetime.datetime): + if v.tzinfo is None or v.tzinfo != datetime.UTC: + msg = f'Timezone must be UTC: {v}' + raise ValueError(msg) + if isinstance(v, urllib.parse.ParseResult): + return v.geturl() + return str(v) + + def try_value(fn: Callable[[str], T], s: str) -> T | None: try: return fn(s) diff --git a/personal_data/fetchers/myanimelist.py b/personal_data/fetchers/myanimelist.py index e2c7124..82ea3b1 100644 --- a/personal_data/fetchers/myanimelist.py +++ b/personal_data/fetchers/myanimelist.py @@ -32,9 +32,10 @@ class MyAnimeList(Scraper): for data_item in data_items: print(data_item) yield { - 'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'), - 'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']), - 'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']), + 'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'), + 'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'), + 'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])), + 'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])), 'me.score': data_item.get('score'), } diff --git a/personal_data/util.py b/personal_data/util.py index 26631d5..c281436 100644 --- a/personal_data/util.py +++ b/personal_data/util.py @@ -15,16 +15,6 @@ from . import csv_import, data logger = logging.getLogger(__name__) -def csv_safe_value(v: Any) -> str: - if isinstance(v, urllib.parse.ParseResult): - return v.geturl() - if isinstance(v, datetime.datetime): - if v.tzinfo is None or v.tzinfo != datetime.UTC: - msg = f'Timezone must be UTC: {v}' - raise ValueError(msg) - return str(v) - - def equals_without_fields( a: Mapping[str, Any], b: Mapping[str, Any], @@ -99,13 +89,8 @@ def deduplicate_dicts( def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]: - return frozendict( - { - k: csv_import.csv_str_to_value(str(v)) - for k, v in d.items() - if csv_import.csv_str_to_value(str(v)) is not None - }, - ) + safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ] + return frozendict( {k:v for k,v in safe_values if v is not None}) def extend_csv_file( @@ -145,7 +130,7 @@ def extend_csv_file( ) writer.writeheader() for d in dicts: - writable_d = {k: csv_safe_value(v) for k, v in d.items()} + writable_d = {k: csv_import.csv_safe_value(v) for k, v in d.items()} writer.writerow(writable_d) del d, writable_d output_csv = csvfile_in_memory.getvalue()