Improved normalization to prevent weird transformations
This commit is contained in:
parent
9d528d4cfd
commit
d2916cbc28
|
@ -17,6 +17,18 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
|||
T = typing.TypeVar('T')
|
||||
|
||||
|
||||
def csv_safe_value(v: Any) -> str:
|
||||
if isinstance(v, urllib.parse.ParseResult):
|
||||
return v.geturl()
|
||||
if isinstance(v, datetime.datetime):
|
||||
if v.tzinfo is None or v.tzinfo != datetime.UTC:
|
||||
msg = f'Timezone must be UTC: {v}'
|
||||
raise ValueError(msg)
|
||||
if isinstance(v, urllib.parse.ParseResult):
|
||||
return v.geturl()
|
||||
return str(v)
|
||||
|
||||
|
||||
def try_value(fn: Callable[[str], T], s: str) -> T | None:
|
||||
try:
|
||||
return fn(s)
|
||||
|
|
|
@ -32,9 +32,10 @@ class MyAnimeList(Scraper):
|
|||
for data_item in data_items:
|
||||
print(data_item)
|
||||
yield {
|
||||
'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
||||
'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']),
|
||||
'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']),
|
||||
'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
||||
'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'),
|
||||
'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])),
|
||||
'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])),
|
||||
'me.score': data_item.get('score'),
|
||||
}
|
||||
|
||||
|
|
|
@ -15,16 +15,6 @@ from . import csv_import, data
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def csv_safe_value(v: Any) -> str:
|
||||
if isinstance(v, urllib.parse.ParseResult):
|
||||
return v.geturl()
|
||||
if isinstance(v, datetime.datetime):
|
||||
if v.tzinfo is None or v.tzinfo != datetime.UTC:
|
||||
msg = f'Timezone must be UTC: {v}'
|
||||
raise ValueError(msg)
|
||||
return str(v)
|
||||
|
||||
|
||||
def equals_without_fields(
|
||||
a: Mapping[str, Any],
|
||||
b: Mapping[str, Any],
|
||||
|
@ -99,13 +89,8 @@ def deduplicate_dicts(
|
|||
|
||||
|
||||
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
||||
return frozendict(
|
||||
{
|
||||
k: csv_import.csv_str_to_value(str(v))
|
||||
for k, v in d.items()
|
||||
if csv_import.csv_str_to_value(str(v)) is not None
|
||||
},
|
||||
)
|
||||
safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ]
|
||||
return frozendict( {k:v for k,v in safe_values if v is not None})
|
||||
|
||||
|
||||
def extend_csv_file(
|
||||
|
@ -145,7 +130,7 @@ def extend_csv_file(
|
|||
)
|
||||
writer.writeheader()
|
||||
for d in dicts:
|
||||
writable_d = {k: csv_safe_value(v) for k, v in d.items()}
|
||||
writable_d = {k: csv_import.csv_safe_value(v) for k, v in d.items()}
|
||||
writer.writerow(writable_d)
|
||||
del d, writable_d
|
||||
output_csv = csvfile_in_memory.getvalue()
|
||||
|
|
Loading…
Reference in New Issue
Block a user