Improved normalization to prevent weird transformations
This commit is contained in:
parent
9d528d4cfd
commit
d2916cbc28
|
@ -17,6 +17,18 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
||||||
T = typing.TypeVar('T')
|
T = typing.TypeVar('T')
|
||||||
|
|
||||||
|
|
||||||
|
def csv_safe_value(v: Any) -> str:
|
||||||
|
if isinstance(v, urllib.parse.ParseResult):
|
||||||
|
return v.geturl()
|
||||||
|
if isinstance(v, datetime.datetime):
|
||||||
|
if v.tzinfo is None or v.tzinfo != datetime.UTC:
|
||||||
|
msg = f'Timezone must be UTC: {v}'
|
||||||
|
raise ValueError(msg)
|
||||||
|
if isinstance(v, urllib.parse.ParseResult):
|
||||||
|
return v.geturl()
|
||||||
|
return str(v)
|
||||||
|
|
||||||
|
|
||||||
def try_value(fn: Callable[[str], T], s: str) -> T | None:
|
def try_value(fn: Callable[[str], T], s: str) -> T | None:
|
||||||
try:
|
try:
|
||||||
return fn(s)
|
return fn(s)
|
||||||
|
|
|
@ -32,9 +32,10 @@ class MyAnimeList(Scraper):
|
||||||
for data_item in data_items:
|
for data_item in data_items:
|
||||||
print(data_item)
|
print(data_item)
|
||||||
yield {
|
yield {
|
||||||
'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
||||||
'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']),
|
'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'),
|
||||||
'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']),
|
'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])),
|
||||||
|
'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])),
|
||||||
'me.score': data_item.get('score'),
|
'me.score': data_item.get('score'),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -15,16 +15,6 @@ from . import csv_import, data
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def csv_safe_value(v: Any) -> str:
|
|
||||||
if isinstance(v, urllib.parse.ParseResult):
|
|
||||||
return v.geturl()
|
|
||||||
if isinstance(v, datetime.datetime):
|
|
||||||
if v.tzinfo is None or v.tzinfo != datetime.UTC:
|
|
||||||
msg = f'Timezone must be UTC: {v}'
|
|
||||||
raise ValueError(msg)
|
|
||||||
return str(v)
|
|
||||||
|
|
||||||
|
|
||||||
def equals_without_fields(
|
def equals_without_fields(
|
||||||
a: Mapping[str, Any],
|
a: Mapping[str, Any],
|
||||||
b: Mapping[str, Any],
|
b: Mapping[str, Any],
|
||||||
|
@ -99,13 +89,8 @@ def deduplicate_dicts(
|
||||||
|
|
||||||
|
|
||||||
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
||||||
return frozendict(
|
safe_values = [(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v))) for k, v in d.items() ]
|
||||||
{
|
return frozendict( {k:v for k,v in safe_values if v is not None})
|
||||||
k: csv_import.csv_str_to_value(str(v))
|
|
||||||
for k, v in d.items()
|
|
||||||
if csv_import.csv_str_to_value(str(v)) is not None
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def extend_csv_file(
|
def extend_csv_file(
|
||||||
|
@ -145,7 +130,7 @@ def extend_csv_file(
|
||||||
)
|
)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for d in dicts:
|
for d in dicts:
|
||||||
writable_d = {k: csv_safe_value(v) for k, v in d.items()}
|
writable_d = {k: csv_import.csv_safe_value(v) for k, v in d.items()}
|
||||||
writer.writerow(writable_d)
|
writer.writerow(writable_d)
|
||||||
del d, writable_d
|
del d, writable_d
|
||||||
output_csv = csvfile_in_memory.getvalue()
|
output_csv = csvfile_in_memory.getvalue()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user