From 167b2c8f276f7c812c1ced07ff48ddee79f06770 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 25 Aug 2024 21:07:52 +0200 Subject: [PATCH] csv_safe_value --- personal_data/main.py | 6 +++--- personal_data/util.py | 15 +++++++++++---- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/personal_data/main.py b/personal_data/main.py index f239ffe..28365ac 100644 --- a/personal_data/main.py +++ b/personal_data/main.py @@ -53,18 +53,18 @@ def get_session( assert isinstance(with_cfscrape, bool) session_class = requests_cache.CachedSession if ignore_cache: - logger.warn('HTTP cache disabled') + logger.warning('HTTP cache disabled') return requests.Session() if cfscrape: session_class = CachedCfScrape - session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar) + session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar, expire_after=datetime.timedelta(days=1)) for cookie in cookiejar: session.cookies.set_cookie(cookie) return session def available_scrapers() -> list[type[data.Scraper]]: - from . import fetchers + from . import fetchers # noqa subclasses = [] class_queue = [data.Scraper] while class_queue: diff --git a/personal_data/util.py b/personal_data/util.py index b11025a..2185520 100644 --- a/personal_data/util.py +++ b/personal_data/util.py @@ -28,7 +28,7 @@ def try_value(fn: Callable[[str], T], s: str) -> T | None: return None -def to_value( +def csv_str_to_value( s: str, ) -> ( str @@ -58,6 +58,11 @@ def to_value( return None return s +def csv_safe_value(v: object) -> str: + if isinstance(v, urllib.parse.ParseResult): + return v.geturl() + return str(v) + def equals_without_fields( a: Mapping[str, object], @@ -130,7 +135,7 @@ def deduplicate_dicts( def normalize_dict(d: dict) -> frozendict: return frozendict( - {k: to_value(str(v)) for k, v in d.items() if to_value(str(v)) is not None}, + {k: csv_str_to_value(str(v)) for k, v in d.items() if csv_str_to_value(str(v)) is not None}, ) @@ -141,7 +146,7 @@ def load_csv_file(csv_file: Path) -> list[frozendict]: for row in reader: for k in list(row.keys()): orig = row[k] - row[k] = to_value(orig) + row[k] = csv_str_to_value(orig) if row[k] is None: del row[k] del k, orig @@ -183,7 +188,9 @@ def extend_csv_file( ) writer.writeheader() for d in dicts: - writer.writerow(d) + writable_d = {k:csv_safe_value(v) for k,v in d.items()} + writer.writerow(writable_d) + del d, writable_d output_csv = csvfile_in_memory.getvalue() del writer, csvfile_in_memory