From 1a9df24278cda7d193fed067d7f76fdeccbe90ea Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sat, 27 Jul 2024 02:14:01 +0200 Subject: [PATCH] Use paths --- personal_data/fetchers/tavex.py | 4 ++-- personal_data/main.py | 17 ++++++++++------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/personal_data/fetchers/tavex.py b/personal_data/fetchers/tavex.py index b3990d6..2713169 100644 --- a/personal_data/fetchers/tavex.py +++ b/personal_data/fetchers/tavex.py @@ -72,7 +72,7 @@ class TavexScraperBase(Scraper): @dataclasses.dataclass(frozen=True) class TavexScraperGold(TavexScraperBase): - dataset_name = 'prices_tavex_gold' + dataset_name = 'prices_tavex/guld-1oz-canadisk-maple-leaf-guldmont' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS @staticmethod @@ -82,7 +82,7 @@ class TavexScraperGold(TavexScraperBase): @dataclasses.dataclass(frozen=True) class TavexScraperSilver(TavexScraperBase): - dataset_name = 'prices_tavex_silver' + dataset_name = 'prices_tavex/solv-1-oz-american-eagle-solvmont-tidligere-argange' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS @staticmethod diff --git a/personal_data/main.py b/personal_data/main.py index 10a016d..2777aa8 100644 --- a/personal_data/main.py +++ b/personal_data/main.py @@ -3,6 +3,7 @@ import datetime import decimal import inspect import io +from pathlib import Path import logging from collections.abc import Iterable, Mapping, Sequence from decimal import Decimal @@ -33,6 +34,7 @@ from . import notification CSV_DIALECT = 'one_true_dialect' csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True) +OUTPUT_PATH = Path('./output') logging.basicConfig() logger.setLevel('INFO') @@ -140,7 +142,7 @@ def normalize_dict(d: dict) -> frozendict: def extend_csv_file( - filename: str, + csv_file: Path, new_dicts: list[dict], deduplicate_mode: personal_data.data.DeduplicateMode, deduplicate_ignore_columns: list[str], @@ -149,7 +151,7 @@ def extend_csv_file( dicts = [] try: - with open(filename) as csvfile: + with open(csv_file) as csvfile: reader = csv.DictReader(csvfile, dialect=CSV_DIALECT) for row in reader: for k in list(row.keys()): @@ -162,7 +164,7 @@ def extend_csv_file( del row del csvfile except FileNotFoundError as e: - logger.info('Creating file: %s', filename) + logger.info('Creating file: %s', csv_file) original_num_dicts = len(dicts) dicts += [normalize_dict(d) for d in new_dicts] @@ -186,12 +188,13 @@ def extend_csv_file( output_csv = csvfile_in_memory.getvalue() del writer, csvfile_in_memory - with open(filename, 'w') as csvfile: + csv_file.parent.mkdir(parents=True,exist_ok=True) + with open(csv_file, 'w') as csvfile: csvfile.write(output_csv) del csvfile logger.info( 'Extended CSV "%s" from %d to %d lines', - filename, + csv_file, original_num_dicts, len(dicts), ) @@ -231,7 +234,7 @@ def get_session( return requests.Session() if cfscrape: session_class = CachedCfScrape - session = session_class('output/web_cache', cookies=cookiejar) + session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar) for cookie in cookiejar: session.cookies.set_cookie(cookie) return session @@ -293,7 +296,7 @@ def main( logger.exception('Failed in running %s', scraper_cls.__name__) continue status = extend_csv_file( - f'output/{scraper.dataset_name}.csv', + OUTPUT_PATH / f'{scraper.dataset_name}.csv', result_rows, deduplicate_mode=scraper.deduplicate_mode, deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(),