1
0

Use paths
Some checks failed
Test Python / Test (push) Failing after 24s

This commit is contained in:
Jon Michael Aanes 2024-07-27 02:14:01 +02:00
parent 231036c14a
commit 1a9df24278
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
2 changed files with 12 additions and 9 deletions

View File

@ -72,7 +72,7 @@ class TavexScraperBase(Scraper):
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class TavexScraperGold(TavexScraperBase): class TavexScraperGold(TavexScraperBase):
dataset_name = 'prices_tavex_gold' dataset_name = 'prices_tavex/guld-1oz-canadisk-maple-leaf-guldmont'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
@staticmethod @staticmethod
@ -82,7 +82,7 @@ class TavexScraperGold(TavexScraperBase):
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class TavexScraperSilver(TavexScraperBase): class TavexScraperSilver(TavexScraperBase):
dataset_name = 'prices_tavex_silver' dataset_name = 'prices_tavex/solv-1-oz-american-eagle-solvmont-tidligere-argange'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
@staticmethod @staticmethod

View File

@ -3,6 +3,7 @@ import datetime
import decimal import decimal
import inspect import inspect
import io import io
from pathlib import Path
import logging import logging
from collections.abc import Iterable, Mapping, Sequence from collections.abc import Iterable, Mapping, Sequence
from decimal import Decimal from decimal import Decimal
@ -33,6 +34,7 @@ from . import notification
CSV_DIALECT = 'one_true_dialect' CSV_DIALECT = 'one_true_dialect'
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True) csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
OUTPUT_PATH = Path('./output')
logging.basicConfig() logging.basicConfig()
logger.setLevel('INFO') logger.setLevel('INFO')
@ -140,7 +142,7 @@ def normalize_dict(d: dict) -> frozendict:
def extend_csv_file( def extend_csv_file(
filename: str, csv_file: Path,
new_dicts: list[dict], new_dicts: list[dict],
deduplicate_mode: personal_data.data.DeduplicateMode, deduplicate_mode: personal_data.data.DeduplicateMode,
deduplicate_ignore_columns: list[str], deduplicate_ignore_columns: list[str],
@ -149,7 +151,7 @@ def extend_csv_file(
dicts = [] dicts = []
try: try:
with open(filename) as csvfile: with open(csv_file) as csvfile:
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT) reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
for row in reader: for row in reader:
for k in list(row.keys()): for k in list(row.keys()):
@ -162,7 +164,7 @@ def extend_csv_file(
del row del row
del csvfile del csvfile
except FileNotFoundError as e: except FileNotFoundError as e:
logger.info('Creating file: %s', filename) logger.info('Creating file: %s', csv_file)
original_num_dicts = len(dicts) original_num_dicts = len(dicts)
dicts += [normalize_dict(d) for d in new_dicts] dicts += [normalize_dict(d) for d in new_dicts]
@ -186,12 +188,13 @@ def extend_csv_file(
output_csv = csvfile_in_memory.getvalue() output_csv = csvfile_in_memory.getvalue()
del writer, csvfile_in_memory del writer, csvfile_in_memory
with open(filename, 'w') as csvfile: csv_file.parent.mkdir(parents=True,exist_ok=True)
with open(csv_file, 'w') as csvfile:
csvfile.write(output_csv) csvfile.write(output_csv)
del csvfile del csvfile
logger.info( logger.info(
'Extended CSV "%s" from %d to %d lines', 'Extended CSV "%s" from %d to %d lines',
filename, csv_file,
original_num_dicts, original_num_dicts,
len(dicts), len(dicts),
) )
@ -231,7 +234,7 @@ def get_session(
return requests.Session() return requests.Session()
if cfscrape: if cfscrape:
session_class = CachedCfScrape session_class = CachedCfScrape
session = session_class('output/web_cache', cookies=cookiejar) session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar)
for cookie in cookiejar: for cookie in cookiejar:
session.cookies.set_cookie(cookie) session.cookies.set_cookie(cookie)
return session return session
@ -293,7 +296,7 @@ def main(
logger.exception('Failed in running %s', scraper_cls.__name__) logger.exception('Failed in running %s', scraper_cls.__name__)
continue continue
status = extend_csv_file( status = extend_csv_file(
f'output/{scraper.dataset_name}.csv', OUTPUT_PATH / f'{scraper.dataset_name}.csv',
result_rows, result_rows,
deduplicate_mode=scraper.deduplicate_mode, deduplicate_mode=scraper.deduplicate_mode,
deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(), deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(),