This commit is contained in:
parent
231036c14a
commit
1a9df24278
|
@ -72,7 +72,7 @@ class TavexScraperBase(Scraper):
|
|||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class TavexScraperGold(TavexScraperBase):
|
||||
dataset_name = 'prices_tavex_gold'
|
||||
dataset_name = 'prices_tavex/guld-1oz-canadisk-maple-leaf-guldmont'
|
||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||
|
||||
@staticmethod
|
||||
|
@ -82,7 +82,7 @@ class TavexScraperGold(TavexScraperBase):
|
|||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class TavexScraperSilver(TavexScraperBase):
|
||||
dataset_name = 'prices_tavex_silver'
|
||||
dataset_name = 'prices_tavex/solv-1-oz-american-eagle-solvmont-tidligere-argange'
|
||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||
|
||||
@staticmethod
|
||||
|
|
|
@ -3,6 +3,7 @@ import datetime
|
|||
import decimal
|
||||
import inspect
|
||||
import io
|
||||
from pathlib import Path
|
||||
import logging
|
||||
from collections.abc import Iterable, Mapping, Sequence
|
||||
from decimal import Decimal
|
||||
|
@ -33,6 +34,7 @@ from . import notification
|
|||
|
||||
CSV_DIALECT = 'one_true_dialect'
|
||||
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
||||
OUTPUT_PATH = Path('./output')
|
||||
|
||||
logging.basicConfig()
|
||||
logger.setLevel('INFO')
|
||||
|
@ -140,7 +142,7 @@ def normalize_dict(d: dict) -> frozendict:
|
|||
|
||||
|
||||
def extend_csv_file(
|
||||
filename: str,
|
||||
csv_file: Path,
|
||||
new_dicts: list[dict],
|
||||
deduplicate_mode: personal_data.data.DeduplicateMode,
|
||||
deduplicate_ignore_columns: list[str],
|
||||
|
@ -149,7 +151,7 @@ def extend_csv_file(
|
|||
|
||||
dicts = []
|
||||
try:
|
||||
with open(filename) as csvfile:
|
||||
with open(csv_file) as csvfile:
|
||||
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
|
||||
for row in reader:
|
||||
for k in list(row.keys()):
|
||||
|
@ -162,7 +164,7 @@ def extend_csv_file(
|
|||
del row
|
||||
del csvfile
|
||||
except FileNotFoundError as e:
|
||||
logger.info('Creating file: %s', filename)
|
||||
logger.info('Creating file: %s', csv_file)
|
||||
|
||||
original_num_dicts = len(dicts)
|
||||
dicts += [normalize_dict(d) for d in new_dicts]
|
||||
|
@ -186,12 +188,13 @@ def extend_csv_file(
|
|||
output_csv = csvfile_in_memory.getvalue()
|
||||
del writer, csvfile_in_memory
|
||||
|
||||
with open(filename, 'w') as csvfile:
|
||||
csv_file.parent.mkdir(parents=True,exist_ok=True)
|
||||
with open(csv_file, 'w') as csvfile:
|
||||
csvfile.write(output_csv)
|
||||
del csvfile
|
||||
logger.info(
|
||||
'Extended CSV "%s" from %d to %d lines',
|
||||
filename,
|
||||
csv_file,
|
||||
original_num_dicts,
|
||||
len(dicts),
|
||||
)
|
||||
|
@ -231,7 +234,7 @@ def get_session(
|
|||
return requests.Session()
|
||||
if cfscrape:
|
||||
session_class = CachedCfScrape
|
||||
session = session_class('output/web_cache', cookies=cookiejar)
|
||||
session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar)
|
||||
for cookie in cookiejar:
|
||||
session.cookies.set_cookie(cookie)
|
||||
return session
|
||||
|
@ -293,7 +296,7 @@ def main(
|
|||
logger.exception('Failed in running %s', scraper_cls.__name__)
|
||||
continue
|
||||
status = extend_csv_file(
|
||||
f'output/{scraper.dataset_name}.csv',
|
||||
OUTPUT_PATH / f'{scraper.dataset_name}.csv',
|
||||
result_rows,
|
||||
deduplicate_mode=scraper.deduplicate_mode,
|
||||
deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(),
|
||||
|
|
Loading…
Reference in New Issue
Block a user