This commit is contained in:
parent
231036c14a
commit
1a9df24278
|
@ -72,7 +72,7 @@ class TavexScraperBase(Scraper):
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class TavexScraperGold(TavexScraperBase):
|
class TavexScraperGold(TavexScraperBase):
|
||||||
dataset_name = 'prices_tavex_gold'
|
dataset_name = 'prices_tavex/guld-1oz-canadisk-maple-leaf-guldmont'
|
||||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -82,7 +82,7 @@ class TavexScraperGold(TavexScraperBase):
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class TavexScraperSilver(TavexScraperBase):
|
class TavexScraperSilver(TavexScraperBase):
|
||||||
dataset_name = 'prices_tavex_silver'
|
dataset_name = 'prices_tavex/solv-1-oz-american-eagle-solvmont-tidligere-argange'
|
||||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
|
|
@ -3,6 +3,7 @@ import datetime
|
||||||
import decimal
|
import decimal
|
||||||
import inspect
|
import inspect
|
||||||
import io
|
import io
|
||||||
|
from pathlib import Path
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Iterable, Mapping, Sequence
|
from collections.abc import Iterable, Mapping, Sequence
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
@ -33,6 +34,7 @@ from . import notification
|
||||||
|
|
||||||
CSV_DIALECT = 'one_true_dialect'
|
CSV_DIALECT = 'one_true_dialect'
|
||||||
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
||||||
|
OUTPUT_PATH = Path('./output')
|
||||||
|
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
logger.setLevel('INFO')
|
logger.setLevel('INFO')
|
||||||
|
@ -140,7 +142,7 @@ def normalize_dict(d: dict) -> frozendict:
|
||||||
|
|
||||||
|
|
||||||
def extend_csv_file(
|
def extend_csv_file(
|
||||||
filename: str,
|
csv_file: Path,
|
||||||
new_dicts: list[dict],
|
new_dicts: list[dict],
|
||||||
deduplicate_mode: personal_data.data.DeduplicateMode,
|
deduplicate_mode: personal_data.data.DeduplicateMode,
|
||||||
deduplicate_ignore_columns: list[str],
|
deduplicate_ignore_columns: list[str],
|
||||||
|
@ -149,7 +151,7 @@ def extend_csv_file(
|
||||||
|
|
||||||
dicts = []
|
dicts = []
|
||||||
try:
|
try:
|
||||||
with open(filename) as csvfile:
|
with open(csv_file) as csvfile:
|
||||||
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
|
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
|
||||||
for row in reader:
|
for row in reader:
|
||||||
for k in list(row.keys()):
|
for k in list(row.keys()):
|
||||||
|
@ -162,7 +164,7 @@ def extend_csv_file(
|
||||||
del row
|
del row
|
||||||
del csvfile
|
del csvfile
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
logger.info('Creating file: %s', filename)
|
logger.info('Creating file: %s', csv_file)
|
||||||
|
|
||||||
original_num_dicts = len(dicts)
|
original_num_dicts = len(dicts)
|
||||||
dicts += [normalize_dict(d) for d in new_dicts]
|
dicts += [normalize_dict(d) for d in new_dicts]
|
||||||
|
@ -186,12 +188,13 @@ def extend_csv_file(
|
||||||
output_csv = csvfile_in_memory.getvalue()
|
output_csv = csvfile_in_memory.getvalue()
|
||||||
del writer, csvfile_in_memory
|
del writer, csvfile_in_memory
|
||||||
|
|
||||||
with open(filename, 'w') as csvfile:
|
csv_file.parent.mkdir(parents=True,exist_ok=True)
|
||||||
|
with open(csv_file, 'w') as csvfile:
|
||||||
csvfile.write(output_csv)
|
csvfile.write(output_csv)
|
||||||
del csvfile
|
del csvfile
|
||||||
logger.info(
|
logger.info(
|
||||||
'Extended CSV "%s" from %d to %d lines',
|
'Extended CSV "%s" from %d to %d lines',
|
||||||
filename,
|
csv_file,
|
||||||
original_num_dicts,
|
original_num_dicts,
|
||||||
len(dicts),
|
len(dicts),
|
||||||
)
|
)
|
||||||
|
@ -231,7 +234,7 @@ def get_session(
|
||||||
return requests.Session()
|
return requests.Session()
|
||||||
if cfscrape:
|
if cfscrape:
|
||||||
session_class = CachedCfScrape
|
session_class = CachedCfScrape
|
||||||
session = session_class('output/web_cache', cookies=cookiejar)
|
session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar)
|
||||||
for cookie in cookiejar:
|
for cookie in cookiejar:
|
||||||
session.cookies.set_cookie(cookie)
|
session.cookies.set_cookie(cookie)
|
||||||
return session
|
return session
|
||||||
|
@ -293,7 +296,7 @@ def main(
|
||||||
logger.exception('Failed in running %s', scraper_cls.__name__)
|
logger.exception('Failed in running %s', scraper_cls.__name__)
|
||||||
continue
|
continue
|
||||||
status = extend_csv_file(
|
status = extend_csv_file(
|
||||||
f'output/{scraper.dataset_name}.csv',
|
OUTPUT_PATH / f'{scraper.dataset_name}.csv',
|
||||||
result_rows,
|
result_rows,
|
||||||
deduplicate_mode=scraper.deduplicate_mode,
|
deduplicate_mode=scraper.deduplicate_mode,
|
||||||
deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(),
|
deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user