This commit is contained in:
parent
36b372fb2d
commit
167b2c8f27
|
@ -53,18 +53,18 @@ def get_session(
|
||||||
assert isinstance(with_cfscrape, bool)
|
assert isinstance(with_cfscrape, bool)
|
||||||
session_class = requests_cache.CachedSession
|
session_class = requests_cache.CachedSession
|
||||||
if ignore_cache:
|
if ignore_cache:
|
||||||
logger.warn('HTTP cache disabled')
|
logger.warning('HTTP cache disabled')
|
||||||
return requests.Session()
|
return requests.Session()
|
||||||
if cfscrape:
|
if cfscrape:
|
||||||
session_class = CachedCfScrape
|
session_class = CachedCfScrape
|
||||||
session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar)
|
session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar, expire_after=datetime.timedelta(days=1))
|
||||||
for cookie in cookiejar:
|
for cookie in cookiejar:
|
||||||
session.cookies.set_cookie(cookie)
|
session.cookies.set_cookie(cookie)
|
||||||
return session
|
return session
|
||||||
|
|
||||||
|
|
||||||
def available_scrapers() -> list[type[data.Scraper]]:
|
def available_scrapers() -> list[type[data.Scraper]]:
|
||||||
from . import fetchers
|
from . import fetchers # noqa
|
||||||
subclasses = []
|
subclasses = []
|
||||||
class_queue = [data.Scraper]
|
class_queue = [data.Scraper]
|
||||||
while class_queue:
|
while class_queue:
|
||||||
|
|
|
@ -28,7 +28,7 @@ def try_value(fn: Callable[[str], T], s: str) -> T | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def to_value(
|
def csv_str_to_value(
|
||||||
s: str,
|
s: str,
|
||||||
) -> (
|
) -> (
|
||||||
str
|
str
|
||||||
|
@ -58,6 +58,11 @@ def to_value(
|
||||||
return None
|
return None
|
||||||
return s
|
return s
|
||||||
|
|
||||||
|
def csv_safe_value(v: object) -> str:
|
||||||
|
if isinstance(v, urllib.parse.ParseResult):
|
||||||
|
return v.geturl()
|
||||||
|
return str(v)
|
||||||
|
|
||||||
|
|
||||||
def equals_without_fields(
|
def equals_without_fields(
|
||||||
a: Mapping[str, object],
|
a: Mapping[str, object],
|
||||||
|
@ -130,7 +135,7 @@ def deduplicate_dicts(
|
||||||
|
|
||||||
def normalize_dict(d: dict) -> frozendict:
|
def normalize_dict(d: dict) -> frozendict:
|
||||||
return frozendict(
|
return frozendict(
|
||||||
{k: to_value(str(v)) for k, v in d.items() if to_value(str(v)) is not None},
|
{k: csv_str_to_value(str(v)) for k, v in d.items() if csv_str_to_value(str(v)) is not None},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -141,7 +146,7 @@ def load_csv_file(csv_file: Path) -> list[frozendict]:
|
||||||
for row in reader:
|
for row in reader:
|
||||||
for k in list(row.keys()):
|
for k in list(row.keys()):
|
||||||
orig = row[k]
|
orig = row[k]
|
||||||
row[k] = to_value(orig)
|
row[k] = csv_str_to_value(orig)
|
||||||
if row[k] is None:
|
if row[k] is None:
|
||||||
del row[k]
|
del row[k]
|
||||||
del k, orig
|
del k, orig
|
||||||
|
@ -183,7 +188,9 @@ def extend_csv_file(
|
||||||
)
|
)
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for d in dicts:
|
for d in dicts:
|
||||||
writer.writerow(d)
|
writable_d = {k:csv_safe_value(v) for k,v in d.items()}
|
||||||
|
writer.writerow(writable_d)
|
||||||
|
del d, writable_d
|
||||||
output_csv = csvfile_in_memory.getvalue()
|
output_csv = csvfile_in_memory.getvalue()
|
||||||
del writer, csvfile_in_memory
|
del writer, csvfile_in_memory
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user