This commit is contained in:
parent
36b372fb2d
commit
167b2c8f27
|
@ -53,18 +53,18 @@ def get_session(
|
|||
assert isinstance(with_cfscrape, bool)
|
||||
session_class = requests_cache.CachedSession
|
||||
if ignore_cache:
|
||||
logger.warn('HTTP cache disabled')
|
||||
logger.warning('HTTP cache disabled')
|
||||
return requests.Session()
|
||||
if cfscrape:
|
||||
session_class = CachedCfScrape
|
||||
session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar)
|
||||
session = session_class(OUTPUT_PATH / 'web_cache', cookies=cookiejar, expire_after=datetime.timedelta(days=1))
|
||||
for cookie in cookiejar:
|
||||
session.cookies.set_cookie(cookie)
|
||||
return session
|
||||
|
||||
|
||||
def available_scrapers() -> list[type[data.Scraper]]:
|
||||
from . import fetchers
|
||||
from . import fetchers # noqa
|
||||
subclasses = []
|
||||
class_queue = [data.Scraper]
|
||||
while class_queue:
|
||||
|
|
|
@ -28,7 +28,7 @@ def try_value(fn: Callable[[str], T], s: str) -> T | None:
|
|||
return None
|
||||
|
||||
|
||||
def to_value(
|
||||
def csv_str_to_value(
|
||||
s: str,
|
||||
) -> (
|
||||
str
|
||||
|
@ -58,6 +58,11 @@ def to_value(
|
|||
return None
|
||||
return s
|
||||
|
||||
def csv_safe_value(v: object) -> str:
|
||||
if isinstance(v, urllib.parse.ParseResult):
|
||||
return v.geturl()
|
||||
return str(v)
|
||||
|
||||
|
||||
def equals_without_fields(
|
||||
a: Mapping[str, object],
|
||||
|
@ -130,7 +135,7 @@ def deduplicate_dicts(
|
|||
|
||||
def normalize_dict(d: dict) -> frozendict:
|
||||
return frozendict(
|
||||
{k: to_value(str(v)) for k, v in d.items() if to_value(str(v)) is not None},
|
||||
{k: csv_str_to_value(str(v)) for k, v in d.items() if csv_str_to_value(str(v)) is not None},
|
||||
)
|
||||
|
||||
|
||||
|
@ -141,7 +146,7 @@ def load_csv_file(csv_file: Path) -> list[frozendict]:
|
|||
for row in reader:
|
||||
for k in list(row.keys()):
|
||||
orig = row[k]
|
||||
row[k] = to_value(orig)
|
||||
row[k] = csv_str_to_value(orig)
|
||||
if row[k] is None:
|
||||
del row[k]
|
||||
del k, orig
|
||||
|
@ -183,7 +188,9 @@ def extend_csv_file(
|
|||
)
|
||||
writer.writeheader()
|
||||
for d in dicts:
|
||||
writer.writerow(d)
|
||||
writable_d = {k:csv_safe_value(v) for k,v in d.items()}
|
||||
writer.writerow(writable_d)
|
||||
del d, writable_d
|
||||
output_csv = csvfile_in_memory.getvalue()
|
||||
del writer, csvfile_in_memory
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user