Use new improved cloudflare avoider
This commit is contained in:
parent
d945fb81fb
commit
119b380f8a
|
@ -5,7 +5,7 @@ Sub-module for importing time-based data into Obsidian.
|
|||
|
||||
import dataclasses
|
||||
import datetime
|
||||
from collections.abc import Iterable, Iterator
|
||||
from collections.abc import Iterator
|
||||
from logging import getLogger
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
@ -104,9 +104,9 @@ def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
|
|||
return num_updated
|
||||
|
||||
|
||||
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
|
||||
MINIMUM_STEPS = 300
|
||||
MINIMUM_BELIEVABLE_STEP_COUNT = 300
|
||||
|
||||
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
|
||||
num_updated = 0
|
||||
|
||||
rows_per_date = {}
|
||||
|
@ -121,7 +121,7 @@ def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
|
|||
}
|
||||
|
||||
for date, steps in steps_per_date.items():
|
||||
if steps < MINIMUM_STEPS:
|
||||
if steps < MINIMUM_BELIEVABLE_STEP_COUNT:
|
||||
continue
|
||||
was_updated = vault.add_statistic(date, 'Steps', steps)
|
||||
if was_updated:
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
import argparse
|
||||
import logging
|
||||
|
||||
logging.basicConfig()
|
||||
logging.getLogger('personal_data').setLevel('INFO')
|
||||
|
||||
import personal_data.main
|
||||
from personal_data.notification import NotificationType
|
||||
|
||||
|
@ -37,9 +40,6 @@ def parse_arguments():
|
|||
|
||||
|
||||
def main():
|
||||
logging.basicConfig()
|
||||
logging.getLogger('personal_data').setLevel('INFO')
|
||||
|
||||
args = parse_arguments()
|
||||
scraper_filter = frozenset(args.fetchers)
|
||||
|
||||
|
|
|
@ -12,10 +12,10 @@ from . import data, fetchers, notification, util
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
try:
|
||||
import cfscrape
|
||||
import cloudscraper
|
||||
except ImportError:
|
||||
cfscrape = None
|
||||
logger.exception('cfscrape not installed: Certain fetchers might not work')
|
||||
cloudscraper = None
|
||||
logger.exception('cloudscraper not installed: Certain fetchers might not work')
|
||||
|
||||
try:
|
||||
import browser_cookie3
|
||||
|
@ -26,11 +26,6 @@ except ImportError:
|
|||
|
||||
OUTPUT_PATH = Path('./output')
|
||||
|
||||
logging.basicConfig(
|
||||
format='%(asctime)s %(levelname)s %(module)s:%(lineno)d - %(message)s',
|
||||
)
|
||||
logger.setLevel('INFO')
|
||||
|
||||
|
||||
STANDARD_HEADERS = {
|
||||
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0',
|
||||
|
@ -39,9 +34,9 @@ STANDARD_HEADERS = {
|
|||
}
|
||||
|
||||
|
||||
if cfscrape:
|
||||
if cloudscraper:
|
||||
|
||||
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
|
||||
class CachedCfScrape(requests_cache.CacheMixin, cloudscraper.CloudScraper):
|
||||
pass
|
||||
|
||||
|
||||
|
@ -54,13 +49,21 @@ def get_session(
|
|||
with_cfscrape: bool,
|
||||
ignore_cache: bool,
|
||||
) -> requests.Session:
|
||||
if with_cfscrape and cfscrape:
|
||||
session_class = requests_cache.CachedSession
|
||||
if with_cfscrape:
|
||||
if cloudscraper:
|
||||
session_class = CachedCfScrape
|
||||
|
||||
if ignore_cache:
|
||||
logger.warning('HTTP cache disabled')
|
||||
return cfscrape.create_scraper()
|
||||
return cloudscraper.create_scraper(
|
||||
interpreter='js2py',
|
||||
delay=5,
|
||||
debug=False,
|
||||
)
|
||||
else:
|
||||
logger.error('Expected cloudscraper, but not defined!')
|
||||
else:
|
||||
session_class = requests_cache.CachedSession
|
||||
if ignore_cache:
|
||||
logger.warning('HTTP cache disabled')
|
||||
return requests.Session()
|
||||
|
@ -100,6 +103,7 @@ def get_cookiejar(use_cookiejar: bool):
|
|||
browser_cookie3.firefox(
|
||||
'/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
|
||||
)
|
||||
logger.warning('Cookiejar has %s cookies', len(cookiejar))
|
||||
if len(cookiejar) > 10:
|
||||
return cookiejar
|
||||
logger.warning('No cookiejar is used')
|
||||
|
@ -114,23 +118,23 @@ def main(
|
|||
notification_types: frozenset[notification.NotificationType],
|
||||
) -> None:
|
||||
cookiejar = get_cookiejar(use_cookiejar)
|
||||
logger.warning('Cookiejar has %s cookies', len(cookiejar))
|
||||
|
||||
if len(notification_types) == 0:
|
||||
logger.info('No notifications enabled: Notifications will not be sent!')
|
||||
|
||||
for scraper_cls in available_scrapers():
|
||||
if scraper_cls.__name__ not in scraper_filter:
|
||||
continue
|
||||
session = get_session(
|
||||
cookiejar,
|
||||
cookiejar=cookiejar,
|
||||
with_cfscrape=scraper_cls.requires_cfscrape(),
|
||||
ignore_cache=ignore_cache,
|
||||
)
|
||||
scraper = scraper_cls(session)
|
||||
if scraper_cls.__name__ not in scraper_filter:
|
||||
continue
|
||||
logger.info(
|
||||
'Running %s, appending to "%s"',
|
||||
'Running %s (%s), appending to "%s"',
|
||||
scraper_cls.__name__,
|
||||
type(session).__name__,
|
||||
scraper.dataset_name,
|
||||
)
|
||||
result_rows = []
|
||||
|
@ -138,8 +142,9 @@ def main(
|
|||
for result in scraper.scrape():
|
||||
result_rows.append(result)
|
||||
del result
|
||||
except requests.exceptions.HTTPError:
|
||||
except requests.exceptions.HTTPError as e:
|
||||
logger.exception('Failed in running %s', scraper_cls.__name__)
|
||||
logger.error('User-Agent: %s', e.request.headers['user-agent'])
|
||||
continue
|
||||
status = util.extend_csv_file(
|
||||
OUTPUT_PATH / f'{scraper.dataset_name}.csv',
|
||||
|
|
Loading…
Reference in New Issue
Block a user