1
0

Use new improved cloudflare avoider
Some checks failed
Run Python tests (through Pytest) / Test (push) Failing after 28s
Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 26s

This commit is contained in:
Jon Michael Aanes 2025-06-16 23:24:09 +02:00
parent d945fb81fb
commit 119b380f8a
3 changed files with 34 additions and 29 deletions

View File

@ -5,7 +5,7 @@ Sub-module for importing time-based data into Obsidian.
import dataclasses
import datetime
from collections.abc import Iterable, Iterator
from collections.abc import Iterator
from logging import getLogger
from pathlib import Path
from typing import Any
@ -104,9 +104,9 @@ def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
return num_updated
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
MINIMUM_STEPS = 300
MINIMUM_BELIEVABLE_STEP_COUNT = 300
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
num_updated = 0
rows_per_date = {}
@ -121,7 +121,7 @@ def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
}
for date, steps in steps_per_date.items():
if steps < MINIMUM_STEPS:
if steps < MINIMUM_BELIEVABLE_STEP_COUNT:
continue
was_updated = vault.add_statistic(date, 'Steps', steps)
if was_updated:

View File

@ -1,6 +1,9 @@
import argparse
import logging
logging.basicConfig()
logging.getLogger('personal_data').setLevel('INFO')
import personal_data.main
from personal_data.notification import NotificationType
@ -37,9 +40,6 @@ def parse_arguments():
def main():
logging.basicConfig()
logging.getLogger('personal_data').setLevel('INFO')
args = parse_arguments()
scraper_filter = frozenset(args.fetchers)

View File

@ -12,10 +12,10 @@ from . import data, fetchers, notification, util
logger = logging.getLogger(__name__)
try:
import cfscrape
import cloudscraper
except ImportError:
cfscrape = None
logger.exception('cfscrape not installed: Certain fetchers might not work')
cloudscraper = None
logger.exception('cloudscraper not installed: Certain fetchers might not work')
try:
import browser_cookie3
@ -26,11 +26,6 @@ except ImportError:
OUTPUT_PATH = Path('./output')
logging.basicConfig(
format='%(asctime)s %(levelname)s %(module)s:%(lineno)d - %(message)s',
)
logger.setLevel('INFO')
STANDARD_HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0',
@ -39,9 +34,9 @@ STANDARD_HEADERS = {
}
if cfscrape:
if cloudscraper:
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
class CachedCfScrape(requests_cache.CacheMixin, cloudscraper.CloudScraper):
pass
@ -54,13 +49,21 @@ def get_session(
with_cfscrape: bool,
ignore_cache: bool,
) -> requests.Session:
if with_cfscrape and cfscrape:
session_class = requests_cache.CachedSession
if with_cfscrape:
if cloudscraper:
session_class = CachedCfScrape
if ignore_cache:
logger.warning('HTTP cache disabled')
return cfscrape.create_scraper()
return cloudscraper.create_scraper(
interpreter='js2py',
delay=5,
debug=False,
)
else:
logger.error('Expected cloudscraper, but not defined!')
else:
session_class = requests_cache.CachedSession
if ignore_cache:
logger.warning('HTTP cache disabled')
return requests.Session()
@ -100,6 +103,7 @@ def get_cookiejar(use_cookiejar: bool):
browser_cookie3.firefox(
'/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
)
logger.warning('Cookiejar has %s cookies', len(cookiejar))
if len(cookiejar) > 10:
return cookiejar
logger.warning('No cookiejar is used')
@ -114,23 +118,23 @@ def main(
notification_types: frozenset[notification.NotificationType],
) -> None:
cookiejar = get_cookiejar(use_cookiejar)
logger.warning('Cookiejar has %s cookies', len(cookiejar))
if len(notification_types) == 0:
logger.info('No notifications enabled: Notifications will not be sent!')
for scraper_cls in available_scrapers():
if scraper_cls.__name__ not in scraper_filter:
continue
session = get_session(
cookiejar,
cookiejar=cookiejar,
with_cfscrape=scraper_cls.requires_cfscrape(),
ignore_cache=ignore_cache,
)
scraper = scraper_cls(session)
if scraper_cls.__name__ not in scraper_filter:
continue
logger.info(
'Running %s, appending to "%s"',
'Running %s (%s), appending to "%s"',
scraper_cls.__name__,
type(session).__name__,
scraper.dataset_name,
)
result_rows = []
@ -138,8 +142,9 @@ def main(
for result in scraper.scrape():
result_rows.append(result)
del result
except requests.exceptions.HTTPError:
except requests.exceptions.HTTPError as e:
logger.exception('Failed in running %s', scraper_cls.__name__)
logger.error('User-Agent: %s', e.request.headers['user-agent'])
continue
status = util.extend_csv_file(
OUTPUT_PATH / f'{scraper.dataset_name}.csv',