Use new improved cloudflare avoider

Remove debug prints
🤖 Repository layout updated to latest version
2025-06-16 23:24:09 +02:00 · 2025-06-15 23:15:47 +02:00 · 2025-06-13 23:59:24 +02:00 · 2025-06-12 23:21:40 +02:00
6 changed files with 36 additions and 34 deletions
--- a/git_time_tracker/source/csv_file.py
+++ b/git_time_tracker/source/csv_file.py
@ -6,10 +6,8 @@ from typing import Any
 from personal_data.activity import ActivitySample, Label
 from personal_data.csv_import import determine_possible_keys, load_csv_file, start_end
 print(__name__)
 logger = getLogger(__name__)
 def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]:
    assert len(rows) > 0
    max_title_parts = 2
--- a/obsidian_import/init.py
+++ b/obsidian_import/init.py
@ -5,7 +5,7 @@ Sub-module for importing time-based data into Obsidian.
 import dataclasses
 import datetime
-from collections.abc import Iterable, Iterator
+from collections.abc import Iterator
 from logging import getLogger
 from pathlib import Path
 from typing import Any
@ -104,9 +104,9 @@ def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
    return num_updated
-def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
+MINIMUM_BELIEVABLE_STEP_COUNT = 300
    MINIMUM_STEPS = 300
 def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
    num_updated = 0
    rows_per_date = {}
@ -121,7 +121,7 @@ def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
    }
    for date, steps in steps_per_date.items():
-        if steps < MINIMUM_STEPS:
+        if steps < MINIMUM_BELIEVABLE_STEP_COUNT:
            continue
        was_updated = vault.add_statistic(date, 'Steps', steps)
        if was_updated:
@ -155,8 +155,6 @@ def import_stepmania_steps_csv(vault: ObsidianVault, rows: Rows) -> int:
        date: sum((row['play.duration'] for row in rows), start=datetime.timedelta())
        for date, rows in rows_per_date.items()
    }
    print(steps_per_date)
    print(duration_per_date)
    for date in steps_per_date:
        was_updated_1 = vault.add_statistic(
--- a/obsidian_import/obsidian.py
+++ b/obsidian_import/obsidian.py
@ -13,6 +13,8 @@ import frontmatter
 import marko
 import marko.md_renderer
 assert hasattr(frontmatter, 'loads'), 'Incorrect frontmatter package installed. Use: pip install python-frontmatter'
 logger = getLogger(__name__)
 StatisticKey = str
--- a/personal_data/main.py
+++ b/personal_data/main.py
@ -1,6 +1,9 @@
 import argparse
 import logging
 logging.basicConfig()
 logging.getLogger('personal_data').setLevel('INFO')
 import personal_data.main
 from personal_data.notification import NotificationType
@ -37,9 +40,6 @@ def parse_arguments():
 def main():
    logging.basicConfig()
    logging.getLogger('personal_data').setLevel('INFO')
    args = parse_arguments()
    scraper_filter = frozenset(args.fetchers)
--- a/personal_data/main.py
+++ b/personal_data/main.py
@ -12,10 +12,10 @@ from . import data, fetchers, notification, util
 logger = logging.getLogger(__name__)
 try:
-    import cfscrape
+    import cloudscraper
 except ImportError:
-    cfscrape = None
+    cloudscraper = None
-    logger.exception('cfscrape not installed: Certain fetchers might not work')
+    logger.exception('cloudscraper not installed: Certain fetchers might not work')
 try:
    import browser_cookie3
@ -26,11 +26,6 @@ except ImportError:
 OUTPUT_PATH = Path('./output')
 logging.basicConfig(
    format='%(asctime)s %(levelname)s %(module)s:%(lineno)d - %(message)s',
 )
 logger.setLevel('INFO')
 STANDARD_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0',
@ -39,9 +34,9 @@ STANDARD_HEADERS = {
 }
-if cfscrape:
+if cloudscraper:
-    class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
+    class CachedCfScrape(requests_cache.CacheMixin, cloudscraper.CloudScraper):
        pass
@ -54,13 +49,21 @@ def get_session(
    with_cfscrape: bool,
    ignore_cache: bool,
 ) -> requests.Session:
-    if with_cfscrape and cfscrape:
+    session_class = requests_cache.CachedSession
    if with_cfscrape:
        if cloudscraper:
            session_class = CachedCfScrape
            if ignore_cache:
                logger.warning('HTTP cache disabled')
-            return cfscrape.create_scraper()
+                return cloudscraper.create_scraper(
                    interpreter='js2py',
                    delay=5,
                    debug=False,
                )
        else:
            logger.error('Expected cloudscraper, but not defined!')
    else:
        session_class = requests_cache.CachedSession
        if ignore_cache:
            logger.warning('HTTP cache disabled')
            return requests.Session()
@ -100,6 +103,7 @@ def get_cookiejar(use_cookiejar: bool):
        browser_cookie3.firefox(
            '/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
        )
        logger.warning('Cookiejar has %s cookies', len(cookiejar))
        if len(cookiejar) > 10:
            return cookiejar
    logger.warning('No cookiejar is used')
@ -114,23 +118,23 @@ def main(
    notification_types: frozenset[notification.NotificationType],
 ) -> None:
    cookiejar = get_cookiejar(use_cookiejar)
    logger.warning('Cookiejar has %s cookies', len(cookiejar))
    if len(notification_types) == 0:
        logger.info('No notifications enabled: Notifications will not be sent!')
    for scraper_cls in available_scrapers():
        if scraper_cls.__name__ not in scraper_filter:
            continue
        session = get_session(
-            cookiejar,
+            cookiejar=cookiejar,
            with_cfscrape=scraper_cls.requires_cfscrape(),
            ignore_cache=ignore_cache,
        )
        scraper = scraper_cls(session)
        if scraper_cls.__name__ not in scraper_filter:
            continue
        logger.info(
-            'Running %s, appending to "%s"',
+            'Running %s (%s), appending to "%s"',
            scraper_cls.__name__,
            type(session).__name__,
            scraper.dataset_name,
        )
        result_rows = []
@ -138,8 +142,9 @@ def main(
            for result in scraper.scrape():
                result_rows.append(result)
                del result
-        except requests.exceptions.HTTPError:
+        except requests.exceptions.HTTPError as e:
            logger.exception('Failed in running %s', scraper_cls.__name__)
            logger.error('User-Agent: %s', e.request.headers['user-agent'])
            continue
        status = util.extend_csv_file(
            OUTPUT_PATH / f'{scraper.dataset_name}.csv',
--- a/setup.py
+++ b/setup.py
@ -76,7 +76,6 @@ def find_python_packages() -> list[str]:
    for init_file in root_path.rglob('__init__.py'):
        packages.add(str(init_file.parent).replace('/', '.'))
    print(f'Found following packages: {packages}')
    return sorted(packages)
 with open(PACKAGE_NAME + '/_version.py') as f:
Author	SHA1	Message	Date
Jon Michael Aanes	119b380f8a	Use new improved cloudflare avoider Some checks failed Run Python tests (through Pytest) / Test (push) Failing after 28s Details Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 26s Details	2025-06-16 23:24:09 +02:00
Jon Michael Aanes	d945fb81fb	Remove debug prints	2025-06-15 23:15:47 +02:00
takunomi-build-bot	46cadc113d	🤖 Repository layout updated to latest version This commit was automatically generated by [a script](https://gitfub.space/Jmaa/repo-manager)	2025-06-13 23:59:24 +02:00
Jon Michael Aanes	2a75b000a6	Check that the correct frontmatter is installed.	2025-06-12 23:21:40 +02:00