1
0

Compare commits

...

4 Commits

Author SHA1 Message Date
119b380f8a Use new improved cloudflare avoider
Some checks failed
Run Python tests (through Pytest) / Test (push) Failing after 28s
Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 26s
2025-06-16 23:24:09 +02:00
d945fb81fb Remove debug prints 2025-06-15 23:15:47 +02:00
46cadc113d 🤖 Repository layout updated to latest version
This commit was automatically generated by [a script](https://gitfub.space/Jmaa/repo-manager)
2025-06-13 23:59:24 +02:00
2a75b000a6 Check that the correct frontmatter is installed. 2025-06-12 23:21:40 +02:00
6 changed files with 36 additions and 34 deletions

View File

@ -6,10 +6,8 @@ from typing import Any
from personal_data.activity import ActivitySample, Label from personal_data.activity import ActivitySample, Label
from personal_data.csv_import import determine_possible_keys, load_csv_file, start_end from personal_data.csv_import import determine_possible_keys, load_csv_file, start_end
print(__name__)
logger = getLogger(__name__) logger = getLogger(__name__)
def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]: def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]:
assert len(rows) > 0 assert len(rows) > 0
max_title_parts = 2 max_title_parts = 2

View File

@ -5,7 +5,7 @@ Sub-module for importing time-based data into Obsidian.
import dataclasses import dataclasses
import datetime import datetime
from collections.abc import Iterable, Iterator from collections.abc import Iterator
from logging import getLogger from logging import getLogger
from pathlib import Path from pathlib import Path
from typing import Any from typing import Any
@ -104,9 +104,9 @@ def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
return num_updated return num_updated
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int: MINIMUM_BELIEVABLE_STEP_COUNT = 300
MINIMUM_STEPS = 300
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
num_updated = 0 num_updated = 0
rows_per_date = {} rows_per_date = {}
@ -121,7 +121,7 @@ def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
} }
for date, steps in steps_per_date.items(): for date, steps in steps_per_date.items():
if steps < MINIMUM_STEPS: if steps < MINIMUM_BELIEVABLE_STEP_COUNT:
continue continue
was_updated = vault.add_statistic(date, 'Steps', steps) was_updated = vault.add_statistic(date, 'Steps', steps)
if was_updated: if was_updated:
@ -155,8 +155,6 @@ def import_stepmania_steps_csv(vault: ObsidianVault, rows: Rows) -> int:
date: sum((row['play.duration'] for row in rows), start=datetime.timedelta()) date: sum((row['play.duration'] for row in rows), start=datetime.timedelta())
for date, rows in rows_per_date.items() for date, rows in rows_per_date.items()
} }
print(steps_per_date)
print(duration_per_date)
for date in steps_per_date: for date in steps_per_date:
was_updated_1 = vault.add_statistic( was_updated_1 = vault.add_statistic(

View File

@ -13,6 +13,8 @@ import frontmatter
import marko import marko
import marko.md_renderer import marko.md_renderer
assert hasattr(frontmatter, 'loads'), 'Incorrect frontmatter package installed. Use: pip install python-frontmatter'
logger = getLogger(__name__) logger = getLogger(__name__)
StatisticKey = str StatisticKey = str

View File

@ -1,6 +1,9 @@
import argparse import argparse
import logging import logging
logging.basicConfig()
logging.getLogger('personal_data').setLevel('INFO')
import personal_data.main import personal_data.main
from personal_data.notification import NotificationType from personal_data.notification import NotificationType
@ -37,9 +40,6 @@ def parse_arguments():
def main(): def main():
logging.basicConfig()
logging.getLogger('personal_data').setLevel('INFO')
args = parse_arguments() args = parse_arguments()
scraper_filter = frozenset(args.fetchers) scraper_filter = frozenset(args.fetchers)

View File

@ -12,10 +12,10 @@ from . import data, fetchers, notification, util
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
try: try:
import cfscrape import cloudscraper
except ImportError: except ImportError:
cfscrape = None cloudscraper = None
logger.exception('cfscrape not installed: Certain fetchers might not work') logger.exception('cloudscraper not installed: Certain fetchers might not work')
try: try:
import browser_cookie3 import browser_cookie3
@ -26,11 +26,6 @@ except ImportError:
OUTPUT_PATH = Path('./output') OUTPUT_PATH = Path('./output')
logging.basicConfig(
format='%(asctime)s %(levelname)s %(module)s:%(lineno)d - %(message)s',
)
logger.setLevel('INFO')
STANDARD_HEADERS = { STANDARD_HEADERS = {
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0',
@ -39,9 +34,9 @@ STANDARD_HEADERS = {
} }
if cfscrape: if cloudscraper:
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper): class CachedCfScrape(requests_cache.CacheMixin, cloudscraper.CloudScraper):
pass pass
@ -54,13 +49,21 @@ def get_session(
with_cfscrape: bool, with_cfscrape: bool,
ignore_cache: bool, ignore_cache: bool,
) -> requests.Session: ) -> requests.Session:
if with_cfscrape and cfscrape: session_class = requests_cache.CachedSession
if with_cfscrape:
if cloudscraper:
session_class = CachedCfScrape session_class = CachedCfScrape
if ignore_cache: if ignore_cache:
logger.warning('HTTP cache disabled') logger.warning('HTTP cache disabled')
return cfscrape.create_scraper() return cloudscraper.create_scraper(
interpreter='js2py',
delay=5,
debug=False,
)
else:
logger.error('Expected cloudscraper, but not defined!')
else: else:
session_class = requests_cache.CachedSession
if ignore_cache: if ignore_cache:
logger.warning('HTTP cache disabled') logger.warning('HTTP cache disabled')
return requests.Session() return requests.Session()
@ -100,6 +103,7 @@ def get_cookiejar(use_cookiejar: bool):
browser_cookie3.firefox( browser_cookie3.firefox(
'/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite', '/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
) )
logger.warning('Cookiejar has %s cookies', len(cookiejar))
if len(cookiejar) > 10: if len(cookiejar) > 10:
return cookiejar return cookiejar
logger.warning('No cookiejar is used') logger.warning('No cookiejar is used')
@ -114,23 +118,23 @@ def main(
notification_types: frozenset[notification.NotificationType], notification_types: frozenset[notification.NotificationType],
) -> None: ) -> None:
cookiejar = get_cookiejar(use_cookiejar) cookiejar = get_cookiejar(use_cookiejar)
logger.warning('Cookiejar has %s cookies', len(cookiejar))
if len(notification_types) == 0: if len(notification_types) == 0:
logger.info('No notifications enabled: Notifications will not be sent!') logger.info('No notifications enabled: Notifications will not be sent!')
for scraper_cls in available_scrapers(): for scraper_cls in available_scrapers():
if scraper_cls.__name__ not in scraper_filter:
continue
session = get_session( session = get_session(
cookiejar, cookiejar=cookiejar,
with_cfscrape=scraper_cls.requires_cfscrape(), with_cfscrape=scraper_cls.requires_cfscrape(),
ignore_cache=ignore_cache, ignore_cache=ignore_cache,
) )
scraper = scraper_cls(session) scraper = scraper_cls(session)
if scraper_cls.__name__ not in scraper_filter:
continue
logger.info( logger.info(
'Running %s, appending to "%s"', 'Running %s (%s), appending to "%s"',
scraper_cls.__name__, scraper_cls.__name__,
type(session).__name__,
scraper.dataset_name, scraper.dataset_name,
) )
result_rows = [] result_rows = []
@ -138,8 +142,9 @@ def main(
for result in scraper.scrape(): for result in scraper.scrape():
result_rows.append(result) result_rows.append(result)
del result del result
except requests.exceptions.HTTPError: except requests.exceptions.HTTPError as e:
logger.exception('Failed in running %s', scraper_cls.__name__) logger.exception('Failed in running %s', scraper_cls.__name__)
logger.error('User-Agent: %s', e.request.headers['user-agent'])
continue continue
status = util.extend_csv_file( status = util.extend_csv_file(
OUTPUT_PATH / f'{scraper.dataset_name}.csv', OUTPUT_PATH / f'{scraper.dataset_name}.csv',

View File

@ -76,7 +76,6 @@ def find_python_packages() -> list[str]:
for init_file in root_path.rglob('__init__.py'): for init_file in root_path.rglob('__init__.py'):
packages.add(str(init_file.parent).replace('/', '.')) packages.add(str(init_file.parent).replace('/', '.'))
print(f'Found following packages: {packages}')
return sorted(packages) return sorted(packages)
with open(PACKAGE_NAME + '/_version.py') as f: with open(PACKAGE_NAME + '/_version.py') as f: