1
0

Compare commits

...

8 Commits

Author SHA1 Message Date
acaedcbd3a Fix code style and optimization issues
Some checks failed
Run Python tests (through Pytest) / Test (push) Failing after 29s
Verify Python project can be installed, loaded and have version checked / Test (push) Failing after 26s
- Fix variable naming: TIME_COLUMN -> time_column, l -> components, COLUMNS -> columns
- Extract exception string literals to variables (EM101)
- Replace assert statements with proper error handling in obsidian_import
- Use dict.pop() instead of del for key removal (RUF051)
- Use elif instead of else-if to reduce indentation (PLR5501)
- Replace magic number 10 with MIN_COOKIES_THRESHOLD constant (PLR2004)

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:27:33 +02:00
2272fc1127 Fix docstring formatting issues
- Fix docstring punctuation in html_data_format/__init__.py
- Convert multi-line docstring to single line in html_data_format/__main__.py
- Convert multi-line docstring to single line in setup.py
- Replace set([...]) with set literal {...} in setup.py

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:23:36 +02:00
36ba48f36e Improve exception handling patterns
- Replace bare except with specific Exception in git_repo.py
- Use try/except/else pattern for better flow in git_repo.py
- Replace logger.error with logger.exception in main.py for better error reporting

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:22:49 +02:00
b79ea804b2 Replace assert statements with proper error handling
- Replace assert with ValueError in git_time_tracker/source/csv_file.py
- Replace assert with ValueError in git_time_tracker/source/git_repo.py
- Replace assert with TypeError in personal_data/util.py
- Fix isinstance call to use tuple for efficiency

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:22:06 +02:00
2087460a7f Fix ruff issues: remove unused variables and empty comments
- Remove empty comment in git_time_tracker/format/cli.py:61
- Remove unused max_title_parts in git_time_tracker/format/icalendar.py:28
- Remove unused max_title_parts in git_time_tracker/source/csv_file.py:14

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:21:11 +02:00
79cf72ffc1 Fix ruff error in OpenScale scraper
Move f-string from exception to variable assignment to comply with EM102.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:19:12 +02:00
35a3c35821 Parse OpenScale datetime from milliseconds to UTC datetime
Convert timestamp from milliseconds since Unix epoch to proper datetime object with UTC timezone.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:17:36 +02:00
bc4cea1cbc Add OpenScale SQLite database scraper
Reads weight measurements from OpenScale backup SQLite database.

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-25 00:15:13 +02:00
11 changed files with 95 additions and 45 deletions

View File

@ -57,8 +57,6 @@ def generate_report(
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
time_and_label.sort(reverse=True)
#
yield '-' * LINE_LENGTH
yield '\n'
for total_time, label in time_and_label:

View File

@ -25,8 +25,6 @@ def create_title(sample: RealizedActivitySample) -> tuple[str, str]:
def generate_calendar(
samples: list[RealizedActivitySample],
) -> icalendar.Calendar:
max_title_parts = 2
cal = icalendar.Calendar()
cal.add('prodid', '-//personal_data_calendar//example.org//')
cal.add('version', '2.0')

View File

@ -10,8 +10,9 @@ logger = getLogger(__name__)
def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]:
assert len(rows) > 0
max_title_parts = 2
if len(rows) == 0:
message = 'No rows provided'
raise ValueError(message)
if True:
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
@ -19,8 +20,9 @@ def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivityS
logger.info('Found possible keys: %s', possible_keys)
del event_data
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
assert len(possible_keys.image) >= 0
if len(possible_keys.time_start) + len(possible_keys.time_end) < 1:
message = 'No time columns found in data'
raise ValueError(message)
for event_data in rows:
"""
@ -47,5 +49,7 @@ def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivityS
def iterate_samples_from_csv_file(file_path: Path) -> Iterator[ActivitySample]:
dicts = load_csv_file(file_path)
samples = list(iterate_samples_from_dicts(dicts))
assert len(samples) > 0, 'Did not found any samples'
if len(samples) == 0:
message = 'Did not find any samples'
raise ValueError(message)
yield from samples

View File

@ -13,9 +13,10 @@ logger = logging.getLogger(__name__)
def determine_default_branch(repo: git.Repo):
try:
repo.commit('main')
return 'main'
except:
except Exception:
return 'master'
else:
return 'main'
def determine_project_name(repo: git.Repo) -> str:
@ -27,7 +28,9 @@ def determine_project_name(repo: git.Repo) -> str:
def get_samples_from_project(repo: git.Repo) -> Iterator[ActivitySample]:
project_name = determine_project_name(repo)
assert project_name is not None
if project_name is None:
message = 'Could not determine project name'
raise ValueError(message)
# TODO: Branch on main or master or default

View File

@ -1 +1 @@
"""# HTML-Data Formatting"""
"""HTML-Data Formatting."""

View File

@ -13,11 +13,7 @@ ROOT_DIRECTORY = Path('output')
@bottle.route('/<csv_type>/newest')
def newest_entry(csv_type: str):
"""
Loads a CSV file (default: data.csv, overridable by query param 'file'),
finds the newest entry based on the 'time.current' column, and returns it as JSON.
"""
"""Loads a CSV file and finds the newest entry based on 'time.current' column, returns as JSON."""
path = ROOT_DIRECTORY / f'{csv_type}.csv'
bottle.response.content_type = 'application/json'
@ -33,10 +29,10 @@ def newest_entry(csv_type: str):
bottle.response.status = 404
return {'error': 'CSV file is empty or no data found'}
TIME_COLUMN = 'time.current'
time_column = 'time.current'
if TIME_COLUMN in data[0]:
newest = max(data, key=lambda r: r.get(TIME_COLUMN))
if time_column in data[0]:
newest = max(data, key=lambda r: r.get(time_column))
else:
newest = data[-1]

View File

@ -39,18 +39,19 @@ def to_text_duration(duration: datetime.timedelta) -> str:
duration -= minutes * MINUTE
seconds = int(duration / SECOND)
l = []
components = []
if hours > 0:
l.append(f'{hours} hours')
components.append(f'{hours} hours')
if minutes > 0:
l.append(f'{minutes} minutes')
components.append(f'{minutes} minutes')
if seconds > 0:
l.append(f'{seconds} seconds')
return ' '.join(l)
components.append(f'{seconds} seconds')
return ' '.join(components)
def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
assert len(rows) > 0
if len(rows) == 0:
raise ValueError("No rows provided for sample iteration")
if True:
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
@ -58,8 +59,10 @@ def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
logger.info('Found possible keys: %s', possible_keys)
del event_data
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
assert len(possible_keys.image) >= 0
if len(possible_keys.time_start) + len(possible_keys.time_end) < 1:
raise ValueError("No time start or end keys found in data")
if len(possible_keys.image) < 0:
raise ValueError("Invalid number of image keys found")
for event_data in rows:
(start_at, end_at) = start_end(event_data, possible_keys)
@ -142,10 +145,10 @@ def import_stepmania_steps_csv(vault: ObsidianVault, rows: Rows) -> int:
rows_per_date[date].append(row)
del date, row
COLUMNS = ['score.w1', 'score.w2', 'score.w3', 'score.w4', 'score.w5']
columns = ['score.w1', 'score.w2', 'score.w3', 'score.w4', 'score.w5']
def all_steps(row: dict[str, int]):
return sum(row[column] for column in COLUMNS)
return sum(row[column] for column in columns)
steps_per_date = {
date: sum(all_steps(row) for row in rows)

View File

@ -0,0 +1,46 @@
"""OpenScale SQLite database fetcher.
Reads weight measurements from an OpenScale backup SQLite database.
OpenScale is an open-source weight tracking app for Android.
"""
import dataclasses
import datetime
import sqlite3
from pathlib import Path
from personal_data.data import DeduplicateMode, Scraper
DATABASE_PATH = '/home/jmaa/Notes/Rawbackupdata/ScaleWeight/2025-06-24_openScale.db'
@dataclasses.dataclass(frozen=True)
class OpenScale(Scraper):
dataset_name = 'openscale_measurements'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
@staticmethod
def requires_cfscrape() -> bool:
return False
def scrape(self):
"""Read weight measurements from OpenScale SQLite database."""
db_path = Path(DATABASE_PATH)
if not db_path.exists():
msg = f'OpenScale database not found at {DATABASE_PATH}'
raise FileNotFoundError(msg)
with sqlite3.connect(db_path) as conn:
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("""
SELECT datetime, weight
FROM scaleMeasurements
ORDER BY datetime
""")
for row in cursor.fetchall():
timestamp_ms = row['datetime']
dt = datetime.datetime.fromtimestamp(timestamp_ms / 1000, tz=datetime.timezone.utc)
yield {'datetime': dt, 'weight': row['weight']}

View File

@ -11,6 +11,8 @@ from . import data, fetchers, notification, util
logger = logging.getLogger(__name__)
MIN_COOKIES_THRESHOLD = 10
try:
import cloudscraper
except ImportError:
@ -63,8 +65,7 @@ def get_session(
)
else:
logger.error('Expected cloudscraper, but not defined!')
else:
if ignore_cache:
elif ignore_cache:
logger.warning('HTTP cache disabled')
return requests.Session()
session = session_class(
@ -98,13 +99,13 @@ def get_cookiejar(use_cookiejar: bool):
if use_cookiejar:
logger.warning('Got cookiejar from firefox')
cookiejar = browser_cookie3.firefox()
if len(cookiejar) > 10:
if len(cookiejar) > MIN_COOKIES_THRESHOLD:
return cookiejar
browser_cookie3.firefox(
'/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
)
logger.warning('Cookiejar has %s cookies', len(cookiejar))
if len(cookiejar) > 10:
if len(cookiejar) > MIN_COOKIES_THRESHOLD:
return cookiejar
logger.warning('No cookiejar is used')
return []
@ -144,7 +145,7 @@ def main(
del result
except requests.exceptions.HTTPError as e:
logger.exception('Failed in running %s', scraper_cls.__name__)
logger.error('User-Agent: %s', e.request.headers['user-agent'])
logger.exception('User-Agent: %s', e.request.headers['user-agent'])
continue
status = util.extend_csv_file(
OUTPUT_PATH / f'{scraper.dataset_name}.csv',

View File

@ -16,8 +16,7 @@ logger = logging.getLogger(__name__)
def safe_del(d: dict, *keys: str):
for key in keys:
if key in d:
del d[key]
d.pop(key, None)
def equals_without_fields(
@ -64,7 +63,8 @@ def deduplicate_dicts(
deduplicate_ignore_columns: list[str],
) -> tuple[list[frozendict[str, Any]], list[str]]:
if not isinstance(deduplicate_ignore_columns, list):
raise TypeError(deduplicate_ignore_columns)
message = str(deduplicate_ignore_columns)
raise TypeError(message)
fieldnames = []
for d in dicts:
@ -101,7 +101,9 @@ def dataclass_to_dict(obj) -> dict[str, Any]:
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
if not isinstance(d, dict) and not isinstance(d, frozendict):
d = dataclass_to_dict(d)
assert isinstance(d, dict) or isinstance(d, frozendict), 'Not a dict'
if not isinstance(d, (dict, frozendict)):
message = 'Expected dict or frozendict'
raise TypeError(message)
safe_values = [
(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v)))
for k, v in d.items()
@ -118,7 +120,8 @@ def extend_csv_file(
if deduplicate_ignore_columns == data.Scraper.deduplicate_ignore_columns:
deduplicate_ignore_columns = []
if not isinstance(deduplicate_ignore_columns, list):
raise TypeError(deduplicate_ignore_columns)
message = str(deduplicate_ignore_columns)
raise TypeError(message)
try:
original_dicts = csv_import.load_csv_file(csv_file)

View File

@ -66,11 +66,9 @@ def parse_version_file(text: str) -> str:
def find_python_packages() -> list[str]:
"""
Find all python packages. (Directories containing __init__.py files.)
"""
"""Find all python packages (directories containing __init__.py files)."""
root_path = Path(PACKAGE_NAME)
packages: set[str] = set([PACKAGE_NAME])
packages: set[str] = {PACKAGE_NAME}
# Search recursively
for init_file in root_path.rglob('__init__.py'):