Compare commits
8 Commits
088cac75fc
...
acaedcbd3a
Author | SHA1 | Date | |
---|---|---|---|
acaedcbd3a | |||
2272fc1127 | |||
36ba48f36e | |||
b79ea804b2 | |||
2087460a7f | |||
79cf72ffc1 | |||
35a3c35821 | |||
bc4cea1cbc |
|
@ -57,8 +57,6 @@ def generate_report(
|
||||||
|
|
||||||
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
|
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
|
||||||
time_and_label.sort(reverse=True)
|
time_and_label.sort(reverse=True)
|
||||||
|
|
||||||
#
|
|
||||||
yield '-' * LINE_LENGTH
|
yield '-' * LINE_LENGTH
|
||||||
yield '\n'
|
yield '\n'
|
||||||
for total_time, label in time_and_label:
|
for total_time, label in time_and_label:
|
||||||
|
|
|
@ -25,8 +25,6 @@ def create_title(sample: RealizedActivitySample) -> tuple[str, str]:
|
||||||
def generate_calendar(
|
def generate_calendar(
|
||||||
samples: list[RealizedActivitySample],
|
samples: list[RealizedActivitySample],
|
||||||
) -> icalendar.Calendar:
|
) -> icalendar.Calendar:
|
||||||
max_title_parts = 2
|
|
||||||
|
|
||||||
cal = icalendar.Calendar()
|
cal = icalendar.Calendar()
|
||||||
cal.add('prodid', '-//personal_data_calendar//example.org//')
|
cal.add('prodid', '-//personal_data_calendar//example.org//')
|
||||||
cal.add('version', '2.0')
|
cal.add('version', '2.0')
|
||||||
|
|
|
@ -10,8 +10,9 @@ logger = getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]:
|
def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivitySample]:
|
||||||
assert len(rows) > 0
|
if len(rows) == 0:
|
||||||
max_title_parts = 2
|
message = 'No rows provided'
|
||||||
|
raise ValueError(message)
|
||||||
|
|
||||||
if True:
|
if True:
|
||||||
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
|
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
|
||||||
|
@ -19,8 +20,9 @@ def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivityS
|
||||||
logger.info('Found possible keys: %s', possible_keys)
|
logger.info('Found possible keys: %s', possible_keys)
|
||||||
del event_data
|
del event_data
|
||||||
|
|
||||||
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
|
if len(possible_keys.time_start) + len(possible_keys.time_end) < 1:
|
||||||
assert len(possible_keys.image) >= 0
|
message = 'No time columns found in data'
|
||||||
|
raise ValueError(message)
|
||||||
|
|
||||||
for event_data in rows:
|
for event_data in rows:
|
||||||
"""
|
"""
|
||||||
|
@ -47,5 +49,7 @@ def iterate_samples_from_dicts(rows: list[dict[str, Any]]) -> Iterator[ActivityS
|
||||||
def iterate_samples_from_csv_file(file_path: Path) -> Iterator[ActivitySample]:
|
def iterate_samples_from_csv_file(file_path: Path) -> Iterator[ActivitySample]:
|
||||||
dicts = load_csv_file(file_path)
|
dicts = load_csv_file(file_path)
|
||||||
samples = list(iterate_samples_from_dicts(dicts))
|
samples = list(iterate_samples_from_dicts(dicts))
|
||||||
assert len(samples) > 0, 'Did not found any samples'
|
if len(samples) == 0:
|
||||||
|
message = 'Did not find any samples'
|
||||||
|
raise ValueError(message)
|
||||||
yield from samples
|
yield from samples
|
||||||
|
|
|
@ -13,9 +13,10 @@ logger = logging.getLogger(__name__)
|
||||||
def determine_default_branch(repo: git.Repo):
|
def determine_default_branch(repo: git.Repo):
|
||||||
try:
|
try:
|
||||||
repo.commit('main')
|
repo.commit('main')
|
||||||
return 'main'
|
except Exception:
|
||||||
except:
|
|
||||||
return 'master'
|
return 'master'
|
||||||
|
else:
|
||||||
|
return 'main'
|
||||||
|
|
||||||
|
|
||||||
def determine_project_name(repo: git.Repo) -> str:
|
def determine_project_name(repo: git.Repo) -> str:
|
||||||
|
@ -27,7 +28,9 @@ def determine_project_name(repo: git.Repo) -> str:
|
||||||
|
|
||||||
def get_samples_from_project(repo: git.Repo) -> Iterator[ActivitySample]:
|
def get_samples_from_project(repo: git.Repo) -> Iterator[ActivitySample]:
|
||||||
project_name = determine_project_name(repo)
|
project_name = determine_project_name(repo)
|
||||||
assert project_name is not None
|
if project_name is None:
|
||||||
|
message = 'Could not determine project name'
|
||||||
|
raise ValueError(message)
|
||||||
|
|
||||||
# TODO: Branch on main or master or default
|
# TODO: Branch on main or master or default
|
||||||
|
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
"""# HTML-Data Formatting"""
|
"""HTML-Data Formatting."""
|
||||||
|
|
|
@ -13,11 +13,7 @@ ROOT_DIRECTORY = Path('output')
|
||||||
|
|
||||||
@bottle.route('/<csv_type>/newest')
|
@bottle.route('/<csv_type>/newest')
|
||||||
def newest_entry(csv_type: str):
|
def newest_entry(csv_type: str):
|
||||||
"""
|
"""Loads a CSV file and finds the newest entry based on 'time.current' column, returns as JSON."""
|
||||||
Loads a CSV file (default: data.csv, overridable by query param 'file'),
|
|
||||||
finds the newest entry based on the 'time.current' column, and returns it as JSON.
|
|
||||||
"""
|
|
||||||
|
|
||||||
path = ROOT_DIRECTORY / f'{csv_type}.csv'
|
path = ROOT_DIRECTORY / f'{csv_type}.csv'
|
||||||
|
|
||||||
bottle.response.content_type = 'application/json'
|
bottle.response.content_type = 'application/json'
|
||||||
|
@ -33,10 +29,10 @@ def newest_entry(csv_type: str):
|
||||||
bottle.response.status = 404
|
bottle.response.status = 404
|
||||||
return {'error': 'CSV file is empty or no data found'}
|
return {'error': 'CSV file is empty or no data found'}
|
||||||
|
|
||||||
TIME_COLUMN = 'time.current'
|
time_column = 'time.current'
|
||||||
|
|
||||||
if TIME_COLUMN in data[0]:
|
if time_column in data[0]:
|
||||||
newest = max(data, key=lambda r: r.get(TIME_COLUMN))
|
newest = max(data, key=lambda r: r.get(time_column))
|
||||||
else:
|
else:
|
||||||
newest = data[-1]
|
newest = data[-1]
|
||||||
|
|
||||||
|
|
|
@ -39,18 +39,19 @@ def to_text_duration(duration: datetime.timedelta) -> str:
|
||||||
duration -= minutes * MINUTE
|
duration -= minutes * MINUTE
|
||||||
seconds = int(duration / SECOND)
|
seconds = int(duration / SECOND)
|
||||||
|
|
||||||
l = []
|
components = []
|
||||||
if hours > 0:
|
if hours > 0:
|
||||||
l.append(f'{hours} hours')
|
components.append(f'{hours} hours')
|
||||||
if minutes > 0:
|
if minutes > 0:
|
||||||
l.append(f'{minutes} minutes')
|
components.append(f'{minutes} minutes')
|
||||||
if seconds > 0:
|
if seconds > 0:
|
||||||
l.append(f'{seconds} seconds')
|
components.append(f'{seconds} seconds')
|
||||||
return ' '.join(l)
|
return ' '.join(components)
|
||||||
|
|
||||||
|
|
||||||
def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
|
def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
|
||||||
assert len(rows) > 0
|
if len(rows) == 0:
|
||||||
|
raise ValueError("No rows provided for sample iteration")
|
||||||
|
|
||||||
if True:
|
if True:
|
||||||
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
|
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
|
||||||
|
@ -58,8 +59,10 @@ def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
|
||||||
logger.info('Found possible keys: %s', possible_keys)
|
logger.info('Found possible keys: %s', possible_keys)
|
||||||
del event_data
|
del event_data
|
||||||
|
|
||||||
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
|
if len(possible_keys.time_start) + len(possible_keys.time_end) < 1:
|
||||||
assert len(possible_keys.image) >= 0
|
raise ValueError("No time start or end keys found in data")
|
||||||
|
if len(possible_keys.image) < 0:
|
||||||
|
raise ValueError("Invalid number of image keys found")
|
||||||
|
|
||||||
for event_data in rows:
|
for event_data in rows:
|
||||||
(start_at, end_at) = start_end(event_data, possible_keys)
|
(start_at, end_at) = start_end(event_data, possible_keys)
|
||||||
|
@ -142,10 +145,10 @@ def import_stepmania_steps_csv(vault: ObsidianVault, rows: Rows) -> int:
|
||||||
rows_per_date[date].append(row)
|
rows_per_date[date].append(row)
|
||||||
del date, row
|
del date, row
|
||||||
|
|
||||||
COLUMNS = ['score.w1', 'score.w2', 'score.w3', 'score.w4', 'score.w5']
|
columns = ['score.w1', 'score.w2', 'score.w3', 'score.w4', 'score.w5']
|
||||||
|
|
||||||
def all_steps(row: dict[str, int]):
|
def all_steps(row: dict[str, int]):
|
||||||
return sum(row[column] for column in COLUMNS)
|
return sum(row[column] for column in columns)
|
||||||
|
|
||||||
steps_per_date = {
|
steps_per_date = {
|
||||||
date: sum(all_steps(row) for row in rows)
|
date: sum(all_steps(row) for row in rows)
|
||||||
|
|
46
personal_data/fetchers/openscale.py
Normal file
46
personal_data/fetchers/openscale.py
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
"""OpenScale SQLite database fetcher.
|
||||||
|
|
||||||
|
Reads weight measurements from an OpenScale backup SQLite database.
|
||||||
|
OpenScale is an open-source weight tracking app for Android.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
import datetime
|
||||||
|
import sqlite3
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
|
|
||||||
|
DATABASE_PATH = '/home/jmaa/Notes/Rawbackupdata/ScaleWeight/2025-06-24_openScale.db'
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class OpenScale(Scraper):
|
||||||
|
dataset_name = 'openscale_measurements'
|
||||||
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def requires_cfscrape() -> bool:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def scrape(self):
|
||||||
|
"""Read weight measurements from OpenScale SQLite database."""
|
||||||
|
db_path = Path(DATABASE_PATH)
|
||||||
|
|
||||||
|
if not db_path.exists():
|
||||||
|
msg = f'OpenScale database not found at {DATABASE_PATH}'
|
||||||
|
raise FileNotFoundError(msg)
|
||||||
|
|
||||||
|
with sqlite3.connect(db_path) as conn:
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT datetime, weight
|
||||||
|
FROM scaleMeasurements
|
||||||
|
ORDER BY datetime
|
||||||
|
""")
|
||||||
|
|
||||||
|
for row in cursor.fetchall():
|
||||||
|
timestamp_ms = row['datetime']
|
||||||
|
dt = datetime.datetime.fromtimestamp(timestamp_ms / 1000, tz=datetime.timezone.utc)
|
||||||
|
yield {'datetime': dt, 'weight': row['weight']}
|
|
@ -11,6 +11,8 @@ from . import data, fetchers, notification, util
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
MIN_COOKIES_THRESHOLD = 10
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cloudscraper
|
import cloudscraper
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -63,8 +65,7 @@ def get_session(
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
logger.error('Expected cloudscraper, but not defined!')
|
logger.error('Expected cloudscraper, but not defined!')
|
||||||
else:
|
elif ignore_cache:
|
||||||
if ignore_cache:
|
|
||||||
logger.warning('HTTP cache disabled')
|
logger.warning('HTTP cache disabled')
|
||||||
return requests.Session()
|
return requests.Session()
|
||||||
session = session_class(
|
session = session_class(
|
||||||
|
@ -98,13 +99,13 @@ def get_cookiejar(use_cookiejar: bool):
|
||||||
if use_cookiejar:
|
if use_cookiejar:
|
||||||
logger.warning('Got cookiejar from firefox')
|
logger.warning('Got cookiejar from firefox')
|
||||||
cookiejar = browser_cookie3.firefox()
|
cookiejar = browser_cookie3.firefox()
|
||||||
if len(cookiejar) > 10:
|
if len(cookiejar) > MIN_COOKIES_THRESHOLD:
|
||||||
return cookiejar
|
return cookiejar
|
||||||
browser_cookie3.firefox(
|
browser_cookie3.firefox(
|
||||||
'/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
|
'/home/jmaa/.cachy/mbui5xg7.default-release/cookies.sqlite',
|
||||||
)
|
)
|
||||||
logger.warning('Cookiejar has %s cookies', len(cookiejar))
|
logger.warning('Cookiejar has %s cookies', len(cookiejar))
|
||||||
if len(cookiejar) > 10:
|
if len(cookiejar) > MIN_COOKIES_THRESHOLD:
|
||||||
return cookiejar
|
return cookiejar
|
||||||
logger.warning('No cookiejar is used')
|
logger.warning('No cookiejar is used')
|
||||||
return []
|
return []
|
||||||
|
@ -144,7 +145,7 @@ def main(
|
||||||
del result
|
del result
|
||||||
except requests.exceptions.HTTPError as e:
|
except requests.exceptions.HTTPError as e:
|
||||||
logger.exception('Failed in running %s', scraper_cls.__name__)
|
logger.exception('Failed in running %s', scraper_cls.__name__)
|
||||||
logger.error('User-Agent: %s', e.request.headers['user-agent'])
|
logger.exception('User-Agent: %s', e.request.headers['user-agent'])
|
||||||
continue
|
continue
|
||||||
status = util.extend_csv_file(
|
status = util.extend_csv_file(
|
||||||
OUTPUT_PATH / f'{scraper.dataset_name}.csv',
|
OUTPUT_PATH / f'{scraper.dataset_name}.csv',
|
||||||
|
|
|
@ -16,8 +16,7 @@ logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def safe_del(d: dict, *keys: str):
|
def safe_del(d: dict, *keys: str):
|
||||||
for key in keys:
|
for key in keys:
|
||||||
if key in d:
|
d.pop(key, None)
|
||||||
del d[key]
|
|
||||||
|
|
||||||
|
|
||||||
def equals_without_fields(
|
def equals_without_fields(
|
||||||
|
@ -64,7 +63,8 @@ def deduplicate_dicts(
|
||||||
deduplicate_ignore_columns: list[str],
|
deduplicate_ignore_columns: list[str],
|
||||||
) -> tuple[list[frozendict[str, Any]], list[str]]:
|
) -> tuple[list[frozendict[str, Any]], list[str]]:
|
||||||
if not isinstance(deduplicate_ignore_columns, list):
|
if not isinstance(deduplicate_ignore_columns, list):
|
||||||
raise TypeError(deduplicate_ignore_columns)
|
message = str(deduplicate_ignore_columns)
|
||||||
|
raise TypeError(message)
|
||||||
|
|
||||||
fieldnames = []
|
fieldnames = []
|
||||||
for d in dicts:
|
for d in dicts:
|
||||||
|
@ -101,7 +101,9 @@ def dataclass_to_dict(obj) -> dict[str, Any]:
|
||||||
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
def normalize_dict(d: dict[str, Any] | frozendict[str, Any]) -> frozendict[str, Any]:
|
||||||
if not isinstance(d, dict) and not isinstance(d, frozendict):
|
if not isinstance(d, dict) and not isinstance(d, frozendict):
|
||||||
d = dataclass_to_dict(d)
|
d = dataclass_to_dict(d)
|
||||||
assert isinstance(d, dict) or isinstance(d, frozendict), 'Not a dict'
|
if not isinstance(d, (dict, frozendict)):
|
||||||
|
message = 'Expected dict or frozendict'
|
||||||
|
raise TypeError(message)
|
||||||
safe_values = [
|
safe_values = [
|
||||||
(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v)))
|
(k, csv_import.csv_str_to_value(csv_import.csv_safe_value(v)))
|
||||||
for k, v in d.items()
|
for k, v in d.items()
|
||||||
|
@ -118,7 +120,8 @@ def extend_csv_file(
|
||||||
if deduplicate_ignore_columns == data.Scraper.deduplicate_ignore_columns:
|
if deduplicate_ignore_columns == data.Scraper.deduplicate_ignore_columns:
|
||||||
deduplicate_ignore_columns = []
|
deduplicate_ignore_columns = []
|
||||||
if not isinstance(deduplicate_ignore_columns, list):
|
if not isinstance(deduplicate_ignore_columns, list):
|
||||||
raise TypeError(deduplicate_ignore_columns)
|
message = str(deduplicate_ignore_columns)
|
||||||
|
raise TypeError(message)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
original_dicts = csv_import.load_csv_file(csv_file)
|
original_dicts = csv_import.load_csv_file(csv_file)
|
||||||
|
|
6
setup.py
6
setup.py
|
@ -66,11 +66,9 @@ def parse_version_file(text: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
def find_python_packages() -> list[str]:
|
def find_python_packages() -> list[str]:
|
||||||
"""
|
"""Find all python packages (directories containing __init__.py files)."""
|
||||||
Find all python packages. (Directories containing __init__.py files.)
|
|
||||||
"""
|
|
||||||
root_path = Path(PACKAGE_NAME)
|
root_path = Path(PACKAGE_NAME)
|
||||||
packages: set[str] = set([PACKAGE_NAME])
|
packages: set[str] = {PACKAGE_NAME}
|
||||||
|
|
||||||
# Search recursively
|
# Search recursively
|
||||||
for init_file in root_path.rglob('__init__.py'):
|
for init_file in root_path.rglob('__init__.py'):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user