1
0
personal-data/obsidian_import/__init__.py

173 lines
5.5 KiB
Python
Raw Normal View History

2024-10-03 21:23:47 +00:00
"""Obsidian Import.
Sub-module for importing time-based data into Obsidian.
"""
import datetime
from logging import getLogger
2024-10-03 21:24:12 +00:00
from pathlib import Path
2024-10-08 19:22:18 +00:00
from typing import Any
from collections.abc import Iterator
2024-10-03 21:24:12 +00:00
from personal_data.csv_import import start_end, determine_possible_keys, load_csv_file
from personal_data.activity import ActivitySample, Label, RealizedActivitySample, heuristically_realize_samples
2024-10-03 21:24:12 +00:00
2024-10-10 22:54:01 +00:00
from .obsidian import Event, ObsidianVault
2024-10-03 21:24:12 +00:00
2024-10-03 21:23:47 +00:00
logger = getLogger(__name__)
2024-10-10 22:54:01 +00:00
Row = dict[str, Any]
2024-10-10 21:50:48 +00:00
Rows = list[Row]
def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
assert len(rows) > 0
if True:
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
possible_keys = determine_possible_keys(event_data)
logger.info('Found possible keys: %s', possible_keys)
del event_data
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
assert len(possible_keys.image) >= 0
for event_data in rows:
(start_at, end_at) = start_end(event_data, possible_keys)
labels = [Label(k, event_data[k]) for k in possible_keys.misc]
# Create event
yield ActivitySample(
labels=tuple(labels),
start_at=start_at,
end_at=end_at,
)
del event_data
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
2024-10-03 21:23:47 +00:00
num_updated = 0
for row in rows:
date = row['Date']
was_updated = False
mapping = {
'Cycling (mins)': ('Cycling (Duration)', 'minutes'),
'Cycling (kcals)': ('Cycling (kcals)', ''),
'Weight (Kg)': ('Weight (Kg)', ''),
}
for input_key, (output_key, unit) in mapping.items():
v = row.get(input_key)
2024-10-03 21:32:30 +00:00
if v is not None:
if unit:
v = str(v) + ' ' + unit
was_updated |= vault.add_statistic(date, output_key, v)
if input_key != output_key:
was_updated |= vault.add_statistic(date, input_key, None)
2024-10-03 21:23:47 +00:00
del input_key, output_key, unit, v
if was_updated:
num_updated += 1
del row, date
2024-10-08 19:22:18 +00:00
return num_updated
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
2024-10-08 19:22:18 +00:00
MINIMUM = 300
num_updated = 0
2024-10-10 21:50:48 +00:00
rows_per_date = {}
2024-10-08 19:22:18 +00:00
for row in rows:
date = row['Start'].date()
2024-10-10 21:50:48 +00:00
rows_per_date.setdefault(date, [])
rows_per_date[date].append(row)
2024-10-08 19:22:18 +00:00
del date, row
2024-10-10 22:54:01 +00:00
steps_per_date = {
date: sum(row['Steps'] for row in rows) for date, rows in rows_per_date.items()
}
2024-10-08 19:22:18 +00:00
2024-10-10 21:50:48 +00:00
for date, steps in steps_per_date.items():
2024-10-08 19:22:18 +00:00
if steps < MINIMUM:
continue
was_updated = vault.add_statistic(date, 'Steps', steps)
if was_updated:
num_updated += 1
del date, steps, was_updated
return num_updated
2024-10-20 16:27:32 +00:00
def escape_for_obsidian_link(link: str) -> str:
return link.replace(':', ' ').replace('/', ' ').replace(' ', ' ')
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
def import_watched_series_csv(vault: ObsidianVault, rows: Rows) -> int:
verb = 'Watched'
samples = heuristically_realize_samples(list(iterate_samples_from_rows(rows)))
2024-10-10 21:50:48 +00:00
samples_per_date: dict[datetime.date, list[RealizedActivitySample]] = {}
for sample in samples:
date: datetime.date = sample.start_at.date()
samples_per_date.setdefault(date, [])
samples_per_date[date].append(sample)
del date, sample
2024-10-10 21:50:48 +00:00
del rows
def map_to_event(sample: RealizedActivitySample) -> Event:
2024-10-20 16:27:32 +00:00
noun = escape_for_obsidian_link(sample.single_label_with_category('series.name'))
2024-10-10 22:54:01 +00:00
comment = '{} Episode {}: *{}*'.format(
sample.single_label_with_category('season.name'),
sample.single_label_with_category('episode.index'),
sample.single_label_with_category('episode.name'),
)
expected_tz = datetime.timezone(datetime.timedelta(hours=2)) # TODO: Determine this in a more intelligent manner
return Event(sample.start_at.astimezone(expected_tz).replace(second=0,microsecond=0).time(),
sample.end_at.astimezone(expected_tz).replace(second=0,microsecond=0).time(),
verb,
2024-10-20 16:27:32 +00:00
noun,
comment,
2024-10-10 22:54:01 +00:00
)
2024-10-08 20:57:41 +00:00
num_updated = 0
for date, samples in samples_per_date.items():
events = [map_to_event(sample) for sample in samples]
2024-10-10 21:50:48 +00:00
was_updated = vault.add_events(date, events)
if was_updated:
num_updated += 1
del date, was_updated
return num_updated
2024-10-08 20:57:41 +00:00
2024-10-10 22:54:01 +00:00
2024-10-08 19:22:18 +00:00
def import_data(obsidian_path: Path, dry_run=True):
vault = ObsidianVault(obsidian_path, read_only=dry_run and 'silent' or None)
2024-10-08 20:57:41 +00:00
if False:
data_path = Path('/home/jmaa/Notes/workout.csv')
rows = load_csv_file(data_path)
logger.info('Loaded CSV with %d lines', len(rows))
num_updated = import_workout_csv(vault, rows)
logger.info('Updated %d files', num_updated)
if False:
2024-10-10 22:54:01 +00:00
data_path = Path(
'/home/jmaa/personal-archive/misc-data/step_counts_2023-07-26_to_2024-09-21.csv',
)
2024-10-08 20:57:41 +00:00
rows = load_csv_file(data_path)
logger.info('Loaded CSV with %d lines', len(rows))
num_updated = import_step_counts_csv(vault, rows)
logger.info('Updated %d files', num_updated)
if True:
2024-10-10 21:50:48 +00:00
data_path = Path('output/show_episodes_watched.csv')
rows = load_csv_file(data_path)
logger.info('Loaded CSV with %d lines', len(rows))
num_updated = import_watched_series_csv(vault, rows)
2024-10-08 20:57:41 +00:00
logger.info('Updated %d files', num_updated)