1
0
personal-data/obsidian_import/__init__.py

252 lines
7.4 KiB
Python
Raw Normal View History

2024-10-03 21:23:47 +00:00
"""Obsidian Import.
Sub-module for importing time-based data into Obsidian.
"""
2024-10-21 21:38:07 +00:00
import dataclasses
2024-10-03 21:23:47 +00:00
import datetime
2024-10-23 19:30:23 +00:00
from collections.abc import Iterator
2024-10-03 21:23:47 +00:00
from logging import getLogger
2024-10-03 21:24:12 +00:00
from pathlib import Path
2024-10-08 19:22:18 +00:00
from typing import Any
2024-10-03 21:24:12 +00:00
2024-10-23 19:30:23 +00:00
from personal_data.activity import (
ActivitySample,
Label,
RealizedActivitySample,
heuristically_realize_samples,
merge_adjacent_samples,
2024-10-21 22:04:34 +00:00
)
2024-10-23 19:30:23 +00:00
from personal_data.csv_import import determine_possible_keys, load_csv_file, start_end
2024-10-03 21:24:12 +00:00
2024-10-10 22:54:01 +00:00
from .obsidian import Event, ObsidianVault
2024-10-03 21:24:12 +00:00
2024-10-03 21:23:47 +00:00
logger = getLogger(__name__)
2024-10-10 22:54:01 +00:00
Row = dict[str, Any]
2024-10-10 21:50:48 +00:00
Rows = list[Row]
2024-10-23 19:30:23 +00:00
def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]:
assert len(rows) > 0
if True:
event_data = rows[len(rows) // 2] # Hopefully select a useful representative.
possible_keys = determine_possible_keys(event_data)
logger.info('Found possible keys: %s', possible_keys)
del event_data
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
assert len(possible_keys.image) >= 0
for event_data in rows:
(start_at, end_at) = start_end(event_data, possible_keys)
labels = [Label(k, event_data.get(k)) for k in possible_keys.misc if k in event_data]
# Create event
yield ActivitySample(
labels=tuple(labels),
start_at=start_at,
end_at=end_at,
)
del event_data
2024-10-10 21:50:48 +00:00
def import_workout_csv(vault: ObsidianVault, rows: Rows) -> int:
2024-10-03 21:23:47 +00:00
num_updated = 0
for row in rows:
date = row['Date']
was_updated = False
mapping = {
'Cycling (mins)': ('Cycling (Duration)', 'minutes'),
'Cycling (kcals)': ('Cycling (kcals)', ''),
'Weight (Kg)': ('Weight (Kg)', ''),
}
for input_key, (output_key, unit) in mapping.items():
v = row.get(input_key)
2024-10-03 21:32:30 +00:00
if v is not None:
if unit:
v = str(v) + ' ' + unit
was_updated |= vault.add_statistic(date, output_key, v)
if input_key != output_key:
was_updated |= vault.add_statistic(date, input_key, None)
2024-10-03 21:23:47 +00:00
del input_key, output_key, unit, v
if was_updated:
num_updated += 1
del row, date
2024-10-08 19:22:18 +00:00
return num_updated
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
def import_step_counts_csv(vault: ObsidianVault, rows: Rows) -> int:
2024-10-08 19:22:18 +00:00
MINIMUM = 300
num_updated = 0
2024-10-10 21:50:48 +00:00
rows_per_date = {}
2024-10-08 19:22:18 +00:00
for row in rows:
date = row['Start'].date()
2024-10-10 21:50:48 +00:00
rows_per_date.setdefault(date, [])
rows_per_date[date].append(row)
2024-10-08 19:22:18 +00:00
del date, row
2024-10-10 22:54:01 +00:00
steps_per_date = {
date: sum(row['Steps'] for row in rows) for date, rows in rows_per_date.items()
}
2024-10-08 19:22:18 +00:00
2024-10-10 21:50:48 +00:00
for date, steps in steps_per_date.items():
2024-10-08 19:22:18 +00:00
if steps < MINIMUM:
continue
was_updated = vault.add_statistic(date, 'Steps', steps)
if was_updated:
num_updated += 1
del date, steps, was_updated
return num_updated
2024-10-23 19:30:23 +00:00
2024-10-20 16:27:32 +00:00
def escape_for_obsidian_link(link: str) -> str:
return link.replace(':', ' ').replace('/', ' ').replace(' ', ' ')
2024-10-21 21:38:07 +00:00
@dataclasses.dataclass(frozen=True)
class EventContent:
verb: str
subject: str
comment: str
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
2024-10-23 19:30:23 +00:00
def import_activity_sample_csv(
vault: ObsidianVault,
rows: Rows,
content_mapper,
group_category: str | None = None,
) -> int:
samples = heuristically_realize_samples(list(iterate_samples_from_rows(rows)))
2024-10-10 21:50:48 +00:00
2024-10-21 22:04:34 +00:00
if group_category is not None:
samples = merge_adjacent_samples(list(samples), group_category)
samples_per_date: dict[datetime.date, list[RealizedActivitySample]] = {}
for sample in samples:
date: datetime.date = sample.start_at.date()
samples_per_date.setdefault(date, [])
samples_per_date[date].append(sample)
del date, sample
2024-10-10 21:50:48 +00:00
del rows
def map_to_event(sample: RealizedActivitySample) -> Event:
2024-10-21 21:38:07 +00:00
content = content_mapper(sample)
2024-10-23 19:30:23 +00:00
expected_tz = datetime.timezone(
datetime.timedelta(hours=2),
) # TODO: Determine this in a more intelligent manner
return Event(
sample.start_at.astimezone(expected_tz)
.replace(second=0, microsecond=0)
.time(),
sample.end_at.astimezone(expected_tz)
.replace(second=0, microsecond=0)
.time(),
verb=content.verb,
subject=escape_for_obsidian_link(content.subject),
comment=content.comment,
2024-10-10 22:54:01 +00:00
)
2024-10-08 20:57:41 +00:00
num_updated = 0
for date, samples in samples_per_date.items():
events = [map_to_event(sample) for sample in samples]
2024-10-10 21:50:48 +00:00
was_updated = vault.add_events(date, events)
if was_updated:
num_updated += 1
del date, was_updated
return num_updated
2024-10-08 20:57:41 +00:00
2024-10-23 19:30:23 +00:00
def import_activity_sample_csv_from_file(
vault: ObsidianVault,
data_path: Path,
content_mapper,
**kwargs,
) -> int:
2024-10-21 21:38:07 +00:00
rows = load_csv_file(data_path)
logger.info('Loaded CSV with %d lines (%s)', len(rows), data_path)
2024-10-21 22:04:34 +00:00
num_updated = import_activity_sample_csv(vault, rows, content_mapper, **kwargs)
2024-10-21 21:38:07 +00:00
logger.info('Updated %d files', num_updated)
2024-10-23 19:30:23 +00:00
2024-10-21 21:38:07 +00:00
def map_watched_series_content(sample: RealizedActivitySample) -> EventContent:
subject = sample.single_label_with_category('series.name')
comment = '{} Episode {}: *{}*'.format(
sample.single_label_with_category('season.name'),
sample.single_label_with_category('episode.index'),
sample.single_label_with_category('episode.name'),
)
return EventContent(
2024-10-23 19:30:23 +00:00
verb='Watched',
subject=subject,
comment=comment,
2024-10-21 21:38:07 +00:00
)
2024-10-23 19:30:23 +00:00
2024-10-21 21:38:07 +00:00
def map_games_played_content(sample: RealizedActivitySample) -> EventContent:
subject = sample.single_label_with_category('game.name')
2024-10-21 22:04:34 +00:00
comment = ''
2024-10-21 21:38:07 +00:00
return EventContent(
2024-10-23 19:30:23 +00:00
verb='Played',
subject=subject,
comment=comment,
2024-10-21 21:38:07 +00:00
)
2024-10-23 19:30:23 +00:00
2024-10-21 21:38:07 +00:00
def import_watched_series_csv_from_file(vault: ObsidianVault) -> int:
data_path = Path('output/show_episodes_watched.csv')
2024-10-23 19:30:23 +00:00
return import_activity_sample_csv_from_file(
vault,
data_path,
map_watched_series_content,
)
2024-10-21 21:38:07 +00:00
def import_played_games_csv_from_file(vault: ObsidianVault) -> int:
2024-10-25 19:47:44 +00:00
data_path = Path('output/games_played.csv')
2024-10-23 19:30:23 +00:00
return import_activity_sample_csv_from_file(
vault,
data_path,
map_games_played_content,
group_category='game.name',
)
2024-10-10 22:54:01 +00:00
2024-10-08 19:22:18 +00:00
def import_data(obsidian_path: Path, dry_run=True):
vault = ObsidianVault(obsidian_path, read_only=dry_run and 'silent' or None)
2024-10-08 20:57:41 +00:00
if False:
data_path = Path('/home/jmaa/Notes/workout.csv')
rows = load_csv_file(data_path)
logger.info('Loaded CSV with %d lines', len(rows))
num_updated = import_workout_csv(vault, rows)
logger.info('Updated %d files', num_updated)
if False:
2024-10-10 22:54:01 +00:00
data_path = Path(
'/home/jmaa/personal-archive/misc-data/step_counts_2023-07-26_to_2024-09-21.csv',
)
2024-10-08 20:57:41 +00:00
rows = load_csv_file(data_path)
logger.info('Loaded CSV with %d lines', len(rows))
num_updated = import_step_counts_csv(vault, rows)
logger.info('Updated %d files', num_updated)
2024-10-21 21:38:07 +00:00
import_watched_series_csv_from_file(vault)
import_played_games_csv_from_file(vault)
num_dirty = len([f for f in vault.internal_file_text_cache.values() if f.is_dirty])
logger.info('dirty files in cache: %d', num_dirty)
logger.info('clean files in cache: %d', len(vault.internal_file_text_cache) - num_dirty)
if not dry_run:
vault.flush_cache()