1
0
personal-data/obsidian_import/obsidian.py

316 lines
10 KiB
Python
Raw Normal View History

2024-10-10 22:54:01 +00:00
import dataclasses
2024-10-03 21:23:47 +00:00
import datetime
import json
2024-10-08 20:57:41 +00:00
import re
2024-10-03 21:24:12 +00:00
from decimal import Decimal
from logging import getLogger
2024-10-03 21:23:47 +00:00
from pathlib import Path
2024-10-03 21:24:12 +00:00
from typing import Any
2024-11-24 16:08:41 +00:00
from zoneinfo import ZoneInfo
import enforce_typing
2024-10-03 21:23:47 +00:00
import frontmatter
2024-10-10 22:54:01 +00:00
import marko
import marko.md_renderer
2024-10-03 21:24:12 +00:00
2024-10-03 21:23:47 +00:00
logger = getLogger(__name__)
StatisticKey = str
2024-10-10 22:54:01 +00:00
2024-11-24 16:08:41 +00:00
@enforce_typing.enforce_types
2024-10-21 22:41:45 +00:00
@dataclasses.dataclass(frozen=True, order=True)
2024-10-08 20:57:41 +00:00
class Event:
2024-11-24 16:08:41 +00:00
start_time: datetime.datetime | None
end_time: datetime.datetime | None
2024-10-20 16:27:32 +00:00
verb: str | None
subject: str | None
2024-10-10 21:50:48 +00:00
comment: str
2024-10-08 20:57:41 +00:00
2024-10-20 16:27:32 +00:00
def __post_init__(self):
if self.subject:
assert ':' not in self.subject
assert '/' not in self.subject
2024-10-10 22:54:01 +00:00
@dataclasses.dataclass(frozen=True)
2024-10-08 20:57:41 +00:00
class FileContents:
frontmatter: dict[str, Any]
blocks_pre_events: list
2024-10-21 21:38:07 +00:00
events: frozenset[Event]
2024-10-08 20:57:41 +00:00
blocks_post_events: list
2024-11-24 16:08:41 +00:00
timezone: ZoneInfo
2024-10-08 20:57:41 +00:00
2024-10-10 22:54:01 +00:00
@dataclasses.dataclass(frozen=False)
class CachedFile:
data: bytes
is_dirty: bool
2024-10-08 20:57:41 +00:00
MARKDOWN_PARSER = marko.Markdown()
MARKDOWN_RENDERER = marko.md_renderer.MarkdownRenderer()
2024-10-10 22:54:01 +00:00
FILE_FORMAT = """
2024-10-08 20:57:41 +00:00
{blocks_pre_events}
## Events
{block_events}
{blocks_post_events}
2024-10-10 22:54:01 +00:00
"""
2024-10-03 21:24:12 +00:00
class ObsidianVault:
2024-11-17 16:09:41 +00:00
def __init__(
self,
vault_path: Path,
read_only: bool = 'silent',
allow_invalid_vault=False,
):
2024-10-03 21:24:12 +00:00
self.vault_path = vault_path
2024-10-03 21:23:47 +00:00
self.read_only = read_only
self.internal_file_text_cache: dict[Path, CachedFile] = {}
2024-11-17 16:09:11 +00:00
if not allow_invalid_vault:
assert (self.vault_path / '.obsidian').exists(), 'Not an Obsidian Vault'
try:
with open(self.vault_path / '.obsidian' / 'daily-notes.json') as f:
daily_notes_config = json.load(f)
self.daily_folder = daily_notes_config['folder']
self.path_format = daily_notes_config['format']
self.template_file_path = daily_notes_config['template']
except FileNotFoundError:
if not allow_invalid_vault:
assert False, 'Missing daily notes configuration!'
2024-10-03 21:24:12 +00:00
def get_statistic(
2024-10-10 22:54:01 +00:00
self,
date: datetime.date,
statistic_key: StatisticKey,
2024-10-03 21:24:12 +00:00
) -> Any | None:
if contents := self._load_date_contents(date):
2024-10-08 20:57:41 +00:00
return contents.frontmatter.get(statistic_key)
return None
2024-10-03 21:23:47 +00:00
2024-10-03 21:24:12 +00:00
def add_statistic(
2024-10-10 22:54:01 +00:00
self,
date: datetime.date,
statistic_key: StatisticKey,
amount: Any,
2024-10-03 21:24:12 +00:00
) -> bool:
2024-10-08 20:57:41 +00:00
# Adjust arguments
if isinstance(amount, Decimal):
amount = float(amount)
# Load contents
contents = self._load_date_contents(date)
2024-10-03 21:23:47 +00:00
2024-10-08 20:57:41 +00:00
# Update contents
if contents.frontmatter.get(statistic_key) == amount:
2024-10-03 21:23:47 +00:00
return False
2024-10-08 20:57:41 +00:00
contents.frontmatter[statistic_key] = amount
2024-10-03 21:32:30 +00:00
if amount is None:
2024-10-08 20:57:41 +00:00
del contents.frontmatter[statistic_key]
2024-10-03 21:23:47 +00:00
2024-10-08 20:57:41 +00:00
# Save contents
self._save_date_contents(date, contents)
2024-10-03 21:23:47 +00:00
return True
2024-10-10 21:50:48 +00:00
def add_events(self, date: datetime.date, events: list[Event]) -> bool:
contents = self._load_date_contents(date)
2024-10-21 21:38:07 +00:00
if contents is None:
return False
# Exit without writing if there were no changes.
updated_events: frozenset[Event] = contents.events | set(events)
if contents.events == updated_events:
return False
2024-10-23 19:30:23 +00:00
contents = dataclasses.replace(contents, events=updated_events)
self._save_date_contents(date, contents)
2024-10-08 20:57:41 +00:00
return True
2024-10-21 21:38:07 +00:00
def get_events(self, date: datetime.date) -> frozenset[Event]:
contents = self._load_date_contents(date)
2024-10-08 20:57:41 +00:00
if contents is None:
2024-10-21 21:38:07 +00:00
return frozenset()
2024-10-08 20:57:41 +00:00
return contents.events
def _load_date_contents(self, date: datetime.date) -> FileContents | None:
2024-11-24 16:08:41 +00:00
timezone = ZoneInfo('Europe/Copenhagen') # TODO: Parameterize in an intelligent manner
file_path = self._date_file_path(date)
2024-11-17 16:09:41 +00:00
text = self._load_file_text(file_path) or self._load_file_text(
self._daily_template_path(),
)
assert text is not None
file_frontmatter = frontmatter.loads(text)
2024-10-08 20:57:41 +00:00
ast = MARKDOWN_PARSER.parse(str(file_frontmatter))
(pre_events, list_block_items, post_events) = find_events_list_block(ast)
2024-10-23 19:30:23 +00:00
events = frozenset(
2024-11-24 16:08:41 +00:00
parse_event_string(list_item, date, timezone) for list_item in list_block_items
2024-10-23 19:30:23 +00:00
)
2024-11-24 16:08:41 +00:00
return FileContents(file_frontmatter.metadata, pre_events, events, post_events, timezone)
2024-10-08 20:57:41 +00:00
def _save_date_contents(self, date: datetime.date, contents: FileContents) -> None:
2024-10-10 22:54:01 +00:00
blocks_pre_events = ''.join(
MARKDOWN_RENDERER.render(b) for b in contents.blocks_pre_events
)
blocks_post_events = ''.join(
MARKDOWN_RENDERER.render(b) for b in contents.blocks_post_events
)
2024-10-21 21:38:07 +00:00
2024-10-21 22:41:45 +00:00
events = list(contents.events)
2024-11-24 16:58:36 +00:00
events.sort(key=lambda x: x.comment or '')
events.sort(key=lambda x: x.subject or '')
events.sort(key=lambda x: x.verb or '')
2024-11-24 16:08:41 +00:00
date_sentinel = datetime.datetime(1900, 1, 1, 1, 1, 1, tzinfo=contents.timezone)
events.sort(key=lambda x: x.start_time or x.end_time or date_sentinel)
2024-11-24 16:49:00 +00:00
formatted_events = ['- ' + format_event_string(e, tz = contents.timezone) for e in events]
formatted_events = list(dict.fromkeys(formatted_events))
block_events = '\n'.join(formatted_events)
2024-11-17 16:09:11 +00:00
post = frontmatter.Post(
2024-11-17 16:09:41 +00:00
content=FILE_FORMAT.format(
blocks_pre_events=blocks_pre_events,
blocks_post_events=blocks_post_events,
block_events=block_events,
).strip(),
2024-11-17 16:12:58 +00:00
**contents.frontmatter,
2024-11-17 16:09:41 +00:00
)
2024-11-17 16:09:11 +00:00
2024-11-17 16:09:41 +00:00
self._save_file_text_to_cache(
self._date_file_path(date),
frontmatter.dumps(post).encode('utf8'),
)
def _save_file_text_to_cache(self, path: Path, text: bytes) -> None:
if path not in self.internal_file_text_cache:
self.internal_file_text_cache[path] = CachedFile(None, False)
self.internal_file_text_cache[path].data = text
self.internal_file_text_cache[path].is_dirty = True
def _date_file_path(self, date: datetime.date) -> Path:
2024-10-03 21:24:12 +00:00
path = (
self.path_format.replace('YYYY', str(date.year))
.replace('MM', f'{date.month:02d}')
.replace('DD', f'{date.day:02d}')
)
2024-10-03 21:23:47 +00:00
return (self.vault_path / self.daily_folder / path).with_suffix('.md')
def _daily_template_path(self) -> Path:
2024-10-03 21:23:47 +00:00
return (self.vault_path / self.template_file_path).with_suffix('.md')
2024-10-08 20:57:41 +00:00
def _load_file_text(self, path: Path) -> bytes | None:
if path not in self.internal_file_text_cache:
try:
with open(path, 'rb') as f:
self.internal_file_text_cache[path] = CachedFile(f.read(), False)
except FileNotFoundError:
return None
return self.internal_file_text_cache[path].data
def flush_cache(self) -> None:
if self.read_only:
msg = 'Read-only ObsidianVault cannot be flushed'
raise RuntimeError(msg)
for path, cached_file in self.internal_file_text_cache.items():
if cached_file.is_dirty:
logger.info('Saving file "%s"', path)
path.parent.mkdir(exist_ok=True, parents=True)
with open(path, 'wb') as f:
f.write(cached_file.data)
del path, cached_file
2024-10-10 22:54:01 +00:00
2024-11-17 16:09:41 +00:00
2024-10-08 20:57:41 +00:00
def find_events_list_block(ast) -> tuple[list, list[str], list]:
blocks = ast.children
for block_i, block in enumerate(blocks):
2024-10-10 22:54:01 +00:00
if (
isinstance(block, marko.block.Heading)
and block.children[0].children.lower() == 'events'
):
2024-11-24 16:58:36 +00:00
events_block = ast.children[block_i + 1] if block_i + 1 < len(ast.children) else None
2024-10-08 20:57:41 +00:00
if isinstance(events_block, marko.block.List):
offset = 2
2024-10-10 22:54:01 +00:00
event_texts = [
MARKDOWN_RENDERER.render_children(li).strip()
for li in events_block.children
]
2024-10-08 20:57:41 +00:00
else:
offset = 1
event_texts = []
2024-10-10 22:54:01 +00:00
return (blocks[:block_i], event_texts, blocks[block_i + offset :])
2024-10-08 20:57:41 +00:00
return (blocks, [], [])
2024-10-10 22:54:01 +00:00
2024-11-24 16:08:41 +00:00
def format_event_string(event: Event, tz: ZoneInfo) -> str:
2024-10-10 21:50:48 +00:00
assert event is not None
2024-10-10 22:54:01 +00:00
if (
event.start_time is None
and event.end_time is None
and event.subject is None
and event.verb is None
):
2024-10-10 21:50:48 +00:00
return event.comment
buf = []
2024-11-24 16:08:41 +00:00
buf.append(f'{event.start_time.astimezone(tz):%H:%M}')
if event.end_time and event.end_time != event.start_time:
2024-11-24 16:08:41 +00:00
buf.append(f'-{event.end_time.astimezone(tz):%H:%M}')
buf.append(' | ')
buf.append(event.verb)
buf.append(' [[')
buf.append(event.subject)
2024-10-21 22:41:45 +00:00
buf.append((']]. ' + event.comment).strip())
return ''.join(buf)
2024-10-10 21:50:48 +00:00
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
RE_TIME = r'(\d\d:\d\d(?::\d\d(?:\.\d+?))?)'
RE_VERB = r'(\w+(?:ed|te))'
2024-10-20 16:27:32 +00:00
RE_LINK_MD = r'\[([^\]:/]*)\]\(?:[^)]*\)'
2024-11-24 16:49:00 +00:00
RE_LINK_WIKI = r'\[\[(?:[^\]:]*\/)?([^\]:/]*)\]\]'
RE_TIME_FORMAT = RE_TIME + r'(?:\s*\-\s*' + RE_TIME + r')?'
2024-10-08 20:57:41 +00:00
2024-10-10 22:54:01 +00:00
2024-11-24 16:08:41 +00:00
def parse_event_string(event_str: str, date: datetime.date, timezone: ZoneInfo) -> Event:
"""Parses event string for the given date.
"""
2024-10-10 22:54:01 +00:00
if m := re.match(
2024-10-23 19:30:23 +00:00
r'^\s*'
+ RE_TIME_FORMAT
+ r'[ :\|-]*'
+ RE_VERB
+ r'\s+'
+ RE_LINK_MD
+ r'\.?\s*(.*)$',
2024-10-10 22:54:01 +00:00
event_str,
):
2024-11-24 16:08:41 +00:00
start_time = datetime.time.fromisoformat(m.group(1))
end_time = datetime.time.fromisoformat(m.group(2)) if m.group(2) else start_time
elif m := re.match(
2024-10-23 19:30:23 +00:00
r'^\s*'
+ RE_TIME_FORMAT
+ r'[ :\|-]*'
+ RE_VERB
+ r'\s+'
+ RE_LINK_WIKI
+ r'\.?\s*(.*)$',
2024-10-10 22:54:01 +00:00
event_str,
):
2024-11-24 16:08:41 +00:00
start_time = datetime.time.fromisoformat(m.group(1))
end_time = datetime.time.fromisoformat(m.group(2)) if m.group(2) else start_time
else:
logger.info('Could not parse format: %s', event_str)
return Event(None, None, None, None, event_str)
2024-11-24 16:13:05 +00:00
start = datetime.datetime.combine(date, start_time, timezone).astimezone(datetime.UTC)
end = datetime.datetime.combine(date, end_time, timezone).astimezone(datetime.UTC)
2024-11-24 16:08:41 +00:00
return Event(start, end, m.group(3), m.group(4), m.group(5))