1
0
personal-data/obsidian_import/obsidian.py

256 lines
7.9 KiB
Python
Raw Normal View History

2024-10-10 22:54:01 +00:00
import dataclasses
2024-10-03 21:23:47 +00:00
import datetime
import json
2024-10-08 20:57:41 +00:00
import re
2024-10-03 21:24:12 +00:00
from decimal import Decimal
from logging import getLogger
2024-10-03 21:23:47 +00:00
from pathlib import Path
2024-10-03 21:24:12 +00:00
from typing import Any
2024-10-03 21:23:47 +00:00
import frontmatter
2024-10-10 22:54:01 +00:00
import marko
import marko.md_renderer
2024-10-03 21:24:12 +00:00
2024-10-03 21:23:47 +00:00
logger = getLogger(__name__)
StatisticKey = str
2024-10-10 22:54:01 +00:00
@dataclasses.dataclass(frozen=True)
2024-10-08 20:57:41 +00:00
class Event:
start_time: datetime.time | None
end_time: datetime.time | None
verb: str
subject: str
2024-10-10 21:50:48 +00:00
comment: str
2024-10-08 20:57:41 +00:00
2024-10-10 22:54:01 +00:00
@dataclasses.dataclass(frozen=True)
2024-10-08 20:57:41 +00:00
class FileContents:
frontmatter: dict[str, Any]
blocks_pre_events: list
events: list[Event]
blocks_post_events: list
2024-10-10 22:54:01 +00:00
2024-10-08 20:57:41 +00:00
MARKDOWN_PARSER = marko.Markdown()
MARKDOWN_RENDERER = marko.md_renderer.MarkdownRenderer()
2024-10-10 22:54:01 +00:00
FILE_FORMAT = """
2024-10-08 20:57:41 +00:00
{blocks_pre_events}
## Events
{block_events}
{blocks_post_events}
2024-10-10 22:54:01 +00:00
"""
2024-10-03 21:23:47 +00:00
2024-10-03 21:24:12 +00:00
class ObsidianVault:
def __init__(self, vault_path: Path, read_only: bool = 'silent'):
self.vault_path = vault_path
2024-10-03 21:23:47 +00:00
assert (self.vault_path / '.obsidian').exists(), 'Not an Obsidian Vault'
2024-10-03 21:24:12 +00:00
with open(self.vault_path / '.obsidian' / 'daily-notes.json') as f:
2024-10-03 21:23:47 +00:00
daily_notes_config = json.load(f)
self.daily_folder = daily_notes_config['folder']
self.path_format = daily_notes_config['format']
self.template_file_path = daily_notes_config['template']
self.read_only = read_only
2024-10-03 21:24:12 +00:00
def get_statistic(
2024-10-10 22:54:01 +00:00
self,
date: datetime.date,
statistic_key: StatisticKey,
2024-10-03 21:24:12 +00:00
) -> Any | None:
2024-10-08 20:57:41 +00:00
if contents := self._get_date_contents(date):
return contents.frontmatter.get(statistic_key)
return None
2024-10-03 21:23:47 +00:00
2024-10-03 21:24:12 +00:00
def add_statistic(
2024-10-10 22:54:01 +00:00
self,
date: datetime.date,
statistic_key: StatisticKey,
amount: Any,
2024-10-03 21:24:12 +00:00
) -> bool:
2024-10-08 20:57:41 +00:00
# Adjust arguments
if isinstance(amount, Decimal):
amount = float(amount)
# Check for silent
2024-10-03 21:23:47 +00:00
if self.read_only == 'silent':
2024-10-03 21:24:12 +00:00
logger.info(
2024-10-08 20:57:41 +00:00
'Read-only ObsidianVault ignoring add_statistic(%s, "%s", %s)',
2024-10-03 21:24:12 +00:00
date,
statistic_key,
2024-10-08 19:22:18 +00:00
amount,
2024-10-03 21:24:12 +00:00
)
2024-10-03 21:23:47 +00:00
return False
2024-10-08 20:57:41 +00:00
# Load contents
2024-10-03 21:23:47 +00:00
self._create_date_if_not_present(date)
2024-10-08 20:57:41 +00:00
contents = self._get_date_contents(date)
2024-10-03 21:23:47 +00:00
2024-10-08 20:57:41 +00:00
# Update contents
if contents.frontmatter.get(statistic_key) == amount:
2024-10-03 21:23:47 +00:00
return False
2024-10-08 20:57:41 +00:00
contents.frontmatter[statistic_key] = amount
2024-10-03 21:32:30 +00:00
if amount is None:
2024-10-08 20:57:41 +00:00
del contents.frontmatter[statistic_key]
2024-10-03 21:23:47 +00:00
2024-10-08 20:57:41 +00:00
# Save contents
self._save_contents(date, contents)
2024-10-03 21:23:47 +00:00
return True
2024-10-10 21:50:48 +00:00
def add_events(self, date: datetime.date, events: list[Event]) -> bool:
2024-10-03 21:23:47 +00:00
if self.read_only == 'silent':
2024-10-03 21:24:12 +00:00
logger.info(
2024-10-10 22:54:01 +00:00
'Read-only ObsidianVault ignoring add_event(%s, "%s", ?)',
date,
events,
2024-10-03 21:24:12 +00:00
)
2024-10-10 21:50:48 +00:00
return False
2024-10-03 21:23:47 +00:00
self._create_date_if_not_present(date)
2024-10-08 20:57:41 +00:00
contents = self._get_date_contents(date)
2024-10-10 21:50:48 +00:00
contents.events.extend(events)
2024-10-08 20:57:41 +00:00
self._save_contents(date, contents)
return True
def get_events(self, date: datetime.date) -> list[Event]:
contents = self._get_date_contents(date)
if contents is None:
return []
return contents.events
def _get_date_contents(self, date: datetime.date) -> FileContents | None:
try:
with open(self._date_file_path(date)) as f:
file_frontmatter = frontmatter.load(f)
except FileNotFoundError:
return None
ast = MARKDOWN_PARSER.parse(str(file_frontmatter))
(pre_events, list_block_items, post_events) = find_events_list_block(ast)
events = [parse_event_string(list_item) for list_item in list_block_items]
return FileContents(file_frontmatter.metadata, pre_events, events, post_events)
def _save_contents(self, date: datetime.date, contents: FileContents) -> None:
2024-10-10 21:50:48 +00:00
logger.info('Formatting file "%s"', date)
2024-10-10 22:54:01 +00:00
blocks_pre_events = ''.join(
MARKDOWN_RENDERER.render(b) for b in contents.blocks_pre_events
)
blocks_post_events = ''.join(
MARKDOWN_RENDERER.render(b) for b in contents.blocks_post_events
)
block_events = '\n'.join(
'- ' + format_event_string(e) for e in unique(contents.events)
)
text = FILE_FORMAT.format(
blocks_pre_events=blocks_pre_events,
blocks_post_events=blocks_post_events,
block_events=block_events,
).strip()
2024-10-08 20:57:41 +00:00
logger.info('Saving file "%s"', date)
with open(self._date_file_path(date), 'wb') as f:
frontmatter.dump(frontmatter.Post(text, **contents.frontmatter), f)
2024-10-03 21:23:47 +00:00
def _create_date_if_not_present(self, date: datetime.date):
date_file = self._date_file_path(date)
if date_file.exists():
return
logger.info('File "%s" doesn\'t exist, creating...', date)
with open(self._daily_template_path()) as f:
template_text = f.read()
with open(date_file, 'w') as f:
f.write(template_text)
def _date_file_path(self, date: datetime.date):
2024-10-03 21:24:12 +00:00
path = (
self.path_format.replace('YYYY', str(date.year))
.replace('MM', f'{date.month:02d}')
.replace('DD', f'{date.day:02d}')
)
2024-10-03 21:23:47 +00:00
return (self.vault_path / self.daily_folder / path).with_suffix('.md')
def _daily_template_path(self):
return (self.vault_path / self.template_file_path).with_suffix('.md')
2024-10-08 20:57:41 +00:00
2024-10-10 22:54:01 +00:00
2024-10-08 20:57:41 +00:00
def find_events_list_block(ast) -> tuple[list, list[str], list]:
blocks = ast.children
for block_i, block in enumerate(blocks):
2024-10-10 22:54:01 +00:00
if (
isinstance(block, marko.block.Heading)
and block.children[0].children.lower() == 'events'
):
events_block = ast.children[block_i + 1]
2024-10-08 20:57:41 +00:00
if isinstance(events_block, marko.block.List):
offset = 2
2024-10-10 22:54:01 +00:00
event_texts = [
MARKDOWN_RENDERER.render_children(li).strip()
for li in events_block.children
]
2024-10-08 20:57:41 +00:00
else:
offset = 1
event_texts = []
2024-10-10 22:54:01 +00:00
return (blocks[:block_i], event_texts, blocks[block_i + offset :])
2024-10-08 20:57:41 +00:00
return (blocks, [], [])
2024-10-10 22:54:01 +00:00
2024-10-08 20:57:41 +00:00
def format_event_string(event: Event) -> str:
2024-10-10 21:50:48 +00:00
assert event is not None
2024-10-10 22:54:01 +00:00
if (
event.start_time is None
and event.end_time is None
and event.subject is None
and event.verb is None
):
2024-10-10 21:50:48 +00:00
return event.comment
buf = []
buf.append(f'{event.start_time:%H:%M}')
if event.end_time and event.end_time != event.start_time:
buf.append(f'-{event.end_time:%H:%M}')
buf.append(' | ')
buf.append(event.verb)
buf.append(' [[')
buf.append(event.subject)
buf.append(']]. ')
buf.append(event.comment.strip())
return ''.join(buf)
2024-10-10 21:50:48 +00:00
2024-10-10 22:54:01 +00:00
2024-10-10 21:50:48 +00:00
RE_TIME = r'(\d\d:\d\d(?::\d\d(?:\.\d+?))?)'
RE_VERB = r'(\w+(?:ed|te))'
RE_LINK_MD = r'\[([^\]]*)\]\(?:[^)]*\)'
RE_LINK_WIKI = r'\[\[([^\]]*)\]\]'
RE_TIME_FORMAT = RE_TIME + r'(?:\s*\-\s*' + RE_TIME + r')?'
2024-10-08 20:57:41 +00:00
2024-10-10 22:54:01 +00:00
2024-10-08 20:57:41 +00:00
def parse_event_string(event_str: str) -> Event:
2024-10-10 22:54:01 +00:00
if m := re.match(
r'^\s*' + RE_TIME_FORMAT + r'[ :\|-]*'+RE_VERB+r'\s+'+RE_LINK_MD+r'\.?\s*(.*)$',
2024-10-10 22:54:01 +00:00
event_str,
):
2024-10-08 20:57:41 +00:00
start = datetime.time.fromisoformat(m.group(1))
end = datetime.time.fromisoformat(m.group(2)) if m.group(2) else start
return Event(start, end, m.group(3), m.group(4), m.group(5))
2024-10-10 22:54:01 +00:00
if m := re.match(
r'^\s*' + RE_TIME_FORMAT + r'[ :\|-]*'+RE_VERB+r'\s+'+RE_LINK_WIKI+r'\.?\s*(.*)$',
2024-10-10 22:54:01 +00:00
event_str,
):
2024-10-08 20:57:41 +00:00
start = datetime.time.fromisoformat(m.group(1))
end = datetime.time.fromisoformat(m.group(2)) if m.group(2) else start
return Event(start, end, m.group(3), m.group(4), m.group(5))
2024-10-10 21:50:48 +00:00
logger.info('Could not parse format: %s', event_str)
2024-10-10 22:54:01 +00:00
return Event(None, None, None, None, event_str)
2024-10-08 20:57:41 +00:00
def unique(ls: list) -> list:
return list(dict.fromkeys(ls))