From 43627f2aa77b16a8b17372e40577bb4a3616c1d6 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 17 Nov 2024 16:51:31 +0100 Subject: [PATCH] Added cache layer for texts. Allows for waiting until all changes have been effectuated before saving to disk. --- obsidian_import/__init__.py | 8 ++- obsidian_import/obsidian.py | 106 ++++++++++++++++++------------------ 2 files changed, 59 insertions(+), 55 deletions(-) diff --git a/obsidian_import/__init__.py b/obsidian_import/__init__.py index 519c8bd..43073bb 100644 --- a/obsidian_import/__init__.py +++ b/obsidian_import/__init__.py @@ -41,7 +41,7 @@ def iterate_samples_from_rows(rows: Rows) -> Iterator[ActivitySample]: for event_data in rows: (start_at, end_at) = start_end(event_data, possible_keys) - labels = [Label(k, event_data[k]) for k in possible_keys.misc] + labels = [Label(k, event_data.get(k)) for k in possible_keys.misc if k in event_data] # Create event yield ActivitySample( @@ -243,3 +243,9 @@ def import_data(obsidian_path: Path, dry_run=True): import_watched_series_csv_from_file(vault) import_played_games_csv_from_file(vault) + + num_dirty = len([f for f in vault.internal_file_text_cache.values() if f.is_dirty]) + logger.info('dirty files in cache: %d', num_dirty) + logger.info('clean files in cache: %d', len(vault.internal_file_text_cache) - num_dirty) + if not dry_run: + vault.flush_cache() diff --git a/obsidian_import/obsidian.py b/obsidian_import/obsidian.py index 8ffa90d..90ca967 100644 --- a/obsidian_import/obsidian.py +++ b/obsidian_import/obsidian.py @@ -38,6 +38,12 @@ class FileContents: blocks_post_events: list +@dataclasses.dataclass(frozen=False) +class CachedFile: + data: bytes + is_dirty: bool + + MARKDOWN_PARSER = marko.Markdown() MARKDOWN_RENDERER = marko.md_renderer.MarkdownRenderer() @@ -64,12 +70,14 @@ class ObsidianVault: self.template_file_path = daily_notes_config['template'] self.read_only = read_only + self.internal_file_text_cache: dict[Path, CachedFile] = {} + def get_statistic( self, date: datetime.date, statistic_key: StatisticKey, ) -> Any | None: - if contents := self._get_date_contents(date): + if contents := self._load_date_contents(date): return contents.frontmatter.get(statistic_key) return None @@ -83,19 +91,8 @@ class ObsidianVault: if isinstance(amount, Decimal): amount = float(amount) - # Check for silent - if self.read_only == 'silent': - logger.info( - 'Read-only ObsidianVault ignoring add_statistic(%s, "%s", %s)', - date, - statistic_key, - amount, - ) - return False - # Load contents - self._create_date_if_not_present(date) - contents = self._get_date_contents(date) + contents = self._load_date_contents(date) # Update contents if contents.frontmatter.get(statistic_key) == amount: @@ -106,20 +103,11 @@ class ObsidianVault: del contents.frontmatter[statistic_key] # Save contents - self._save_contents(date, contents) + self._save_date_contents(date, contents) return True def add_events(self, date: datetime.date, events: list[Event]) -> bool: - if self.read_only == 'silent': - logger.info( - 'Read-only ObsidianVault ignoring add_event(%s, "%s", ?)', - date, - events, - ) - if not self.read_only: - self._create_date_if_not_present(date) - - contents = self._get_date_contents(date) + contents = self._load_date_contents(date) if contents is None: return False @@ -129,22 +117,21 @@ class ObsidianVault: return False contents = dataclasses.replace(contents, events=updated_events) - if not self.read_only: - self._save_contents(date, contents) + self._save_date_contents(date, contents) return True def get_events(self, date: datetime.date) -> frozenset[Event]: - contents = self._get_date_contents(date) + contents = self._load_date_contents(date) if contents is None: return frozenset() return contents.events - def _get_date_contents(self, date: datetime.date) -> FileContents | None: - try: - with open(self._date_file_path(date)) as f: - file_frontmatter = frontmatter.load(f) - except FileNotFoundError: - return None + def _load_date_contents(self, date: datetime.date) -> FileContents | None: + file_path = self._date_file_path(date) + text = self._load_file_text(file_path) or self._load_file_text(self._daily_template_path()) + assert text is not None + + file_frontmatter = frontmatter.loads(text) ast = MARKDOWN_PARSER.parse(str(file_frontmatter)) (pre_events, list_block_items, post_events) = find_events_list_block(ast) @@ -153,8 +140,7 @@ class ObsidianVault: ) return FileContents(file_frontmatter.metadata, pre_events, events, post_events) - def _save_contents(self, date: datetime.date, contents: FileContents) -> None: - logger.info('Formatting file "%s"', date) + def _save_date_contents(self, date: datetime.date, contents: FileContents) -> None: blocks_pre_events = ''.join( MARKDOWN_RENDERER.render(b) for b in contents.blocks_pre_events ) @@ -163,31 +149,23 @@ class ObsidianVault: ) events = list(contents.events) - events.sort() + events.sort(key=lambda x: x.subject or '') + events.sort(key=lambda x: x.verb or '') events.sort(key=lambda x: x.start_time or x.end_time or MIDNIGHT) block_events = '\n'.join('- ' + format_event_string(e) for e in events) - text = FILE_FORMAT.format( + self._save_file_text_to_cache(self._date_file_path(date), FILE_FORMAT.format( blocks_pre_events=blocks_pre_events, blocks_post_events=blocks_post_events, block_events=block_events, - ).strip() + ).strip().encode('utf8')) - logger.info('Saving file "%s"', date) - with open(self._date_file_path(date), 'wb') as f: - frontmatter.dump(frontmatter.Post(text, **contents.frontmatter), f) + def _save_file_text_to_cache(self, path: Path, text: bytes) -> None: + if path not in self.internal_file_text_cache: + self.internal_file_text_cache[path] = CachedFile(None, False) + self.internal_file_text_cache[path].data = text + self.internal_file_text_cache[path].is_dirty = True - def _create_date_if_not_present(self, date: datetime.date): - date_file = self._date_file_path(date) - if date_file.exists(): - return - logger.info('File "%s" doesn\'t exist, creating...', date) - with open(self._daily_template_path()) as f: - template_text = f.read() - date_file.parent.mkdir(exist_ok=True, parents=True) - with open(date_file, 'w') as f: - f.write(template_text) - - def _date_file_path(self, date: datetime.date): + def _date_file_path(self, date: datetime.date) -> Path: path = ( self.path_format.replace('YYYY', str(date.year)) .replace('MM', f'{date.month:02d}') @@ -195,9 +173,29 @@ class ObsidianVault: ) return (self.vault_path / self.daily_folder / path).with_suffix('.md') - def _daily_template_path(self): + def _daily_template_path(self) -> Path: return (self.vault_path / self.template_file_path).with_suffix('.md') + def _load_file_text(self, path: Path) -> bytes | None: + if path not in self.internal_file_text_cache: + try: + with open(path, 'rb') as f: + self.internal_file_text_cache[path] = CachedFile(f.read(), False) + except FileNotFoundError: + return None + return self.internal_file_text_cache[path].data + + def flush_cache(self) -> None: + if self.read_only: + msg = 'Read-only ObsidianVault cannot be flushed' + raise RuntimeError(msg) + for path, cached_file in self.internal_file_text_cache.items(): + if cached_file.is_dirty: + logger.info('Saving file "%s"', path) + path.parent.mkdir(exist_ok=True, parents=True) + with open(path, 'wb') as f: + f.write(cached_file.data) + del path, cached_file def find_events_list_block(ast) -> tuple[list, list[str], list]: blocks = ast.children