From a4df23e8ffeeb1977b0b1bc064aa79822916e6e8 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 17 Nov 2024 12:04:29 +0100 Subject: [PATCH] More options for recurring import --- libpurple_to_markdown/__init__.py | 32 ++++++++++++++++--- libpurple_to_markdown/__main__.py | 51 +++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 20 deletions(-) diff --git a/libpurple_to_markdown/__init__.py b/libpurple_to_markdown/__init__.py index f150ae8..078c2fd 100644 --- a/libpurple_to_markdown/__init__.py +++ b/libpurple_to_markdown/__init__.py @@ -26,18 +26,42 @@ Matrix. ## Usage -From the repository root: +There are two main import patterns: + +- One-off archival import: For when you have a large set of messages to import + for a service that you don't use very much anymore. +- Recurring import: For when you are still using the service, and want to + import on a recurring basis. + +This program will be default not overwrite existing files, as the user might +have modified it. + +Special consideration must be taking for recurring imports if you expect to be +modifying the resulting files, for example if you are inserting links +using Obsidian's unlinked mentions feature. You might want to use the +`--skip-this-period` flag to avoid importing the current period until it has +become the last period. That way you won't accidentally modify the log, because +it has been finalized. + +### One-off + +This is the recommended command for the one-off case: ```bash python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER ``` -It was made specifically for import into Obsidian, so it might not suite your -purposes, but it shouldn't be too difficult to adjust the formatting code. +### Recurring + +This is the recommended command for the recurring import case: + +```bash +python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER --skip-this-period --period month +``` ## TODO -- [ ] Decode MMS parts and reconstruct image attachments. +- [ ] SyncTech: Decode MMS parts and reconstruct image attachments. """ import dataclasses diff --git a/libpurple_to_markdown/__main__.py b/libpurple_to_markdown/__main__.py index 12570a0..ba8f893 100644 --- a/libpurple_to_markdown/__main__.py +++ b/libpurple_to_markdown/__main__.py @@ -43,24 +43,27 @@ def year_quarter_period_key(msg: Message): MAX_AVERAGE_MESSAGES_PER_PERIOD = 120 +PERIOD_KEYS_BY_NAME = { + 'full': (lambda msg: 'full'), + 'year': year_period_key, + 'quarter': year_quarter_period_key, + 'month': year_and_month_period_key, +} -def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]: - possible_period_keys = [ - (lambda msg: 'full'), - year_period_key, - year_quarter_period_key, - year_and_month_period_key, - ] - for period_key in possible_period_keys: +def group_messages_by_period(messages: Iterable[Message], period_key: str | None = None) -> dict[str, list[Message]]: + possible_period_keys = PERIOD_KEYS_BY_NAME.values() + if period_key is not None: + possible_period_keys = [PERIOD_KEYS_BY_NAME[period_key]] + for period_key_fn in possible_period_keys: grouped = group_messages(messages, key=period_key) average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped) if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD: break - del period_key, average_num_messages + del average_num_messages - return grouped + return grouped, period_key_fn def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]: @@ -77,9 +80,13 @@ def parse_args(): parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file') parser.add_argument('--output', type=Path) parser.add_argument('--myself', type=str, default='Myself') + parser.add_argument('--overwrite', action='store_true', dest='overwrite_files') + parser.add_argument('--period', dest='period_key', values=list(PERIOD_KEYS_BY_NAME.keys())) + parser.add_argument('--skip-this-period', action='store_true', dest='skip_this_period') return parser.parse_args() + def main(): logging.basicConfig() logging.getLogger().setLevel('INFO') @@ -112,7 +119,7 @@ def main(): logger.info(' "%s": Skipped due to too few messages', chat_id) continue - messages_by_period = group_messages_by_period(messages_in_chat) + messages_by_period, period_key_fn = group_messages_by_period(messages_in_chat, args.period_key) logger.info( ' "%s": %d messages, %d periods (%d msg/period avg)', chat_id, @@ -121,22 +128,34 @@ def main(): len(messages_in_chat_original) / len(messages_by_period), ) - for period_key, messages in messages_by_period.items(): + this_period_name = period_key_fn(datetime.datetime.now()) + + for period_key_name, messages in messages_by_period.items(): file_escaped_chat_id = chat_id.replace(' ', '-') output_file = ( - args.output / chat_id / f'{file_escaped_chat_id}-{period_key}.md' + args.output / chat_id / f'{file_escaped_chat_id}-{period_key_name}.md' ) - output_file.parent.mkdir(exist_ok=True) + logger.info('Writing % 5d messages to %s', len(messages), output_file) + + if this_period_name == period_key_name: + logger.info('Skipping due to --skip-this-period: %s', output_file) + continue + if output_file.exists() and not args.overwrite_files: + logger.info('Skipping existing file: %s', output_file) + continue + + # Create folders and file + output_file.parent.mkdir(exist_ok=True) with open(output_file, 'w') as f: f.write( format_messages( messages, - title=f'{chat_id} - {period_key}', + title=f'{chat_id} - {period_key_name}', ), ) - del period_key, messages, output_file + del period_key_name, messages, output_file del chat_id, messages_in_chat_original, messages_in_chat