"""# Markdown Message Conversion. Conversion script from various messaging formats to markdown. Supported input formats: - [Pidgin/Libpurple](https://pidgin.im/) chat program HTML-based logs. **This backend is not actively maintained.** - [SyncTech Backup & Restore](https://www.synctech.com.au/sms-backup-restore/) XML-based backup format. ## Motivation Messaging applications are mostly good at sending real-time messages to other people, but they generally do not possess any useful archival features. Most messages are write-once read-once, and the apps where built for this use case. More and more through, I am attracted to the prospect of archival; of understanding who I am and who I _were_ when I wrote those messages. I recently discovered [Obsidian](https://obsidian.md) and liked the prospect of cross-referencing my notes with my old chat logs. Libpurple uses HTML logs if you haven't configured it to something else (which I haden't). I no longer use IRC or Pidgin as my entire friend group have switched to using Matrix. ## Usage From the repository root: ```bash python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER ``` It was made specifically for import into Obsidian, so it might not suite your purposes, but it shouldn't be too difficult to adjust the formatting code. ## TODO - [ ] Decode MMS parts and reconstruct image attachments. """ import dataclasses import datetime import logging import re from collections.abc import Iterable, Iterator from ._version import __version__ from .data import Message __all__ = ['__version__'] logger = logging.getLogger(__name__) def datetime_sent( chat_start: datetime.datetime, message_sent: datetime.time, ) -> datetime.datetime: naive = datetime.datetime.combine( chat_start.date(), message_sent, chat_start.tzinfo, ) if chat_start.time() > message_sent: naive = naive + datetime.timedelta(days=1) return naive def format_message_as_citation(out: list[str], msg: Message) -> None: out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:') out.append('\n') for line in msg.text.split('\n'): line = re.sub(r'(<[\w ]+>)', r'`\1`', line) line = re.sub(r'(\$\$\$)', r'`\1`', line) out.append(f'> {line}\n') del line out.append('\n') def format_message_as_table(out: list[str], msg: Message) -> None: out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ') for line in msg.text.split('\n'): out.append(f'{line}') del line out.append('|\n') def format_messages(messages: list[Message], title: str) -> str: out = ['# ', title, '\n\n'] as_table = False for msg_idx, msg in enumerate(messages): if msg_idx == 0 or messages[msg_idx - 1].sent_at.date() != msg.sent_at.date(): out.append('---\n') out.append(f'## [[{msg.sent_at.date()}]]\n\n') if as_table: out.append('| sent at | sender | text |\n') out.append('| ------- | ------ | ---- |\n') if as_table: format_message_as_table(out, msg) else: format_message_as_citation(out, msg) del msg return ''.join(out) MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2) def is_useless_message(msg: Message) -> bool: return msg.sender.endswith('') or msg.sender == '' def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]: for msg in messages: if not is_useless_message(msg): yield msg else: print(msg.text) def is_adjacent_messages(first: Message, second: Message) -> bool: return ( first.sender == second.sender and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST ) def merge_texts(text1: str, text2: str) -> str: punctuated = text1.endswith('.?!,:') # return text1 + (' ' if punctuated else '. ') + text2 return text1 + (' ' if punctuated else '. ') + '\n' + text2 def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]: out: list[Message] = [] for msg in messages: if out and is_adjacent_messages(out[-1], msg): out[-1] = dataclasses.replace( out[-1], text=merge_texts(out[-1].text, msg.text), ) else: out.append(msg) return out