1
0
libpurple-to-markdown/libpurple_to_markdown/__init__.py

147 lines
4.2 KiB
Python

"""# Markdown Message Conversion.
Conversion script from various messaging formats to markdown.
Supported input formats:
- [Pidgin/Libpurple](https://pidgin.im/) chat program HTML-based logs. **This
backend is not actively maintained.**
- [SyncTech Backup & Restore](https://www.synctech.com.au/sms-backup-restore/)
XML-based backup format.
## Motivation
Messaging applications are mostly good at sending real-time messages to other
people, but they generally do not possess any useful archival features. Most
messages are write-once read-once, and the apps where built for this use case.
More and more through, I am attracted to the prospect of archival; of
understanding who I am and who I _were_ when I wrote those messages.
I recently discovered [Obsidian](https://obsidian.md) and liked the prospect of
cross-referencing my notes with my old chat logs. Libpurple uses HTML logs if
you haven't configured it to something else (which I haden't).
I no longer use IRC or Pidgin as my entire friend group have switched to using
Matrix.
## Usage
From the repository root:
```bash
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
```
It was made specifically for import into Obsidian, so it might not suite your
purposes, but it shouldn't be too difficult to adjust the formatting code.
"""
import dataclasses
import datetime
import logging
import re
from collections.abc import Iterable, Iterator
from ._version import __version__
from .data import Message
__all__ = ['__version__']
logger = logging.getLogger(__name__)
def datetime_sent(
chat_start: datetime.datetime,
message_sent: datetime.time,
) -> datetime.datetime:
naive = datetime.datetime.combine(
chat_start.date(),
message_sent,
chat_start.tzinfo,
)
if chat_start.time() > message_sent:
naive = naive + datetime.timedelta(days=1)
return naive
def format_message_as_citation(out: list[str], msg: Message) -> None:
out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:')
out.append('\n')
for line in msg.text.split('\n'):
line = re.sub(r'(<[\w ]+>)', r'`\1`', line)
line = re.sub(r'(\$\$\$)', r'`\1`', line)
out.append(f'> {line}\n')
del line
out.append('\n')
def format_message_as_table(out: list[str], msg: Message) -> None:
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
for line in msg.text.split('\n'):
out.append(f'{line}')
del line
out.append('|\n')
def format_messages(messages: list[Message], title: str) -> str:
out = ['# ', title, '\n\n']
as_table = False
for msg_idx, msg in enumerate(messages):
if msg_idx == 0 or messages[msg_idx - 1].sent_at.date() != msg.sent_at.date():
out.append('---\n')
out.append(f'## [[{msg.sent_at.date()}]]\n\n')
if as_table:
out.append('| sent at | sender | text |\n')
out.append('| ------- | ------ | ---- |\n')
if as_table:
format_message_as_table(out, msg)
else:
format_message_as_citation(out, msg)
del msg
return ''.join(out)
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
def is_useless_message(msg: Message) -> bool:
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
for msg in messages:
if not is_useless_message(msg):
yield msg
else:
print(msg.text)
def is_adjacent_messages(first: Message, second: Message) -> bool:
return (
first.sender == second.sender
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
)
def merge_texts(text1: str, text2: str) -> str:
punctuated = text1.endswith('.?!,:')
# return text1 + (' ' if punctuated else '. ') + text2
return text1 + (' ' if punctuated else '. ') + '\n' + text2
def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
out: list[Message] = []
for msg in messages:
if out and is_adjacent_messages(out[-1], msg):
out[-1] = dataclasses.replace(
out[-1],
text=merge_texts(out[-1].text, msg.text),
)
else:
out.append(msg)
return out