1
0
libpurple-to-markdown/libpurple_to_markdown/__init__.py

110 lines
3.1 KiB
Python

"""# Markdown Message Conversion.
Conversion script from various messaging formats to markdown.
Supported input formats:
- [Pidgin/Libpurple](https://pidgin.im/) chat program HTML-based logs. **This
backend is not actively maintained.**
- [SyncTech Backup & Restore](https://www.synctech.com.au/sms-backup-restore/)
XML-based backup format.
## Motivation
Messaging applications are mostly good at sending real-time messages to other
people, but they generally do not possess any useful archival features. Most
messages are write-once read-once, and the apps where built for this use case.
More and more through, I am attracted to the prospect of archival; of
understanding who I am and who I _were_ when I wrote those messages.
I recently discovered [Obsidian](https://obsidian.md) and liked the prospect of
cross-referencing my notes with my old chat logs. Libpurple uses HTML logs if
you haven't configured it to something else (which I haden't).
I no longer use IRC or Pidgin as my entire friend group have switched to using
Matrix.
## Usage
From the repository root:
```bash
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
```
It was made specifically for import into Obsidian, so it might not suite your
purposes, but it shouldn't be too difficult to adjust the formatting code.
## TODO
- [ ] Decode MMS parts and reconstruct image attachments.
"""
import dataclasses
import datetime
import logging
from collections.abc import Iterable, Iterator
from ._version import __version__
from .data import Message
__all__ = ['__version__']
logger = logging.getLogger(__name__)
def datetime_sent(
chat_start: datetime.datetime,
message_sent: datetime.time,
) -> datetime.datetime:
naive = datetime.datetime.combine(
chat_start.date(),
message_sent,
chat_start.tzinfo,
)
if chat_start.time() > message_sent:
naive = naive + datetime.timedelta(days=1)
return naive
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
def is_useless_message(msg: Message) -> bool:
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
for msg in messages:
if not is_useless_message(msg):
yield msg
else:
print(msg.text)
def is_adjacent_messages(first: Message, second: Message) -> bool:
return (
first.sender == second.sender
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
)
PUNCTUATION = ('.','?','!',',',':',';')
def merge_texts(text1: str, text2: str) -> str:
punctuated = text1.endswith(PUNCTUATION)
# return text1 + (' ' if punctuated else '. ') + text2
return text1 + ('' if punctuated else '.') + '\n' + text2
def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
out: list[Message] = []
for msg in messages:
if out and is_adjacent_messages(out[-1], msg):
out[-1] = dataclasses.replace(
out[-1],
text=merge_texts(out[-1].text, msg.text),
)
else:
out.append(msg)
return out