135 lines
3.9 KiB
Python
135 lines
3.9 KiB
Python
"""# Markdown Message Conversion.
|
|
|
|
Conversion script from various messaging formats to markdown.
|
|
|
|
Supported input formats:
|
|
|
|
- [Pidgin/Libpurple](https://pidgin.im/) chat program HTML-based logs. **This
|
|
backend is not actively maintained.**
|
|
- [SyncTech Backup & Restore](https://www.synctech.com.au/sms-backup-restore/)
|
|
XML-based backup format.
|
|
|
|
## Motivation
|
|
|
|
Messaging applications are mostly good at sending real-time messages to other
|
|
people, but they generally do not possess any useful archival features. Most
|
|
messages are write-once read-once, and the apps where built for this use case.
|
|
More and more through, I am attracted to the prospect of archival; of
|
|
understanding who I am and who I _were_ when I wrote those messages.
|
|
|
|
I recently discovered [Obsidian](https://obsidian.md) and liked the prospect of
|
|
cross-referencing my notes with my old chat logs. Libpurple uses HTML logs if
|
|
you haven't configured it to something else (which I haden't).
|
|
|
|
I no longer use IRC or Pidgin as my entire friend group have switched to using
|
|
Matrix.
|
|
|
|
## Usage
|
|
|
|
There are two main import patterns:
|
|
|
|
- One-off archival import: For when you have a large set of messages to import
|
|
for a service that you don't use very much anymore.
|
|
- Recurring import: For when you are still using the service, and want to
|
|
import on a recurring basis.
|
|
|
|
This program will be default not overwrite existing files, as the user might
|
|
have modified it.
|
|
|
|
Special consideration must be taking for recurring imports if you expect to be
|
|
modifying the resulting files, for example if you are inserting links
|
|
using Obsidian's unlinked mentions feature. You might want to use the
|
|
`--skip-this-period` flag to avoid importing the current period until it has
|
|
become the last period. That way you won't accidentally modify the log, because
|
|
it has been finalized.
|
|
|
|
### One-off
|
|
|
|
This is the recommended command for the one-off case:
|
|
|
|
```bash
|
|
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
|
|
```
|
|
|
|
### Recurring
|
|
|
|
This is the recommended command for the recurring import case:
|
|
|
|
```bash
|
|
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER --skip-this-period --period month
|
|
```
|
|
|
|
## TODO
|
|
|
|
- [ ] SyncTech: Decode MMS parts and reconstruct image attachments.
|
|
"""
|
|
|
|
import dataclasses
|
|
import datetime
|
|
import logging
|
|
from collections.abc import Iterable, Iterator
|
|
|
|
from ._version import __version__
|
|
from .data import Message
|
|
|
|
__all__ = ['__version__']
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def datetime_sent(
|
|
chat_start: datetime.datetime,
|
|
message_sent: datetime.time,
|
|
) -> datetime.datetime:
|
|
naive = datetime.datetime.combine(
|
|
chat_start.date(),
|
|
message_sent,
|
|
chat_start.tzinfo,
|
|
)
|
|
if chat_start.time() > message_sent:
|
|
naive = naive + datetime.timedelta(days=1)
|
|
return naive
|
|
|
|
|
|
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
|
|
|
|
|
|
def is_useless_message(msg: Message) -> bool:
|
|
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
|
|
|
|
|
|
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
|
|
for msg in messages:
|
|
if not is_useless_message(msg):
|
|
yield msg
|
|
else:
|
|
print(msg.text)
|
|
|
|
|
|
def is_adjacent_messages(first: Message, second: Message) -> bool:
|
|
return (
|
|
first.sender == second.sender
|
|
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
|
|
)
|
|
|
|
|
|
PUNCTUATION = ('.', '?', '!', ',', ':', ';')
|
|
|
|
|
|
def merge_texts(text1: str, text2: str) -> str:
|
|
punctuated = text1.endswith(PUNCTUATION)
|
|
return text1 + ('' if punctuated else '.') + '\n' + text2
|
|
|
|
|
|
def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
|
|
out: list[Message] = []
|
|
for msg in messages:
|
|
if out and is_adjacent_messages(out[-1], msg):
|
|
out[-1] = dataclasses.replace(
|
|
out[-1],
|
|
text=merge_texts(out[-1].text, msg.text),
|
|
)
|
|
else:
|
|
out.append(msg)
|
|
return out
|