1
0
This commit is contained in:
Jon Michael Aanes 2024-10-26 15:17:46 +02:00
parent a54cb92d69
commit 384a6a7093
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
2 changed files with 36 additions and 11 deletions

View File

@ -24,11 +24,12 @@ python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
It was made specifically for import into Obsidian, so it might not suite your
purposes, but it shouldn't be too difficult to adjust.
"""
import dataclasses
import datetime
from collections.abc import Iterator, Iterable
import logging
import re
from collections.abc import Iterable, Iterator
from pathlib import Path
import bs4
@ -53,10 +54,13 @@ class Message:
def datetime_sent(
chat_start: datetime.datetime, message_sent: datetime.time,
chat_start: datetime.datetime,
message_sent: datetime.time,
) -> datetime.datetime:
naive = datetime.datetime.combine(
chat_start.date(), message_sent, chat_start.tzinfo,
chat_start.date(),
message_sent,
chat_start.tzinfo,
)
if chat_start.time() > message_sent:
naive = naive + datetime.timedelta(days=1)
@ -103,7 +107,13 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
# Get sender
if c.b:
assert cur_sender is None
cur_sender = c.b.get_text().strip().removesuffix(':').removeprefix('***').removesuffix('[m]')
cur_sender = (
c.b.get_text()
.strip()
.removesuffix(':')
.removeprefix('***')
.removesuffix('[m]')
)
elif c.name in {None, 'span', 'font'}:
cur_text += c.get_text()
@ -183,9 +193,11 @@ def format_messages(messages: list[Message], title: str) -> str:
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
def is_useless_message(msg: Message) -> bool:
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
for msg in messages:
if not is_useless_message(msg):
@ -193,6 +205,7 @@ def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
else:
print(msg.text)
def is_adjacent_messages(first: Message, second: Message) -> bool:
return (
first.sender == second.sender
@ -211,7 +224,8 @@ def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
for msg in messages:
if out and is_adjacent_messages(out[-1], msg):
out[-1] = dataclasses.replace(
out[-1], text=merge_texts(out[-1].text, msg.text),
out[-1],
text=merge_texts(out[-1].text, msg.text),
)
else:
out.append(msg)

View File

@ -1,21 +1,27 @@
import argparse
import logging
from pathlib import Path
import logging
from . import (format_messages, merge_adjacent_messages,
parse_messages_in_chat_folder, Message, filter_useless_messages)
from . import (
Message,
filter_useless_messages,
format_messages,
merge_adjacent_messages,
parse_messages_in_chat_folder,
)
logger = logging.getLogger(__name__)
def group_messages_by_period(messages: list[Message]) -> dict[str, list[Message]]:
by_period: dict[str,list[Message]] = {}
by_period: dict[str, list[Message]] = {}
for msg in messages:
period_key = f'{msg.sent_at.year}-{msg.sent_at.month:02}'
by_period.setdefault(period_key, []).append(msg)
del msg
return by_period
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('path', type=Path)
@ -39,9 +45,14 @@ def main():
for period_key, messages in messages_by_period.items():
output_file = args.output / f'{server} - {receipient} - {period_key}.md'
logger.info("Writing % 5d messages to %s", len(messages), output_file)
logger.info('Writing % 5d messages to %s', len(messages), output_file)
with open(output_file, 'w') as f:
f.write(format_messages(messages, title = f'{server} - {receipient} - {period_key}'))
f.write(
format_messages(
messages,
title=f'{server} - {receipient} - {period_key}',
),
)
del period_key, messages, output_file