1
0
This commit is contained in:
Jon Michael Aanes 2024-10-26 15:17:46 +02:00
parent a54cb92d69
commit 384a6a7093
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
2 changed files with 36 additions and 11 deletions

View File

@ -24,11 +24,12 @@ python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
It was made specifically for import into Obsidian, so it might not suite your It was made specifically for import into Obsidian, so it might not suite your
purposes, but it shouldn't be too difficult to adjust. purposes, but it shouldn't be too difficult to adjust.
""" """
import dataclasses import dataclasses
import datetime import datetime
from collections.abc import Iterator, Iterable
import logging import logging
import re import re
from collections.abc import Iterable, Iterator
from pathlib import Path from pathlib import Path
import bs4 import bs4
@ -53,10 +54,13 @@ class Message:
def datetime_sent( def datetime_sent(
chat_start: datetime.datetime, message_sent: datetime.time, chat_start: datetime.datetime,
message_sent: datetime.time,
) -> datetime.datetime: ) -> datetime.datetime:
naive = datetime.datetime.combine( naive = datetime.datetime.combine(
chat_start.date(), message_sent, chat_start.tzinfo, chat_start.date(),
message_sent,
chat_start.tzinfo,
) )
if chat_start.time() > message_sent: if chat_start.time() > message_sent:
naive = naive + datetime.timedelta(days=1) naive = naive + datetime.timedelta(days=1)
@ -103,7 +107,13 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
# Get sender # Get sender
if c.b: if c.b:
assert cur_sender is None assert cur_sender is None
cur_sender = c.b.get_text().strip().removesuffix(':').removeprefix('***').removesuffix('[m]') cur_sender = (
c.b.get_text()
.strip()
.removesuffix(':')
.removeprefix('***')
.removesuffix('[m]')
)
elif c.name in {None, 'span', 'font'}: elif c.name in {None, 'span', 'font'}:
cur_text += c.get_text() cur_text += c.get_text()
@ -183,9 +193,11 @@ def format_messages(messages: list[Message], title: str) -> str:
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2) MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
def is_useless_message(msg: Message) -> bool: def is_useless_message(msg: Message) -> bool:
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == '' return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]: def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
for msg in messages: for msg in messages:
if not is_useless_message(msg): if not is_useless_message(msg):
@ -193,6 +205,7 @@ def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
else: else:
print(msg.text) print(msg.text)
def is_adjacent_messages(first: Message, second: Message) -> bool: def is_adjacent_messages(first: Message, second: Message) -> bool:
return ( return (
first.sender == second.sender first.sender == second.sender
@ -211,7 +224,8 @@ def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
for msg in messages: for msg in messages:
if out and is_adjacent_messages(out[-1], msg): if out and is_adjacent_messages(out[-1], msg):
out[-1] = dataclasses.replace( out[-1] = dataclasses.replace(
out[-1], text=merge_texts(out[-1].text, msg.text), out[-1],
text=merge_texts(out[-1].text, msg.text),
) )
else: else:
out.append(msg) out.append(msg)

View File

@ -1,13 +1,18 @@
import argparse import argparse
import logging import logging
from pathlib import Path from pathlib import Path
import logging
from . import (format_messages, merge_adjacent_messages, from . import (
parse_messages_in_chat_folder, Message, filter_useless_messages) Message,
filter_useless_messages,
format_messages,
merge_adjacent_messages,
parse_messages_in_chat_folder,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def group_messages_by_period(messages: list[Message]) -> dict[str, list[Message]]: def group_messages_by_period(messages: list[Message]) -> dict[str, list[Message]]:
by_period: dict[str, list[Message]] = {} by_period: dict[str, list[Message]] = {}
for msg in messages: for msg in messages:
@ -16,6 +21,7 @@ def group_messages_by_period(messages: list[Message]) -> dict[str, list[Message]
del msg del msg
return by_period return by_period
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('path', type=Path) parser.add_argument('path', type=Path)
@ -39,9 +45,14 @@ def main():
for period_key, messages in messages_by_period.items(): for period_key, messages in messages_by_period.items():
output_file = args.output / f'{server} - {receipient} - {period_key}.md' output_file = args.output / f'{server} - {receipient} - {period_key}.md'
logger.info("Writing % 5d messages to %s", len(messages), output_file) logger.info('Writing % 5d messages to %s', len(messages), output_file)
with open(output_file, 'w') as f: with open(output_file, 'w') as f:
f.write(format_messages(messages, title = f'{server} - {receipient} - {period_key}')) f.write(
format_messages(
messages,
title=f'{server} - {receipient} - {period_key}',
),
)
del period_key, messages, output_file del period_key, messages, output_file