1
0

Improvements

This commit is contained in:
Jon Michael Aanes 2024-10-26 14:53:31 +02:00
parent 0722dc96e5
commit fa50a6970a
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
2 changed files with 30 additions and 16 deletions

View File

@ -1,5 +1,6 @@
import dataclasses import dataclasses
import datetime import datetime
from collections.abc import Iterator, Iterable
import logging import logging
import re import re
from pathlib import Path from pathlib import Path
@ -32,7 +33,7 @@ def datetime_sent(
chat_start.date(), message_sent, chat_start.tzinfo, chat_start.date(), message_sent, chat_start.tzinfo,
) )
if chat_start.time() > message_sent: if chat_start.time() > message_sent:
naive = naive - datetime.timedelta(days=1) naive = naive + datetime.timedelta(days=1)
return naive return naive
@ -63,9 +64,8 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
cur_sender: str | None = None cur_sender: str | None = None
cur_text: str = '' cur_text: str = ''
if soup.p: if soup.body.p:
logger.warning('File indicates error message?') loglines = soup.body.p.children
return [] # TODO
else: else:
loglines = soup.body.children loglines = soup.body.children
@ -77,7 +77,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
# Get sender # Get sender
if c.b: if c.b:
assert cur_sender is None assert cur_sender is None
cur_sender = c.b.get_text().strip().removesuffix(':') cur_sender = c.b.get_text().strip().removesuffix(':').removeprefix('***').removesuffix('[m]')
elif c.name in {None, 'span', 'font'}: elif c.name in {None, 'span', 'font'}:
cur_text += c.get_text() cur_text += c.get_text()
@ -86,6 +86,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
cur_text += '<' + c['href'] + '>' cur_text += '<' + c['href'] + '>'
elif c.name == 'br': elif c.name == 'br':
if cur_sender:
messages.append(Message(cur_sent_at, cur_sender, cur_text.strip())) messages.append(Message(cur_sent_at, cur_sender, cur_text.strip()))
cur_sent_at = None cur_sent_at = None
cur_sender = None cur_sender = None
@ -106,23 +107,25 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]: def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
messages = [] messages = []
for file_path in chat_folder_path.iterdir(): for file_path in sorted(chat_folder_path.iterdir()):
messages.extend(parse_messages_in_chat_file(file_path)) messages.extend(parse_messages_in_chat_file(file_path))
messages.sort() messages.sort()
return messages return messages
def format_message_as_citation(out, msg): def format_message_as_citation(out: list[str], msg: Message) -> None:
out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:') out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:')
out.append('\n') out.append('\n')
for line in msg.text.split('\n'): for line in msg.text.split('\n'):
line = re.sub(r'(<[\w ]+>)', r'`\1`', line)
line = re.sub(r'(\$\$\$)', r'`\1`', line)
out.append(f'> {line}\n') out.append(f'> {line}\n')
del line del line
out.append('\n') out.append('\n')
def format_message_as_table(out, msg): def format_message_as_table(out: list[str], msg: Message) -> None:
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ') out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
for line in msg.text.split('\n'): for line in msg.text.split('\n'):
out.append(f'{line}') out.append(f'{line}')
@ -130,8 +133,8 @@ def format_message_as_table(out, msg):
out.append('|\n') out.append('|\n')
def format_messages(messages: list[Message]) -> str: def format_messages(messages: list[Message], title: str) -> str:
out = ['# Chat 2018' '\n\n'] out = ['# ', title, '\n\n']
as_table = False as_table = False
@ -154,8 +157,17 @@ def format_messages(messages: list[Message]) -> str:
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2) MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
def is_useless_message(msg: Message) -> bool:
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
def is_adjacent_messages(first, second): def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
for msg in messages:
if not is_useless_message(msg):
yield msg
else:
print(msg.text)
def is_adjacent_messages(first: Message, second: Message) -> bool:
return ( return (
first.sender == second.sender first.sender == second.sender
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
@ -168,7 +180,7 @@ def merge_texts(text1: str, text2: str) -> str:
return text1 + (' ' if punctuated else '. ') + '\n' + text2 return text1 + (' ' if punctuated else '. ') + '\n' + text2
def merge_adjacent_messages(messages: list[Message]) -> list[Message]: def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
out = [] out = []
for msg in messages: for msg in messages:
if out and is_adjacent_messages(out[-1], msg): if out and is_adjacent_messages(out[-1], msg):

View File

@ -3,7 +3,8 @@ import logging
from pathlib import Path from pathlib import Path
import logging import logging
from . import (format_messages, merge_adjacent_messages, parse_messages_in_chat_folder, Message) from . import (format_messages, merge_adjacent_messages,
parse_messages_in_chat_folder, Message, filter_useless_messages)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,15 +31,16 @@ def main():
receipient = args.path.name receipient = args.path.name
all_messages = parse_messages_in_chat_folder(args.path) all_messages = parse_messages_in_chat_folder(args.path)
all_messages = filter_useless_messages(all_messages)
all_messages = merge_adjacent_messages(all_messages) all_messages = merge_adjacent_messages(all_messages)
messages_by_year = group_messages_by_year(all_messages) messages_by_year = group_messages_by_year(all_messages)
for year, messages in messages_by_year.items(): for year, messages in messages_by_year.items():
output_file = args.output / f'{server} - {receipient} - {year}.md' output_file = args.output / f'{server} - {receipient} - {year}.md'
logger.info("Writing to %s", output_file) logger.info("Writing %d messages to %s", len(messages), output_file)
with open(output_file, 'w') as f: with open(output_file, 'w') as f:
f.write(format_messages(messages)) f.write(format_messages(messages, title = f'{server} - {receipient} - {year}'))
del year, messages, output_file del year, messages, output_file