From 29a3eaa97c732213cefd407846291eee14a470d2 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sat, 26 Oct 2024 02:08:45 +0200 Subject: [PATCH] Improved formatting --- libpurple_to_markdown/__init__.py | 46 +++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/libpurple_to_markdown/__init__.py b/libpurple_to_markdown/__init__.py index 6785a14..9b32bf0 100644 --- a/libpurple_to_markdown/__init__.py +++ b/libpurple_to_markdown/__init__.py @@ -45,7 +45,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]: messages = [] cur_sent_at: datetime.datetime | None = None - cur_sender: str = 'NOT DEFINED' + cur_sender: str | None = None cur_text: str = '' if soup.p: @@ -61,18 +61,19 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]: # Get sender if c.b: + assert cur_sender is None cur_sender = c.b.get_text().strip().removesuffix(':') elif c.name in {None,'span','font'}: cur_text += c.get_text() elif c.name == 'a': - cur_text += cur_text + '<' + c['href'] + '>' + cur_text += '<' + c['href'] + '>' elif c.name == 'br': messages.append(Message(cur_sent_at, cur_sender, cur_text.strip())) cur_sent_at = None - cur_sender = 'NOT DEFINED' + cur_sender = None cur_text = '' elif c.name == 'b': @@ -100,14 +101,42 @@ def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]: return messages +def format_message_as_citation(out, msg): + out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:') + out.append('\n') + for line in msg.text.split('\n'): + out.append(f'> {line}\n') + del line + out.append('\n') + +def format_message_as_table(out, msg): + out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ') + for line in msg.text.split('\n'): + out.append(f'{line}') + del line + out.append('|\n') + def format_messages(messages: list[Message]) -> str: out = [ '# Chat 2018' '\n\n' ] - for msg in messages: - out.append(f'[[{msg.sent_at.date()}]] {msg.sent_at.time()} [[{msg.sender}]]:\n> {msg.text}\n') + as_table = False + + + for msg_idx, msg in enumerate(messages): + if msg_idx == 0 or messages[msg_idx-1].sent_at.date() != msg.sent_at.date(): + out.append('---\n') + out.append(f'## [[{msg.sent_at.date()}]]\n\n') + if as_table: + out.append('| sent at | sender | text |\n') + out.append('| ------- | ------ | ---- |\n') + + if as_table: + format_message_as_table(out, msg) + else: + format_message_as_citation(out, msg) del msg return ''.join(out) @@ -117,11 +146,16 @@ MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2) def is_adjacent_messages(first, second): return first.sender == second.sender and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST +def merge_texts(text1: str, text2: str) -> str: + punctuated = text1.endswith('.?!,:') + #return text1 + (' ' if punctuated else '. ') + text2 + return text1 + (' ' if punctuated else '. ') + '\n' + text2 + def merge_adjacent_messages(messages: list[Message]) -> list[Message]: out = [] for msg in messages: if out and is_adjacent_messages(out[-1], msg): - out[-1] = dataclasses.replace(out[-1], text=out[-1].text + '\n\n' + msg.text) + out[-1] = dataclasses.replace(out[-1], text=merge_texts(out[-1].text, msg.text)) else: out.append(msg) return out