From 7568b862e2cdba9df5d86e17a4e225f14dc5de55 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 3 Nov 2024 23:57:15 +0100 Subject: [PATCH] Improved text formatting --- libpurple_to_markdown/__init__.py | 5 +++-- libpurple_to_markdown/markdown.py | 2 ++ test/test_join.py | 9 +++++++++ 3 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 test/test_join.py diff --git a/libpurple_to_markdown/__init__.py b/libpurple_to_markdown/__init__.py index 5e1996a..56b3b1e 100644 --- a/libpurple_to_markdown/__init__.py +++ b/libpurple_to_markdown/__init__.py @@ -88,11 +88,12 @@ def is_adjacent_messages(first: Message, second: Message) -> bool: and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST ) +PUNCTUATION = ('.','?','!',',',':',';') def merge_texts(text1: str, text2: str) -> str: - punctuated = text1.endswith('.?!,:') + punctuated = text1.endswith(PUNCTUATION) # return text1 + (' ' if punctuated else '. ') + text2 - return text1 + (' ' if punctuated else '. ') + '\n' + text2 + return text1 + ('' if punctuated else '.') + '\n' + text2 def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]: diff --git a/libpurple_to_markdown/markdown.py b/libpurple_to_markdown/markdown.py index 2139e65..de31a8f 100644 --- a/libpurple_to_markdown/markdown.py +++ b/libpurple_to_markdown/markdown.py @@ -7,6 +7,8 @@ from .data import Message def normalize_line(line: str) -> str: line = re.sub(r'(<[\w ]+>)', r'`\1`', line) line = re.sub(r'(\$\$\$)', r'`\1`', line) + line = re.sub(r'\s\s', ' ', line) + line = re.sub(r'\s+([.:;!?])', r'\1', line) return line.strip() def format_message_as_citation(out: list[str], msg: Message) -> None: diff --git a/test/test_join.py b/test/test_join.py new file mode 100644 index 0000000..3dc2820 --- /dev/null +++ b/test/test_join.py @@ -0,0 +1,9 @@ +import datetime + +from libpurple_to_markdown import merge_texts + +def test_merge(): + assert merge_texts('Hello World', 'Hello World.') == 'Hello World.\nHello World.' + assert merge_texts('Hello World.', 'Hello World.') == 'Hello World.\nHello World.' + assert merge_texts('Hello World?', 'Hello World.') == 'Hello World?\nHello World.' +