diff --git a/libpurple_to_markdown/__init__.py b/libpurple_to_markdown/__init__.py index 54dc868..c95b1f6 100644 --- a/libpurple_to_markdown/__init__.py +++ b/libpurple_to_markdown/__init__.py @@ -1,15 +1,18 @@ import dataclasses -import re import datetime -from pathlib import Path -import bs4 import logging +import re +from pathlib import Path + +import bs4 from ._version import __version__ + __all__ = ['__version__'] logger = logging.getLogger(__name__) + @dataclasses.dataclass(frozen=True, order=True) class Message: sent_at: datetime.datetime @@ -21,12 +24,18 @@ class Message: assert self.sender is not None assert self.text is not None -def datetime_sent(chat_start: datetime.datetime, message_sent: datetime.time) -> datetime.datetime: - naive = datetime.datetime.combine(chat_start.date(), message_sent, chat_start.tzinfo) + +def datetime_sent( + chat_start: datetime.datetime, message_sent: datetime.time, +) -> datetime.datetime: + naive = datetime.datetime.combine( + chat_start.date(), message_sent, chat_start.tzinfo, + ) if chat_start.time() > message_sent: naive = naive - datetime.timedelta(days=1) return naive + def parse_timestamp(c) -> datetime.time: timestamp_obj = c if c.font is not None: @@ -34,9 +43,12 @@ def parse_timestamp(c) -> datetime.time: m = re.match(r'\((\d+):(\d+):(\d+)\)', timestamp_obj.get_text()) return datetime.time(int(m.group(1)), int(m.group(2)), int(m.group(3))) + def parse_messages_in_chat_file(path: Path) -> list[Message]: logger.info('Parsing %s', path) - chat_start = datetime.datetime.fromisoformat(path.stem.removesuffix('CEST').removesuffix('CET')) + chat_start = datetime.datetime.fromisoformat( + path.stem.removesuffix('CEST').removesuffix('CET'), + ) with open(path) as f: soup = bs4.BeautifulSoup(f, 'lxml') @@ -53,12 +65,12 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]: if soup.p: logger.warning('File indicates error message?') - return [] # TODO + return [] # TODO else: loglines = soup.body.children for c in loglines: - if c.name in {'font','span'} and cur_sent_at is None: + if c.name in {'font', 'span'} and cur_sent_at is None: # Get timestamp cur_sent_at = datetime_sent(chat_start, parse_timestamp(c)) @@ -67,7 +79,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]: assert cur_sender is None cur_sender = c.b.get_text().strip().removesuffix(':') - elif c.name in {None,'span','font'}: + elif c.name in {None, 'span', 'font'}: cur_text += c.get_text() elif c.name == 'a': @@ -84,8 +96,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]: pass elif c.name in {'h1', 'h3'}: - pass # Ignore log header - + pass # Ignore log header else: assert False, c @@ -93,8 +104,6 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]: return messages - - def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]: messages = [] for file_path in chat_folder_path.iterdir(): @@ -112,6 +121,7 @@ def format_message_as_citation(out, msg): del line out.append('\n') + def format_message_as_table(out, msg): out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ') for line in msg.text.split('\n'): @@ -119,17 +129,14 @@ def format_message_as_table(out, msg): del line out.append('|\n') + def format_messages(messages: list[Message]) -> str: - out = [ - '# Chat 2018' - '\n\n' - ] + out = ['# Chat 2018' '\n\n'] as_table = False - for msg_idx, msg in enumerate(messages): - if msg_idx == 0 or messages[msg_idx-1].sent_at.date() != msg.sent_at.date(): + if msg_idx == 0 or messages[msg_idx - 1].sent_at.date() != msg.sent_at.date(): out.append('---\n') out.append(f'## [[{msg.sent_at.date()}]]\n\n') if as_table: @@ -144,21 +151,30 @@ def format_messages(messages: list[Message]) -> str: return ''.join(out) + MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2) + def is_adjacent_messages(first, second): - return first.sender == second.sender and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST + return ( + first.sender == second.sender + and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST + ) + def merge_texts(text1: str, text2: str) -> str: punctuated = text1.endswith('.?!,:') - #return text1 + (' ' if punctuated else '. ') + text2 + # return text1 + (' ' if punctuated else '. ') + text2 return text1 + (' ' if punctuated else '. ') + '\n' + text2 + def merge_adjacent_messages(messages: list[Message]) -> list[Message]: out = [] for msg in messages: if out and is_adjacent_messages(out[-1], msg): - out[-1] = dataclasses.replace(out[-1], text=merge_texts(out[-1].text, msg.text)) + out[-1] = dataclasses.replace( + out[-1], text=merge_texts(out[-1].text, msg.text), + ) else: out.append(msg) return out diff --git a/libpurple_to_markdown/__main__.py b/libpurple_to_markdown/__main__.py index 5b4ea0b..c4ac203 100644 --- a/libpurple_to_markdown/__main__.py +++ b/libpurple_to_markdown/__main__.py @@ -2,14 +2,15 @@ import argparse import logging from pathlib import Path -from . import (parse_messages_in_chat_folder, merge_adjacent_messages, - format_messages) +from . import format_messages, merge_adjacent_messages, parse_messages_in_chat_folder + def parse_args(): parser = argparse.ArgumentParser() parser.add_argument('path', type=Path) return parser.parse_args() + def main(): logging.basicConfig() logging.getLogger().setLevel('INFO') @@ -19,5 +20,6 @@ def main(): messages = merge_adjacent_messages(messages) print(format_messages(messages)) + if __name__ == '__main__': main() diff --git a/test/test_init.py b/test/test_init.py index 0e5e703..bf34bc9 100644 --- a/test/test_init.py +++ b/test/test_init.py @@ -1,4 +1,5 @@ import libpurple_to_markdown + def test_version(): assert libpurple_to_markdown.__version__ is not None