Ruff
This commit is contained in:
parent
241c4f92c5
commit
4afb0f1364
|
@ -1,15 +1,18 @@
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import re
|
|
||||||
import datetime
|
import datetime
|
||||||
from pathlib import Path
|
|
||||||
import bs4
|
|
||||||
import logging
|
import logging
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import bs4
|
||||||
|
|
||||||
from ._version import __version__
|
from ._version import __version__
|
||||||
|
|
||||||
__all__ = ['__version__']
|
__all__ = ['__version__']
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True, order=True)
|
@dataclasses.dataclass(frozen=True, order=True)
|
||||||
class Message:
|
class Message:
|
||||||
sent_at: datetime.datetime
|
sent_at: datetime.datetime
|
||||||
|
@ -21,12 +24,18 @@ class Message:
|
||||||
assert self.sender is not None
|
assert self.sender is not None
|
||||||
assert self.text is not None
|
assert self.text is not None
|
||||||
|
|
||||||
def datetime_sent(chat_start: datetime.datetime, message_sent: datetime.time) -> datetime.datetime:
|
|
||||||
naive = datetime.datetime.combine(chat_start.date(), message_sent, chat_start.tzinfo)
|
def datetime_sent(
|
||||||
|
chat_start: datetime.datetime, message_sent: datetime.time,
|
||||||
|
) -> datetime.datetime:
|
||||||
|
naive = datetime.datetime.combine(
|
||||||
|
chat_start.date(), message_sent, chat_start.tzinfo,
|
||||||
|
)
|
||||||
if chat_start.time() > message_sent:
|
if chat_start.time() > message_sent:
|
||||||
naive = naive - datetime.timedelta(days=1)
|
naive = naive - datetime.timedelta(days=1)
|
||||||
return naive
|
return naive
|
||||||
|
|
||||||
|
|
||||||
def parse_timestamp(c) -> datetime.time:
|
def parse_timestamp(c) -> datetime.time:
|
||||||
timestamp_obj = c
|
timestamp_obj = c
|
||||||
if c.font is not None:
|
if c.font is not None:
|
||||||
|
@ -34,9 +43,12 @@ def parse_timestamp(c) -> datetime.time:
|
||||||
m = re.match(r'\((\d+):(\d+):(\d+)\)', timestamp_obj.get_text())
|
m = re.match(r'\((\d+):(\d+):(\d+)\)', timestamp_obj.get_text())
|
||||||
return datetime.time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
|
return datetime.time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
|
||||||
|
|
||||||
|
|
||||||
def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
logger.info('Parsing %s', path)
|
logger.info('Parsing %s', path)
|
||||||
chat_start = datetime.datetime.fromisoformat(path.stem.removesuffix('CEST').removesuffix('CET'))
|
chat_start = datetime.datetime.fromisoformat(
|
||||||
|
path.stem.removesuffix('CEST').removesuffix('CET'),
|
||||||
|
)
|
||||||
|
|
||||||
with open(path) as f:
|
with open(path) as f:
|
||||||
soup = bs4.BeautifulSoup(f, 'lxml')
|
soup = bs4.BeautifulSoup(f, 'lxml')
|
||||||
|
@ -58,7 +70,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
loglines = soup.body.children
|
loglines = soup.body.children
|
||||||
|
|
||||||
for c in loglines:
|
for c in loglines:
|
||||||
if c.name in {'font','span'} and cur_sent_at is None:
|
if c.name in {'font', 'span'} and cur_sent_at is None:
|
||||||
# Get timestamp
|
# Get timestamp
|
||||||
cur_sent_at = datetime_sent(chat_start, parse_timestamp(c))
|
cur_sent_at = datetime_sent(chat_start, parse_timestamp(c))
|
||||||
|
|
||||||
|
@ -67,7 +79,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
assert cur_sender is None
|
assert cur_sender is None
|
||||||
cur_sender = c.b.get_text().strip().removesuffix(':')
|
cur_sender = c.b.get_text().strip().removesuffix(':')
|
||||||
|
|
||||||
elif c.name in {None,'span','font'}:
|
elif c.name in {None, 'span', 'font'}:
|
||||||
cur_text += c.get_text()
|
cur_text += c.get_text()
|
||||||
|
|
||||||
elif c.name == 'a':
|
elif c.name == 'a':
|
||||||
|
@ -86,15 +98,12 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
elif c.name in {'h1', 'h3'}:
|
elif c.name in {'h1', 'h3'}:
|
||||||
pass # Ignore log header
|
pass # Ignore log header
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
assert False, c
|
assert False, c
|
||||||
|
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
|
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
|
||||||
messages = []
|
messages = []
|
||||||
for file_path in chat_folder_path.iterdir():
|
for file_path in chat_folder_path.iterdir():
|
||||||
|
@ -112,6 +121,7 @@ def format_message_as_citation(out, msg):
|
||||||
del line
|
del line
|
||||||
out.append('\n')
|
out.append('\n')
|
||||||
|
|
||||||
|
|
||||||
def format_message_as_table(out, msg):
|
def format_message_as_table(out, msg):
|
||||||
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
|
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
|
||||||
for line in msg.text.split('\n'):
|
for line in msg.text.split('\n'):
|
||||||
|
@ -119,17 +129,14 @@ def format_message_as_table(out, msg):
|
||||||
del line
|
del line
|
||||||
out.append('|\n')
|
out.append('|\n')
|
||||||
|
|
||||||
|
|
||||||
def format_messages(messages: list[Message]) -> str:
|
def format_messages(messages: list[Message]) -> str:
|
||||||
out = [
|
out = ['# Chat 2018' '\n\n']
|
||||||
'# Chat 2018'
|
|
||||||
'\n\n'
|
|
||||||
]
|
|
||||||
|
|
||||||
as_table = False
|
as_table = False
|
||||||
|
|
||||||
|
|
||||||
for msg_idx, msg in enumerate(messages):
|
for msg_idx, msg in enumerate(messages):
|
||||||
if msg_idx == 0 or messages[msg_idx-1].sent_at.date() != msg.sent_at.date():
|
if msg_idx == 0 or messages[msg_idx - 1].sent_at.date() != msg.sent_at.date():
|
||||||
out.append('---\n')
|
out.append('---\n')
|
||||||
out.append(f'## [[{msg.sent_at.date()}]]\n\n')
|
out.append(f'## [[{msg.sent_at.date()}]]\n\n')
|
||||||
if as_table:
|
if as_table:
|
||||||
|
@ -144,21 +151,30 @@ def format_messages(messages: list[Message]) -> str:
|
||||||
|
|
||||||
return ''.join(out)
|
return ''.join(out)
|
||||||
|
|
||||||
|
|
||||||
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
|
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
|
||||||
|
|
||||||
|
|
||||||
def is_adjacent_messages(first, second):
|
def is_adjacent_messages(first, second):
|
||||||
return first.sender == second.sender and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
|
return (
|
||||||
|
first.sender == second.sender
|
||||||
|
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def merge_texts(text1: str, text2: str) -> str:
|
def merge_texts(text1: str, text2: str) -> str:
|
||||||
punctuated = text1.endswith('.?!,:')
|
punctuated = text1.endswith('.?!,:')
|
||||||
#return text1 + (' ' if punctuated else '. ') + text2
|
# return text1 + (' ' if punctuated else '. ') + text2
|
||||||
return text1 + (' ' if punctuated else '. ') + '\n' + text2
|
return text1 + (' ' if punctuated else '. ') + '\n' + text2
|
||||||
|
|
||||||
|
|
||||||
def merge_adjacent_messages(messages: list[Message]) -> list[Message]:
|
def merge_adjacent_messages(messages: list[Message]) -> list[Message]:
|
||||||
out = []
|
out = []
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
if out and is_adjacent_messages(out[-1], msg):
|
if out and is_adjacent_messages(out[-1], msg):
|
||||||
out[-1] = dataclasses.replace(out[-1], text=merge_texts(out[-1].text, msg.text))
|
out[-1] = dataclasses.replace(
|
||||||
|
out[-1], text=merge_texts(out[-1].text, msg.text),
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
out.append(msg)
|
out.append(msg)
|
||||||
return out
|
return out
|
||||||
|
|
|
@ -2,14 +2,15 @@ import argparse
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from . import (parse_messages_in_chat_folder, merge_adjacent_messages,
|
from . import format_messages, merge_adjacent_messages, parse_messages_in_chat_folder
|
||||||
format_messages)
|
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('path', type=Path)
|
parser.add_argument('path', type=Path)
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
logging.getLogger().setLevel('INFO')
|
logging.getLogger().setLevel('INFO')
|
||||||
|
@ -19,5 +20,6 @@ def main():
|
||||||
messages = merge_adjacent_messages(messages)
|
messages = merge_adjacent_messages(messages)
|
||||||
print(format_messages(messages))
|
print(format_messages(messages))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import libpurple_to_markdown
|
import libpurple_to_markdown
|
||||||
|
|
||||||
|
|
||||||
def test_version():
|
def test_version():
|
||||||
assert libpurple_to_markdown.__version__ is not None
|
assert libpurple_to_markdown.__version__ is not None
|
||||||
|
|
Loading…
Reference in New Issue
Block a user