Improvements
This commit is contained in:
parent
0722dc96e5
commit
fa50a6970a
|
@ -1,5 +1,6 @@
|
|||
import dataclasses
|
||||
import datetime
|
||||
from collections.abc import Iterator, Iterable
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
@ -32,7 +33,7 @@ def datetime_sent(
|
|||
chat_start.date(), message_sent, chat_start.tzinfo,
|
||||
)
|
||||
if chat_start.time() > message_sent:
|
||||
naive = naive - datetime.timedelta(days=1)
|
||||
naive = naive + datetime.timedelta(days=1)
|
||||
return naive
|
||||
|
||||
|
||||
|
@ -63,9 +64,8 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
|||
cur_sender: str | None = None
|
||||
cur_text: str = ''
|
||||
|
||||
if soup.p:
|
||||
logger.warning('File indicates error message?')
|
||||
return [] # TODO
|
||||
if soup.body.p:
|
||||
loglines = soup.body.p.children
|
||||
else:
|
||||
loglines = soup.body.children
|
||||
|
||||
|
@ -77,7 +77,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
|||
# Get sender
|
||||
if c.b:
|
||||
assert cur_sender is None
|
||||
cur_sender = c.b.get_text().strip().removesuffix(':')
|
||||
cur_sender = c.b.get_text().strip().removesuffix(':').removeprefix('***').removesuffix('[m]')
|
||||
|
||||
elif c.name in {None, 'span', 'font'}:
|
||||
cur_text += c.get_text()
|
||||
|
@ -86,6 +86,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
|||
cur_text += '<' + c['href'] + '>'
|
||||
|
||||
elif c.name == 'br':
|
||||
if cur_sender:
|
||||
messages.append(Message(cur_sent_at, cur_sender, cur_text.strip()))
|
||||
cur_sent_at = None
|
||||
cur_sender = None
|
||||
|
@ -106,23 +107,25 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
|||
|
||||
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
|
||||
messages = []
|
||||
for file_path in chat_folder_path.iterdir():
|
||||
for file_path in sorted(chat_folder_path.iterdir()):
|
||||
messages.extend(parse_messages_in_chat_file(file_path))
|
||||
|
||||
messages.sort()
|
||||
return messages
|
||||
|
||||
|
||||
def format_message_as_citation(out, msg):
|
||||
def format_message_as_citation(out: list[str], msg: Message) -> None:
|
||||
out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:')
|
||||
out.append('\n')
|
||||
for line in msg.text.split('\n'):
|
||||
line = re.sub(r'(<[\w ]+>)', r'`\1`', line)
|
||||
line = re.sub(r'(\$\$\$)', r'`\1`', line)
|
||||
out.append(f'> {line}\n')
|
||||
del line
|
||||
out.append('\n')
|
||||
|
||||
|
||||
def format_message_as_table(out, msg):
|
||||
def format_message_as_table(out: list[str], msg: Message) -> None:
|
||||
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
|
||||
for line in msg.text.split('\n'):
|
||||
out.append(f'{line}')
|
||||
|
@ -130,8 +133,8 @@ def format_message_as_table(out, msg):
|
|||
out.append('|\n')
|
||||
|
||||
|
||||
def format_messages(messages: list[Message]) -> str:
|
||||
out = ['# Chat 2018' '\n\n']
|
||||
def format_messages(messages: list[Message], title: str) -> str:
|
||||
out = ['# ', title, '\n\n']
|
||||
|
||||
as_table = False
|
||||
|
||||
|
@ -154,8 +157,17 @@ def format_messages(messages: list[Message]) -> str:
|
|||
|
||||
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
|
||||
|
||||
def is_useless_message(msg: Message) -> bool:
|
||||
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
|
||||
|
||||
def is_adjacent_messages(first, second):
|
||||
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
|
||||
for msg in messages:
|
||||
if not is_useless_message(msg):
|
||||
yield msg
|
||||
else:
|
||||
print(msg.text)
|
||||
|
||||
def is_adjacent_messages(first: Message, second: Message) -> bool:
|
||||
return (
|
||||
first.sender == second.sender
|
||||
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
|
||||
|
@ -168,7 +180,7 @@ def merge_texts(text1: str, text2: str) -> str:
|
|||
return text1 + (' ' if punctuated else '. ') + '\n' + text2
|
||||
|
||||
|
||||
def merge_adjacent_messages(messages: list[Message]) -> list[Message]:
|
||||
def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
|
||||
out = []
|
||||
for msg in messages:
|
||||
if out and is_adjacent_messages(out[-1], msg):
|
||||
|
|
|
@ -3,7 +3,8 @@ import logging
|
|||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
from . import (format_messages, merge_adjacent_messages, parse_messages_in_chat_folder, Message)
|
||||
from . import (format_messages, merge_adjacent_messages,
|
||||
parse_messages_in_chat_folder, Message, filter_useless_messages)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -30,15 +31,16 @@ def main():
|
|||
receipient = args.path.name
|
||||
|
||||
all_messages = parse_messages_in_chat_folder(args.path)
|
||||
all_messages = filter_useless_messages(all_messages)
|
||||
all_messages = merge_adjacent_messages(all_messages)
|
||||
|
||||
messages_by_year = group_messages_by_year(all_messages)
|
||||
|
||||
for year, messages in messages_by_year.items():
|
||||
output_file = args.output / f'{server} - {receipient} - {year}.md'
|
||||
logger.info("Writing to %s", output_file)
|
||||
logger.info("Writing %d messages to %s", len(messages), output_file)
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(format_messages(messages))
|
||||
f.write(format_messages(messages, title = f'{server} - {receipient} - {year}'))
|
||||
|
||||
del year, messages, output_file
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user