Improvements
This commit is contained in:
parent
0722dc96e5
commit
fa50a6970a
|
@ -1,5 +1,6 @@
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
|
from collections.abc import Iterator, Iterable
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
@ -32,7 +33,7 @@ def datetime_sent(
|
||||||
chat_start.date(), message_sent, chat_start.tzinfo,
|
chat_start.date(), message_sent, chat_start.tzinfo,
|
||||||
)
|
)
|
||||||
if chat_start.time() > message_sent:
|
if chat_start.time() > message_sent:
|
||||||
naive = naive - datetime.timedelta(days=1)
|
naive = naive + datetime.timedelta(days=1)
|
||||||
return naive
|
return naive
|
||||||
|
|
||||||
|
|
||||||
|
@ -63,9 +64,8 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
cur_sender: str | None = None
|
cur_sender: str | None = None
|
||||||
cur_text: str = ''
|
cur_text: str = ''
|
||||||
|
|
||||||
if soup.p:
|
if soup.body.p:
|
||||||
logger.warning('File indicates error message?')
|
loglines = soup.body.p.children
|
||||||
return [] # TODO
|
|
||||||
else:
|
else:
|
||||||
loglines = soup.body.children
|
loglines = soup.body.children
|
||||||
|
|
||||||
|
@ -77,7 +77,7 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
# Get sender
|
# Get sender
|
||||||
if c.b:
|
if c.b:
|
||||||
assert cur_sender is None
|
assert cur_sender is None
|
||||||
cur_sender = c.b.get_text().strip().removesuffix(':')
|
cur_sender = c.b.get_text().strip().removesuffix(':').removeprefix('***').removesuffix('[m]')
|
||||||
|
|
||||||
elif c.name in {None, 'span', 'font'}:
|
elif c.name in {None, 'span', 'font'}:
|
||||||
cur_text += c.get_text()
|
cur_text += c.get_text()
|
||||||
|
@ -86,7 +86,8 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
cur_text += '<' + c['href'] + '>'
|
cur_text += '<' + c['href'] + '>'
|
||||||
|
|
||||||
elif c.name == 'br':
|
elif c.name == 'br':
|
||||||
messages.append(Message(cur_sent_at, cur_sender, cur_text.strip()))
|
if cur_sender:
|
||||||
|
messages.append(Message(cur_sent_at, cur_sender, cur_text.strip()))
|
||||||
cur_sent_at = None
|
cur_sent_at = None
|
||||||
cur_sender = None
|
cur_sender = None
|
||||||
cur_text = ''
|
cur_text = ''
|
||||||
|
@ -106,23 +107,25 @@ def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
|
|
||||||
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
|
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
|
||||||
messages = []
|
messages = []
|
||||||
for file_path in chat_folder_path.iterdir():
|
for file_path in sorted(chat_folder_path.iterdir()):
|
||||||
messages.extend(parse_messages_in_chat_file(file_path))
|
messages.extend(parse_messages_in_chat_file(file_path))
|
||||||
|
|
||||||
messages.sort()
|
messages.sort()
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
def format_message_as_citation(out, msg):
|
def format_message_as_citation(out: list[str], msg: Message) -> None:
|
||||||
out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:')
|
out.append(f'{msg.sent_at.date()} {msg.sent_at.time()} [[{msg.sender}]]:')
|
||||||
out.append('\n')
|
out.append('\n')
|
||||||
for line in msg.text.split('\n'):
|
for line in msg.text.split('\n'):
|
||||||
|
line = re.sub(r'(<[\w ]+>)', r'`\1`', line)
|
||||||
|
line = re.sub(r'(\$\$\$)', r'`\1`', line)
|
||||||
out.append(f'> {line}\n')
|
out.append(f'> {line}\n')
|
||||||
del line
|
del line
|
||||||
out.append('\n')
|
out.append('\n')
|
||||||
|
|
||||||
|
|
||||||
def format_message_as_table(out, msg):
|
def format_message_as_table(out: list[str], msg: Message) -> None:
|
||||||
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
|
out.append(f'| {msg.sent_at} | [[{msg.sender}]] | ')
|
||||||
for line in msg.text.split('\n'):
|
for line in msg.text.split('\n'):
|
||||||
out.append(f'{line}')
|
out.append(f'{line}')
|
||||||
|
@ -130,8 +133,8 @@ def format_message_as_table(out, msg):
|
||||||
out.append('|\n')
|
out.append('|\n')
|
||||||
|
|
||||||
|
|
||||||
def format_messages(messages: list[Message]) -> str:
|
def format_messages(messages: list[Message], title: str) -> str:
|
||||||
out = ['# Chat 2018' '\n\n']
|
out = ['# ', title, '\n\n']
|
||||||
|
|
||||||
as_table = False
|
as_table = False
|
||||||
|
|
||||||
|
@ -154,8 +157,17 @@ def format_messages(messages: list[Message]) -> str:
|
||||||
|
|
||||||
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
|
MSG_ADJACENTCY_DIST = datetime.timedelta(minutes=2)
|
||||||
|
|
||||||
|
def is_useless_message(msg: Message) -> bool:
|
||||||
|
return msg.sender.endswith('<AUTO-REPLY>') or msg.sender == ''
|
||||||
|
|
||||||
def is_adjacent_messages(first, second):
|
def filter_useless_messages(messages: Iterable[Message]) -> Iterator[Message]:
|
||||||
|
for msg in messages:
|
||||||
|
if not is_useless_message(msg):
|
||||||
|
yield msg
|
||||||
|
else:
|
||||||
|
print(msg.text)
|
||||||
|
|
||||||
|
def is_adjacent_messages(first: Message, second: Message) -> bool:
|
||||||
return (
|
return (
|
||||||
first.sender == second.sender
|
first.sender == second.sender
|
||||||
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
|
and second.sent_at - first.sent_at <= MSG_ADJACENTCY_DIST
|
||||||
|
@ -168,7 +180,7 @@ def merge_texts(text1: str, text2: str) -> str:
|
||||||
return text1 + (' ' if punctuated else '. ') + '\n' + text2
|
return text1 + (' ' if punctuated else '. ') + '\n' + text2
|
||||||
|
|
||||||
|
|
||||||
def merge_adjacent_messages(messages: list[Message]) -> list[Message]:
|
def merge_adjacent_messages(messages: Iterable[Message]) -> list[Message]:
|
||||||
out = []
|
out = []
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
if out and is_adjacent_messages(out[-1], msg):
|
if out and is_adjacent_messages(out[-1], msg):
|
||||||
|
|
|
@ -3,7 +3,8 @@ import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import logging
|
import logging
|
||||||
|
|
||||||
from . import (format_messages, merge_adjacent_messages, parse_messages_in_chat_folder, Message)
|
from . import (format_messages, merge_adjacent_messages,
|
||||||
|
parse_messages_in_chat_folder, Message, filter_useless_messages)
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -30,15 +31,16 @@ def main():
|
||||||
receipient = args.path.name
|
receipient = args.path.name
|
||||||
|
|
||||||
all_messages = parse_messages_in_chat_folder(args.path)
|
all_messages = parse_messages_in_chat_folder(args.path)
|
||||||
|
all_messages = filter_useless_messages(all_messages)
|
||||||
all_messages = merge_adjacent_messages(all_messages)
|
all_messages = merge_adjacent_messages(all_messages)
|
||||||
|
|
||||||
messages_by_year = group_messages_by_year(all_messages)
|
messages_by_year = group_messages_by_year(all_messages)
|
||||||
|
|
||||||
for year, messages in messages_by_year.items():
|
for year, messages in messages_by_year.items():
|
||||||
output_file = args.output / f'{server} - {receipient} - {year}.md'
|
output_file = args.output / f'{server} - {receipient} - {year}.md'
|
||||||
logger.info("Writing to %s", output_file)
|
logger.info("Writing %d messages to %s", len(messages), output_file)
|
||||||
with open(output_file, 'w') as f:
|
with open(output_file, 'w') as f:
|
||||||
f.write(format_messages(messages))
|
f.write(format_messages(messages, title = f'{server} - {receipient} - {year}'))
|
||||||
|
|
||||||
del year, messages, output_file
|
del year, messages, output_file
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user