1
0

Initial commit for crazy log conversion project

This commit is contained in:
Jon Michael Aanes 2024-10-26 00:34:53 +02:00
commit 21c61f7d02
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
3 changed files with 110 additions and 0 deletions

20
.gitignore vendored Normal file
View File

@ -0,0 +1,20 @@
# Program specific
/output/
/deps/
/secrets
/private_deps/
/data/
/config/
# Python
__pycache__/
/build/
/dist/
*.egg-info/
.mypy_cache/
# Python, Testing
/test/secrets.py
/.coverage
/.hypothesis/
/htmlcov/

View File

View File

@ -0,0 +1,90 @@
import dataclasses
import re
import datetime
from pathlib import Path
import bs4
@dataclasses.dataclass(frozen=True, order=True)
class Message:
sent_at: datetime.datetime
sender: str
text: str
def __post_init__(self):
assert self.sent_at is not None
assert self.sender is not None
assert self.text is not None
def datetime_sent(chat_start: datetime.datetime, message_sent: datetime.time) -> datetime.datetime:
naive = datetime.datetime.combine(chat_start.date(), message_sent, chat_start.tzinfo)
if chat_start.time() > message_sent:
naive = naive - datetime.timedelta(days=1)
return naive
def parse_messages_in_chat_file(path: Path) -> list[Message]:
chat_start = datetime.datetime.fromisoformat(path.stem.removesuffix('CEST'))
with open(path) as f:
soup = bs4.BeautifulSoup(f)
messages = []
cur_sent_at: datetime.datetime | None = None
cur_sender: str | None = None
cur_text: str | None = None
for c in soup.body.children:
if c.name == 'font':
# Get timestamp
m = re.match(r'\((\d+):(\d+):(\d+)\)', c.font.get_text())
time_sent = datetime.time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
cur_sent_at = datetime_sent(chat_start, time_sent)
# Get sender
cur_sender = c.b.get_text().strip().removesuffix(':')
elif c.name is None:
cur_text = c.get_text()
elif c.name == 'a':
cur_text = '<' + c['href'] + '>'
elif c.name == 'br':
messages.append(Message(cur_sent_at, cur_sender, cur_text))
cur_sent_at = None
cur_sender = None
cur_text = None
elif c.name == 'h3':
pass # Ignore log header
else:
assert False, c
return messages
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
messages = []
for file_path in chat_folder_path.iterdir():
messages.extend(parse_messages_in_chat_file(file_path))
messages.sort()
return messages
def main():
path = Path()
for message in parse_messages_in_chat_folder(path):
print(f'({message.sent_at}) {message.sender} : {message.text}')
if __name__ == '__main__':
main()