Initial commit for crazy log conversion project
This commit is contained in:
commit
21c61f7d02
20
.gitignore
vendored
Normal file
20
.gitignore
vendored
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
# Program specific
|
||||||
|
/output/
|
||||||
|
/deps/
|
||||||
|
/secrets
|
||||||
|
/private_deps/
|
||||||
|
/data/
|
||||||
|
/config/
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
/build/
|
||||||
|
/dist/
|
||||||
|
*.egg-info/
|
||||||
|
.mypy_cache/
|
||||||
|
|
||||||
|
# Python, Testing
|
||||||
|
/test/secrets.py
|
||||||
|
/.coverage
|
||||||
|
/.hypothesis/
|
||||||
|
/htmlcov/
|
0
libpurple_to_markdown/__init__.py
Normal file
0
libpurple_to_markdown/__init__.py
Normal file
90
libpurple_to_markdown/__main__.py
Normal file
90
libpurple_to_markdown/__main__.py
Normal file
|
@ -0,0 +1,90 @@
|
||||||
|
|
||||||
|
import dataclasses
|
||||||
|
import re
|
||||||
|
import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
import bs4
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True, order=True)
|
||||||
|
class Message:
|
||||||
|
sent_at: datetime.datetime
|
||||||
|
sender: str
|
||||||
|
text: str
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
assert self.sent_at is not None
|
||||||
|
assert self.sender is not None
|
||||||
|
assert self.text is not None
|
||||||
|
|
||||||
|
def datetime_sent(chat_start: datetime.datetime, message_sent: datetime.time) -> datetime.datetime:
|
||||||
|
naive = datetime.datetime.combine(chat_start.date(), message_sent, chat_start.tzinfo)
|
||||||
|
if chat_start.time() > message_sent:
|
||||||
|
naive = naive - datetime.timedelta(days=1)
|
||||||
|
return naive
|
||||||
|
|
||||||
|
|
||||||
|
def parse_messages_in_chat_file(path: Path) -> list[Message]:
|
||||||
|
chat_start = datetime.datetime.fromisoformat(path.stem.removesuffix('CEST'))
|
||||||
|
|
||||||
|
with open(path) as f:
|
||||||
|
soup = bs4.BeautifulSoup(f)
|
||||||
|
|
||||||
|
messages = []
|
||||||
|
|
||||||
|
cur_sent_at: datetime.datetime | None = None
|
||||||
|
cur_sender: str | None = None
|
||||||
|
cur_text: str | None = None
|
||||||
|
|
||||||
|
for c in soup.body.children:
|
||||||
|
if c.name == 'font':
|
||||||
|
# Get timestamp
|
||||||
|
m = re.match(r'\((\d+):(\d+):(\d+)\)', c.font.get_text())
|
||||||
|
time_sent = datetime.time(int(m.group(1)), int(m.group(2)), int(m.group(3)))
|
||||||
|
cur_sent_at = datetime_sent(chat_start, time_sent)
|
||||||
|
|
||||||
|
# Get sender
|
||||||
|
cur_sender = c.b.get_text().strip().removesuffix(':')
|
||||||
|
|
||||||
|
elif c.name is None:
|
||||||
|
cur_text = c.get_text()
|
||||||
|
|
||||||
|
elif c.name == 'a':
|
||||||
|
cur_text = '<' + c['href'] + '>'
|
||||||
|
|
||||||
|
elif c.name == 'br':
|
||||||
|
messages.append(Message(cur_sent_at, cur_sender, cur_text))
|
||||||
|
cur_sent_at = None
|
||||||
|
cur_sender = None
|
||||||
|
cur_text = None
|
||||||
|
|
||||||
|
elif c.name == 'h3':
|
||||||
|
pass # Ignore log header
|
||||||
|
|
||||||
|
|
||||||
|
else:
|
||||||
|
assert False, c
|
||||||
|
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def parse_messages_in_chat_folder(chat_folder_path: Path) -> list[Message]:
|
||||||
|
messages = []
|
||||||
|
for file_path in chat_folder_path.iterdir():
|
||||||
|
messages.extend(parse_messages_in_chat_file(file_path))
|
||||||
|
|
||||||
|
messages.sort()
|
||||||
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
path = Path()
|
||||||
|
|
||||||
|
for message in parse_messages_in_chat_folder(path):
|
||||||
|
print(f'({message.sent_at}) {message.sender} : {message.text}')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user