1
0

Ruff
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 24s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 22s

This commit is contained in:
Jon Michael Aanes 2024-10-31 22:38:22 +01:00
parent 9951348164
commit 577b229a0a
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
4 changed files with 42 additions and 20 deletions

View File

@ -1,8 +1,8 @@
import argparse import argparse
import logging
from pathlib import Path
from collections.abc import Iterable, Iterator
import dataclasses import dataclasses
import logging
from collections.abc import Iterable, Iterator
from pathlib import Path
from . import ( from . import (
filter_useless_messages, filter_useless_messages,
@ -11,10 +11,11 @@ from . import (
merge_adjacent_messages, merge_adjacent_messages,
synctech_sms, synctech_sms,
) )
from .data import Message, MYSELF from .data import MYSELF, Message
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]: def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
by_key: dict[str, list[Message]] = {} by_key: dict[str, list[Message]] = {}
for msg in messages: for msg in messages:
@ -26,21 +27,30 @@ def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]: def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
return group_messages(messages, key=lambda msg: msg.chat_id) return group_messages(messages, key=lambda msg: msg.chat_id)
def year_and_month_period_key(msg: Message): def year_and_month_period_key(msg: Message):
return f'{msg.sent_at.year}-{msg.sent_at.month:02}' return f'{msg.sent_at.year}-{msg.sent_at.month:02}'
def year_period_key(msg: Message): def year_period_key(msg: Message):
return f'{msg.sent_at.year}' return f'{msg.sent_at.year}'
def year_quarter_period_key(msg: Message): def year_quarter_period_key(msg: Message):
quarter = int((msg.sent_at.month - 1) / 3) + 1 quarter = int((msg.sent_at.month - 1) / 3) + 1
return f'{msg.sent_at.year}-Q{quarter:01}' return f'{msg.sent_at.year}-Q{quarter:01}'
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120 MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
possible_period_keys = [(lambda msg: 'Full History'), year_period_key, year_quarter_period_key, year_and_month_period_key] def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
possible_period_keys = [
(lambda msg: 'Full History'),
year_period_key,
year_quarter_period_key,
year_and_month_period_key,
]
for period_key in possible_period_keys: for period_key in possible_period_keys:
grouped = group_messages(messages, key=period_key) grouped = group_messages(messages, key=period_key)
@ -52,6 +62,7 @@ def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Mess
return grouped return grouped
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]: def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
for msg in messages: for msg in messages:
if msg.sender == MYSELF: if msg.sender == MYSELF:
@ -59,6 +70,7 @@ def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message
else: else:
yield msg yield msg
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--purple', type=Path, dest='purple_folder') parser.add_argument('--purple', type=Path, dest='purple_folder')
@ -76,7 +88,9 @@ def main():
if args.purple_folder: if args.purple_folder:
all_messages = libpurple.parse_messages_in_chat_folder(args.purple_folder) all_messages = libpurple.parse_messages_in_chat_folder(args.purple_folder)
elif args.synctech_sms_backup_file: elif args.synctech_sms_backup_file:
all_messages = synctech_sms.parse_messages_in_backup_xml_file(args.synctech_sms_backup_file) all_messages = synctech_sms.parse_messages_in_backup_xml_file(
args.synctech_sms_backup_file,
)
else: else:
logger.fatal('No input file given!') logger.fatal('No input file given!')
return return
@ -95,7 +109,13 @@ def main():
for chat_id, messages_in_chat_original in messages_by_chat_id.items(): for chat_id, messages_in_chat_original in messages_by_chat_id.items():
messages_in_chat = merge_adjacent_messages(messages_in_chat_original) messages_in_chat = merge_adjacent_messages(messages_in_chat_original)
messages_by_period = group_messages_by_period(messages_in_chat) messages_by_period = group_messages_by_period(messages_in_chat)
logger.info(' "%s": %d messages, %d periods (%d msg/period avg)', chat_id, len(messages_in_chat_original), len(messages_by_period), len(messages_in_chat_original)/ len(messages_by_period)) logger.info(
' "%s": %d messages, %d periods (%d msg/period avg)',
chat_id,
len(messages_in_chat_original),
len(messages_by_period),
len(messages_in_chat_original) / len(messages_by_period),
)
for period_key, messages in messages_by_period.items(): for period_key, messages in messages_by_period.items():
output_file = args.output / chat_id / f'{period_key}.md' output_file = args.output / chat_id / f'{period_key}.md'

View File

@ -1,7 +1,6 @@
import dataclasses import dataclasses
import datetime import datetime
MYSELF = 'MYSELF' MYSELF = 'MYSELF'

View File

@ -7,6 +7,7 @@ This backend parses the HTML files, focusing on the IRC protocol-style logs.
**This backend is not actively maintained.** **This backend is not actively maintained.**
""" """
import datetime import datetime
import logging import logging
from pathlib import Path from pathlib import Path
@ -74,8 +75,9 @@ def parse_messages_in_chat_file(path: Path, chat_id: str) -> list[Message]:
elif c.name == 'br': elif c.name == 'br':
if cur_sender: if cur_sender:
messages.append(Message(cur_sent_at, cur_sender, messages.append(
cur_text.strip(), chat_id)) Message(cur_sent_at, cur_sender, cur_text.strip(), chat_id),
)
cur_sent_at = None cur_sent_at = None
cur_sender = None cur_sender = None
cur_text = '' cur_text = ''

View File

@ -5,6 +5,7 @@ for Android is a free app for backing up your SMS and MMS messages. It uses an
XML format as backup format, which this backend reads and converts to the XML format as backup format, which this backend reads and converts to the
standardized Message format. standardized Message format.
""" """
import datetime import datetime
import logging import logging
from collections.abc import Iterator from collections.abc import Iterator
@ -12,7 +13,7 @@ from pathlib import Path
import bs4 import bs4
from .data import Message, MYSELF from .data import MYSELF, Message
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,6 +31,7 @@ def sms_soup_to_message(soup: bs4.BeautifulSoup) -> Message:
chat_id = 'SMS ' + soup['address'] chat_id = 'SMS ' + soup['address']
return Message(sent_at, sender, text, chat_id=chat_id) return Message(sent_at, sender, text, chat_id=chat_id)
def parse_messages_in_backup_xml_file(path: Path) -> Iterator[Message]: def parse_messages_in_backup_xml_file(path: Path) -> Iterator[Message]:
logger.info('Parsing %s', path) logger.info('Parsing %s', path)
@ -39,4 +41,3 @@ def parse_messages_in_backup_xml_file(path: Path) -> Iterator[Message]:
for sms in soup.find_all('sms'): for sms in soup.find_all('sms'):
yield sms_soup_to_message(sms) yield sms_soup_to_message(sms)
del sms del sms