1
0

Ruff
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 24s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 22s

This commit is contained in:
Jon Michael Aanes 2024-10-31 22:38:22 +01:00
parent 9951348164
commit 577b229a0a
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
4 changed files with 42 additions and 20 deletions

View File

@ -1,8 +1,8 @@
import argparse
import logging
from pathlib import Path
from collections.abc import Iterable, Iterator
import dataclasses
import logging
from collections.abc import Iterable, Iterator
from pathlib import Path
from . import (
filter_useless_messages,
@ -11,10 +11,11 @@ from . import (
merge_adjacent_messages,
synctech_sms,
)
from .data import Message, MYSELF
from .data import MYSELF, Message
logger = logging.getLogger(__name__)
def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
by_key: dict[str, list[Message]] = {}
for msg in messages:
@ -26,21 +27,30 @@ def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
return group_messages(messages, key=lambda msg: msg.chat_id)
def year_and_month_period_key(msg: Message):
return f'{msg.sent_at.year}-{msg.sent_at.month:02}'
def year_period_key(msg: Message):
return f'{msg.sent_at.year}'
def year_quarter_period_key(msg: Message):
quarter = int((msg.sent_at.month-1)/3)+1
quarter = int((msg.sent_at.month - 1) / 3) + 1
return f'{msg.sent_at.year}-Q{quarter:01}'
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
possible_period_keys = [(lambda msg: 'Full History'), year_period_key, year_quarter_period_key, year_and_month_period_key]
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
possible_period_keys = [
(lambda msg: 'Full History'),
year_period_key,
year_quarter_period_key,
year_and_month_period_key,
]
for period_key in possible_period_keys:
grouped = group_messages(messages, key=period_key)
@ -52,13 +62,15 @@ def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Mess
return grouped
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
for msg in messages:
if msg.sender == MYSELF:
yield dataclasses.replace(msg, sender = myself)
yield dataclasses.replace(msg, sender=myself)
else:
yield msg
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--purple', type=Path, dest='purple_folder')
@ -76,7 +88,9 @@ def main():
if args.purple_folder:
all_messages = libpurple.parse_messages_in_chat_folder(args.purple_folder)
elif args.synctech_sms_backup_file:
all_messages = synctech_sms.parse_messages_in_backup_xml_file(args.synctech_sms_backup_file)
all_messages = synctech_sms.parse_messages_in_backup_xml_file(
args.synctech_sms_backup_file,
)
else:
logger.fatal('No input file given!')
return
@ -93,9 +107,15 @@ def main():
del all_messages
for chat_id, messages_in_chat_original in messages_by_chat_id.items():
messages_in_chat = merge_adjacent_messages(messages_in_chat_original )
messages_in_chat = merge_adjacent_messages(messages_in_chat_original)
messages_by_period = group_messages_by_period(messages_in_chat)
logger.info(' "%s": %d messages, %d periods (%d msg/period avg)', chat_id, len(messages_in_chat_original), len(messages_by_period), len(messages_in_chat_original)/ len(messages_by_period))
logger.info(
' "%s": %d messages, %d periods (%d msg/period avg)',
chat_id,
len(messages_in_chat_original),
len(messages_by_period),
len(messages_in_chat_original) / len(messages_by_period),
)
for period_key, messages in messages_by_period.items():
output_file = args.output / chat_id / f'{period_key}.md'

View File

@ -1,7 +1,6 @@
import dataclasses
import datetime
MYSELF = 'MYSELF'

View File

@ -7,6 +7,7 @@ This backend parses the HTML files, focusing on the IRC protocol-style logs.
**This backend is not actively maintained.**
"""
import datetime
import logging
from pathlib import Path
@ -74,8 +75,9 @@ def parse_messages_in_chat_file(path: Path, chat_id: str) -> list[Message]:
elif c.name == 'br':
if cur_sender:
messages.append(Message(cur_sent_at, cur_sender,
cur_text.strip(), chat_id))
messages.append(
Message(cur_sent_at, cur_sender, cur_text.strip(), chat_id),
)
cur_sent_at = None
cur_sender = None
cur_text = ''

View File

@ -5,6 +5,7 @@ for Android is a free app for backing up your SMS and MMS messages. It uses an
XML format as backup format, which this backend reads and converts to the
standardized Message format.
"""
import datetime
import logging
from collections.abc import Iterator
@ -12,23 +13,24 @@ from pathlib import Path
import bs4
from .data import Message, MYSELF
from .data import MYSELF, Message
logger = logging.getLogger(__name__)
def sms_soup_to_message(soup: bs4.BeautifulSoup) -> Message:
# TODO: Require myself
sent_at = datetime.datetime.fromtimestamp(int(soup['date'])/1000)
sent_at = datetime.datetime.fromtimestamp(int(soup['date']) / 1000)
if soup['type'] == '2':
sender=MYSELF
sender = MYSELF
else:
sender=soup.get('contact_name') or soup['address']
sender = soup.get('contact_name') or soup['address']
text = soup['body']
chat_id = 'SMS ' + soup['address']
return Message(sent_at,sender, text, chat_id = chat_id)
return Message(sent_at, sender, text, chat_id=chat_id)
def parse_messages_in_backup_xml_file(path: Path) -> Iterator[Message]:
logger.info('Parsing %s', path)
@ -39,4 +41,3 @@ def parse_messages_in_backup_xml_file(path: Path) -> Iterator[Message]:
for sms in soup.find_all('sms'):
yield sms_soup_to_message(sms)
del sms