1
0

Automatically select period key
Some checks failed
Run Python tests (through Pytest) / Test (push) Successful in 24s
Verify Python project can be installed, loaded and have version checked / Test (push) Has been cancelled

This commit is contained in:
Jon Michael Aanes 2024-10-31 22:37:50 +01:00
parent 57cac8daa1
commit 9951348164
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA

View File

@ -1,7 +1,8 @@
import argparse import argparse
import logging import logging
from pathlib import Path from pathlib import Path
from collections.abc import Iterable from collections.abc import Iterable, Iterator
import dataclasses
from . import ( from . import (
filter_useless_messages, filter_useless_messages,
@ -10,32 +11,60 @@ from . import (
merge_adjacent_messages, merge_adjacent_messages,
synctech_sms, synctech_sms,
) )
from .data import Message from .data import Message, MYSELF
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
by_key: dict[str, list[Message]] = {}
for msg in messages:
by_key.setdefault(key(msg), []).append(msg)
del msg
return by_key
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]: def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
by_period: dict[str, list[Message]] = {} return group_messages(messages, key=lambda msg: msg.chat_id)
for msg in messages:
by_period.setdefault(msg.chat_id, []).append(msg) def year_and_month_period_key(msg: Message):
del msg return f'{msg.sent_at.year}-{msg.sent_at.month:02}'
return by_period
def year_period_key(msg: Message):
return f'{msg.sent_at.year}'
def year_quarter_period_key(msg: Message):
quarter = int((msg.sent_at.month-1)/3)+1
return f'{msg.sent_at.year}-Q{quarter:01}'
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]: def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
by_period: dict[str, list[Message]] = {}
for msg in messages:
period_key = f'{msg.sent_at.year}-{msg.sent_at.month:02}'
by_period.setdefault(period_key, []).append(msg)
del msg
return by_period
possible_period_keys = [(lambda msg: 'Full History'), year_period_key, year_quarter_period_key, year_and_month_period_key]
for period_key in possible_period_keys:
grouped = group_messages(messages, key=period_key)
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
break
del period_key, average_num_messages
return grouped
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
for msg in messages:
if msg.sender == MYSELF:
yield dataclasses.replace(msg, sender = myself)
else:
yield msg
def parse_args(): def parse_args():
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--purple', type=Path, dest='purple_folder') parser.add_argument('--purple', type=Path, dest='purple_folder')
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file') parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
parser.add_argument('--output', type=Path) parser.add_argument('--output', type=Path)
parser.add_argument('--myself', type=str, default='Myself')
return parser.parse_args() return parser.parse_args()
@ -52,6 +81,7 @@ def main():
logger.fatal('No input file given!') logger.fatal('No input file given!')
return return
all_messages = replace_myself(all_messages, myself=args.myself)
all_messages = list(all_messages) all_messages = list(all_messages)
logger.info('%d messages after loading', len(all_messages)) logger.info('%d messages after loading', len(all_messages))
@ -64,11 +94,12 @@ def main():
for chat_id, messages_in_chat_original in messages_by_chat_id.items(): for chat_id, messages_in_chat_original in messages_by_chat_id.items():
messages_in_chat = merge_adjacent_messages(messages_in_chat_original ) messages_in_chat = merge_adjacent_messages(messages_in_chat_original )
messages_by_period = group_messages_by_period(messages_in_chat) messages_by_period = group_messages_by_period(messages_in_chat)
logger.info(' "%s": %d messages, %d periods (%d msg/period avg)', chat_id, len(messages_in_chat_original), len(messages_by_period), len(messages_in_chat_original)/ len(messages_by_period))
for period_key, messages in messages_by_period.items(): for period_key, messages in messages_by_period.items():
output_file = args.output / f'{chat_id} - {period_key}.md' output_file = args.output / chat_id / f'{period_key}.md'
output_file.parent.mkdir(exist_ok=True)
logger.info('Writing % 5d messages to %s', len(messages), output_file) logger.info('Writing % 5d messages to %s', len(messages), output_file)
with open(output_file, 'w') as f: with open(output_file, 'w') as f:
f.write( f.write(