Automatically select period key
This commit is contained in:
parent
57cac8daa1
commit
9951348164
|
@ -1,7 +1,8 @@
|
|||
import argparse
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from collections.abc import Iterable
|
||||
from collections.abc import Iterable, Iterator
|
||||
import dataclasses
|
||||
|
||||
from . import (
|
||||
filter_useless_messages,
|
||||
|
@ -10,32 +11,60 @@ from . import (
|
|||
merge_adjacent_messages,
|
||||
synctech_sms,
|
||||
)
|
||||
from .data import Message
|
||||
from .data import Message, MYSELF
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
|
||||
by_key: dict[str, list[Message]] = {}
|
||||
for msg in messages:
|
||||
by_key.setdefault(key(msg), []).append(msg)
|
||||
del msg
|
||||
return by_key
|
||||
|
||||
|
||||
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||
by_period: dict[str, list[Message]] = {}
|
||||
for msg in messages:
|
||||
by_period.setdefault(msg.chat_id, []).append(msg)
|
||||
del msg
|
||||
return by_period
|
||||
return group_messages(messages, key=lambda msg: msg.chat_id)
|
||||
|
||||
def year_and_month_period_key(msg: Message):
|
||||
return f'{msg.sent_at.year}-{msg.sent_at.month:02}'
|
||||
|
||||
def year_period_key(msg: Message):
|
||||
return f'{msg.sent_at.year}'
|
||||
|
||||
def year_quarter_period_key(msg: Message):
|
||||
quarter = int((msg.sent_at.month-1)/3)+1
|
||||
return f'{msg.sent_at.year}-Q{quarter:01}'
|
||||
|
||||
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
|
||||
|
||||
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||
by_period: dict[str, list[Message]] = {}
|
||||
for msg in messages:
|
||||
period_key = f'{msg.sent_at.year}-{msg.sent_at.month:02}'
|
||||
by_period.setdefault(period_key, []).append(msg)
|
||||
del msg
|
||||
return by_period
|
||||
|
||||
possible_period_keys = [(lambda msg: 'Full History'), year_period_key, year_quarter_period_key, year_and_month_period_key]
|
||||
|
||||
for period_key in possible_period_keys:
|
||||
grouped = group_messages(messages, key=period_key)
|
||||
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
|
||||
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
|
||||
break
|
||||
|
||||
del period_key, average_num_messages
|
||||
|
||||
return grouped
|
||||
|
||||
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
|
||||
for msg in messages:
|
||||
if msg.sender == MYSELF:
|
||||
yield dataclasses.replace(msg, sender = myself)
|
||||
else:
|
||||
yield msg
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--purple', type=Path, dest='purple_folder')
|
||||
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
|
||||
parser.add_argument('--output', type=Path)
|
||||
parser.add_argument('--myself', type=str, default='Myself')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
@ -52,6 +81,7 @@ def main():
|
|||
logger.fatal('No input file given!')
|
||||
return
|
||||
|
||||
all_messages = replace_myself(all_messages, myself=args.myself)
|
||||
all_messages = list(all_messages)
|
||||
logger.info('%d messages after loading', len(all_messages))
|
||||
|
||||
|
@ -64,11 +94,12 @@ def main():
|
|||
|
||||
for chat_id, messages_in_chat_original in messages_by_chat_id.items():
|
||||
messages_in_chat = merge_adjacent_messages(messages_in_chat_original )
|
||||
|
||||
messages_by_period = group_messages_by_period(messages_in_chat)
|
||||
logger.info(' "%s": %d messages, %d periods (%d msg/period avg)', chat_id, len(messages_in_chat_original), len(messages_by_period), len(messages_in_chat_original)/ len(messages_by_period))
|
||||
|
||||
for period_key, messages in messages_by_period.items():
|
||||
output_file = args.output / f'{chat_id} - {period_key}.md'
|
||||
output_file = args.output / chat_id / f'{period_key}.md'
|
||||
output_file.parent.mkdir(exist_ok=True)
|
||||
logger.info('Writing % 5d messages to %s', len(messages), output_file)
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(
|
||||
|
|
Loading…
Reference in New Issue
Block a user