Automatically select period key
This commit is contained in:
parent
57cac8daa1
commit
9951348164
|
@ -1,7 +1,8 @@
|
||||||
import argparse
|
import argparse
|
||||||
import logging
|
import logging
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from collections.abc import Iterable
|
from collections.abc import Iterable, Iterator
|
||||||
|
import dataclasses
|
||||||
|
|
||||||
from . import (
|
from . import (
|
||||||
filter_useless_messages,
|
filter_useless_messages,
|
||||||
|
@ -10,32 +11,60 @@ from . import (
|
||||||
merge_adjacent_messages,
|
merge_adjacent_messages,
|
||||||
synctech_sms,
|
synctech_sms,
|
||||||
)
|
)
|
||||||
from .data import Message
|
from .data import Message, MYSELF
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
|
||||||
|
by_key: dict[str, list[Message]] = {}
|
||||||
|
for msg in messages:
|
||||||
|
by_key.setdefault(key(msg), []).append(msg)
|
||||||
|
del msg
|
||||||
|
return by_key
|
||||||
|
|
||||||
|
|
||||||
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||||
by_period: dict[str, list[Message]] = {}
|
return group_messages(messages, key=lambda msg: msg.chat_id)
|
||||||
for msg in messages:
|
|
||||||
by_period.setdefault(msg.chat_id, []).append(msg)
|
def year_and_month_period_key(msg: Message):
|
||||||
del msg
|
return f'{msg.sent_at.year}-{msg.sent_at.month:02}'
|
||||||
return by_period
|
|
||||||
|
def year_period_key(msg: Message):
|
||||||
|
return f'{msg.sent_at.year}'
|
||||||
|
|
||||||
|
def year_quarter_period_key(msg: Message):
|
||||||
|
quarter = int((msg.sent_at.month-1)/3)+1
|
||||||
|
return f'{msg.sent_at.year}-Q{quarter:01}'
|
||||||
|
|
||||||
|
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
|
||||||
|
|
||||||
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||||
by_period: dict[str, list[Message]] = {}
|
|
||||||
for msg in messages:
|
|
||||||
period_key = f'{msg.sent_at.year}-{msg.sent_at.month:02}'
|
|
||||||
by_period.setdefault(period_key, []).append(msg)
|
|
||||||
del msg
|
|
||||||
return by_period
|
|
||||||
|
|
||||||
|
possible_period_keys = [(lambda msg: 'Full History'), year_period_key, year_quarter_period_key, year_and_month_period_key]
|
||||||
|
|
||||||
|
for period_key in possible_period_keys:
|
||||||
|
grouped = group_messages(messages, key=period_key)
|
||||||
|
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
|
||||||
|
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
|
||||||
|
break
|
||||||
|
|
||||||
|
del period_key, average_num_messages
|
||||||
|
|
||||||
|
return grouped
|
||||||
|
|
||||||
|
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
|
||||||
|
for msg in messages:
|
||||||
|
if msg.sender == MYSELF:
|
||||||
|
yield dataclasses.replace(msg, sender = myself)
|
||||||
|
else:
|
||||||
|
yield msg
|
||||||
|
|
||||||
def parse_args():
|
def parse_args():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument('--purple', type=Path, dest='purple_folder')
|
parser.add_argument('--purple', type=Path, dest='purple_folder')
|
||||||
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
|
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
|
||||||
parser.add_argument('--output', type=Path)
|
parser.add_argument('--output', type=Path)
|
||||||
|
parser.add_argument('--myself', type=str, default='Myself')
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
@ -52,6 +81,7 @@ def main():
|
||||||
logger.fatal('No input file given!')
|
logger.fatal('No input file given!')
|
||||||
return
|
return
|
||||||
|
|
||||||
|
all_messages = replace_myself(all_messages, myself=args.myself)
|
||||||
all_messages = list(all_messages)
|
all_messages = list(all_messages)
|
||||||
logger.info('%d messages after loading', len(all_messages))
|
logger.info('%d messages after loading', len(all_messages))
|
||||||
|
|
||||||
|
@ -64,11 +94,12 @@ def main():
|
||||||
|
|
||||||
for chat_id, messages_in_chat_original in messages_by_chat_id.items():
|
for chat_id, messages_in_chat_original in messages_by_chat_id.items():
|
||||||
messages_in_chat = merge_adjacent_messages(messages_in_chat_original )
|
messages_in_chat = merge_adjacent_messages(messages_in_chat_original )
|
||||||
|
|
||||||
messages_by_period = group_messages_by_period(messages_in_chat)
|
messages_by_period = group_messages_by_period(messages_in_chat)
|
||||||
|
logger.info(' "%s": %d messages, %d periods (%d msg/period avg)', chat_id, len(messages_in_chat_original), len(messages_by_period), len(messages_in_chat_original)/ len(messages_by_period))
|
||||||
|
|
||||||
for period_key, messages in messages_by_period.items():
|
for period_key, messages in messages_by_period.items():
|
||||||
output_file = args.output / f'{chat_id} - {period_key}.md'
|
output_file = args.output / chat_id / f'{period_key}.md'
|
||||||
|
output_file.parent.mkdir(exist_ok=True)
|
||||||
logger.info('Writing % 5d messages to %s', len(messages), output_file)
|
logger.info('Writing % 5d messages to %s', len(messages), output_file)
|
||||||
with open(output_file, 'w') as f:
|
with open(output_file, 'w') as f:
|
||||||
f.write(
|
f.write(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user