1
0

Compare commits

..

No commits in common. "634bd5b03fc3a451764fa2b89c4771e5aebdb6a4" and "4334f55af052ad83b73854f394ff8f1ac1995484" have entirely different histories.

2 changed files with 26 additions and 94 deletions

View File

@ -26,42 +26,18 @@ Matrix.
## Usage ## Usage
There are two main import patterns: From the repository root:
- One-off archival import: For when you have a large set of messages to import
for a service that you don't use very much anymore.
- Recurring import: For when you are still using the service, and want to
import on a recurring basis.
This program will be default not overwrite existing files, as the user might
have modified it.
Special consideration must be taking for recurring imports if you expect to be
modifying the resulting files, for example if you are inserting links
using Obsidian's unlinked mentions feature. You might want to use the
`--skip-this-period` flag to avoid importing the current period until it has
become the last period. That way you won't accidentally modify the log, because
it has been finalized.
### One-off
This is the recommended command for the one-off case:
```bash ```bash
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
``` ```
### Recurring It was made specifically for import into Obsidian, so it might not suite your
purposes, but it shouldn't be too difficult to adjust the formatting code.
This is the recommended command for the recurring import case:
```bash
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER --skip-this-period --period month
```
## TODO ## TODO
- [ ] SyncTech: Decode MMS parts and reconstruct image attachments. - [ ] Decode MMS parts and reconstruct image attachments.
""" """
import dataclasses import dataclasses

View File

@ -1,8 +1,7 @@
import argparse import argparse
import dataclasses import dataclasses
import datetime
import logging import logging
from collections.abc import Callable, Iterable, Iterator, Mapping from collections.abc import Iterable, Iterator
from pathlib import Path from pathlib import Path
from . import ( from . import (
@ -17,19 +16,16 @@ from .markdown import format_messages
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def group_messages( def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
messages: Iterable[Message],
key_fn: Callable[[Message], str],
) -> dict[str, list[Message]]:
by_key: dict[str, list[Message]] = {} by_key: dict[str, list[Message]] = {}
for msg in messages: for msg in messages:
by_key.setdefault(key_fn(msg), []).append(msg) by_key.setdefault(key(msg), []).append(msg)
del msg del msg
return by_key return by_key
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]: def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
return group_messages(messages, key_fn=lambda msg: msg.chat_id) return group_messages(messages, key=lambda msg: msg.chat_id)
def year_and_month_period_key(msg: Message): def year_and_month_period_key(msg: Message):
@ -47,38 +43,24 @@ def year_quarter_period_key(msg: Message):
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120 MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
TOO_FEW_MESSAGES_TO_CARE = 2
PERIOD_KEYS_BY_NAME: Mapping[str, Callable[[Message], str]] = { def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
'full': (lambda msg: 'full'), possible_period_keys = [
'year': year_period_key, (lambda msg: 'full'),
'quarter': year_quarter_period_key, year_period_key,
'month': year_and_month_period_key, year_quarter_period_key,
} year_and_month_period_key,
]
for period_key in possible_period_keys:
def group_messages_by_period( grouped = group_messages(messages, key=period_key)
messages: Iterable[Message],
period_key: str | None = None,
) -> tuple[dict[str, list[Message]], Callable[[Message], str]]:
# Determine key function
possible_period_keys: Iterable[Callable[[Message], str]] = (
PERIOD_KEYS_BY_NAME.values()
)
if period_key is not None:
possible_period_keys = [PERIOD_KEYS_BY_NAME[period_key]]
del period_key
# Group by key
for period_key_fn in possible_period_keys:
grouped = group_messages(messages, key_fn=period_key_fn)
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped) average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD: if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
break break
del average_num_messages del period_key, average_num_messages
return grouped, period_key_fn return grouped
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]: def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
@ -95,17 +77,6 @@ def parse_args():
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file') parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
parser.add_argument('--output', type=Path) parser.add_argument('--output', type=Path)
parser.add_argument('--myself', type=str, default='Myself') parser.add_argument('--myself', type=str, default='Myself')
parser.add_argument('--overwrite', action='store_true', dest='overwrite_files')
parser.add_argument(
'--period',
dest='period_key',
choices=list(PERIOD_KEYS_BY_NAME.keys()),
)
parser.add_argument(
'--skip-this-period',
action='store_true',
dest='skip_this_period',
)
return parser.parse_args() return parser.parse_args()
@ -137,14 +108,11 @@ def main():
for chat_id, messages_in_chat_original in messages_by_chat_id.items(): for chat_id, messages_in_chat_original in messages_by_chat_id.items():
messages_in_chat = merge_adjacent_messages(messages_in_chat_original) messages_in_chat = merge_adjacent_messages(messages_in_chat_original)
if len(messages_in_chat) <= TOO_FEW_MESSAGES_TO_CARE: if len(messages_in_chat) <= 2:
logger.info(' "%s": Skipped due to too few messages', chat_id) logger.info(' "%s": Skipped due to too few messages', chat_id)
continue continue
messages_by_period, period_key_fn = group_messages_by_period( messages_by_period = group_messages_by_period(messages_in_chat)
messages_in_chat,
args.period_key,
)
logger.info( logger.info(
' "%s": %d messages, %d periods (%d msg/period avg)', ' "%s": %d messages, %d periods (%d msg/period avg)',
chat_id, chat_id,
@ -153,34 +121,22 @@ def main():
len(messages_in_chat_original) / len(messages_by_period), len(messages_in_chat_original) / len(messages_by_period),
) )
this_period_name = period_key_fn(Message(datetime.datetime.now(), '', '', '')) for period_key, messages in messages_by_period.items():
for period_key_name, messages in messages_by_period.items():
file_escaped_chat_id = chat_id.replace(' ', '-') file_escaped_chat_id = chat_id.replace(' ', '-')
output_file = ( output_file = (
args.output / chat_id / f'{file_escaped_chat_id}-{period_key_name}.md' args.output / chat_id / f'{file_escaped_chat_id}-{period_key}.md'
) )
output_file.parent.mkdir(exist_ok=True)
logger.info('Writing % 5d messages to %s', len(messages), output_file) logger.info('Writing % 5d messages to %s', len(messages), output_file)
if this_period_name == period_key_name:
logger.info('Skipping due to --skip-this-period: %s', output_file)
continue
if output_file.exists() and not args.overwrite_files:
logger.info('Skipping existing file: %s', output_file)
continue
# Create folders and file
output_file.parent.mkdir(exist_ok=True, parents=True)
with open(output_file, 'w') as f: with open(output_file, 'w') as f:
f.write( f.write(
format_messages( format_messages(
messages, messages,
title=f'{chat_id} - {period_key_name}', title=f'{chat_id} - {period_key}',
), ),
) )
del period_key_name, messages, output_file del period_key, messages, output_file
del chat_id, messages_in_chat_original, messages_in_chat del chat_id, messages_in_chat_original, messages_in_chat