1
0

More options for recurring import

This commit is contained in:
Jon Michael Aanes 2024-11-17 12:04:29 +01:00
parent 4334f55af0
commit a4df23e8ff
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
2 changed files with 63 additions and 20 deletions

View File

@ -26,18 +26,42 @@ Matrix.
## Usage ## Usage
From the repository root: There are two main import patterns:
- One-off archival import: For when you have a large set of messages to import
for a service that you don't use very much anymore.
- Recurring import: For when you are still using the service, and want to
import on a recurring basis.
This program will be default not overwrite existing files, as the user might
have modified it.
Special consideration must be taking for recurring imports if you expect to be
modifying the resulting files, for example if you are inserting links
using Obsidian's unlinked mentions feature. You might want to use the
`--skip-this-period` flag to avoid importing the current period until it has
become the last period. That way you won't accidentally modify the log, because
it has been finalized.
### One-off
This is the recommended command for the one-off case:
```bash ```bash
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
``` ```
It was made specifically for import into Obsidian, so it might not suite your ### Recurring
purposes, but it shouldn't be too difficult to adjust the formatting code.
This is the recommended command for the recurring import case:
```bash
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER --skip-this-period --period month
```
## TODO ## TODO
- [ ] Decode MMS parts and reconstruct image attachments. - [ ] SyncTech: Decode MMS parts and reconstruct image attachments.
""" """
import dataclasses import dataclasses

View File

@ -43,24 +43,27 @@ def year_quarter_period_key(msg: Message):
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120 MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
PERIOD_KEYS_BY_NAME = {
'full': (lambda msg: 'full'),
'year': year_period_key,
'quarter': year_quarter_period_key,
'month': year_and_month_period_key,
}
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
possible_period_keys = [
(lambda msg: 'full'),
year_period_key,
year_quarter_period_key,
year_and_month_period_key,
]
for period_key in possible_period_keys: def group_messages_by_period(messages: Iterable[Message], period_key: str | None = None) -> dict[str, list[Message]]:
possible_period_keys = PERIOD_KEYS_BY_NAME.values()
if period_key is not None:
possible_period_keys = [PERIOD_KEYS_BY_NAME[period_key]]
for period_key_fn in possible_period_keys:
grouped = group_messages(messages, key=period_key) grouped = group_messages(messages, key=period_key)
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped) average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD: if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
break break
del period_key, average_num_messages del average_num_messages
return grouped return grouped, period_key_fn
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]: def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
@ -77,9 +80,13 @@ def parse_args():
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file') parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
parser.add_argument('--output', type=Path) parser.add_argument('--output', type=Path)
parser.add_argument('--myself', type=str, default='Myself') parser.add_argument('--myself', type=str, default='Myself')
parser.add_argument('--overwrite', action='store_true', dest='overwrite_files')
parser.add_argument('--period', dest='period_key', values=list(PERIOD_KEYS_BY_NAME.keys()))
parser.add_argument('--skip-this-period', action='store_true', dest='skip_this_period')
return parser.parse_args() return parser.parse_args()
def main(): def main():
logging.basicConfig() logging.basicConfig()
logging.getLogger().setLevel('INFO') logging.getLogger().setLevel('INFO')
@ -112,7 +119,7 @@ def main():
logger.info(' "%s": Skipped due to too few messages', chat_id) logger.info(' "%s": Skipped due to too few messages', chat_id)
continue continue
messages_by_period = group_messages_by_period(messages_in_chat) messages_by_period, period_key_fn = group_messages_by_period(messages_in_chat, args.period_key)
logger.info( logger.info(
' "%s": %d messages, %d periods (%d msg/period avg)', ' "%s": %d messages, %d periods (%d msg/period avg)',
chat_id, chat_id,
@ -121,22 +128,34 @@ def main():
len(messages_in_chat_original) / len(messages_by_period), len(messages_in_chat_original) / len(messages_by_period),
) )
for period_key, messages in messages_by_period.items(): this_period_name = period_key_fn(datetime.datetime.now())
for period_key_name, messages in messages_by_period.items():
file_escaped_chat_id = chat_id.replace(' ', '-') file_escaped_chat_id = chat_id.replace(' ', '-')
output_file = ( output_file = (
args.output / chat_id / f'{file_escaped_chat_id}-{period_key}.md' args.output / chat_id / f'{file_escaped_chat_id}-{period_key_name}.md'
) )
output_file.parent.mkdir(exist_ok=True)
logger.info('Writing % 5d messages to %s', len(messages), output_file) logger.info('Writing % 5d messages to %s', len(messages), output_file)
if this_period_name == period_key_name:
logger.info('Skipping due to --skip-this-period: %s', output_file)
continue
if output_file.exists() and not args.overwrite_files:
logger.info('Skipping existing file: %s', output_file)
continue
# Create folders and file
output_file.parent.mkdir(exist_ok=True)
with open(output_file, 'w') as f: with open(output_file, 'w') as f:
f.write( f.write(
format_messages( format_messages(
messages, messages,
title=f'{chat_id} - {period_key}', title=f'{chat_id} - {period_key_name}',
), ),
) )
del period_key, messages, output_file del period_key_name, messages, output_file
del chat_id, messages_in_chat_original, messages_in_chat del chat_id, messages_in_chat_original, messages_in_chat