Compare commits
No commits in common. "634bd5b03fc3a451764fa2b89c4771e5aebdb6a4" and "4334f55af052ad83b73854f394ff8f1ac1995484" have entirely different histories.
634bd5b03f
...
4334f55af0
|
@ -26,42 +26,18 @@ Matrix.
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
There are two main import patterns:
|
From the repository root:
|
||||||
|
|
||||||
- One-off archival import: For when you have a large set of messages to import
|
|
||||||
for a service that you don't use very much anymore.
|
|
||||||
- Recurring import: For when you are still using the service, and want to
|
|
||||||
import on a recurring basis.
|
|
||||||
|
|
||||||
This program will be default not overwrite existing files, as the user might
|
|
||||||
have modified it.
|
|
||||||
|
|
||||||
Special consideration must be taking for recurring imports if you expect to be
|
|
||||||
modifying the resulting files, for example if you are inserting links
|
|
||||||
using Obsidian's unlinked mentions feature. You might want to use the
|
|
||||||
`--skip-this-period` flag to avoid importing the current period until it has
|
|
||||||
become the last period. That way you won't accidentally modify the log, because
|
|
||||||
it has been finalized.
|
|
||||||
|
|
||||||
### One-off
|
|
||||||
|
|
||||||
This is the recommended command for the one-off case:
|
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
|
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
|
||||||
```
|
```
|
||||||
|
|
||||||
### Recurring
|
It was made specifically for import into Obsidian, so it might not suite your
|
||||||
|
purposes, but it shouldn't be too difficult to adjust the formatting code.
|
||||||
This is the recommended command for the recurring import case:
|
|
||||||
|
|
||||||
```bash
|
|
||||||
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER --skip-this-period --period month
|
|
||||||
```
|
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
- [ ] SyncTech: Decode MMS parts and reconstruct image attachments.
|
- [ ] Decode MMS parts and reconstruct image attachments.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import dataclasses
|
import dataclasses
|
||||||
|
|
|
@ -1,8 +1,7 @@
|
||||||
import argparse
|
import argparse
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
|
||||||
import logging
|
import logging
|
||||||
from collections.abc import Callable, Iterable, Iterator, Mapping
|
from collections.abc import Iterable, Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from . import (
|
from . import (
|
||||||
|
@ -17,19 +16,16 @@ from .markdown import format_messages
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def group_messages(
|
def group_messages(messages: Iterable[Message], key) -> dict[str, list[Message]]:
|
||||||
messages: Iterable[Message],
|
|
||||||
key_fn: Callable[[Message], str],
|
|
||||||
) -> dict[str, list[Message]]:
|
|
||||||
by_key: dict[str, list[Message]] = {}
|
by_key: dict[str, list[Message]] = {}
|
||||||
for msg in messages:
|
for msg in messages:
|
||||||
by_key.setdefault(key_fn(msg), []).append(msg)
|
by_key.setdefault(key(msg), []).append(msg)
|
||||||
del msg
|
del msg
|
||||||
return by_key
|
return by_key
|
||||||
|
|
||||||
|
|
||||||
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
def group_messages_by_chat_id(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||||
return group_messages(messages, key_fn=lambda msg: msg.chat_id)
|
return group_messages(messages, key=lambda msg: msg.chat_id)
|
||||||
|
|
||||||
|
|
||||||
def year_and_month_period_key(msg: Message):
|
def year_and_month_period_key(msg: Message):
|
||||||
|
@ -47,38 +43,24 @@ def year_quarter_period_key(msg: Message):
|
||||||
|
|
||||||
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
|
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
|
||||||
|
|
||||||
TOO_FEW_MESSAGES_TO_CARE = 2
|
|
||||||
|
|
||||||
PERIOD_KEYS_BY_NAME: Mapping[str, Callable[[Message], str]] = {
|
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||||
'full': (lambda msg: 'full'),
|
possible_period_keys = [
|
||||||
'year': year_period_key,
|
(lambda msg: 'full'),
|
||||||
'quarter': year_quarter_period_key,
|
year_period_key,
|
||||||
'month': year_and_month_period_key,
|
year_quarter_period_key,
|
||||||
}
|
year_and_month_period_key,
|
||||||
|
]
|
||||||
|
|
||||||
|
for period_key in possible_period_keys:
|
||||||
def group_messages_by_period(
|
grouped = group_messages(messages, key=period_key)
|
||||||
messages: Iterable[Message],
|
|
||||||
period_key: str | None = None,
|
|
||||||
) -> tuple[dict[str, list[Message]], Callable[[Message], str]]:
|
|
||||||
# Determine key function
|
|
||||||
possible_period_keys: Iterable[Callable[[Message], str]] = (
|
|
||||||
PERIOD_KEYS_BY_NAME.values()
|
|
||||||
)
|
|
||||||
if period_key is not None:
|
|
||||||
possible_period_keys = [PERIOD_KEYS_BY_NAME[period_key]]
|
|
||||||
del period_key
|
|
||||||
|
|
||||||
# Group by key
|
|
||||||
for period_key_fn in possible_period_keys:
|
|
||||||
grouped = group_messages(messages, key_fn=period_key_fn)
|
|
||||||
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
|
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
|
||||||
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
|
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
|
||||||
break
|
break
|
||||||
|
|
||||||
del average_num_messages
|
del period_key, average_num_messages
|
||||||
|
|
||||||
return grouped, period_key_fn
|
return grouped
|
||||||
|
|
||||||
|
|
||||||
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
|
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
|
||||||
|
@ -95,17 +77,6 @@ def parse_args():
|
||||||
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
|
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
|
||||||
parser.add_argument('--output', type=Path)
|
parser.add_argument('--output', type=Path)
|
||||||
parser.add_argument('--myself', type=str, default='Myself')
|
parser.add_argument('--myself', type=str, default='Myself')
|
||||||
parser.add_argument('--overwrite', action='store_true', dest='overwrite_files')
|
|
||||||
parser.add_argument(
|
|
||||||
'--period',
|
|
||||||
dest='period_key',
|
|
||||||
choices=list(PERIOD_KEYS_BY_NAME.keys()),
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
'--skip-this-period',
|
|
||||||
action='store_true',
|
|
||||||
dest='skip_this_period',
|
|
||||||
)
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
@ -137,14 +108,11 @@ def main():
|
||||||
|
|
||||||
for chat_id, messages_in_chat_original in messages_by_chat_id.items():
|
for chat_id, messages_in_chat_original in messages_by_chat_id.items():
|
||||||
messages_in_chat = merge_adjacent_messages(messages_in_chat_original)
|
messages_in_chat = merge_adjacent_messages(messages_in_chat_original)
|
||||||
if len(messages_in_chat) <= TOO_FEW_MESSAGES_TO_CARE:
|
if len(messages_in_chat) <= 2:
|
||||||
logger.info(' "%s": Skipped due to too few messages', chat_id)
|
logger.info(' "%s": Skipped due to too few messages', chat_id)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
messages_by_period, period_key_fn = group_messages_by_period(
|
messages_by_period = group_messages_by_period(messages_in_chat)
|
||||||
messages_in_chat,
|
|
||||||
args.period_key,
|
|
||||||
)
|
|
||||||
logger.info(
|
logger.info(
|
||||||
' "%s": %d messages, %d periods (%d msg/period avg)',
|
' "%s": %d messages, %d periods (%d msg/period avg)',
|
||||||
chat_id,
|
chat_id,
|
||||||
|
@ -153,34 +121,22 @@ def main():
|
||||||
len(messages_in_chat_original) / len(messages_by_period),
|
len(messages_in_chat_original) / len(messages_by_period),
|
||||||
)
|
)
|
||||||
|
|
||||||
this_period_name = period_key_fn(Message(datetime.datetime.now(), '', '', ''))
|
for period_key, messages in messages_by_period.items():
|
||||||
|
|
||||||
for period_key_name, messages in messages_by_period.items():
|
|
||||||
file_escaped_chat_id = chat_id.replace(' ', '-')
|
file_escaped_chat_id = chat_id.replace(' ', '-')
|
||||||
output_file = (
|
output_file = (
|
||||||
args.output / chat_id / f'{file_escaped_chat_id}-{period_key_name}.md'
|
args.output / chat_id / f'{file_escaped_chat_id}-{period_key}.md'
|
||||||
)
|
)
|
||||||
|
output_file.parent.mkdir(exist_ok=True)
|
||||||
logger.info('Writing % 5d messages to %s', len(messages), output_file)
|
logger.info('Writing % 5d messages to %s', len(messages), output_file)
|
||||||
|
|
||||||
if this_period_name == period_key_name:
|
|
||||||
logger.info('Skipping due to --skip-this-period: %s', output_file)
|
|
||||||
continue
|
|
||||||
if output_file.exists() and not args.overwrite_files:
|
|
||||||
logger.info('Skipping existing file: %s', output_file)
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Create folders and file
|
|
||||||
output_file.parent.mkdir(exist_ok=True, parents=True)
|
|
||||||
with open(output_file, 'w') as f:
|
with open(output_file, 'w') as f:
|
||||||
f.write(
|
f.write(
|
||||||
format_messages(
|
format_messages(
|
||||||
messages,
|
messages,
|
||||||
title=f'{chat_id} - {period_key_name}',
|
title=f'{chat_id} - {period_key}',
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
del period_key_name, messages, output_file
|
del period_key, messages, output_file
|
||||||
del chat_id, messages_in_chat_original, messages_in_chat
|
del chat_id, messages_in_chat_original, messages_in_chat
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user