More options for recurring import
This commit is contained in:
parent
4334f55af0
commit
a4df23e8ff
|
@ -26,18 +26,42 @@ Matrix.
|
|||
|
||||
## Usage
|
||||
|
||||
From the repository root:
|
||||
There are two main import patterns:
|
||||
|
||||
- One-off archival import: For when you have a large set of messages to import
|
||||
for a service that you don't use very much anymore.
|
||||
- Recurring import: For when you are still using the service, and want to
|
||||
import on a recurring basis.
|
||||
|
||||
This program will be default not overwrite existing files, as the user might
|
||||
have modified it.
|
||||
|
||||
Special consideration must be taking for recurring imports if you expect to be
|
||||
modifying the resulting files, for example if you are inserting links
|
||||
using Obsidian's unlinked mentions feature. You might want to use the
|
||||
`--skip-this-period` flag to avoid importing the current period until it has
|
||||
become the last period. That way you won't accidentally modify the log, because
|
||||
it has been finalized.
|
||||
|
||||
### One-off
|
||||
|
||||
This is the recommended command for the one-off case:
|
||||
|
||||
```bash
|
||||
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER
|
||||
```
|
||||
|
||||
It was made specifically for import into Obsidian, so it might not suite your
|
||||
purposes, but it shouldn't be too difficult to adjust the formatting code.
|
||||
### Recurring
|
||||
|
||||
This is the recommended command for the recurring import case:
|
||||
|
||||
```bash
|
||||
python -m libpurple_to_markdown LOG_DIRECTORY --output OUTPUT_FOLDER --skip-this-period --period month
|
||||
```
|
||||
|
||||
## TODO
|
||||
|
||||
- [ ] Decode MMS parts and reconstruct image attachments.
|
||||
- [ ] SyncTech: Decode MMS parts and reconstruct image attachments.
|
||||
"""
|
||||
|
||||
import dataclasses
|
||||
|
|
|
@ -43,24 +43,27 @@ def year_quarter_period_key(msg: Message):
|
|||
|
||||
MAX_AVERAGE_MESSAGES_PER_PERIOD = 120
|
||||
|
||||
PERIOD_KEYS_BY_NAME = {
|
||||
'full': (lambda msg: 'full'),
|
||||
'year': year_period_key,
|
||||
'quarter': year_quarter_period_key,
|
||||
'month': year_and_month_period_key,
|
||||
}
|
||||
|
||||
def group_messages_by_period(messages: Iterable[Message]) -> dict[str, list[Message]]:
|
||||
possible_period_keys = [
|
||||
(lambda msg: 'full'),
|
||||
year_period_key,
|
||||
year_quarter_period_key,
|
||||
year_and_month_period_key,
|
||||
]
|
||||
|
||||
for period_key in possible_period_keys:
|
||||
def group_messages_by_period(messages: Iterable[Message], period_key: str | None = None) -> dict[str, list[Message]]:
|
||||
possible_period_keys = PERIOD_KEYS_BY_NAME.values()
|
||||
if period_key is not None:
|
||||
possible_period_keys = [PERIOD_KEYS_BY_NAME[period_key]]
|
||||
for period_key_fn in possible_period_keys:
|
||||
grouped = group_messages(messages, key=period_key)
|
||||
average_num_messages = sum(len(grouped[k]) for k in grouped) / len(grouped)
|
||||
if average_num_messages <= MAX_AVERAGE_MESSAGES_PER_PERIOD:
|
||||
break
|
||||
|
||||
del period_key, average_num_messages
|
||||
del average_num_messages
|
||||
|
||||
return grouped
|
||||
return grouped, period_key_fn
|
||||
|
||||
|
||||
def replace_myself(messages: Iterable[Message], myself: str) -> Iterator[Message]:
|
||||
|
@ -77,9 +80,13 @@ def parse_args():
|
|||
parser.add_argument('--synctech', type=Path, dest='synctech_sms_backup_file')
|
||||
parser.add_argument('--output', type=Path)
|
||||
parser.add_argument('--myself', type=str, default='Myself')
|
||||
parser.add_argument('--overwrite', action='store_true', dest='overwrite_files')
|
||||
parser.add_argument('--period', dest='period_key', values=list(PERIOD_KEYS_BY_NAME.keys()))
|
||||
parser.add_argument('--skip-this-period', action='store_true', dest='skip_this_period')
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
|
||||
def main():
|
||||
logging.basicConfig()
|
||||
logging.getLogger().setLevel('INFO')
|
||||
|
@ -112,7 +119,7 @@ def main():
|
|||
logger.info(' "%s": Skipped due to too few messages', chat_id)
|
||||
continue
|
||||
|
||||
messages_by_period = group_messages_by_period(messages_in_chat)
|
||||
messages_by_period, period_key_fn = group_messages_by_period(messages_in_chat, args.period_key)
|
||||
logger.info(
|
||||
' "%s": %d messages, %d periods (%d msg/period avg)',
|
||||
chat_id,
|
||||
|
@ -121,22 +128,34 @@ def main():
|
|||
len(messages_in_chat_original) / len(messages_by_period),
|
||||
)
|
||||
|
||||
for period_key, messages in messages_by_period.items():
|
||||
this_period_name = period_key_fn(datetime.datetime.now())
|
||||
|
||||
for period_key_name, messages in messages_by_period.items():
|
||||
file_escaped_chat_id = chat_id.replace(' ', '-')
|
||||
output_file = (
|
||||
args.output / chat_id / f'{file_escaped_chat_id}-{period_key}.md'
|
||||
args.output / chat_id / f'{file_escaped_chat_id}-{period_key_name}.md'
|
||||
)
|
||||
output_file.parent.mkdir(exist_ok=True)
|
||||
|
||||
logger.info('Writing % 5d messages to %s', len(messages), output_file)
|
||||
|
||||
if this_period_name == period_key_name:
|
||||
logger.info('Skipping due to --skip-this-period: %s', output_file)
|
||||
continue
|
||||
if output_file.exists() and not args.overwrite_files:
|
||||
logger.info('Skipping existing file: %s', output_file)
|
||||
continue
|
||||
|
||||
# Create folders and file
|
||||
output_file.parent.mkdir(exist_ok=True)
|
||||
with open(output_file, 'w') as f:
|
||||
f.write(
|
||||
format_messages(
|
||||
messages,
|
||||
title=f'{chat_id} - {period_key}',
|
||||
title=f'{chat_id} - {period_key_name}',
|
||||
),
|
||||
)
|
||||
|
||||
del period_key, messages, output_file
|
||||
del period_key_name, messages, output_file
|
||||
del chat_id, messages_in_chat_original, messages_in_chat
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user