1
0
libpurple-to-markdown/libpurple_to_markdown/synctech_sms.py

90 lines
2.3 KiB
Python
Raw Permalink Normal View History

"""Backend for SyncTech Backup & Restore.
[SyncTech Backup & Restore](https://www.synctech.com.au/sms-backup-restore/)
for Android is a free app for backing up your SMS and MMS messages. It uses an
XML format as backup format, which this backend reads and converts to the
standardized Message format.
"""
2024-10-31 21:38:22 +00:00
import datetime
import logging
2024-10-31 19:36:38 +00:00
from collections.abc import Iterator
from pathlib import Path
import bs4
2024-10-31 21:38:22 +00:00
from .data import MYSELF, Message
logger = logging.getLogger(__name__)
2024-11-03 16:15:03 +00:00
2024-11-03 16:14:43 +00:00
def is_named_number(num: str) -> str:
try:
int(num.removeprefix('+').replace(' ', ''))
return False
except ValueError:
return True
2024-11-03 16:15:03 +00:00
2024-10-31 23:36:30 +00:00
def normalize_phone_number(num: str) -> str:
2024-11-03 16:14:43 +00:00
if is_named_number(num):
return num
2024-10-31 23:36:30 +00:00
num = num.replace(' ', '')
if num.startswith('00'):
num = '+' + num.removeprefix('00')
if len(num) == 8:
num = '+45' + num
elif len(num) >= 10 and num[0] != '+':
num = '+' + num
return num
2024-10-31 19:36:38 +00:00
def sms_soup_to_message(soup: bs4.BeautifulSoup) -> Message:
# TODO: Require myself
2024-10-31 21:38:22 +00:00
sent_at = datetime.datetime.fromtimestamp(int(soup['date']) / 1000)
2024-10-31 23:36:30 +00:00
phone_num = normalize_phone_number(soup['address'])
2024-11-03 16:14:43 +00:00
if is_named_number(phone_num):
contact_name = phone_num
phone_num = None
else:
contact_name = soup.get('contact_name') or phone_num
if contact_name == '(Unknown)':
contact_name = None
2024-10-31 19:36:38 +00:00
if soup['type'] == '2':
2024-10-31 21:38:22 +00:00
sender = MYSELF
2024-10-31 19:36:38 +00:00
else:
2024-11-03 16:14:43 +00:00
sender = contact_name or phone_num
2024-10-31 19:36:38 +00:00
text = soup['body']
2024-11-03 16:14:43 +00:00
chat_id_parts = ['SMS', contact_name or phone_num]
chat_id = ' '.join(p for p in chat_id_parts if p)
2024-10-31 21:38:22 +00:00
return Message(sent_at, sender, text, chat_id=chat_id)
2024-11-03 22:58:40 +00:00
def select_newest_file_in_dir(path: Path) -> Path:
2024-11-04 21:32:20 +00:00
return max(
2024-11-03 22:58:40 +00:00
(p for p in path.iterdir() if p.suffix == '.xml' and 'sms' in p.name),
key=lambda p: p.stat().st_atime_ns,
)
2024-10-31 19:36:38 +00:00
def parse_messages_in_backup_xml_file(path: Path) -> Iterator[Message]:
if path.is_dir():
logger.info('%s is a dir. Finding newest backup in directory', path)
path = select_newest_file_in_dir(path)
logger.info('Found: %s', path)
logger.info('Parsing %s', path)
with open(path) as f:
soup = bs4.BeautifulSoup(f, 'lxml-xml')
2024-10-31 19:36:38 +00:00
for sms in soup.find_all('sms'):
yield sms_soup_to_message(sms)
del sms