From ff4a975c796def38b1abf34120d4a937ca473584 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sat, 17 May 2025 15:45:23 +0200 Subject: [PATCH] Improved support for whitespace in prices --- fin_defs/parse_price.py | 11 +++++------ test/test_parse_price.py | 14 ++++++++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/fin_defs/parse_price.py b/fin_defs/parse_price.py index fc9fb5e..1476369 100644 --- a/fin_defs/parse_price.py +++ b/fin_defs/parse_price.py @@ -12,16 +12,14 @@ from .data import ( logger = logging.getLogger(__name__) -RE_PRICE_RAW = r'\b(?:dkk|sek|usd|nok|eur)?\s*([1-9][\d.]*[\d](?:,\d+)?)\s*(?:,-|:-|.-|;-)?\s*(?:(?:kr|kroner|krone|dkk|sek|usd|eur|nok)\b)?\.?' - -RE_PRICE = re.compile(RE_PRICE_RAW, flags=re.IGNORECASE) - -RE_PRODUCT_PRICE_DK = r'-?[1-9]\d{0,2}(?:\.?\d{3})*(?:,\d\d\.?)?' -RE_PRODUCT_PRICE_EN = r'-?[1-9]\d{0,2}(?:,?\d{3})*(?:\.\d\d)?' +RE_PRODUCT_PRICE_DK = r'-?[1-9]\d{0,2}(?:[\.\s]?\d{3})*(?:,\d\d\.?)?' +RE_PRODUCT_PRICE_EN = r'-?[1-9]\d{0,2}(?:[,\s]?\d{3})*(?:\.\d\d)?' RE_PRODUCT_PRICE_AMOUNT = r'(?P' + RE_PRODUCT_PRICE_DK + '|' + RE_PRODUCT_PRICE_EN + ')' +assert re.fullmatch(RE_PRODUCT_PRICE_DK, '1 000 000') def parse_amount(price: str) -> Decimal: + price = re.sub(r'\s', '', price, flags=re.IGNORECASE) if re.fullmatch(RE_PRODUCT_PRICE_DK, price): price = price.replace('.', '').replace(',', '.') else: @@ -109,6 +107,7 @@ def parse_price(text: str, default_currency: Asset) -> AssetAmount | None: elif m := (RE_KR_AMOUNT.fullmatch(text) or RE_AMOUNT_KR.fullmatch(text) or RE_AMOUNT_SUFFIX.fullmatch(text)): code, amount_text = 'DKK', m.group('amount') else: + logger.debug('Unknown format: %s', text) return None currency = ( diff --git a/test/test_parse_price.py b/test/test_parse_price.py index 29be95c..5e579c0 100644 --- a/test/test_parse_price.py +++ b/test/test_parse_price.py @@ -17,6 +17,11 @@ PRICES_PARSABLE = [ (' 100 kr ', dkk(100)), ('349.-', dkk(349)), ('3.000 kr.', dkk(3000)), + ('3.000.000 kr.', dkk(3_000_000)), + ('3.000.000,25 kr.', dkk(3_000_000.25)), + ('3,000 kr.', dkk(3000)), + ('3,000,000 kr.', dkk(3_000_000)), + ('3,000,000.25 kr.', dkk(3_000_000.25)), ('25,00 kr.', dkk(25)), ('300,00 kr.', dkk(300)), ('300kr.', dkk(300)), @@ -25,6 +30,15 @@ PRICES_PARSABLE = [ ('9,99 dkk', dkk('9.99')), ('17900 kr', dkk(17900)), ('650 kr.', dkk(650)), + ('3 650 kr.', dkk(3_650)), + ('3 650 DKK', dkk(3_650)), + ('3 650 650 kr.', dkk(3_650_650)), + ('3 650 650 DKK', dkk(3_650_650)), + ('3 650 kr.', dkk(3_650)), + ('3 650 DKK', dkk(3_650)), + ('3 650 650 kr.', dkk(3_650_650)), + ('3 650 650 DKK', dkk(3_650_650)), + ('2 998 kr', dkk(2998)), ] PRICES_UNPARSABLE = [