diff --git a/fin_defs/parse_price.py b/fin_defs/parse_price.py index d34cb4e..509783e 100644 --- a/fin_defs/parse_price.py +++ b/fin_defs/parse_price.py @@ -2,7 +2,7 @@ import logging import re from decimal import Decimal -from .data import DKK, USD, Asset, AssetAmount, FiatCurrency +from .data import DKK, USD, Asset, AssetAmount, FiatCurrency, CURRENCY_CODES, CURRENCY_SYMBOLS logger = logging.getLogger(__name__) @@ -10,6 +10,54 @@ RE_PRICE_RAW = r'\b(?:dkk|sek|usd|nok|eur)?\s*([1-9][\d.]*[\d](?:,\d+)?)\s*(?:,- RE_PRICE = re.compile(RE_PRICE_RAW, flags=re.IGNORECASE) +RE_PRODUCT_PRICE_DK = r'-?\d{1,3}(?:\.?\d{3})*(?:,\d\d\.?)?' +RE_PRODUCT_PRICE_EN = r'-?\d{1,3}(?:,?\d{3})*(?:\.\d\d)?' +RE_PRODUCT_PRICE_AMOUNT = r'(' + RE_PRODUCT_PRICE_DK + '|' + RE_PRODUCT_PRICE_EN + ')' + + +def parse_amount(price: str) -> Decimal: + if re.fullmatch(RE_PRODUCT_PRICE_DK, price): + price = price.replace('.', '').replace(',', '.') + else: + price = price.replace(',', '') + return Decimal(price) + + +RE_CURRENCY_CODES = '(?P' + '|'.join(re.escape(c) for c in CURRENCY_CODES) + ')' + +RE_CURRENCY_SYMBOLS = ( + r'(?P[' + ''.join(re.escape(c) for c in CURRENCY_SYMBOLS.values()) + '])' +) + +RE_SYM_AMOUNT_CODE = re.compile(RE_CURRENCY_SYMBOLS + + r'\s*' + + RE_PRODUCT_PRICE_AMOUNT + + r'(?:\s+' + + RE_CURRENCY_CODES + + r')?', flags=re.IGNORECASE) + +RE_AMOUNT_SYM_CODE = re.compile( + RE_PRODUCT_PRICE_AMOUNT + + r'\s*' + + RE_CURRENCY_SYMBOLS + + r'(?:\s+' + + RE_CURRENCY_CODES + + ')?' + , flags=re.IGNORECASE) + +RE_AMOUNT_CODE = re.compile( + RE_PRODUCT_PRICE_AMOUNT + r'\s+' + RE_CURRENCY_CODES + , flags=re.IGNORECASE) + +RE_KR_AMOUNT = re.compile( + r'kr\.?\s*(' + RE_PRODUCT_PRICE_AMOUNT + ')', + flags=re.IGNORECASE) + +RE_AMOUNT_KR= re.compile( + '(' + RE_PRODUCT_PRICE_AMOUNT + r')\s*kr\.?', + flags=re.IGNORECASE) + + def parse_price(text: str, default_currency: Asset) -> AssetAmount | None: """ Attempts to parse price from the given text. @@ -18,49 +66,24 @@ def parse_price(text: str, default_currency: Asset) -> AssetAmount | None: """ if isinstance(text, AssetAmount): return text - text = str(text) + text = str(text).lower().strip() - if m := re.match(r'^Kr\s*([\d.]+(?:,\d+))?$', text): - return AssetAmount( - DKK, - Decimal(m.group(1).replace('.', '').replace(',', '.')), - ) - if m := re.match(r'^(\d+)\s*DKK$', text): - return AssetAmount(DKK, Decimal(m.group(1))) + if text == 'free': + return AssetAmount(default_currency, Decimal(0)) - if m := re.match(r'^\$\s*([0-9.]+)(\s+USD)?$', text): - return AssetAmount(USD, Decimal(m.group(1))) + code, sym, amount_text = None, None, None - if text.lower().strip() == 'free': - return AssetAmount(default_currency, Decimal(0.0)) - - text = str(text).strip().lower().removesuffix('.') - if m := RE_PRICE.fullmatch(text): - currency = default_currency - price_tag = m.group(1).replace('.', '').replace(',', '.') # TODO - if text.endswith('dkk') or text.startswith('dkk'): - currency = FiatCurrency('DKK') - elif text.endswith('sek') or text.startswith('sek'): - currency = FiatCurrency('SEK') - elif text.endswith('nok') or text.startswith('nok'): - currency = FiatCurrency('NOK') - elif text.endswith('usd') or text.startswith('usd'): - currency = FiatCurrency('USD') - - return AssetAmount(currency, Decimal(price_tag)) - - logger.warning('Unknown price format: %s', text) - return None - - -def parse_usd_price(s: str | int) -> AssetAmount: - assert s is not None - if isinstance(s, str) or isinstance(s, int): - text = str(s) + if m := RE_SYM_AMOUNT_CODE.fullmatch(text): + code, sym, amount_text = m.group('code'), m.group('sym'), m.group(2) + elif m := RE_AMOUNT_SYM_CODE.fullmatch(text): + code, sym, amount_text = m.group('code'), m.group('sym'), m.group(1) + elif m := RE_AMOUNT_CODE.fullmatch(text): + code, amount_text = m.group('code'), m.group(1) + elif m := (RE_KR_AMOUNT.fullmatch(text) or RE_AMOUNT_KR.fullmatch(text)): + code, amount_text = 'DKK', m.group(1) else: - text = s.text_content() - text = text.strip().replace(',', '').removeprefix('$') - if text in {'-', ''}: - return AssetAmount(USD, Decimal(0)) # TODO - dollar_amount = Decimal(text) - return AssetAmount(USD, dollar_amount) + return None + + currency = CURRENCY_CODES[code.upper()] if code else FiatCurrency.from_currency_symbol(sym) + assert currency is not None + return AssetAmount(currency, parse_amount(amount_text)) diff --git a/test/test_parse_price.py b/test/test_parse_price.py index 807e45c..fa39886 100644 --- a/test/test_parse_price.py +++ b/test/test_parse_price.py @@ -28,9 +28,36 @@ PRICES_UNPARSABLE = [ @pytest.mark.parametrize(('price_string', 'parsed_amount'), PRICES_PARSABLE) def test_parse_price(price_string: str, parsed_amount: AssetAmount): - result = parse_price(price_string, parsed_amount.asset) + result = parse_price(price_string, FiatCurrency.JPY) assert result == parsed_amount @pytest.mark.parametrize('price_string', PRICES_UNPARSABLE) def test_parse_unparsable(price_string: str): assert parse_price(price_string, USD) is None + + +def parse_asset_amount(text: str) -> AssetAmount: + return parse_price(text, FiatCurrency.JPY) + +def test_parse_asset_amount_dkk(): + assert parse_asset_amount('1338 DKK').amount == 1338 + assert parse_asset_amount('1338,00 DKK').amount == 1338 + assert parse_asset_amount('13,38 DKK').amount == Decimal('13.38') + assert parse_asset_amount('13.38 DKK').amount == Decimal('13.38') + assert parse_asset_amount('1338,00. DKK').amount == 1338 + assert parse_asset_amount('99,00 kr.').amount == 99 + + assert parse_asset_amount('kr 825.00').amount == 825 + assert parse_asset_amount('kr 825.00').asset == DKK + assert parse_asset_amount('kr 825,00').amount == 825 + assert parse_asset_amount('kr 825,00').asset == DKK + assert parse_asset_amount('kr. 825.00').amount == 825 + assert parse_asset_amount('kr. 825.00').asset == DKK + +def test_parse_asset_amount_usd(): + assert parse_asset_amount('$99').amount == 99 + assert parse_asset_amount('$99').asset == USD + + assert parse_asset_amount('99$ USD').amount == 99 + assert parse_asset_amount('99$ USD').asset == USD +