Improved parsing performance
This commit is contained in:
parent
066b8cdac7
commit
bcefee2dad
|
@ -2,7 +2,7 @@ import logging
|
||||||
import re
|
import re
|
||||||
from decimal import Decimal
|
from decimal import Decimal
|
||||||
|
|
||||||
from .data import DKK, USD, Asset, AssetAmount, FiatCurrency
|
from .data import DKK, USD, Asset, AssetAmount, FiatCurrency, CURRENCY_CODES, CURRENCY_SYMBOLS
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@ -10,6 +10,54 @@ RE_PRICE_RAW = r'\b(?:dkk|sek|usd|nok|eur)?\s*([1-9][\d.]*[\d](?:,\d+)?)\s*(?:,-
|
||||||
|
|
||||||
RE_PRICE = re.compile(RE_PRICE_RAW, flags=re.IGNORECASE)
|
RE_PRICE = re.compile(RE_PRICE_RAW, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
RE_PRODUCT_PRICE_DK = r'-?\d{1,3}(?:\.?\d{3})*(?:,\d\d\.?)?'
|
||||||
|
RE_PRODUCT_PRICE_EN = r'-?\d{1,3}(?:,?\d{3})*(?:\.\d\d)?'
|
||||||
|
RE_PRODUCT_PRICE_AMOUNT = r'(' + RE_PRODUCT_PRICE_DK + '|' + RE_PRODUCT_PRICE_EN + ')'
|
||||||
|
|
||||||
|
|
||||||
|
def parse_amount(price: str) -> Decimal:
|
||||||
|
if re.fullmatch(RE_PRODUCT_PRICE_DK, price):
|
||||||
|
price = price.replace('.', '').replace(',', '.')
|
||||||
|
else:
|
||||||
|
price = price.replace(',', '')
|
||||||
|
return Decimal(price)
|
||||||
|
|
||||||
|
|
||||||
|
RE_CURRENCY_CODES = '(?P<code>' + '|'.join(re.escape(c) for c in CURRENCY_CODES) + ')'
|
||||||
|
|
||||||
|
RE_CURRENCY_SYMBOLS = (
|
||||||
|
r'(?P<sym>[' + ''.join(re.escape(c) for c in CURRENCY_SYMBOLS.values()) + '])'
|
||||||
|
)
|
||||||
|
|
||||||
|
RE_SYM_AMOUNT_CODE = re.compile(RE_CURRENCY_SYMBOLS
|
||||||
|
+ r'\s*'
|
||||||
|
+ RE_PRODUCT_PRICE_AMOUNT
|
||||||
|
+ r'(?:\s+'
|
||||||
|
+ RE_CURRENCY_CODES
|
||||||
|
+ r')?', flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
RE_AMOUNT_SYM_CODE = re.compile(
|
||||||
|
RE_PRODUCT_PRICE_AMOUNT
|
||||||
|
+ r'\s*'
|
||||||
|
+ RE_CURRENCY_SYMBOLS
|
||||||
|
+ r'(?:\s+'
|
||||||
|
+ RE_CURRENCY_CODES
|
||||||
|
+ ')?'
|
||||||
|
, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
RE_AMOUNT_CODE = re.compile(
|
||||||
|
RE_PRODUCT_PRICE_AMOUNT + r'\s+' + RE_CURRENCY_CODES
|
||||||
|
, flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
RE_KR_AMOUNT = re.compile(
|
||||||
|
r'kr\.?\s*(' + RE_PRODUCT_PRICE_AMOUNT + ')',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
RE_AMOUNT_KR= re.compile(
|
||||||
|
'(' + RE_PRODUCT_PRICE_AMOUNT + r')\s*kr\.?',
|
||||||
|
flags=re.IGNORECASE)
|
||||||
|
|
||||||
|
|
||||||
def parse_price(text: str, default_currency: Asset) -> AssetAmount | None:
|
def parse_price(text: str, default_currency: Asset) -> AssetAmount | None:
|
||||||
"""
|
"""
|
||||||
Attempts to parse price from the given text.
|
Attempts to parse price from the given text.
|
||||||
|
@ -18,49 +66,24 @@ def parse_price(text: str, default_currency: Asset) -> AssetAmount | None:
|
||||||
"""
|
"""
|
||||||
if isinstance(text, AssetAmount):
|
if isinstance(text, AssetAmount):
|
||||||
return text
|
return text
|
||||||
text = str(text)
|
text = str(text).lower().strip()
|
||||||
|
|
||||||
if m := re.match(r'^Kr\s*([\d.]+(?:,\d+))?$', text):
|
if text == 'free':
|
||||||
return AssetAmount(
|
return AssetAmount(default_currency, Decimal(0))
|
||||||
DKK,
|
|
||||||
Decimal(m.group(1).replace('.', '').replace(',', '.')),
|
|
||||||
)
|
|
||||||
if m := re.match(r'^(\d+)\s*DKK$', text):
|
|
||||||
return AssetAmount(DKK, Decimal(m.group(1)))
|
|
||||||
|
|
||||||
if m := re.match(r'^\$\s*([0-9.]+)(\s+USD)?$', text):
|
code, sym, amount_text = None, None, None
|
||||||
return AssetAmount(USD, Decimal(m.group(1)))
|
|
||||||
|
|
||||||
if text.lower().strip() == 'free':
|
if m := RE_SYM_AMOUNT_CODE.fullmatch(text):
|
||||||
return AssetAmount(default_currency, Decimal(0.0))
|
code, sym, amount_text = m.group('code'), m.group('sym'), m.group(2)
|
||||||
|
elif m := RE_AMOUNT_SYM_CODE.fullmatch(text):
|
||||||
text = str(text).strip().lower().removesuffix('.')
|
code, sym, amount_text = m.group('code'), m.group('sym'), m.group(1)
|
||||||
if m := RE_PRICE.fullmatch(text):
|
elif m := RE_AMOUNT_CODE.fullmatch(text):
|
||||||
currency = default_currency
|
code, amount_text = m.group('code'), m.group(1)
|
||||||
price_tag = m.group(1).replace('.', '').replace(',', '.') # TODO
|
elif m := (RE_KR_AMOUNT.fullmatch(text) or RE_AMOUNT_KR.fullmatch(text)):
|
||||||
if text.endswith('dkk') or text.startswith('dkk'):
|
code, amount_text = 'DKK', m.group(1)
|
||||||
currency = FiatCurrency('DKK')
|
|
||||||
elif text.endswith('sek') or text.startswith('sek'):
|
|
||||||
currency = FiatCurrency('SEK')
|
|
||||||
elif text.endswith('nok') or text.startswith('nok'):
|
|
||||||
currency = FiatCurrency('NOK')
|
|
||||||
elif text.endswith('usd') or text.startswith('usd'):
|
|
||||||
currency = FiatCurrency('USD')
|
|
||||||
|
|
||||||
return AssetAmount(currency, Decimal(price_tag))
|
|
||||||
|
|
||||||
logger.warning('Unknown price format: %s', text)
|
|
||||||
return None
|
|
||||||
|
|
||||||
|
|
||||||
def parse_usd_price(s: str | int) -> AssetAmount:
|
|
||||||
assert s is not None
|
|
||||||
if isinstance(s, str) or isinstance(s, int):
|
|
||||||
text = str(s)
|
|
||||||
else:
|
else:
|
||||||
text = s.text_content()
|
return None
|
||||||
text = text.strip().replace(',', '').removeprefix('$')
|
|
||||||
if text in {'-', ''}:
|
currency = CURRENCY_CODES[code.upper()] if code else FiatCurrency.from_currency_symbol(sym)
|
||||||
return AssetAmount(USD, Decimal(0)) # TODO
|
assert currency is not None
|
||||||
dollar_amount = Decimal(text)
|
return AssetAmount(currency, parse_amount(amount_text))
|
||||||
return AssetAmount(USD, dollar_amount)
|
|
||||||
|
|
|
@ -28,9 +28,36 @@ PRICES_UNPARSABLE = [
|
||||||
|
|
||||||
@pytest.mark.parametrize(('price_string', 'parsed_amount'), PRICES_PARSABLE)
|
@pytest.mark.parametrize(('price_string', 'parsed_amount'), PRICES_PARSABLE)
|
||||||
def test_parse_price(price_string: str, parsed_amount: AssetAmount):
|
def test_parse_price(price_string: str, parsed_amount: AssetAmount):
|
||||||
result = parse_price(price_string, parsed_amount.asset)
|
result = parse_price(price_string, FiatCurrency.JPY)
|
||||||
assert result == parsed_amount
|
assert result == parsed_amount
|
||||||
|
|
||||||
@pytest.mark.parametrize('price_string', PRICES_UNPARSABLE)
|
@pytest.mark.parametrize('price_string', PRICES_UNPARSABLE)
|
||||||
def test_parse_unparsable(price_string: str):
|
def test_parse_unparsable(price_string: str):
|
||||||
assert parse_price(price_string, USD) is None
|
assert parse_price(price_string, USD) is None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_asset_amount(text: str) -> AssetAmount:
|
||||||
|
return parse_price(text, FiatCurrency.JPY)
|
||||||
|
|
||||||
|
def test_parse_asset_amount_dkk():
|
||||||
|
assert parse_asset_amount('1338 DKK').amount == 1338
|
||||||
|
assert parse_asset_amount('1338,00 DKK').amount == 1338
|
||||||
|
assert parse_asset_amount('13,38 DKK').amount == Decimal('13.38')
|
||||||
|
assert parse_asset_amount('13.38 DKK').amount == Decimal('13.38')
|
||||||
|
assert parse_asset_amount('1338,00. DKK').amount == 1338
|
||||||
|
assert parse_asset_amount('99,00 kr.').amount == 99
|
||||||
|
|
||||||
|
assert parse_asset_amount('kr 825.00').amount == 825
|
||||||
|
assert parse_asset_amount('kr 825.00').asset == DKK
|
||||||
|
assert parse_asset_amount('kr 825,00').amount == 825
|
||||||
|
assert parse_asset_amount('kr 825,00').asset == DKK
|
||||||
|
assert parse_asset_amount('kr. 825.00').amount == 825
|
||||||
|
assert parse_asset_amount('kr. 825.00').asset == DKK
|
||||||
|
|
||||||
|
def test_parse_asset_amount_usd():
|
||||||
|
assert parse_asset_amount('$99').amount == 99
|
||||||
|
assert parse_asset_amount('$99').asset == USD
|
||||||
|
|
||||||
|
assert parse_asset_amount('99$ USD').amount == 99
|
||||||
|
assert parse_asset_amount('99$ USD').asset == USD
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user