1
0

Compare commits

..

2 Commits

Author SHA1 Message Date
22aa122388 Ruff
Some checks failed
Python Ruff Code Quality / ruff (push) Failing after 23s
Run Python tests (through Pytest) / Test (push) Failing after 24s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 21s
2025-05-14 21:01:50 +02:00
bcefee2dad Improved parsing performance 2025-05-14 21:01:28 +02:00
5 changed files with 121 additions and 48 deletions

View File

@ -17,7 +17,6 @@ Defined hierarchy:
""" """
from ._version import __version__ from ._version import __version__
from .data import * from .data import *
__all__ = [ __all__ = [
@ -27,4 +26,3 @@ __all__ = [
'StockExchange', 'StockExchange',
'__version__', '__version__',
] ]

View File

@ -5,6 +5,7 @@ import re
from collections.abc import Mapping from collections.abc import Mapping
from decimal import Decimal from decimal import Decimal
def parse_id_attr_key_value_pair(attr_datum: str) -> tuple[str, str | int] | None: def parse_id_attr_key_value_pair(attr_datum: str) -> tuple[str, str | int] | None:
attr_datum = attr_datum.strip() attr_datum = attr_datum.strip()
if attr_datum == '': if attr_datum == '':

View File

@ -2,7 +2,13 @@ import logging
import re import re
from decimal import Decimal from decimal import Decimal
from .data import DKK, USD, Asset, AssetAmount, FiatCurrency from .data import (
CURRENCY_CODES,
CURRENCY_SYMBOLS,
Asset,
AssetAmount,
FiatCurrency,
)
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -10,6 +16,61 @@ RE_PRICE_RAW = r'\b(?:dkk|sek|usd|nok|eur)?\s*([1-9][\d.]*[\d](?:,\d+)?)\s*(?:,-
RE_PRICE = re.compile(RE_PRICE_RAW, flags=re.IGNORECASE) RE_PRICE = re.compile(RE_PRICE_RAW, flags=re.IGNORECASE)
RE_PRODUCT_PRICE_DK = r'-?\d{1,3}(?:\.?\d{3})*(?:,\d\d\.?)?'
RE_PRODUCT_PRICE_EN = r'-?\d{1,3}(?:,?\d{3})*(?:\.\d\d)?'
RE_PRODUCT_PRICE_AMOUNT = r'(' + RE_PRODUCT_PRICE_DK + '|' + RE_PRODUCT_PRICE_EN + ')'
def parse_amount(price: str) -> Decimal:
if re.fullmatch(RE_PRODUCT_PRICE_DK, price):
price = price.replace('.', '').replace(',', '.')
else:
price = price.replace(',', '')
return Decimal(price)
RE_CURRENCY_CODES = '(?P<code>' + '|'.join(re.escape(c) for c in CURRENCY_CODES) + ')'
RE_CURRENCY_SYMBOLS = (
r'(?P<sym>[' + ''.join(re.escape(c) for c in CURRENCY_SYMBOLS.values()) + '])'
)
RE_SYM_AMOUNT_CODE = re.compile(
RE_CURRENCY_SYMBOLS
+ r'\s*'
+ RE_PRODUCT_PRICE_AMOUNT
+ r'(?:\s+'
+ RE_CURRENCY_CODES
+ r')?',
flags=re.IGNORECASE,
)
RE_AMOUNT_SYM_CODE = re.compile(
RE_PRODUCT_PRICE_AMOUNT
+ r'\s*'
+ RE_CURRENCY_SYMBOLS
+ r'(?:\s+'
+ RE_CURRENCY_CODES
+ ')?',
flags=re.IGNORECASE,
)
RE_AMOUNT_CODE = re.compile(
RE_PRODUCT_PRICE_AMOUNT + r'\s+' + RE_CURRENCY_CODES,
flags=re.IGNORECASE,
)
RE_KR_AMOUNT = re.compile(
r'kr\.?\s*(' + RE_PRODUCT_PRICE_AMOUNT + ')',
flags=re.IGNORECASE,
)
RE_AMOUNT_KR = re.compile(
'(' + RE_PRODUCT_PRICE_AMOUNT + r')\s*kr\.?',
flags=re.IGNORECASE,
)
def parse_price(text: str, default_currency: Asset) -> AssetAmount | None: def parse_price(text: str, default_currency: Asset) -> AssetAmount | None:
""" """
Attempts to parse price from the given text. Attempts to parse price from the given text.
@ -18,49 +79,26 @@ def parse_price(text: str, default_currency: Asset) -> AssetAmount | None:
""" """
if isinstance(text, AssetAmount): if isinstance(text, AssetAmount):
return text return text
text = str(text) text = str(text).lower().strip()
if m := re.match(r'^Kr\s*([\d.]+(?:,\d+))?$', text): if text == 'free':
return AssetAmount( return AssetAmount(default_currency, Decimal(0))
DKK,
Decimal(m.group(1).replace('.', '').replace(',', '.')),
)
if m := re.match(r'^(\d+)\s*DKK$', text):
return AssetAmount(DKK, Decimal(m.group(1)))
if m := re.match(r'^\$\s*([0-9.]+)(\s+USD)?$', text): code, sym, amount_text = None, None, None
return AssetAmount(USD, Decimal(m.group(1)))
if text.lower().strip() == 'free': if m := RE_SYM_AMOUNT_CODE.fullmatch(text):
return AssetAmount(default_currency, Decimal(0.0)) code, sym, amount_text = m.group('code'), m.group('sym'), m.group(2)
elif m := RE_AMOUNT_SYM_CODE.fullmatch(text):
text = str(text).strip().lower().removesuffix('.') code, sym, amount_text = m.group('code'), m.group('sym'), m.group(1)
if m := RE_PRICE.fullmatch(text): elif m := RE_AMOUNT_CODE.fullmatch(text):
currency = default_currency code, amount_text = m.group('code'), m.group(1)
price_tag = m.group(1).replace('.', '').replace(',', '.') # TODO elif m := (RE_KR_AMOUNT.fullmatch(text) or RE_AMOUNT_KR.fullmatch(text)):
if text.endswith('dkk') or text.startswith('dkk'): code, amount_text = 'DKK', m.group(1)
currency = FiatCurrency('DKK')
elif text.endswith('sek') or text.startswith('sek'):
currency = FiatCurrency('SEK')
elif text.endswith('nok') or text.startswith('nok'):
currency = FiatCurrency('NOK')
elif text.endswith('usd') or text.startswith('usd'):
currency = FiatCurrency('USD')
return AssetAmount(currency, Decimal(price_tag))
logger.warning('Unknown price format: %s', text)
return None
def parse_usd_price(s: str | int) -> AssetAmount:
assert s is not None
if isinstance(s, str) or isinstance(s, int):
text = str(s)
else: else:
text = s.text_content() return None
text = text.strip().replace(',', '').removeprefix('$')
if text in {'-', ''}: currency = (
return AssetAmount(USD, Decimal(0)) # TODO CURRENCY_CODES[code.upper()] if code else FiatCurrency.from_currency_symbol(sym)
dollar_amount = Decimal(text) )
return AssetAmount(USD, dollar_amount) assert currency is not None
return AssetAmount(currency, parse_amount(amount_text))

View File

@ -30,6 +30,7 @@ Defined hierarchy:
PACKAGE_DESCRIPTION_SHORT = """ PACKAGE_DESCRIPTION_SHORT = """
Python library defining base types for financial processing.""".strip() Python library defining base types for financial processing.""".strip()
def parse_version_file(text: str) -> str: def parse_version_file(text: str) -> str:
match = re.match(r'^__version__\s*=\s*(["\'])([\d\.]+)\1$', text) match = re.match(r'^__version__\s*=\s*(["\'])([\d\.]+)\1$', text)
if match is None: if match is None:
@ -37,6 +38,7 @@ def parse_version_file(text: str) -> str:
raise Exception(msg) raise Exception(msg)
return match.group(2) return match.group(2)
with open(PACKAGE_NAME + '/_version.py') as f: with open(PACKAGE_NAME + '/_version.py') as f:
version = parse_version_file(f.read()) version = parse_version_file(f.read())

View File

@ -1,11 +1,15 @@
import pytest
from decimal import Decimal from decimal import Decimal
import pytest
from fin_defs import DKK, USD, AssetAmount, FiatCurrency from fin_defs import DKK, USD, AssetAmount, FiatCurrency
from fin_defs.parse_price import parse_price from fin_defs.parse_price import parse_price
def dkk(amount): def dkk(amount):
return AssetAmount(DKK, Decimal(amount)) return AssetAmount(DKK, Decimal(amount))
PRICES_PARSABLE = [ PRICES_PARSABLE = [
('DKK100', dkk(100)), ('DKK100', dkk(100)),
('100;-', dkk(100)), ('100;-', dkk(100)),
@ -23,14 +27,44 @@ PRICES_PARSABLE = [
] ]
PRICES_UNPARSABLE = [ PRICES_UNPARSABLE = [
'007', '007',
] ]
@pytest.mark.parametrize(('price_string', 'parsed_amount'), PRICES_PARSABLE) @pytest.mark.parametrize(('price_string', 'parsed_amount'), PRICES_PARSABLE)
def test_parse_price(price_string: str, parsed_amount: AssetAmount): def test_parse_price(price_string: str, parsed_amount: AssetAmount):
result = parse_price(price_string, parsed_amount.asset) result = parse_price(price_string, FiatCurrency.JPY)
assert result == parsed_amount assert result == parsed_amount
@pytest.mark.parametrize('price_string', PRICES_UNPARSABLE) @pytest.mark.parametrize('price_string', PRICES_UNPARSABLE)
def test_parse_unparsable(price_string: str): def test_parse_unparsable(price_string: str):
assert parse_price(price_string, USD) is None assert parse_price(price_string, USD) is None
def parse_asset_amount(text: str) -> AssetAmount:
return parse_price(text, FiatCurrency.JPY)
def test_parse_asset_amount_dkk():
assert parse_asset_amount('1338 DKK').amount == 1338
assert parse_asset_amount('1338,00 DKK').amount == 1338
assert parse_asset_amount('13,38 DKK').amount == Decimal('13.38')
assert parse_asset_amount('13.38 DKK').amount == Decimal('13.38')
assert parse_asset_amount('1338,00. DKK').amount == 1338
assert parse_asset_amount('99,00 kr.').amount == 99
assert parse_asset_amount('kr 825.00').amount == 825
assert parse_asset_amount('kr 825.00').asset == DKK
assert parse_asset_amount('kr 825,00').amount == 825
assert parse_asset_amount('kr 825,00').asset == DKK
assert parse_asset_amount('kr. 825.00').amount == 825
assert parse_asset_amount('kr. 825.00').asset == DKK
def test_parse_asset_amount_usd():
assert parse_asset_amount('$99').amount == 99
assert parse_asset_amount('$99').asset == USD
assert parse_asset_amount('99$ USD').amount == 99
assert parse_asset_amount('99$ USD').asset == USD