diff --git a/personal_data/fetchers/tavex.py b/personal_data/fetchers/tavex.py index bff1b4d..b3990d6 100644 --- a/personal_data/fetchers/tavex.py +++ b/personal_data/fetchers/tavex.py @@ -4,12 +4,8 @@ By request of my colleague. """ import abc -from decimal import Decimal import dataclasses -import datetime -import logging -import re -import secrets +from decimal import Decimal import bs4 @@ -17,11 +13,13 @@ import personal_data.html_util import personal_data.parse_util from personal_data.data import DeduplicateMode, Scraper + def parse_dkk_price(dkk: str) -> Decimal: print(dkk) if dkk.strip() == '-': return None - return Decimal(dkk.removesuffix(' DKK').replace(',','.')) + return Decimal(dkk.removesuffix(' DKK').replace(',', '.')) + @dataclasses.dataclass(frozen=True) class TavexScraperBase(Scraper): @@ -54,15 +52,24 @@ class TavexScraperBase(Scraper): for soup_row in soup.children: if isinstance(soup_row, bs4.NavigableString): continue - table.append([soup_cell.get_text().strip() for soup_cell in soup_row if not isinstance(soup_cell, bs4.NavigableString)]) + table.append( + [ + soup_cell.get_text().strip() + for soup_cell in soup_row + if not isinstance(soup_cell, bs4.NavigableString) + ], + ) yield { - 'time': NOW, - 'buy': parse_dkk_price(table[1][1]), - 'sell': parse_dkk_price(table[1][2]), - 'spread_percentage': Decimal(table[1][3].removesuffix('%')) if len(table[1]) > 3 else None + 'time': NOW, + 'buy': parse_dkk_price(table[1][1]), + 'sell': parse_dkk_price(table[1][2]), + 'spread_percentage': Decimal(table[1][3].removesuffix('%')) + if len(table[1]) > 3 + else None, } + @dataclasses.dataclass(frozen=True) class TavexScraperGold(TavexScraperBase): dataset_name = 'prices_tavex_gold' @@ -72,6 +79,7 @@ class TavexScraperGold(TavexScraperBase): def page_url() -> str: return 'https://tavex.dk/guld/1oz-canadisk-maple-leaf-guldmont/' + @dataclasses.dataclass(frozen=True) class TavexScraperSilver(TavexScraperBase): dataset_name = 'prices_tavex_silver' diff --git a/personal_data/html_util.py b/personal_data/html_util.py index e00f8bd..91d2b5d 100644 --- a/personal_data/html_util.py +++ b/personal_data/html_util.py @@ -57,21 +57,22 @@ def normalize_soup(soup) -> bytes: text = normalize_soup_lxml(soup).text_content() return normalize_text(text) + def data_attributes_of_element(e): for attr_key in list(e.attrs.keys()): if attr_key.startswith('data-'): yield attr_key + def has_data_attribute(e) -> bool: for attr_key in data_attributes_of_element(e): return True return False -def normalize_soup_slightly(soup, - classes=True, - scripts=True, - comments=True, - data_attributes=True): + +def normalize_soup_slightly( + soup, classes=True, scripts=True, comments=True, data_attributes=True, +): """Perform soup normalization.""" # Little if any content for tag in HTML_TAGS_MOSTLY_CONTENTLESS: diff --git a/personal_data/parse_util.py b/personal_data/parse_util.py index 7ebeca1..868f55f 100644 --- a/personal_data/parse_util.py +++ b/personal_data/parse_util.py @@ -30,7 +30,10 @@ def parse_duration(text: str) -> datetime.timedelta: def parse_response_datetime(response) -> datetime.datetime: - return datetime.datetime.strptime(response.headers['Date'], FORMAT_DATE_HEADER).replace(tzinfo=datetime.UTC) + return datetime.datetime.strptime( + response.headers['Date'], FORMAT_DATE_HEADER, + ).replace(tzinfo=datetime.UTC) + def parse_time(text: str) -> datetime.datetime: text = text.replace('\n', ' ')