"""Scrapes Tavex.dk prices. By request of my colleague. """ import abc import dataclasses from decimal import Decimal import bs4 import personal_data.html_util import personal_data.parse_util from personal_data.data import DeduplicateMode, Scraper def parse_dkk_price(dkk: str) -> Decimal: print(dkk) if dkk.strip() == '-': return None return Decimal(dkk.removesuffix(' DKK').replace(',', '.')) @dataclasses.dataclass(frozen=True) class TavexScraperBase(Scraper): deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS @staticmethod def requires_cfscrape() -> bool: return True @abc.abstractmethod def page_url() -> str: pass def scrape(self): response = self.session.get(self.page_url()) response.raise_for_status() NOW = personal_data.parse_util.parse_response_datetime(response) soup = bs4.BeautifulSoup(response.content, 'lxml') soup_page = personal_data.html_util.normalize_soup_slightly( soup, classes=False, scripts=True, ) soup = soup.select_one('.product-poster__box .product-poster__table') table = [] for soup_row in soup.children: if isinstance(soup_row, bs4.NavigableString): continue table.append( [ soup_cell.get_text().strip() for soup_cell in soup_row if not isinstance(soup_cell, bs4.NavigableString) ], ) yield { 'time': NOW, 'buy': parse_dkk_price(table[1][1]), 'sell': parse_dkk_price(table[1][2]), 'spread_percentage': Decimal(table[1][3].removesuffix('%')) if len(table[1]) > 3 else None, } @dataclasses.dataclass(frozen=True) class TavexScraperGold(TavexScraperBase): dataset_name = 'prices_tavex_gold' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS @staticmethod def page_url() -> str: return 'https://tavex.dk/guld/1oz-canadisk-maple-leaf-guldmont/' @dataclasses.dataclass(frozen=True) class TavexScraperSilver(TavexScraperBase): dataset_name = 'prices_tavex_silver' deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS @staticmethod def page_url() -> str: return 'https://tavex.dk/solv/1-oz-american-eagle-solvmont-tidligere-argange/'