2024-07-25 11:05:50 +00:00
|
|
|
"""Scrapes Tavex.dk prices.
|
|
|
|
|
|
|
|
By request of my colleague.
|
|
|
|
"""
|
|
|
|
|
|
|
|
import abc
|
|
|
|
import dataclasses
|
2024-07-25 11:06:05 +00:00
|
|
|
from decimal import Decimal
|
2024-07-25 11:05:50 +00:00
|
|
|
|
|
|
|
import bs4
|
|
|
|
|
|
|
|
import personal_data.html_util
|
|
|
|
import personal_data.parse_util
|
|
|
|
from personal_data.data import DeduplicateMode, Scraper
|
|
|
|
|
2024-07-25 11:06:05 +00:00
|
|
|
|
2024-07-25 11:05:50 +00:00
|
|
|
def parse_dkk_price(dkk: str) -> Decimal:
|
|
|
|
print(dkk)
|
|
|
|
if dkk.strip() == '-':
|
|
|
|
return None
|
2024-07-25 11:06:05 +00:00
|
|
|
return Decimal(dkk.removesuffix(' DKK').replace(',', '.'))
|
|
|
|
|
2024-07-25 11:05:50 +00:00
|
|
|
|
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
|
|
class TavexScraperBase(Scraper):
|
|
|
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def requires_cfscrape() -> bool:
|
|
|
|
return True
|
|
|
|
|
|
|
|
@abc.abstractmethod
|
|
|
|
def page_url() -> str:
|
|
|
|
pass
|
|
|
|
|
|
|
|
def scrape(self):
|
|
|
|
response = self.session.get(self.page_url())
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
|
|
NOW = personal_data.parse_util.parse_response_datetime(response)
|
|
|
|
|
|
|
|
soup = bs4.BeautifulSoup(response.content, 'lxml')
|
|
|
|
soup_page = personal_data.html_util.normalize_soup_slightly(
|
|
|
|
soup,
|
|
|
|
classes=False,
|
|
|
|
scripts=True,
|
|
|
|
)
|
|
|
|
|
|
|
|
soup = soup.select_one('.product-poster__box .product-poster__table')
|
|
|
|
|
|
|
|
table = []
|
|
|
|
for soup_row in soup.children:
|
|
|
|
if isinstance(soup_row, bs4.NavigableString):
|
|
|
|
continue
|
2024-07-25 11:06:05 +00:00
|
|
|
table.append(
|
|
|
|
[
|
|
|
|
soup_cell.get_text().strip()
|
|
|
|
for soup_cell in soup_row
|
|
|
|
if not isinstance(soup_cell, bs4.NavigableString)
|
|
|
|
],
|
|
|
|
)
|
2024-07-25 11:05:50 +00:00
|
|
|
|
|
|
|
yield {
|
2024-07-25 11:06:05 +00:00
|
|
|
'time': NOW,
|
|
|
|
'buy': parse_dkk_price(table[1][1]),
|
|
|
|
'sell': parse_dkk_price(table[1][2]),
|
|
|
|
'spread_percentage': Decimal(table[1][3].removesuffix('%'))
|
|
|
|
if len(table[1]) > 3
|
|
|
|
else None,
|
2024-07-25 11:05:50 +00:00
|
|
|
}
|
|
|
|
|
2024-07-25 11:06:05 +00:00
|
|
|
|
2024-07-25 11:05:50 +00:00
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
|
|
class TavexScraperGold(TavexScraperBase):
|
2024-07-27 00:14:01 +00:00
|
|
|
dataset_name = 'prices_tavex/guld-1oz-canadisk-maple-leaf-guldmont'
|
2024-07-25 11:05:50 +00:00
|
|
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def page_url() -> str:
|
|
|
|
return 'https://tavex.dk/guld/1oz-canadisk-maple-leaf-guldmont/'
|
|
|
|
|
2024-07-25 11:06:05 +00:00
|
|
|
|
2024-07-25 11:05:50 +00:00
|
|
|
@dataclasses.dataclass(frozen=True)
|
|
|
|
class TavexScraperSilver(TavexScraperBase):
|
2024-07-27 00:14:01 +00:00
|
|
|
dataset_name = 'prices_tavex/solv-1-oz-american-eagle-solvmont-tidligere-argange'
|
2024-07-25 11:05:50 +00:00
|
|
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
def page_url() -> str:
|
|
|
|
return 'https://tavex.dk/solv/1-oz-american-eagle-solvmont-tidligere-argange/'
|