1
0

Ruff
Some checks failed
Test Python / Test (push) Failing after 23s

This commit is contained in:
Jon Michael Aanes 2024-07-25 13:06:05 +02:00
parent 9dedb18c4f
commit 231036c14a
3 changed files with 29 additions and 17 deletions

View File

@ -4,12 +4,8 @@ By request of my colleague.
"""
import abc
from decimal import Decimal
import dataclasses
import datetime
import logging
import re
import secrets
from decimal import Decimal
import bs4
@ -17,11 +13,13 @@ import personal_data.html_util
import personal_data.parse_util
from personal_data.data import DeduplicateMode, Scraper
def parse_dkk_price(dkk: str) -> Decimal:
print(dkk)
if dkk.strip() == '-':
return None
return Decimal(dkk.removesuffix(' DKK').replace(',','.'))
return Decimal(dkk.removesuffix(' DKK').replace(',', '.'))
@dataclasses.dataclass(frozen=True)
class TavexScraperBase(Scraper):
@ -54,15 +52,24 @@ class TavexScraperBase(Scraper):
for soup_row in soup.children:
if isinstance(soup_row, bs4.NavigableString):
continue
table.append([soup_cell.get_text().strip() for soup_cell in soup_row if not isinstance(soup_cell, bs4.NavigableString)])
table.append(
[
soup_cell.get_text().strip()
for soup_cell in soup_row
if not isinstance(soup_cell, bs4.NavigableString)
],
)
yield {
'time': NOW,
'buy': parse_dkk_price(table[1][1]),
'sell': parse_dkk_price(table[1][2]),
'spread_percentage': Decimal(table[1][3].removesuffix('%')) if len(table[1]) > 3 else None
'time': NOW,
'buy': parse_dkk_price(table[1][1]),
'sell': parse_dkk_price(table[1][2]),
'spread_percentage': Decimal(table[1][3].removesuffix('%'))
if len(table[1]) > 3
else None,
}
@dataclasses.dataclass(frozen=True)
class TavexScraperGold(TavexScraperBase):
dataset_name = 'prices_tavex_gold'
@ -72,6 +79,7 @@ class TavexScraperGold(TavexScraperBase):
def page_url() -> str:
return 'https://tavex.dk/guld/1oz-canadisk-maple-leaf-guldmont/'
@dataclasses.dataclass(frozen=True)
class TavexScraperSilver(TavexScraperBase):
dataset_name = 'prices_tavex_silver'

View File

@ -57,21 +57,22 @@ def normalize_soup(soup) -> bytes:
text = normalize_soup_lxml(soup).text_content()
return normalize_text(text)
def data_attributes_of_element(e):
for attr_key in list(e.attrs.keys()):
if attr_key.startswith('data-'):
yield attr_key
def has_data_attribute(e) -> bool:
for attr_key in data_attributes_of_element(e):
return True
return False
def normalize_soup_slightly(soup,
classes=True,
scripts=True,
comments=True,
data_attributes=True):
def normalize_soup_slightly(
soup, classes=True, scripts=True, comments=True, data_attributes=True,
):
"""Perform soup normalization."""
# Little if any content
for tag in HTML_TAGS_MOSTLY_CONTENTLESS:

View File

@ -30,7 +30,10 @@ def parse_duration(text: str) -> datetime.timedelta:
def parse_response_datetime(response) -> datetime.datetime:
return datetime.datetime.strptime(response.headers['Date'], FORMAT_DATE_HEADER).replace(tzinfo=datetime.UTC)
return datetime.datetime.strptime(
response.headers['Date'], FORMAT_DATE_HEADER,
).replace(tzinfo=datetime.UTC)
def parse_time(text: str) -> datetime.datetime:
text = text.replace('\n', ' ')