1
0

Ruff format

This commit is contained in:
Jon Michael Aanes 2024-04-16 23:00:44 +02:00
parent be24037cdf
commit c521dd35a6
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
3 changed files with 24 additions and 10 deletions

View File

@ -10,7 +10,7 @@ from frozendict import frozendict
try:
import cfscrape
except Exception:
except ImportError:
cfscrape = None
logger = logging.getLogger(__name__)
@ -28,14 +28,15 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
logging.basicConfig()
logger.setLevel('INFO')
def try_value(fn, s: str) -> any:
def try_value(fn, s: str) -> object:
try:
return fn(s)
except ValueError:
return None
def to_value(s: str) -> any:
def to_value(s: str) -> object:
s = s.strip()
if len(s) == 0:
return None
@ -61,7 +62,7 @@ def extend_csv_file(
):
dicts = []
try:
with open(filename, 'r') as csvfile:
with open(filename) as csvfile:
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
for row in reader:
for k in list(row.keys()):
@ -72,7 +73,6 @@ def extend_csv_file(
del csvfile
except FileNotFoundError as e:
logger.info('Creating file: %s', filename)
pass
original_num_dicts = len(dicts)
dicts += [frozendict(d) for d in new_dicts]
@ -120,9 +120,11 @@ STANDARD_HEADERS = {
'Accept-Encoding': 'gzip, deflate, br',
}
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
pass
def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session:
assert isinstance(with_cfscrape, bool)
session = CachedCfScrape('web_cache', cookies=cookiejar)

View File

@ -15,18 +15,22 @@ class DeduplicateMode(Enum):
class Scraper(abc.ABC):
session: requests.Session
@abc.abstractmethod
@staticmethod
def dataset_name() -> str:
pass
@abc.abstractmethod
@staticmethod
def deduplicate_mode() -> DeduplicateMode:
pass
@abc.abstractmethod
@staticmethod
def dataset_format() -> str:
return 'list-of-dicts'
@abc.abstractmethod
@staticmethod
def requires_cfscrape() -> bool:
return False

View File

@ -1,9 +1,9 @@
import dataclasses
import datetime
from collections.abc import Iterator
import logging
import re
import secrets
from collections.abc import Iterator
import bs4
@ -34,16 +34,19 @@ assert game_psnprofiles_id_from_url(
'/trophy/21045-theatrhythm-final-bar-line/19-seasoned-hunter',
)
def parse_time(text: str) -> datetime.datetime:
text = text.replace('\n', ' ')
text = text.strip()
return datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p')
assert parse_time('06 Apr 2024 06:11:42 PM')
assert parse_time('26 Mar 2024 7:07:01 PM')
MAX_GAME_ITERATIONS = 10
@dataclasses.dataclass(frozen=True)
class PsnProfilesScraper(Scraper):
dataset_name = 'games_played_playstation'
@ -155,14 +158,17 @@ class PsnProfilesScraper(Scraper):
d['me.last_played_time'] = time_played
yield d
def scrape_game_trophies(self, psnprofiles_id: int, game_name: str) -> Iterator[dict]:
def scrape_game_trophies(
self, psnprofiles_id: int, game_name: str,
) -> Iterator[dict]:
assert isinstance(psnprofiles_id, int), psnprofiles_id
assert isinstance(game_name, str), game_name
logger.info('Getting Game Trophies %s', psnprofiles_id)
url = URL_USER_GAME_TROPHIES.format(psn_id=secrets.PLAYSTATION_PSN_ID,
game_id=psnprofiles_id)
url = URL_USER_GAME_TROPHIES.format(
psn_id=secrets.PLAYSTATION_PSN_ID, game_id=psnprofiles_id,
)
response = self.session.get(url)
response.raise_for_status()
@ -177,7 +183,9 @@ class PsnProfilesScraper(Scraper):
redundant.extract()
# Recent trophies.
soup_tropies = soup.select('#content.page > .row > div.col-xs div.box table.zebra tr.completed')
soup_tropies = soup.select(
'#content.page > .row > div.col-xs div.box table.zebra tr.completed',
)
for row in soup_tropies:
cells = row.find_all('td')