1
0

Ruff format

This commit is contained in:
Jon Michael Aanes 2024-04-16 23:00:44 +02:00
parent be24037cdf
commit c521dd35a6
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
3 changed files with 24 additions and 10 deletions

View File

@ -10,7 +10,7 @@ from frozendict import frozendict
try: try:
import cfscrape import cfscrape
except Exception: except ImportError:
cfscrape = None cfscrape = None
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -28,14 +28,15 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
logging.basicConfig() logging.basicConfig()
logger.setLevel('INFO') logger.setLevel('INFO')
def try_value(fn, s: str) -> any:
def try_value(fn, s: str) -> object:
try: try:
return fn(s) return fn(s)
except ValueError: except ValueError:
return None return None
def to_value(s: str) -> any: def to_value(s: str) -> object:
s = s.strip() s = s.strip()
if len(s) == 0: if len(s) == 0:
return None return None
@ -61,7 +62,7 @@ def extend_csv_file(
): ):
dicts = [] dicts = []
try: try:
with open(filename, 'r') as csvfile: with open(filename) as csvfile:
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT) reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
for row in reader: for row in reader:
for k in list(row.keys()): for k in list(row.keys()):
@ -72,7 +73,6 @@ def extend_csv_file(
del csvfile del csvfile
except FileNotFoundError as e: except FileNotFoundError as e:
logger.info('Creating file: %s', filename) logger.info('Creating file: %s', filename)
pass
original_num_dicts = len(dicts) original_num_dicts = len(dicts)
dicts += [frozendict(d) for d in new_dicts] dicts += [frozendict(d) for d in new_dicts]
@ -120,9 +120,11 @@ STANDARD_HEADERS = {
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
} }
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper): class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
pass pass
def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session: def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session:
assert isinstance(with_cfscrape, bool) assert isinstance(with_cfscrape, bool)
session = CachedCfScrape('web_cache', cookies=cookiejar) session = CachedCfScrape('web_cache', cookies=cookiejar)

View File

@ -15,18 +15,22 @@ class DeduplicateMode(Enum):
class Scraper(abc.ABC): class Scraper(abc.ABC):
session: requests.Session session: requests.Session
@abc.abstractmethod
@staticmethod @staticmethod
def dataset_name() -> str: def dataset_name() -> str:
pass pass
@abc.abstractmethod
@staticmethod @staticmethod
def deduplicate_mode() -> DeduplicateMode: def deduplicate_mode() -> DeduplicateMode:
pass pass
@abc.abstractmethod
@staticmethod @staticmethod
def dataset_format() -> str: def dataset_format() -> str:
return 'list-of-dicts' return 'list-of-dicts'
@abc.abstractmethod
@staticmethod @staticmethod
def requires_cfscrape() -> bool: def requires_cfscrape() -> bool:
return False return False

View File

@ -1,9 +1,9 @@
import dataclasses import dataclasses
import datetime import datetime
from collections.abc import Iterator
import logging import logging
import re import re
import secrets import secrets
from collections.abc import Iterator
import bs4 import bs4
@ -34,16 +34,19 @@ assert game_psnprofiles_id_from_url(
'/trophy/21045-theatrhythm-final-bar-line/19-seasoned-hunter', '/trophy/21045-theatrhythm-final-bar-line/19-seasoned-hunter',
) )
def parse_time(text: str) -> datetime.datetime: def parse_time(text: str) -> datetime.datetime:
text = text.replace('\n', ' ') text = text.replace('\n', ' ')
text = text.strip() text = text.strip()
return datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p') return datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p')
assert parse_time('06 Apr 2024 06:11:42 PM') assert parse_time('06 Apr 2024 06:11:42 PM')
assert parse_time('26 Mar 2024 7:07:01 PM') assert parse_time('26 Mar 2024 7:07:01 PM')
MAX_GAME_ITERATIONS = 10 MAX_GAME_ITERATIONS = 10
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class PsnProfilesScraper(Scraper): class PsnProfilesScraper(Scraper):
dataset_name = 'games_played_playstation' dataset_name = 'games_played_playstation'
@ -155,14 +158,17 @@ class PsnProfilesScraper(Scraper):
d['me.last_played_time'] = time_played d['me.last_played_time'] = time_played
yield d yield d
def scrape_game_trophies(self, psnprofiles_id: int, game_name: str) -> Iterator[dict]: def scrape_game_trophies(
self, psnprofiles_id: int, game_name: str,
) -> Iterator[dict]:
assert isinstance(psnprofiles_id, int), psnprofiles_id assert isinstance(psnprofiles_id, int), psnprofiles_id
assert isinstance(game_name, str), game_name assert isinstance(game_name, str), game_name
logger.info('Getting Game Trophies %s', psnprofiles_id) logger.info('Getting Game Trophies %s', psnprofiles_id)
url = URL_USER_GAME_TROPHIES.format(psn_id=secrets.PLAYSTATION_PSN_ID, url = URL_USER_GAME_TROPHIES.format(
game_id=psnprofiles_id) psn_id=secrets.PLAYSTATION_PSN_ID, game_id=psnprofiles_id,
)
response = self.session.get(url) response = self.session.get(url)
response.raise_for_status() response.raise_for_status()
@ -177,7 +183,9 @@ class PsnProfilesScraper(Scraper):
redundant.extract() redundant.extract()
# Recent trophies. # Recent trophies.
soup_tropies = soup.select('#content.page > .row > div.col-xs div.box table.zebra tr.completed') soup_tropies = soup.select(
'#content.page > .row > div.col-xs div.box table.zebra tr.completed',
)
for row in soup_tropies: for row in soup_tropies:
cells = row.find_all('td') cells = row.find_all('td')