From c521dd35a6b0b5d198d0a4338010e9dcebac88ba Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Tue, 16 Apr 2024 23:00:44 +0200 Subject: [PATCH] Ruff format --- personal_data/__init__.py | 12 +++++++----- personal_data/data.py | 4 ++++ personal_data/fetchers/psnprofiles.py | 18 +++++++++++++----- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/personal_data/__init__.py b/personal_data/__init__.py index e6da075..c9bee9a 100644 --- a/personal_data/__init__.py +++ b/personal_data/__init__.py @@ -10,7 +10,7 @@ from frozendict import frozendict try: import cfscrape -except Exception: +except ImportError: cfscrape = None logger = logging.getLogger(__name__) @@ -28,14 +28,15 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True) logging.basicConfig() logger.setLevel('INFO') -def try_value(fn, s: str) -> any: + +def try_value(fn, s: str) -> object: try: return fn(s) except ValueError: return None -def to_value(s: str) -> any: +def to_value(s: str) -> object: s = s.strip() if len(s) == 0: return None @@ -61,7 +62,7 @@ def extend_csv_file( ): dicts = [] try: - with open(filename, 'r') as csvfile: + with open(filename) as csvfile: reader = csv.DictReader(csvfile, dialect=CSV_DIALECT) for row in reader: for k in list(row.keys()): @@ -72,7 +73,6 @@ def extend_csv_file( del csvfile except FileNotFoundError as e: logger.info('Creating file: %s', filename) - pass original_num_dicts = len(dicts) dicts += [frozendict(d) for d in new_dicts] @@ -120,9 +120,11 @@ STANDARD_HEADERS = { 'Accept-Encoding': 'gzip, deflate, br', } + class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper): pass + def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session: assert isinstance(with_cfscrape, bool) session = CachedCfScrape('web_cache', cookies=cookiejar) diff --git a/personal_data/data.py b/personal_data/data.py index 0bddc51..5ab54a4 100644 --- a/personal_data/data.py +++ b/personal_data/data.py @@ -15,18 +15,22 @@ class DeduplicateMode(Enum): class Scraper(abc.ABC): session: requests.Session + @abc.abstractmethod @staticmethod def dataset_name() -> str: pass + @abc.abstractmethod @staticmethod def deduplicate_mode() -> DeduplicateMode: pass + @abc.abstractmethod @staticmethod def dataset_format() -> str: return 'list-of-dicts' + @abc.abstractmethod @staticmethod def requires_cfscrape() -> bool: return False diff --git a/personal_data/fetchers/psnprofiles.py b/personal_data/fetchers/psnprofiles.py index e5858a5..70145d9 100644 --- a/personal_data/fetchers/psnprofiles.py +++ b/personal_data/fetchers/psnprofiles.py @@ -1,9 +1,9 @@ import dataclasses import datetime -from collections.abc import Iterator import logging import re import secrets +from collections.abc import Iterator import bs4 @@ -34,16 +34,19 @@ assert game_psnprofiles_id_from_url( '/trophy/21045-theatrhythm-final-bar-line/19-seasoned-hunter', ) + def parse_time(text: str) -> datetime.datetime: text = text.replace('\n', ' ') text = text.strip() return datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p') + assert parse_time('06 Apr 2024 06:11:42 PM') assert parse_time('26 Mar 2024 7:07:01 PM') MAX_GAME_ITERATIONS = 10 + @dataclasses.dataclass(frozen=True) class PsnProfilesScraper(Scraper): dataset_name = 'games_played_playstation' @@ -155,14 +158,17 @@ class PsnProfilesScraper(Scraper): d['me.last_played_time'] = time_played yield d - def scrape_game_trophies(self, psnprofiles_id: int, game_name: str) -> Iterator[dict]: + def scrape_game_trophies( + self, psnprofiles_id: int, game_name: str, + ) -> Iterator[dict]: assert isinstance(psnprofiles_id, int), psnprofiles_id assert isinstance(game_name, str), game_name logger.info('Getting Game Trophies %s', psnprofiles_id) - url = URL_USER_GAME_TROPHIES.format(psn_id=secrets.PLAYSTATION_PSN_ID, - game_id=psnprofiles_id) + url = URL_USER_GAME_TROPHIES.format( + psn_id=secrets.PLAYSTATION_PSN_ID, game_id=psnprofiles_id, + ) response = self.session.get(url) response.raise_for_status() @@ -177,7 +183,9 @@ class PsnProfilesScraper(Scraper): redundant.extract() # Recent trophies. - soup_tropies = soup.select('#content.page > .row > div.col-xs div.box table.zebra tr.completed') + soup_tropies = soup.select( + '#content.page > .row > div.col-xs div.box table.zebra tr.completed', + ) for row in soup_tropies: cells = row.find_all('td')