Ruff format
This commit is contained in:
parent
be24037cdf
commit
c521dd35a6
|
@ -10,7 +10,7 @@ from frozendict import frozendict
|
||||||
|
|
||||||
try:
|
try:
|
||||||
import cfscrape
|
import cfscrape
|
||||||
except Exception:
|
except ImportError:
|
||||||
cfscrape = None
|
cfscrape = None
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
@ -28,14 +28,15 @@ csv.register_dialect(CSV_DIALECT, lineterminator='\n', skipinitialspace=True)
|
||||||
logging.basicConfig()
|
logging.basicConfig()
|
||||||
logger.setLevel('INFO')
|
logger.setLevel('INFO')
|
||||||
|
|
||||||
def try_value(fn, s: str) -> any:
|
|
||||||
|
def try_value(fn, s: str) -> object:
|
||||||
try:
|
try:
|
||||||
return fn(s)
|
return fn(s)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def to_value(s: str) -> any:
|
def to_value(s: str) -> object:
|
||||||
s = s.strip()
|
s = s.strip()
|
||||||
if len(s) == 0:
|
if len(s) == 0:
|
||||||
return None
|
return None
|
||||||
|
@ -61,7 +62,7 @@ def extend_csv_file(
|
||||||
):
|
):
|
||||||
dicts = []
|
dicts = []
|
||||||
try:
|
try:
|
||||||
with open(filename, 'r') as csvfile:
|
with open(filename) as csvfile:
|
||||||
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
|
reader = csv.DictReader(csvfile, dialect=CSV_DIALECT)
|
||||||
for row in reader:
|
for row in reader:
|
||||||
for k in list(row.keys()):
|
for k in list(row.keys()):
|
||||||
|
@ -72,7 +73,6 @@ def extend_csv_file(
|
||||||
del csvfile
|
del csvfile
|
||||||
except FileNotFoundError as e:
|
except FileNotFoundError as e:
|
||||||
logger.info('Creating file: %s', filename)
|
logger.info('Creating file: %s', filename)
|
||||||
pass
|
|
||||||
|
|
||||||
original_num_dicts = len(dicts)
|
original_num_dicts = len(dicts)
|
||||||
dicts += [frozendict(d) for d in new_dicts]
|
dicts += [frozendict(d) for d in new_dicts]
|
||||||
|
@ -120,9 +120,11 @@ STANDARD_HEADERS = {
|
||||||
'Accept-Encoding': 'gzip, deflate, br',
|
'Accept-Encoding': 'gzip, deflate, br',
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
|
class CachedCfScrape(requests_cache.CacheMixin, cfscrape.CloudflareScraper):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session:
|
def get_session(cookiejar, *, with_cfscrape: bool) -> requests.Session:
|
||||||
assert isinstance(with_cfscrape, bool)
|
assert isinstance(with_cfscrape, bool)
|
||||||
session = CachedCfScrape('web_cache', cookies=cookiejar)
|
session = CachedCfScrape('web_cache', cookies=cookiejar)
|
||||||
|
|
|
@ -15,18 +15,22 @@ class DeduplicateMode(Enum):
|
||||||
class Scraper(abc.ABC):
|
class Scraper(abc.ABC):
|
||||||
session: requests.Session
|
session: requests.Session
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dataset_name() -> str:
|
def dataset_name() -> str:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def deduplicate_mode() -> DeduplicateMode:
|
def deduplicate_mode() -> DeduplicateMode:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def dataset_format() -> str:
|
def dataset_format() -> str:
|
||||||
return 'list-of-dicts'
|
return 'list-of-dicts'
|
||||||
|
|
||||||
|
@abc.abstractmethod
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def requires_cfscrape() -> bool:
|
def requires_cfscrape() -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
|
@ -1,9 +1,9 @@
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import datetime
|
import datetime
|
||||||
from collections.abc import Iterator
|
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import secrets
|
import secrets
|
||||||
|
from collections.abc import Iterator
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
|
@ -34,16 +34,19 @@ assert game_psnprofiles_id_from_url(
|
||||||
'/trophy/21045-theatrhythm-final-bar-line/19-seasoned-hunter',
|
'/trophy/21045-theatrhythm-final-bar-line/19-seasoned-hunter',
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def parse_time(text: str) -> datetime.datetime:
|
def parse_time(text: str) -> datetime.datetime:
|
||||||
text = text.replace('\n', ' ')
|
text = text.replace('\n', ' ')
|
||||||
text = text.strip()
|
text = text.strip()
|
||||||
return datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p')
|
return datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p')
|
||||||
|
|
||||||
|
|
||||||
assert parse_time('06 Apr 2024 06:11:42 PM')
|
assert parse_time('06 Apr 2024 06:11:42 PM')
|
||||||
assert parse_time('26 Mar 2024 7:07:01 PM')
|
assert parse_time('26 Mar 2024 7:07:01 PM')
|
||||||
|
|
||||||
MAX_GAME_ITERATIONS = 10
|
MAX_GAME_ITERATIONS = 10
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class PsnProfilesScraper(Scraper):
|
class PsnProfilesScraper(Scraper):
|
||||||
dataset_name = 'games_played_playstation'
|
dataset_name = 'games_played_playstation'
|
||||||
|
@ -155,14 +158,17 @@ class PsnProfilesScraper(Scraper):
|
||||||
d['me.last_played_time'] = time_played
|
d['me.last_played_time'] = time_played
|
||||||
yield d
|
yield d
|
||||||
|
|
||||||
def scrape_game_trophies(self, psnprofiles_id: int, game_name: str) -> Iterator[dict]:
|
def scrape_game_trophies(
|
||||||
|
self, psnprofiles_id: int, game_name: str,
|
||||||
|
) -> Iterator[dict]:
|
||||||
assert isinstance(psnprofiles_id, int), psnprofiles_id
|
assert isinstance(psnprofiles_id, int), psnprofiles_id
|
||||||
assert isinstance(game_name, str), game_name
|
assert isinstance(game_name, str), game_name
|
||||||
|
|
||||||
logger.info('Getting Game Trophies %s', psnprofiles_id)
|
logger.info('Getting Game Trophies %s', psnprofiles_id)
|
||||||
|
|
||||||
url = URL_USER_GAME_TROPHIES.format(psn_id=secrets.PLAYSTATION_PSN_ID,
|
url = URL_USER_GAME_TROPHIES.format(
|
||||||
game_id=psnprofiles_id)
|
psn_id=secrets.PLAYSTATION_PSN_ID, game_id=psnprofiles_id,
|
||||||
|
)
|
||||||
response = self.session.get(url)
|
response = self.session.get(url)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
|
@ -177,7 +183,9 @@ class PsnProfilesScraper(Scraper):
|
||||||
redundant.extract()
|
redundant.extract()
|
||||||
|
|
||||||
# Recent trophies.
|
# Recent trophies.
|
||||||
soup_tropies = soup.select('#content.page > .row > div.col-xs div.box table.zebra tr.completed')
|
soup_tropies = soup.select(
|
||||||
|
'#content.page > .row > div.col-xs div.box table.zebra tr.completed',
|
||||||
|
)
|
||||||
for row in soup_tropies:
|
for row in soup_tropies:
|
||||||
cells = row.find_all('td')
|
cells = row.find_all('td')
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user