1
0

Compare commits

...

3 Commits

Author SHA1 Message Date
ce89103c32
Ruff
Some checks failed
Run Python tests (through Pytest) / Test (push) Failing after 34s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 29s
2024-10-25 00:42:25 +02:00
3b908e17ee
PSN: Fixed parsing of missing timestamps 2024-10-25 00:42:13 +02:00
83208fd6ef
Fix caching 2024-10-24 23:51:55 +02:00

View File

@ -1,5 +1,4 @@
import dataclasses
import datetime
import logging
import re
from collections.abc import Iterator
@ -27,7 +26,7 @@ def game_psnprofiles_id_from_url(relative_url: str) -> int:
return int(result)
MAX_NUMBER_GAMES_TO_PARSE = 1000
MAX_NUMBER_GAMES_TO_PARSE = 10000
@dataclasses.dataclass(frozen=True)
@ -46,29 +45,18 @@ class PsnProfilesScraper(Scraper):
logger.info('Found %d games from overview', len(games_rows))
SCRAPE_FROM_OVERVIEW = False
if SCRAPE_FROM_OVERVIEW:
yield from games_rows
idx = 0
for game_id, game_name in games_ids.items():
for idx, (game_id, game_name) in enumerate(reversed(games_ids.items())):
yield from self._scrape_game_trophies(game_id, game_name)
del game_id
idx += 1
if idx >= MAX_NUMBER_GAMES_TO_PARSE:
break
logger.info('Found all trophies for playstation games')
def _setup_cache(self):
requests_util.setup_limiter(
self.session,
URL_API_ROOT,
per_minute=5,
expire_after=datetime.timedelta(hours=1),
)
requests_util.setup_limiter(
self.session,
URL_API_ROOT + '/trophies/',
expire_after=datetime.timedelta(days=14),
)
def _scrape_games_overview(self) -> Iterator[dict]:
@ -87,53 +75,6 @@ class PsnProfilesScraper(Scraper):
if len(games_on_page) == 0:
return
def _scrape_games_overview_old(self) -> Iterator[dict]:
# Request to get overview
logger.info('Getting Overview')
url = URL_PROFILE.format(psn_id=secrets.PLAYSTATION_PSN_ID)
response = self.session.get(url)
response.raise_for_status()
now = parse_util.parse_response_datetime(response)
# Parse data
soup = bs4.BeautifulSoup(response.content, 'lxml')
soup = personal_data.html_util.normalize_soup_slightly(soup, classes=False)
yield from self._iterate_games_from_recent_tropies(soup, now)
yield from self._iterate_games_from_games_table(soup)
def _iterate_games_from_recent_tropies(self, soup, now) -> Iterator[dict]:
soup_recent_tropies = soup.select('ul#recent-trophies > li')
assert len(soup_recent_tropies) > 0
for row in soup_recent_tropies:
cells = row.select_one('.info .box td').find_all('div')
trophy_name = cells[0].get_text().strip()
trophy_desc = cells[1].get_text().strip()
game_name = cells[2].a.extract().get_text().strip()
psnprofiles_id = game_psnprofiles_id_from_url(cells[0].find('a')['href'])
trophy_icon = row.find(class_='icon').find('img')['src']
gotten_at = (
cells[2].get_text().strip().removesuffix(' in').removesuffix(' ago')
)
gotten_at = parse_util.parse_duration(gotten_at)
time_acquired = now - gotten_at
yield {
'game.name': game_name,
'me.last_played_time': time_acquired.date(),
# Trophy Data
'trophy.name': trophy_name,
'trophy.desc': trophy_desc,
'trophy.icon': trophy_icon,
'psnprofiles.game_id': psnprofiles_id,
}
del row, cells, time_acquired
def _iterate_games_from_games_table(self, soup) -> Iterator[dict]:
# Games table
table_rows = soup.find(id='gamesTable').find_all('tr')
@ -226,6 +167,8 @@ class PsnProfilesScraper(Scraper):
trophy_icon = cells[0].img['src']
if 'Missing\nTimestamp' in cells[2].get_text().strip():
continue
cells[2].span.span.nobr.sup.extract()
gotten_at = parse_util.parse_time(cells[2].get_text())