PSN: Fixed parsing of missing timestamps
This commit is contained in:
parent
83208fd6ef
commit
3b908e17ee
|
@ -27,7 +27,7 @@ def game_psnprofiles_id_from_url(relative_url: str) -> int:
|
||||||
return int(result)
|
return int(result)
|
||||||
|
|
||||||
|
|
||||||
MAX_NUMBER_GAMES_TO_PARSE = 1000
|
MAX_NUMBER_GAMES_TO_PARSE = 10000
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
@ -46,17 +46,12 @@ class PsnProfilesScraper(Scraper):
|
||||||
|
|
||||||
logger.info('Found %d games from overview', len(games_rows))
|
logger.info('Found %d games from overview', len(games_rows))
|
||||||
|
|
||||||
SCRAPE_FROM_OVERVIEW = False
|
for idx, (game_id, game_name) in enumerate(reversed(games_ids.items())):
|
||||||
if SCRAPE_FROM_OVERVIEW:
|
|
||||||
yield from games_rows
|
|
||||||
|
|
||||||
idx = 0
|
|
||||||
for game_id, game_name in games_ids.items():
|
|
||||||
yield from self._scrape_game_trophies(game_id, game_name)
|
yield from self._scrape_game_trophies(game_id, game_name)
|
||||||
del game_id
|
del game_id
|
||||||
idx += 1
|
|
||||||
if idx >= MAX_NUMBER_GAMES_TO_PARSE:
|
if idx >= MAX_NUMBER_GAMES_TO_PARSE:
|
||||||
break
|
break
|
||||||
|
logger.info('Found all trophies for playstation games')
|
||||||
|
|
||||||
def _setup_cache(self):
|
def _setup_cache(self):
|
||||||
requests_util.setup_limiter(
|
requests_util.setup_limiter(
|
||||||
|
@ -81,53 +76,6 @@ class PsnProfilesScraper(Scraper):
|
||||||
if len(games_on_page) == 0:
|
if len(games_on_page) == 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
def _scrape_games_overview_old(self) -> Iterator[dict]:
|
|
||||||
# Request to get overview
|
|
||||||
logger.info('Getting Overview')
|
|
||||||
url = URL_PROFILE.format(psn_id=secrets.PLAYSTATION_PSN_ID)
|
|
||||||
response = self.session.get(url)
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
now = parse_util.parse_response_datetime(response)
|
|
||||||
|
|
||||||
# Parse data
|
|
||||||
soup = bs4.BeautifulSoup(response.content, 'lxml')
|
|
||||||
soup = personal_data.html_util.normalize_soup_slightly(soup, classes=False)
|
|
||||||
|
|
||||||
yield from self._iterate_games_from_recent_tropies(soup, now)
|
|
||||||
yield from self._iterate_games_from_games_table(soup)
|
|
||||||
|
|
||||||
def _iterate_games_from_recent_tropies(self, soup, now) -> Iterator[dict]:
|
|
||||||
soup_recent_tropies = soup.select('ul#recent-trophies > li')
|
|
||||||
assert len(soup_recent_tropies) > 0
|
|
||||||
for row in soup_recent_tropies:
|
|
||||||
cells = row.select_one('.info .box td').find_all('div')
|
|
||||||
|
|
||||||
trophy_name = cells[0].get_text().strip()
|
|
||||||
trophy_desc = cells[1].get_text().strip()
|
|
||||||
game_name = cells[2].a.extract().get_text().strip()
|
|
||||||
|
|
||||||
psnprofiles_id = game_psnprofiles_id_from_url(cells[0].find('a')['href'])
|
|
||||||
trophy_icon = row.find(class_='icon').find('img')['src']
|
|
||||||
|
|
||||||
gotten_at = (
|
|
||||||
cells[2].get_text().strip().removesuffix(' in').removesuffix(' ago')
|
|
||||||
)
|
|
||||||
gotten_at = parse_util.parse_duration(gotten_at)
|
|
||||||
time_acquired = now - gotten_at
|
|
||||||
|
|
||||||
yield {
|
|
||||||
'game.name': game_name,
|
|
||||||
'me.last_played_time': time_acquired.date(),
|
|
||||||
# Trophy Data
|
|
||||||
'trophy.name': trophy_name,
|
|
||||||
'trophy.desc': trophy_desc,
|
|
||||||
'trophy.icon': trophy_icon,
|
|
||||||
'psnprofiles.game_id': psnprofiles_id,
|
|
||||||
}
|
|
||||||
|
|
||||||
del row, cells, time_acquired
|
|
||||||
|
|
||||||
def _iterate_games_from_games_table(self, soup) -> Iterator[dict]:
|
def _iterate_games_from_games_table(self, soup) -> Iterator[dict]:
|
||||||
# Games table
|
# Games table
|
||||||
table_rows = soup.find(id='gamesTable').find_all('tr')
|
table_rows = soup.find(id='gamesTable').find_all('tr')
|
||||||
|
@ -220,6 +168,9 @@ class PsnProfilesScraper(Scraper):
|
||||||
|
|
||||||
trophy_icon = cells[0].img['src']
|
trophy_icon = cells[0].img['src']
|
||||||
|
|
||||||
|
|
||||||
|
if 'Missing\nTimestamp' in cells[2].get_text().strip():
|
||||||
|
continue
|
||||||
cells[2].span.span.nobr.sup.extract()
|
cells[2].span.span.nobr.sup.extract()
|
||||||
gotten_at = parse_util.parse_time(cells[2].get_text())
|
gotten_at = parse_util.parse_time(cells[2].get_text())
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user