diff --git a/personal_data/fetchers/psnprofiles.py b/personal_data/fetchers/psnprofiles.py index b342e44..eb596f6 100644 --- a/personal_data/fetchers/psnprofiles.py +++ b/personal_data/fetchers/psnprofiles.py @@ -27,7 +27,7 @@ def game_psnprofiles_id_from_url(relative_url: str) -> int: return int(result) -MAX_NUMBER_GAMES_TO_PARSE = 1000 +MAX_NUMBER_GAMES_TO_PARSE = 10000 @dataclasses.dataclass(frozen=True) @@ -46,17 +46,12 @@ class PsnProfilesScraper(Scraper): logger.info('Found %d games from overview', len(games_rows)) - SCRAPE_FROM_OVERVIEW = False - if SCRAPE_FROM_OVERVIEW: - yield from games_rows - - idx = 0 - for game_id, game_name in games_ids.items(): + for idx, (game_id, game_name) in enumerate(reversed(games_ids.items())): yield from self._scrape_game_trophies(game_id, game_name) del game_id - idx += 1 if idx >= MAX_NUMBER_GAMES_TO_PARSE: break + logger.info('Found all trophies for playstation games') def _setup_cache(self): requests_util.setup_limiter( @@ -81,53 +76,6 @@ class PsnProfilesScraper(Scraper): if len(games_on_page) == 0: return - def _scrape_games_overview_old(self) -> Iterator[dict]: - # Request to get overview - logger.info('Getting Overview') - url = URL_PROFILE.format(psn_id=secrets.PLAYSTATION_PSN_ID) - response = self.session.get(url) - response.raise_for_status() - - now = parse_util.parse_response_datetime(response) - - # Parse data - soup = bs4.BeautifulSoup(response.content, 'lxml') - soup = personal_data.html_util.normalize_soup_slightly(soup, classes=False) - - yield from self._iterate_games_from_recent_tropies(soup, now) - yield from self._iterate_games_from_games_table(soup) - - def _iterate_games_from_recent_tropies(self, soup, now) -> Iterator[dict]: - soup_recent_tropies = soup.select('ul#recent-trophies > li') - assert len(soup_recent_tropies) > 0 - for row in soup_recent_tropies: - cells = row.select_one('.info .box td').find_all('div') - - trophy_name = cells[0].get_text().strip() - trophy_desc = cells[1].get_text().strip() - game_name = cells[2].a.extract().get_text().strip() - - psnprofiles_id = game_psnprofiles_id_from_url(cells[0].find('a')['href']) - trophy_icon = row.find(class_='icon').find('img')['src'] - - gotten_at = ( - cells[2].get_text().strip().removesuffix(' in').removesuffix(' ago') - ) - gotten_at = parse_util.parse_duration(gotten_at) - time_acquired = now - gotten_at - - yield { - 'game.name': game_name, - 'me.last_played_time': time_acquired.date(), - # Trophy Data - 'trophy.name': trophy_name, - 'trophy.desc': trophy_desc, - 'trophy.icon': trophy_icon, - 'psnprofiles.game_id': psnprofiles_id, - } - - del row, cells, time_acquired - def _iterate_games_from_games_table(self, soup) -> Iterator[dict]: # Games table table_rows = soup.find(id='gamesTable').find_all('tr') @@ -220,6 +168,9 @@ class PsnProfilesScraper(Scraper): trophy_icon = cells[0].img['src'] + + if 'Missing\nTimestamp' in cells[2].get_text().strip(): + continue cells[2].span.span.nobr.sup.extract() gotten_at = parse_util.parse_time(cells[2].get_text())