From b648983ff2eda0c12101fa6f13a68f9c719f8ed4 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Wed, 9 Oct 2024 14:24:14 +0200 Subject: [PATCH] Steam: Automatic detection of appids for recent games. (Still missing all games, but that requires more permissions) --- personal_data/fetchers/steam_community.py | 28 +++++++++++++++++++---- personal_data/parse_util.py | 10 +++++++- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/personal_data/fetchers/steam_community.py b/personal_data/fetchers/steam_community.py index 8369acc..08c126f 100644 --- a/personal_data/fetchers/steam_community.py +++ b/personal_data/fetchers/steam_community.py @@ -15,6 +15,7 @@ logger = logging.getLogger(__name__) URL_SITE_ROOT = 'https://steamcommunity.com/' URL_GAME_ACHIVEMENTS = URL_SITE_ROOT + 'id/{username}/stats/appid/{appid}' +URL_USER_RECENT_ACTIVITY = URL_SITE_ROOT + 'id/{username}' FORMAT_DATE_HEADER = '%d/%m/%YYYY' @@ -25,11 +26,30 @@ class SteamAchievementScraper(Scraper): deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS def scrape(self) -> Iterator[dict[str, Any]]: - yield from self.scrape_app(105600) + username = secrets.STEAM_USERNAME + for appid in self.determine_appids_from_recent_activity(username): + yield from self.scrape_app(username, appid) - def scrape_app(self, appid: int) -> Iterator[dict[str, Any]]: + def determine_appids_from_recent_activity(self, username: str) -> Iterator[int]: + url = URL_USER_RECENT_ACTIVITY.format( + username=username, + ) + response = self.session.get(url) + response.raise_for_status() + + soup = html_util.normalize_soup_slightly( + bs4.BeautifulSoup(response.content, 'lxml'), + classes=False, + ) + + for entry_a in soup.select('.recent_games .recent_game .game_info_cap a'): + href = entry_a['href'] + appid = int(href.split('/')[-1]) + yield appid + + def scrape_app(self, username: str, appid: int) -> Iterator[dict[str, Any]]: url = URL_GAME_ACHIVEMENTS.format( - username=secrets.STEAM_USERNAME, + username=username, appid=appid, ) response = self.session.get(url) @@ -41,7 +61,7 @@ class SteamAchievementScraper(Scraper): soup = bs4.BeautifulSoup(response.content, 'lxml') game_name: str = re.match( - r'Steam Community :: (.+) :: Jmaa', soup.head.title.get_text(), + r'Steam Community :: (.+) :: .*', soup.head.title.get_text(), ).group(1) soup = html_util.normalize_soup_slightly( diff --git a/personal_data/parse_util.py b/personal_data/parse_util.py index 5e68379..148621b 100644 --- a/personal_data/parse_util.py +++ b/personal_data/parse_util.py @@ -36,7 +36,8 @@ def parse_response_datetime(response) -> datetime.datetime: ).replace(tzinfo=datetime.UTC) -LOCAL_TIMEZONE = datetime.datetime.now(datetime.UTC).astimezone().tzinfo +NOW = datetime.datetime.now(datetime.UTC) +LOCAL_TIMEZONE = NOW.astimezone().tzinfo def try_parse(text: str, fmt: str) -> datetime.datetime | None: @@ -55,6 +56,13 @@ def parse_time(text: str) -> datetime.datetime: time = try_parse(text, '%d %b %Y %I:%M:%S %p') time = time or try_parse(text, '%d %b, %Y @ %I:%M%p') + if time is None and (m := try_parse(text, '%d %b @ %I:%M%p')): + time = m.replace(year = NOW.year) + + assert time is not None, 'Could not parse format' + + if time.tzinfo is None: + time = time.replace(tzinfo=LOCAL_TIMEZONE ) assert time.tzinfo is not None, time return time