Steam: Automatic detection of appids for recent games. (Still missing all games, but that requires more permissions)

2024-10-09 14:24:14 +02:00 · 2024-10-09 14:24:14 +02:00 · b648983ff2
commit b648983ff2
parent c74920e478
2 changed files with 33 additions and 5 deletions
--- a/personal_data/fetchers/steam_community.py
+++ b/personal_data/fetchers/steam_community.py
@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
 URL_SITE_ROOT = 'https://steamcommunity.com/'
 URL_GAME_ACHIVEMENTS = URL_SITE_ROOT + 'id/{username}/stats/appid/{appid}'
 URL_USER_RECENT_ACTIVITY = URL_SITE_ROOT + 'id/{username}'
 FORMAT_DATE_HEADER = '%d/%m/%YYYY'
@ -25,11 +26,30 @@ class SteamAchievementScraper(Scraper):
    deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
    def scrape(self) -> Iterator[dict[str, Any]]:
-        yield from self.scrape_app(105600)
+        username = secrets.STEAM_USERNAME
        for appid in self.determine_appids_from_recent_activity(username):
            yield from self.scrape_app(username, appid)
-    def scrape_app(self, appid: int) -> Iterator[dict[str, Any]]:
+    def determine_appids_from_recent_activity(self, username: str) -> Iterator[int]:
        url = URL_USER_RECENT_ACTIVITY.format(
            username=username,
        )
        response = self.session.get(url)
        response.raise_for_status()
        soup = html_util.normalize_soup_slightly(
            bs4.BeautifulSoup(response.content, 'lxml'),
            classes=False,
        )
        for entry_a in soup.select('.recent_games .recent_game .game_info_cap a'):
            href = entry_a['href']
            appid = int(href.split('/')[-1])
            yield appid
    def scrape_app(self, username: str, appid: int) -> Iterator[dict[str, Any]]:
        url = URL_GAME_ACHIVEMENTS.format(
-            username=secrets.STEAM_USERNAME,
+            username=username,
            appid=appid,
        )
        response = self.session.get(url)
@ -41,7 +61,7 @@ class SteamAchievementScraper(Scraper):
        soup = bs4.BeautifulSoup(response.content, 'lxml')
        game_name: str = re.match(
-            r'Steam Community :: (.+) :: Jmaa', soup.head.title.get_text(),
+            r'Steam Community :: (.+) :: .*', soup.head.title.get_text(),
        ).group(1)
        soup = html_util.normalize_soup_slightly(
--- a/personal_data/parse_util.py
+++ b/personal_data/parse_util.py
@ -36,7 +36,8 @@ def parse_response_datetime(response) -> datetime.datetime:
    ).replace(tzinfo=datetime.UTC)
-LOCAL_TIMEZONE = datetime.datetime.now(datetime.UTC).astimezone().tzinfo
+NOW = datetime.datetime.now(datetime.UTC)
 LOCAL_TIMEZONE = NOW.astimezone().tzinfo
 def try_parse(text: str, fmt: str) -> datetime.datetime | None:
@ -55,6 +56,13 @@ def parse_time(text: str) -> datetime.datetime:
    time = try_parse(text, '%d %b %Y %I:%M:%S %p')
    time = time or try_parse(text, '%d %b, %Y @ %I:%M%p')
    if time is None and (m := try_parse(text, '%d %b @ %I:%M%p')):
        time = m.replace(year = NOW.year)
    assert time is not None, 'Could not parse format'
    if time.tzinfo is None:
        time = time.replace(tzinfo=LOCAL_TIMEZONE )
    assert time.tzinfo is not None, time
    return time