1
0

Steam: Automatic detection of appids for recent games. (Still missing all games, but that requires more permissions)
All checks were successful
Test Python / Test (push) Successful in 32s

This commit is contained in:
Jon Michael Aanes 2024-10-09 14:24:14 +02:00
parent c74920e478
commit b648983ff2
2 changed files with 33 additions and 5 deletions

View File

@ -15,6 +15,7 @@ logger = logging.getLogger(__name__)
URL_SITE_ROOT = 'https://steamcommunity.com/'
URL_GAME_ACHIVEMENTS = URL_SITE_ROOT + 'id/{username}/stats/appid/{appid}'
URL_USER_RECENT_ACTIVITY = URL_SITE_ROOT + 'id/{username}'
FORMAT_DATE_HEADER = '%d/%m/%YYYY'
@ -25,11 +26,30 @@ class SteamAchievementScraper(Scraper):
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def scrape(self) -> Iterator[dict[str, Any]]:
yield from self.scrape_app(105600)
username = secrets.STEAM_USERNAME
for appid in self.determine_appids_from_recent_activity(username):
yield from self.scrape_app(username, appid)
def scrape_app(self, appid: int) -> Iterator[dict[str, Any]]:
def determine_appids_from_recent_activity(self, username: str) -> Iterator[int]:
url = URL_USER_RECENT_ACTIVITY.format(
username=username,
)
response = self.session.get(url)
response.raise_for_status()
soup = html_util.normalize_soup_slightly(
bs4.BeautifulSoup(response.content, 'lxml'),
classes=False,
)
for entry_a in soup.select('.recent_games .recent_game .game_info_cap a'):
href = entry_a['href']
appid = int(href.split('/')[-1])
yield appid
def scrape_app(self, username: str, appid: int) -> Iterator[dict[str, Any]]:
url = URL_GAME_ACHIVEMENTS.format(
username=secrets.STEAM_USERNAME,
username=username,
appid=appid,
)
response = self.session.get(url)
@ -41,7 +61,7 @@ class SteamAchievementScraper(Scraper):
soup = bs4.BeautifulSoup(response.content, 'lxml')
game_name: str = re.match(
r'Steam Community :: (.+) :: Jmaa', soup.head.title.get_text(),
r'Steam Community :: (.+) :: .*', soup.head.title.get_text(),
).group(1)
soup = html_util.normalize_soup_slightly(

View File

@ -36,7 +36,8 @@ def parse_response_datetime(response) -> datetime.datetime:
).replace(tzinfo=datetime.UTC)
LOCAL_TIMEZONE = datetime.datetime.now(datetime.UTC).astimezone().tzinfo
NOW = datetime.datetime.now(datetime.UTC)
LOCAL_TIMEZONE = NOW.astimezone().tzinfo
def try_parse(text: str, fmt: str) -> datetime.datetime | None:
@ -55,6 +56,13 @@ def parse_time(text: str) -> datetime.datetime:
time = try_parse(text, '%d %b %Y %I:%M:%S %p')
time = time or try_parse(text, '%d %b, %Y @ %I:%M%p')
if time is None and (m := try_parse(text, '%d %b @ %I:%M%p')):
time = m.replace(year = NOW.year)
assert time is not None, 'Could not parse format'
if time.tzinfo is None:
time = time.replace(tzinfo=LOCAL_TIMEZONE )
assert time.tzinfo is not None, time
return time