1
0
personal-data/personal_data/fetchers/steam_community.py

71 lines
2.2 KiB
Python

import dataclasses
import datetime
import logging
import re
import bs4
from typing import Any
from collections.abc import Iterator
from ..data import DeduplicateMode, Scraper
from .. import secrets, parse_util, html_util
logger = logging.getLogger(__name__)
URL_SITE_ROOT = 'https://steamcommunity.com/'
URL_GAME_ACHIVEMENTS = URL_SITE_ROOT+'id/{username}/stats/appid/{appid}'
FORMAT_DATE_HEADER = '%d/%m/%YYYY'
@dataclasses.dataclass(frozen=True)
class SteamAchievementScraper(Scraper):
dataset_name = 'games_played_TODO'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def scrape(self) -> Iterator[dict[str, Any]]:
yield from self.scrape_app(105600)
def scrape_app(self, appid: int) -> Iterator[dict[str, Any]]:
url = URL_GAME_ACHIVEMENTS.format(
username=secrets.STEAM_USERNAME,
appid=appid,
)
response = self.session.get(url)
response.raise_for_status()
NOW = parse_util.parse_response_datetime(response)
# Parse data
soup = bs4.BeautifulSoup(response.content, 'lxml')
game_name: str = re.match(r'Steam Community :: (.+) :: Jmaa', soup.head.title.get_text()).group(1)
soup = html_util.normalize_soup_slightly(
soup,
classes=False,
)
for entry in soup.select('.achieveRow'):
trophy_name: str = entry.select_one('h3').get_text()
trophy_desc: str = entry.select_one('h5').get_text()
trophy_icon: str = entry.select_one('img').src
time_acquired_html: str = entry.select_one('.achieveUnlockTime')
if time_acquired_html is None:
continue
time_acquired_text: str = time_acquired_html.get_text().strip().removeprefix('Unlocked ')
time_acquired: datetime.datetime = parse_util.parse_time(time_acquired_text)
yield {
'game.name': game_name,
'me.last_played_time': time_acquired,
# Trophy Data
'trophy.name': trophy_name,
'trophy.desc': trophy_desc,
'trophy.icon': trophy_icon,
}
del entry, time_acquired