From 263b0b284364cb89b6dfa3ed30382e739747904b Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 1 Sep 2024 16:55:45 +0200 Subject: [PATCH] Implemented partial Steam support --- personal_data/fetchers/steam_community.py | 70 +++++++++++++++++++++++ personal_data/parse_util.py | 18 +++++- personal_data/secrets.py | 3 + test/test_deduplicate.py | 4 +- test/test_parsing.py | 2 +- 5 files changed, 91 insertions(+), 6 deletions(-) create mode 100644 personal_data/fetchers/steam_community.py diff --git a/personal_data/fetchers/steam_community.py b/personal_data/fetchers/steam_community.py new file mode 100644 index 0000000..a975e33 --- /dev/null +++ b/personal_data/fetchers/steam_community.py @@ -0,0 +1,70 @@ +import dataclasses +import datetime +import logging +import re +import bs4 +from typing import Any +from collections.abc import Iterator + +from ..data import DeduplicateMode, Scraper +from .. import secrets, parse_util, html_util + +logger = logging.getLogger(__name__) + +URL_SITE_ROOT = 'https://steamcommunity.com/' + +URL_GAME_ACHIVEMENTS = URL_SITE_ROOT+'id/{username}/stats/appid/{appid}' + +FORMAT_DATE_HEADER = '%d/%m/%YYYY' + + +@dataclasses.dataclass(frozen=True) +class SteamAchievementScraper(Scraper): + dataset_name = 'games_played_TODO' + deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS + + def scrape(self) -> Iterator[dict[str, Any]]: + yield from self.scrape_app(105600) + + def scrape_app(self, appid: int) -> Iterator[dict[str, Any]]: + url = URL_GAME_ACHIVEMENTS.format( + username=secrets.STEAM_USERNAME, + appid=appid, + ) + response = self.session.get(url) + response.raise_for_status() + + NOW = parse_util.parse_response_datetime(response) + + # Parse data + soup = bs4.BeautifulSoup(response.content, 'lxml') + + game_name: str = re.match(r'Steam Community :: (.+) :: Jmaa', soup.head.title.get_text()).group(1) + + soup = html_util.normalize_soup_slightly( + soup, + classes=False, + ) + + + for entry in soup.select('.achieveRow'): + trophy_name: str = entry.select_one('h3').get_text() + trophy_desc: str = entry.select_one('h5').get_text() + trophy_icon: str = entry.select_one('img').src + + time_acquired_html: str = entry.select_one('.achieveUnlockTime') + if time_acquired_html is None: + continue + time_acquired_text: str = time_acquired_html.get_text().strip().removeprefix('Unlocked ') + time_acquired: datetime.datetime = parse_util.parse_time(time_acquired_text) + + yield { + 'game.name': game_name, + 'me.last_played_time': time_acquired, + # Trophy Data + 'trophy.name': trophy_name, + 'trophy.desc': trophy_desc, + 'trophy.icon': trophy_icon, + } + + del entry, time_acquired diff --git a/personal_data/parse_util.py b/personal_data/parse_util.py index d23c4a0..4f42bc2 100644 --- a/personal_data/parse_util.py +++ b/personal_data/parse_util.py @@ -36,14 +36,26 @@ def parse_response_datetime(response) -> datetime.datetime: ).replace(tzinfo=datetime.UTC) -LOCAL_TIMEZONE = datetime.datetime.now(datetime.timezone.utc).astimezone().tzinfo +LOCAL_TIMEZONE = datetime.datetime.now(datetime.UTC).astimezone().tzinfo + + +def try_parse(text:str, fmt:str) -> datetime.datetime | None: + try: + time = datetime.datetime.strptime(text, fmt) + if time.tzinfo is None: + time = time.replace(tzinfo=LOCAL_TIMEZONE) + except: + time = None + return time def parse_time(text: str) -> datetime.datetime: text = text.replace('\n', ' ') text = text.strip() - time = datetime.datetime.strptime(text, '%d %b %Y %I:%M:%S %p') - time = time.replace(tzinfo=LOCAL_TIMEZONE) + + time = try_parse(text, '%d %b %Y %I:%M:%S %p') + time = time or try_parse(text, '%d %b, %Y @ %I:%M%p') + assert time.tzinfo is not None, time return time diff --git a/personal_data/secrets.py b/personal_data/secrets.py index a56c1d5..83af2d5 100644 --- a/personal_data/secrets.py +++ b/personal_data/secrets.py @@ -15,6 +15,9 @@ PLAYSTATION_PSN_ID = load_secret('PLAYSTATION_PSN_ID') # Partisia Blockchain PBC_ACCOUNT_ADDRESS = load_secret('PBC_ACCOUNT_ADDRESS') +# Steam +STEAM_USERNAME = load_secret('STEAM_USERNAME') + # Kucoin KUCOIN_KEY = load_secret('KUCOIN_KEY') KUCOIN_SECRET = load_secret('KUCOIN_SECRET') diff --git a/test/test_deduplicate.py b/test/test_deduplicate.py index 2dabd59..10250f9 100644 --- a/test/test_deduplicate.py +++ b/test/test_deduplicate.py @@ -60,7 +60,7 @@ LIST_2 = [ 27, 31, 134506, - tzinfo=datetime.timezone.utc, + tzinfo=datetime.UTC, ), 'weight.kg': Decimal('73.6'), }, @@ -75,7 +75,7 @@ LIST_2 = [ 36, 9, 590355, - tzinfo=datetime.timezone.utc, + tzinfo=datetime.UTC, ), 'weight.kg': Decimal('74.7'), }, diff --git a/test/test_parsing.py b/test/test_parsing.py index b082da9..ce6d15c 100644 --- a/test/test_parsing.py +++ b/test/test_parsing.py @@ -8,7 +8,7 @@ from personal_data.main import to_value PARSE_MAPPINGS = [ ( '2024-04-28 21:35:40+00:00', - datetime.datetime(2024, 4, 28, 21, 35, 40, tzinfo=datetime.timezone.utc), + datetime.datetime(2024, 4, 28, 21, 35, 40, tzinfo=datetime.UTC), ), ( '0003791e9f5f3691b8bbbe0d12a7ae9c3f2e89db38',