From c11f48a7a62e852f423a438db2bea9315d767293 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Tue, 18 Feb 2025 23:47:41 +0100 Subject: [PATCH] Stepmania scraper --- personal_data/fetchers/stepmania.py | 63 +++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 personal_data/fetchers/stepmania.py diff --git a/personal_data/fetchers/stepmania.py b/personal_data/fetchers/stepmania.py new file mode 100644 index 0000000..31e254b --- /dev/null +++ b/personal_data/fetchers/stepmania.py @@ -0,0 +1,63 @@ +import dataclasses +import datetime +import logging +import datetime +from collections.abc import Iterator, Mapping +from decimal import Decimal +from pathlib import Path +import bs4 +import zoneinfo + +from personal_data.data import DeduplicateMode, Scraper + +from .. import secrets + +logger = logging.getLogger(__name__) + +STATS_FILE_PATH: Path = Path('/home/jmaa/.itgmania/Save/LocalProfiles/00000000/Stats.xml') + +@dataclasses.dataclass(frozen=True) +class Stepmania(Scraper): + dataset_name = 'stepmania' + deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS + deduplicate_ignore_columns = [] + + def scrape(self) -> Iterator[Mapping[str, object]]: + timezone = zoneinfo.ZoneInfo( + 'Europe/Copenhagen', + ) # TODO: Parameterize in an intelligent manner + + print(STATS_FILE_PATH) + + with open(STATS_FILE_PATH) as f: + soup = bs4.BeautifulSoup(f.read(), 'lxml-xml') + + # Derp + for score in soup.select('SongScores Song HighScoreList HighScore'): + song = score.parent.parent.parent + song_path = Path(song ['Dir'].removesuffix('/')) + + disqualified = score.select_one('Disqualified').get_text().strip() != '0' + if disqualified: + logger.warning('Ignored disqualified') + continue + + play_start = datetime.datetime.fromisoformat(score.select_one('DateTime').get_text()) + play_start = play_start.replace(tzinfo=timezone).astimezone(datetime.UTC) + + play_seconds = float(score.select_one('SurviveSeconds').get_text()) + + yield { + 'song.name': song_path.stem, + 'song.pack': song_path.parent.stem, + 'song.difficulty': score.parent.parent['Difficulty'], + 'play.start': play_start, + 'play.duration': datetime.timedelta(seconds=play_seconds), + 'score.score:': float(score.select_one('PercentDP').get_text()), + 'score.w1:': int(score.select_one('W1').get_text()), + 'score.w2:': int(score.select_one('W2').get_text()), + 'score.w3:': int(score.select_one('W3').get_text()), + 'score.w4': int(score.select_one('W4').get_text()), + 'score.w5': int(score.select_one('W5').get_text()), + 'score.miss': int(score.select_one('Miss').get_text()), + }