1
0
personal-data/personal_data/fetchers/stepmania.py
2025-03-15 21:54:56 +01:00

65 lines
2.3 KiB
Python

import dataclasses
import datetime
import logging
import zoneinfo
from collections.abc import Iterator, Mapping
from pathlib import Path
import bs4
from personal_data.data import DeduplicateMode, Scraper
logger = logging.getLogger(__name__)
STATS_FILE_PATH: Path = Path(
'/home/jmaa/.itgmania/Save/LocalProfiles/00000000/Stats.xml',
)
@dataclasses.dataclass(frozen=True)
class Stepmania(Scraper):
dataset_name = 'stepmania'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
deduplicate_ignore_columns = []
def scrape(self) -> Iterator[Mapping[str, object]]:
timezone = zoneinfo.ZoneInfo(
'Europe/Copenhagen',
) # TODO: Parameterize in an intelligent manner
with open(STATS_FILE_PATH) as f:
soup = bs4.BeautifulSoup(f.read(), 'lxml-xml')
# Derp
for score in soup.select('SongScores Song HighScoreList HighScore'):
song = score.parent.parent.parent
song_path = Path(song['Dir'].removesuffix('/'))
disqualified = score.select_one('Disqualified').get_text().strip() != '0'
if disqualified:
logger.warning('Ignored disqualified')
continue
play_start = datetime.datetime.fromisoformat(
score.select_one('DateTime').get_text(),
)
play_start = play_start.replace(tzinfo=timezone).astimezone(datetime.UTC)
play_seconds = float(score.select_one('SurviveSeconds').get_text())
yield {
'song.name': song_path.stem,
'song.pack': song_path.parent.stem,
'song.difficulty': score.parent.parent['Difficulty'],
'song.grade': score.select_one('Grade').get_text(),
'play.start': play_start,
'play.duration': datetime.timedelta(seconds=play_seconds),
'score.score': float(score.select_one('PercentDP').get_text()),
'score.w1': int(score.select_one('W1').get_text()),
'score.w2': int(score.select_one('W2').get_text()),
'score.w3': int(score.select_one('W3').get_text()),
'score.w4': int(score.select_one('W4').get_text()),
'score.w5': int(score.select_one('W5').get_text()),
'score.miss': int(score.select_one('Miss').get_text()),
}