Improved type parsing
This commit is contained in:
parent
c11f48a7a6
commit
2c2421adf3
|
@ -36,6 +36,15 @@ def try_value(fn: Callable[[str], T], s: str) -> T | None:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def parse_timedelta(text: str) -> datetime.timedelta:
|
||||||
|
if t := try_value(lambda t: datetime.datetime.strptime(t, '%H:%M:%S.%f'), text):
|
||||||
|
return datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
|
||||||
|
elif t := try_value(lambda t: datetime.datetime.strptime(t, '%H:%M:%S'), text):
|
||||||
|
return datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
|
||||||
|
else:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def csv_str_to_value(
|
def csv_str_to_value(
|
||||||
s: str,
|
s: str,
|
||||||
) -> (
|
) -> (
|
||||||
|
@ -60,6 +69,8 @@ def csv_str_to_value(
|
||||||
return v_date
|
return v_date
|
||||||
if (v_datetime := try_value(datetime.datetime.fromisoformat, s)) is not None:
|
if (v_datetime := try_value(datetime.datetime.fromisoformat, s)) is not None:
|
||||||
return v_datetime
|
return v_datetime
|
||||||
|
if (v_timedelta := parse_timedelta(s)) is not None:
|
||||||
|
return v_timedelta
|
||||||
if s.startswith(('http://', 'https://')):
|
if s.startswith(('http://', 'https://')):
|
||||||
return urllib.parse.urlparse(s)
|
return urllib.parse.urlparse(s)
|
||||||
if s.lower() == 'false':
|
if s.lower() == 'false':
|
||||||
|
@ -103,13 +114,21 @@ class PossibleKeys:
|
||||||
misc: list[str]
|
misc: list[str]
|
||||||
|
|
||||||
|
|
||||||
|
def is_duration_key(k,v):
|
||||||
|
if isinstance(v, Decimal) and 'duration_seconds' in k:
|
||||||
|
return True
|
||||||
|
if isinstance(v, datetime.timedelta) and 'duration' in k:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def determine_possible_keys(event_data: dict[str, Any]) -> PossibleKeys:
|
def determine_possible_keys(event_data: dict[str, Any]) -> PossibleKeys:
|
||||||
# Select data
|
# Select data
|
||||||
time_keys = [k for k, v in event_data.items() if isinstance(v, datetime.date)]
|
time_keys = [k for k, v in event_data.items() if isinstance(v, datetime.date)]
|
||||||
duration_keys = [
|
duration_keys = [
|
||||||
k
|
k
|
||||||
for k, v in event_data.items()
|
for k, v in event_data.items()
|
||||||
if isinstance(v, Decimal) and 'duration_seconds' in k
|
if is_duration_key(k,v)
|
||||||
]
|
]
|
||||||
name_keys = [k for k, v in event_data.items() if isinstance(v, str)]
|
name_keys = [k for k, v in event_data.items() if isinstance(v, str)]
|
||||||
image_keys = [
|
image_keys = [
|
||||||
|
@ -152,12 +171,16 @@ def start_end(
|
||||||
|
|
||||||
if keys.time_start and keys.duration:
|
if keys.time_start and keys.duration:
|
||||||
start = sample[keys.time_start[0]]
|
start = sample[keys.time_start[0]]
|
||||||
duration = datetime.timedelta(seconds=float(sample[keys.duration[0]]))
|
duration = sample[keys.duration[0]]
|
||||||
|
if not isinstance(duration, datetime.timedelta):
|
||||||
|
duration = datetime.timedelta(seconds=float(duration))
|
||||||
return (start, start + duration)
|
return (start, start + duration)
|
||||||
|
|
||||||
if keys.time_end and keys.duration:
|
if keys.time_end and keys.duration:
|
||||||
end = sample[keys.time_end[0]]
|
end = sample[keys.time_end[0]]
|
||||||
duration = datetime.timedelta(seconds=float(sample[keys.duration[0]]))
|
duration = sample[keys.duration[0]]
|
||||||
|
if not isinstance(duration, datetime.timedelta):
|
||||||
|
duration = datetime.timedelta(seconds=float(duration))
|
||||||
return (end - duration, end)
|
return (end - duration, end)
|
||||||
|
|
||||||
if keys.time_start:
|
if keys.time_start:
|
||||||
|
|
|
@ -34,6 +34,7 @@ class Stepmania(Scraper):
|
||||||
|
|
||||||
# Derp
|
# Derp
|
||||||
for score in soup.select('SongScores Song HighScoreList HighScore'):
|
for score in soup.select('SongScores Song HighScoreList HighScore'):
|
||||||
|
print(score.parent.parent)
|
||||||
song = score.parent.parent.parent
|
song = score.parent.parent.parent
|
||||||
song_path = Path(song ['Dir'].removesuffix('/'))
|
song_path = Path(song ['Dir'].removesuffix('/'))
|
||||||
|
|
||||||
|
@ -51,12 +52,13 @@ class Stepmania(Scraper):
|
||||||
'song.name': song_path.stem,
|
'song.name': song_path.stem,
|
||||||
'song.pack': song_path.parent.stem,
|
'song.pack': song_path.parent.stem,
|
||||||
'song.difficulty': score.parent.parent['Difficulty'],
|
'song.difficulty': score.parent.parent['Difficulty'],
|
||||||
|
'song.grade': score.select_one('Grade').get_text(),
|
||||||
'play.start': play_start,
|
'play.start': play_start,
|
||||||
'play.duration': datetime.timedelta(seconds=play_seconds),
|
'play.duration': datetime.timedelta(seconds=play_seconds),
|
||||||
'score.score:': float(score.select_one('PercentDP').get_text()),
|
'score.score': float(score.select_one('PercentDP').get_text()),
|
||||||
'score.w1:': int(score.select_one('W1').get_text()),
|
'score.w1': int(score.select_one('W1').get_text()),
|
||||||
'score.w2:': int(score.select_one('W2').get_text()),
|
'score.w2': int(score.select_one('W2').get_text()),
|
||||||
'score.w3:': int(score.select_one('W3').get_text()),
|
'score.w3': int(score.select_one('W3').get_text()),
|
||||||
'score.w4': int(score.select_one('W4').get_text()),
|
'score.w4': int(score.select_one('W4').get_text()),
|
||||||
'score.w5': int(score.select_one('W5').get_text()),
|
'score.w5': int(score.select_one('W5').get_text()),
|
||||||
'score.miss': int(score.select_one('Miss').get_text()),
|
'score.miss': int(score.select_one('Miss').get_text()),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user