1
0
personal-data/scripts/download_simfiles.py

93 lines
2.6 KiB
Python
Raw Normal View History

2025-02-01 23:35:45 +00:00
import sys
import bs4
import zipfile
import subprocess
import csv
import requests
from pathlib import Path
import personal_data.csv_import
import personal_data.main
import dataclasses
2025-02-02 00:03:04 +00:00
import logging
logger = logging.getLogger(__name__)
2025-02-01 23:35:45 +00:00
@dataclasses.dataclass
class Result:
title: str
id: int
levels: str
SESSION = personal_data.main.get_session(
[],
with_cfscrape=False, ignore_cache=False)
def parse_results(response) -> list[Result]:
soup = bs4.BeautifulSoup(response.text, 'lxml')
results = []
for tr in soup.select('table tbody tr'):
cells = tr.select('td')
title = cells[0].get_text().strip()
2025-02-02 00:03:04 +00:00
link = cells[0].a
if link is None:
continue
id = link['href'].removeprefix('viewsimfile.php?simfileid=')
2025-02-01 23:35:45 +00:00
levels = cells[1].get_text().strip()
results.append(Result(title , int(id), levels))
return results
def search_for_song(song_data) -> Result | None:
response = SESSION.post('https://zenius-i-vanisher.com/v5.2/simfiles_search_ajax.php',
data={
'songtitle': song_data['song.name_eng'],
'songartist': song_data['song.artist'],
})
if results := parse_results(response):
return results[0]
response = SESSION.post('https://zenius-i-vanisher.com/v5.2/simfiles_search_ajax.php',
data={
'songtitle': song_data['song.name_eng'],
'songartist': '',
})
if results := parse_results(response):
return results[0]
2025-02-02 00:03:04 +00:00
logger.warning('No results for %s', song_data['song.name_eng'])
2025-02-01 23:35:45 +00:00
return None
def download_song(song_data, output_dir: Path):
song_result = search_for_song(song_data)
if song_result is None:
return
path_zip = output_dir/f'zenius-{song_result.id}-{song_result.title}.zip'
2025-02-02 00:03:04 +00:00
if path_zip.exists():
logger.warning('Skipping existing file')
return
logger.warning('Downloading to %s', path_zip)
2025-02-01 23:35:45 +00:00
url = f'https://zenius-i-vanisher.com/v5.2/download.php?type=ddrsimfile&simfileid={song_result.id}'
cmd = ['curl', '-L', '--fail', url, '-o', path_zip]
2025-02-02 00:03:04 +00:00
subprocess.run(cmd, check=True, capture_output=True)
2025-02-01 23:35:45 +00:00
def main():
csv_path = Path('./output/myanimelist_songs.csv')
output_path = Path('./output/songs')
output_path.mkdir(exist_ok=True,parents=True)
songs = personal_data.csv_import.load_csv_file(csv_path)
for song in songs:
2025-02-02 00:03:04 +00:00
logger.warning('Trying to download %s', song['song.name_eng'])
2025-02-01 23:35:45 +00:00
download_song(song, output_path)
if __name__ == '__main__':
main()