From 19c7eb37de68c4ecf3d05e44ebb3ff5d26ed9c7a Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 2 Feb 2025 00:35:45 +0100 Subject: [PATCH] Download zips --- scripts/download_simfiles.py | 85 ++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 scripts/download_simfiles.py diff --git a/scripts/download_simfiles.py b/scripts/download_simfiles.py new file mode 100644 index 0000000..1e5b5cc --- /dev/null +++ b/scripts/download_simfiles.py @@ -0,0 +1,85 @@ +import sys +import bs4 +import zipfile +import subprocess +import csv +import requests +from pathlib import Path +import personal_data.csv_import +import personal_data.main +import dataclasses + +@dataclasses.dataclass +class Result: + title: str + id: int + levels: str + + +SESSION = personal_data.main.get_session( + [], + with_cfscrape=False, ignore_cache=False) + +def parse_results(response) -> list[Result]: + soup = bs4.BeautifulSoup(response.text, 'lxml') + + results = [] + for tr in soup.select('table tbody tr'): + if tr.get_text().strip() == 'Nothing found.': + continue + + cells = tr.select('td') + + title = cells[0].get_text().strip() + id = cells[0].a['href'].removeprefix('viewsimfile.php?simfileid=') + levels = cells[1].get_text().strip() + results.append(Result(title , int(id), levels)) + return results + +def search_for_song(song_data) -> Result | None: + response = SESSION.post('https://zenius-i-vanisher.com/v5.2/simfiles_search_ajax.php', + data={ + 'songtitle': song_data['song.name_eng'], + 'songartist': song_data['song.artist'], + }) + if results := parse_results(response): + return results[0] + + response = SESSION.post('https://zenius-i-vanisher.com/v5.2/simfiles_search_ajax.php', + data={ + 'songtitle': song_data['song.name_eng'], + 'songartist': '', + }) + if results := parse_results(response): + return results[0] + return None + +def download_song(song_data, output_dir: Path): + song_result = search_for_song(song_data) + if song_result is None: + return + + path_zip = output_dir/f'zenius-{song_result.id}-{song_result.title}.zip' + + url = f'https://zenius-i-vanisher.com/v5.2/download.php?type=ddrsimfile&simfileid={song_result.id}' + + cmd = ['curl', '-L', '--fail', url, '-o', path_zip] + result = subprocess.run(cmd, check=True, capture_output=True) + + with zipfile.ZipFile(path_zip, 'r') as zip_ref: + zip_ref.extractall(output_dir) + +def main(): + csv_path = Path('./output/myanimelist_songs.csv') + output_path = Path('./output/songs') + output_path.mkdir(exist_ok=True,parents=True) + + songs = personal_data.csv_import.load_csv_file(csv_path) + for song in songs: + download_song(song, output_path) + + +if __name__ == '__main__': + main() + +