From a27ffe6ddb1ee5ed8c545667e1a7e8e63ac3f2f7 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Tue, 6 May 2025 00:47:30 +0200 Subject: [PATCH] Move most of the client code into dedicated MyAnimeList client --- personal_data/fetchers/myanimelist.py | 120 ++------------------------ requirements.txt | 1 + tests/test_myanimelist.py | 18 ---- 3 files changed, 10 insertions(+), 129 deletions(-) delete mode 100644 tests/test_myanimelist.py diff --git a/personal_data/fetchers/myanimelist.py b/personal_data/fetchers/myanimelist.py index adc3929..f7b2572 100644 --- a/personal_data/fetchers/myanimelist.py +++ b/personal_data/fetchers/myanimelist.py @@ -7,29 +7,15 @@ from collections.abc import Iterator import bs4 +from clients.myanimelist import MyAnimeListClient, MyAnimeListAnime, MyAnimeListSong +from clients import init_client from personal_data.data import DeduplicateMode, Scraper +from .. import secrets logger = logging.getLogger(__name__) - -@dataclasses.dataclass(frozen=True) -class MyAnimeListAnime: - series_name_eng: str - series_name: str - series_myanimelist_url: urllib.parse.ParseResult - series_icon: urllib.parse.ParseResult - me_score: int - - -@dataclasses.dataclass(frozen=True) -class MyAnimeListSong: - song_name_eng: str - song_name_jp: str | None - song_artist: str - song_placement: str - series_name_eng: str - series_name: str - +def client(session): + return init_client(MyAnimeListClient, session, secrets.secrets, throws=True) @dataclasses.dataclass(frozen=True) class MyAnimeList(Scraper): @@ -37,72 +23,7 @@ class MyAnimeList(Scraper): deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN def scrape(self) -> Iterator[MyAnimeListAnime]: - username = 'WhereTheDogGoin' - url = f'https://myanimelist.net/animelist/{username}' - response = self.session.get(url) - response.raise_for_status() - - soup = bs4.BeautifulSoup(response.text) - data_items_soup = soup.select('[data-items]')[0] - data_items = json.loads(data_items_soup.get('data-items')) - - for data_item in data_items: - yield MyAnimeListAnime( - series_name_eng=data_item.get('anime_title_eng') - or data_item.get('anime_title'), - series_name=data_item.get('anime_title') - or data_item.get('anime_title_eng'), - series_myanimelist_url=urllib.parse.urlparse( - urllib.parse.urljoin(url, data_item['anime_url']), - ), - series_icon=urllib.parse.urlparse( - urllib.parse.urljoin(url, data_item['anime_image_path']), - ), - me_score=data_item.get('score'), - ) - - del data_item - - -def parse_name(text: str): - match = re.fullmatch(r'^(?:\d+:\s*)?"(.*?)(?:\((.*)\))?"$', text) - return match - - -def parse_songs( - tr_elements, - song_position: str, - series_name_eng: str, - series_name: str, -): - print(series_name_eng, len(tr_elements)) - for song_tr in tr_elements: - artist = song_tr.select_one('.theme-song-artist') - if artist is None: - continue - artist.extract() - if e := song_tr.select_one('.theme-song-episode'): - e.extract() - del e - - song_artist = artist.get_text().strip().removeprefix('by ') - - song_name_eng = song_tr.get_text().strip() - m = parse_name(song_name_eng) - - song_name_eng = m.group(1).strip() - song_name_jp = m.group(2).strip() if m.group(2) else None - - song = MyAnimeListSong( - song_name_eng=song_name_eng, - song_name_jp=song_name_jp, - song_artist=song_artist, - song_placement=song_position, - series_name_eng=series_name_eng, - series_name=series_name, - ) - print(' ', song_name_eng) - yield song + yield from client(self.session).get_my_anime_list() @dataclasses.dataclass(frozen=True) @@ -110,30 +31,7 @@ class MyAnimeListSongs(Scraper): dataset_name = 'myanimelist_songs' deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN - def get_songs_for_anime(self, anime: MyAnimeListAnime): - response = self.session.get(anime.series_myanimelist_url.geturl()) - response.raise_for_status() - - soup = bs4.BeautifulSoup(response.text) - - for script in soup.select('script'): - script.extract() - for script in soup.select('.oped-popup'): - script.extract() - - yield from parse_songs( - soup.select('.theme-songs.opnening table tr'), - 'opening', - anime.series_name_eng, - anime.series_name, - ) - yield from parse_songs( - soup.select('.theme-songs.ending table tr'), - 'ending', - anime.series_name_eng, - anime.series_name, - ) - def scrape(self) -> Iterator[MyAnimeListSong]: - for anime in MyAnimeList(self.session).scrape(): - yield from self.get_songs_for_anime(anime) + my_client = client(self.session) + for anime in my_client.get_my_anime_list(): + yield from my_client.get_songs_for_anime(anime) diff --git a/requirements.txt b/requirements.txt index 438ba7a..c8d0eee 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,3 +12,4 @@ marko fin-depo @ git+https://gitfub.space/Jmaa/fin-depo.git secret_loader @ git+https://gitfub.space/Jmaa/secret_loader requests-util @ git+https://gitfub.space/Jmaa/requests_util +clients @ git+https://gitfub.space/Jmaa/clients diff --git a/tests/test_myanimelist.py b/tests/test_myanimelist.py deleted file mode 100644 index d56c101..0000000 --- a/tests/test_myanimelist.py +++ /dev/null @@ -1,18 +0,0 @@ -import pytest - -from personal_data.fetchers.myanimelist import parse_name - - -@pytest.mark.parametrize( - ('input_str', 'expected_group1', 'expected_group2'), - [ - ('"Soundscape"', 'Soundscape', None), - ('"Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'), - ('1: "Soundscape"', 'Soundscape', None), - ('2: "Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'), - ], -) -def test_parse_name(input_str, expected_group1, expected_group2): - m = parse_name(input_str) - assert m.group(1) == expected_group1 - assert m.group(2) == expected_group2