Move most of the client code into dedicated MyAnimeList client
This commit is contained in:
parent
be3be218b2
commit
a27ffe6ddb
|
@ -7,29 +7,15 @@ from collections.abc import Iterator
|
|||
|
||||
import bs4
|
||||
|
||||
from clients.myanimelist import MyAnimeListClient, MyAnimeListAnime, MyAnimeListSong
|
||||
from clients import init_client
|
||||
from personal_data.data import DeduplicateMode, Scraper
|
||||
from .. import secrets
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class MyAnimeListAnime:
|
||||
series_name_eng: str
|
||||
series_name: str
|
||||
series_myanimelist_url: urllib.parse.ParseResult
|
||||
series_icon: urllib.parse.ParseResult
|
||||
me_score: int
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class MyAnimeListSong:
|
||||
song_name_eng: str
|
||||
song_name_jp: str | None
|
||||
song_artist: str
|
||||
song_placement: str
|
||||
series_name_eng: str
|
||||
series_name: str
|
||||
|
||||
def client(session):
|
||||
return init_client(MyAnimeListClient, session, secrets.secrets, throws=True)
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
class MyAnimeList(Scraper):
|
||||
|
@ -37,72 +23,7 @@ class MyAnimeList(Scraper):
|
|||
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||||
|
||||
def scrape(self) -> Iterator[MyAnimeListAnime]:
|
||||
username = 'WhereTheDogGoin'
|
||||
url = f'https://myanimelist.net/animelist/{username}'
|
||||
response = self.session.get(url)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(response.text)
|
||||
data_items_soup = soup.select('[data-items]')[0]
|
||||
data_items = json.loads(data_items_soup.get('data-items'))
|
||||
|
||||
for data_item in data_items:
|
||||
yield MyAnimeListAnime(
|
||||
series_name_eng=data_item.get('anime_title_eng')
|
||||
or data_item.get('anime_title'),
|
||||
series_name=data_item.get('anime_title')
|
||||
or data_item.get('anime_title_eng'),
|
||||
series_myanimelist_url=urllib.parse.urlparse(
|
||||
urllib.parse.urljoin(url, data_item['anime_url']),
|
||||
),
|
||||
series_icon=urllib.parse.urlparse(
|
||||
urllib.parse.urljoin(url, data_item['anime_image_path']),
|
||||
),
|
||||
me_score=data_item.get('score'),
|
||||
)
|
||||
|
||||
del data_item
|
||||
|
||||
|
||||
def parse_name(text: str):
|
||||
match = re.fullmatch(r'^(?:\d+:\s*)?"(.*?)(?:\((.*)\))?"$', text)
|
||||
return match
|
||||
|
||||
|
||||
def parse_songs(
|
||||
tr_elements,
|
||||
song_position: str,
|
||||
series_name_eng: str,
|
||||
series_name: str,
|
||||
):
|
||||
print(series_name_eng, len(tr_elements))
|
||||
for song_tr in tr_elements:
|
||||
artist = song_tr.select_one('.theme-song-artist')
|
||||
if artist is None:
|
||||
continue
|
||||
artist.extract()
|
||||
if e := song_tr.select_one('.theme-song-episode'):
|
||||
e.extract()
|
||||
del e
|
||||
|
||||
song_artist = artist.get_text().strip().removeprefix('by ')
|
||||
|
||||
song_name_eng = song_tr.get_text().strip()
|
||||
m = parse_name(song_name_eng)
|
||||
|
||||
song_name_eng = m.group(1).strip()
|
||||
song_name_jp = m.group(2).strip() if m.group(2) else None
|
||||
|
||||
song = MyAnimeListSong(
|
||||
song_name_eng=song_name_eng,
|
||||
song_name_jp=song_name_jp,
|
||||
song_artist=song_artist,
|
||||
song_placement=song_position,
|
||||
series_name_eng=series_name_eng,
|
||||
series_name=series_name,
|
||||
)
|
||||
print(' ', song_name_eng)
|
||||
yield song
|
||||
yield from client(self.session).get_my_anime_list()
|
||||
|
||||
|
||||
@dataclasses.dataclass(frozen=True)
|
||||
|
@ -110,30 +31,7 @@ class MyAnimeListSongs(Scraper):
|
|||
dataset_name = 'myanimelist_songs'
|
||||
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||||
|
||||
def get_songs_for_anime(self, anime: MyAnimeListAnime):
|
||||
response = self.session.get(anime.series_myanimelist_url.geturl())
|
||||
response.raise_for_status()
|
||||
|
||||
soup = bs4.BeautifulSoup(response.text)
|
||||
|
||||
for script in soup.select('script'):
|
||||
script.extract()
|
||||
for script in soup.select('.oped-popup'):
|
||||
script.extract()
|
||||
|
||||
yield from parse_songs(
|
||||
soup.select('.theme-songs.opnening table tr'),
|
||||
'opening',
|
||||
anime.series_name_eng,
|
||||
anime.series_name,
|
||||
)
|
||||
yield from parse_songs(
|
||||
soup.select('.theme-songs.ending table tr'),
|
||||
'ending',
|
||||
anime.series_name_eng,
|
||||
anime.series_name,
|
||||
)
|
||||
|
||||
def scrape(self) -> Iterator[MyAnimeListSong]:
|
||||
for anime in MyAnimeList(self.session).scrape():
|
||||
yield from self.get_songs_for_anime(anime)
|
||||
my_client = client(self.session)
|
||||
for anime in my_client.get_my_anime_list():
|
||||
yield from my_client.get_songs_for_anime(anime)
|
||||
|
|
|
@ -12,3 +12,4 @@ marko
|
|||
fin-depo @ git+https://gitfub.space/Jmaa/fin-depo.git
|
||||
secret_loader @ git+https://gitfub.space/Jmaa/secret_loader
|
||||
requests-util @ git+https://gitfub.space/Jmaa/requests_util
|
||||
clients @ git+https://gitfub.space/Jmaa/clients
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
import pytest
|
||||
|
||||
from personal_data.fetchers.myanimelist import parse_name
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
('input_str', 'expected_group1', 'expected_group2'),
|
||||
[
|
||||
('"Soundscape"', 'Soundscape', None),
|
||||
('"Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'),
|
||||
('1: "Soundscape"', 'Soundscape', None),
|
||||
('2: "Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'),
|
||||
],
|
||||
)
|
||||
def test_parse_name(input_str, expected_group1, expected_group2):
|
||||
m = parse_name(input_str)
|
||||
assert m.group(1) == expected_group1
|
||||
assert m.group(2) == expected_group2
|
Loading…
Reference in New Issue
Block a user