import abc import bs4 import urllib.parse import json import dataclasses import logging import secrets from collections.abc import Iterator, Mapping from enum import Enum from personal_data.data import DeduplicateMode, Scraper logger = logging.getLogger(__name__) @dataclasses.dataclass(frozen=True) class MyAnimeListAnime: series_name_eng: str series_name: str series_myanimelist_url: urllib.parse.ParseResult series_icon: urllib.parse.ParseResult me_score: int @dataclasses.dataclass(frozen=True) class MyAnimeList(Scraper): dataset_name = 'myanimelist_anime' deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN def scrape(self) -> Iterator[MyAnimeListAnime]: username = 'WhereTheDogGoin' url = f'https://myanimelist.net/animelist/{username}' response = self.session.get(url) response.raise_for_status() soup = bs4.BeautifulSoup(response.text) data_items_soup = soup.select('[data-items]')[0] data_items = json.loads(data_items_soup.get('data-items')) for data_item in data_items: yield MyAnimeListAnime( series_name_eng= data_item.get('anime_title_eng') or data_item.get('anime_title'), series_name= data_item.get('anime_title') or data_item.get('anime_title_eng'), series_myanimelist_url= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])), series_icon= urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])), me_score= data_item.get('score'), ) del data_item