import abc import bs4 import urllib.parse import json import dataclasses import logging import secrets from collections.abc import Iterator, Mapping from enum import Enum from personal_data.data import DeduplicateMode, Scraper logger = logging.getLogger(__name__) @dataclasses.dataclass(frozen=True) class MyAnimeList(Scraper): dataset_name = 'myanimelist_anime' deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN def scrape(self) -> Iterator[Mapping[str, object]]: username = 'WhereTheDogGoin' url = f'https://myanimelist.net/animelist/{username}' response = self.session.get(url) response.raise_for_status() soup = bs4.BeautifulSoup(response.text) print(soup) data_items_soup = soup.select('[data-items]')[0] print(data_items_soup) data_items = json.loads(data_items_soup.get('data-items')) for data_item in data_items: print(data_item) yield { 'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'), 'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']), 'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']), 'me.score': data_item.get('score'), } del data_item