1
0
personal-data/personal_data/fetchers/myanimelist.py

40 lines
1.4 KiB
Python
Raw Normal View History

2025-02-01 19:00:21 +00:00
import abc
import bs4
import urllib.parse
import json
import dataclasses
import logging
import secrets
from collections.abc import Iterator, Mapping
from enum import Enum
from personal_data.data import DeduplicateMode, Scraper
logger = logging.getLogger(__name__)
@dataclasses.dataclass(frozen=True)
class MyAnimeList(Scraper):
dataset_name = 'myanimelist_anime'
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
def scrape(self) -> Iterator[Mapping[str, object]]:
username = 'WhereTheDogGoin'
url = f'https://myanimelist.net/animelist/{username}'
response = self.session.get(url)
response.raise_for_status()
soup = bs4.BeautifulSoup(response.text)
data_items_soup = soup.select('[data-items]')[0]
data_items = json.loads(data_items_soup.get('data-items'))
for data_item in data_items:
yield {
'series.name_eng': data_item.get('anime_title_eng') or data_item.get('anime_title'),
'series.name': data_item.get('anime_title') or data_item.get('anime_title_eng'),
'series.myanimelist_url': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_url'])),
'series.icon': urllib.parse.urlparse(urllib.parse.urljoin(url, data_item['anime_image_path'])),
2025-02-01 19:00:21 +00:00
'me.score': data_item.get('score'),
}
del data_item