42 lines
1.3 KiB
Python
42 lines
1.3 KiB
Python
|
import abc
|
||
|
import bs4
|
||
|
import urllib.parse
|
||
|
import json
|
||
|
import dataclasses
|
||
|
import logging
|
||
|
import secrets
|
||
|
from collections.abc import Iterator, Mapping
|
||
|
from enum import Enum
|
||
|
|
||
|
from personal_data.data import DeduplicateMode, Scraper
|
||
|
|
||
|
logger = logging.getLogger(__name__)
|
||
|
|
||
|
@dataclasses.dataclass(frozen=True)
|
||
|
class MyAnimeList(Scraper):
|
||
|
dataset_name = 'myanimelist_anime'
|
||
|
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||
|
|
||
|
def scrape(self) -> Iterator[Mapping[str, object]]:
|
||
|
username = 'WhereTheDogGoin'
|
||
|
url = f'https://myanimelist.net/animelist/{username}'
|
||
|
response = self.session.get(url)
|
||
|
response.raise_for_status()
|
||
|
|
||
|
soup = bs4.BeautifulSoup(response.text)
|
||
|
print(soup)
|
||
|
data_items_soup = soup.select('[data-items]')[0]
|
||
|
print(data_items_soup)
|
||
|
data_items = json.loads(data_items_soup.get('data-items'))
|
||
|
|
||
|
for data_item in data_items:
|
||
|
print(data_item)
|
||
|
yield {
|
||
|
'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
||
|
'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']),
|
||
|
'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']),
|
||
|
'me.score': data_item.get('score'),
|
||
|
}
|
||
|
|
||
|
del data_item
|