1
0

MyAnimeList
All checks were successful
Run Python tests (through Pytest) / Test (push) Successful in 36s
Verify Python project can be installed, loaded and have version checked / Test (push) Successful in 30s

This commit is contained in:
Jon Michael Aanes 2025-02-01 20:00:21 +01:00
parent 02d1d0fb02
commit 9d528d4cfd
4 changed files with 45 additions and 2 deletions

View File

@ -1,6 +1,7 @@
import dataclasses
import logging
import secrets
from collections.abc import Iterator, Mapping
from personal_data.data import DeduplicateMode, Scraper
@ -19,7 +20,7 @@ class CrunchyrollScraper(Scraper):
dataset_name = 'episodes_watched_crunchyroll'
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
def scrape(self):
def scrape(self) -> Iterator[Mapping[str, object]]:
headers = {
'Referer': 'https://www.crunchyroll.com/history',
'Authorization': secrets.CRUNCHYROLL_AUTH, # TODO: Determine automatically

View File

@ -28,7 +28,6 @@ class HomeAssistantScaleWeight(Scraper):
end_time = datetime.datetime.now()
start_time = end_time - datetime.timedelta(days=90)
url = f'{HA_ROOT}/api/history/period/{start_time}'
print(url)
params = {
'filter_entity_id': 'sensor.bathroom_scale_mass',
'end_time': end_time,

View File

@ -0,0 +1,41 @@
import abc
import bs4
import urllib.parse
import json
import dataclasses
import logging
import secrets
from collections.abc import Iterator, Mapping
from enum import Enum
from personal_data.data import DeduplicateMode, Scraper
logger = logging.getLogger(__name__)
@dataclasses.dataclass(frozen=True)
class MyAnimeList(Scraper):
dataset_name = 'myanimelist_anime'
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
def scrape(self) -> Iterator[Mapping[str, object]]:
username = 'WhereTheDogGoin'
url = f'https://myanimelist.net/animelist/{username}'
response = self.session.get(url)
response.raise_for_status()
soup = bs4.BeautifulSoup(response.text)
print(soup)
data_items_soup = soup.select('[data-items]')[0]
print(data_items_soup)
data_items = json.loads(data_items_soup.get('data-items'))
for data_item in data_items:
print(data_item)
yield {
'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'),
'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']),
'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']),
'me.score': data_item.get('score'),
}
del data_item

View File

@ -114,6 +114,8 @@ def extend_csv_file(
deduplicate_mode: data.DeduplicateMode,
deduplicate_ignore_columns: list[str],
) -> dict:
if deduplicate_ignore_columns == data.Scraper.deduplicate_ignore_columns:
deduplicate_ignore_columns = []
if not isinstance(deduplicate_ignore_columns, list):
raise TypeError(deduplicate_ignore_columns)