MyAnimeList
This commit is contained in:
parent
02d1d0fb02
commit
9d528d4cfd
|
@ -1,6 +1,7 @@
|
||||||
import dataclasses
|
import dataclasses
|
||||||
import logging
|
import logging
|
||||||
import secrets
|
import secrets
|
||||||
|
from collections.abc import Iterator, Mapping
|
||||||
|
|
||||||
from personal_data.data import DeduplicateMode, Scraper
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
|
|
||||||
|
@ -19,7 +20,7 @@ class CrunchyrollScraper(Scraper):
|
||||||
dataset_name = 'episodes_watched_crunchyroll'
|
dataset_name = 'episodes_watched_crunchyroll'
|
||||||
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS
|
||||||
|
|
||||||
def scrape(self):
|
def scrape(self) -> Iterator[Mapping[str, object]]:
|
||||||
headers = {
|
headers = {
|
||||||
'Referer': 'https://www.crunchyroll.com/history',
|
'Referer': 'https://www.crunchyroll.com/history',
|
||||||
'Authorization': secrets.CRUNCHYROLL_AUTH, # TODO: Determine automatically
|
'Authorization': secrets.CRUNCHYROLL_AUTH, # TODO: Determine automatically
|
||||||
|
|
|
@ -28,7 +28,6 @@ class HomeAssistantScaleWeight(Scraper):
|
||||||
end_time = datetime.datetime.now()
|
end_time = datetime.datetime.now()
|
||||||
start_time = end_time - datetime.timedelta(days=90)
|
start_time = end_time - datetime.timedelta(days=90)
|
||||||
url = f'{HA_ROOT}/api/history/period/{start_time}'
|
url = f'{HA_ROOT}/api/history/period/{start_time}'
|
||||||
print(url)
|
|
||||||
params = {
|
params = {
|
||||||
'filter_entity_id': 'sensor.bathroom_scale_mass',
|
'filter_entity_id': 'sensor.bathroom_scale_mass',
|
||||||
'end_time': end_time,
|
'end_time': end_time,
|
||||||
|
|
41
personal_data/fetchers/myanimelist.py
Normal file
41
personal_data/fetchers/myanimelist.py
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
import abc
|
||||||
|
import bs4
|
||||||
|
import urllib.parse
|
||||||
|
import json
|
||||||
|
import dataclasses
|
||||||
|
import logging
|
||||||
|
import secrets
|
||||||
|
from collections.abc import Iterator, Mapping
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class MyAnimeList(Scraper):
|
||||||
|
dataset_name = 'myanimelist_anime'
|
||||||
|
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||||||
|
|
||||||
|
def scrape(self) -> Iterator[Mapping[str, object]]:
|
||||||
|
username = 'WhereTheDogGoin'
|
||||||
|
url = f'https://myanimelist.net/animelist/{username}'
|
||||||
|
response = self.session.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = bs4.BeautifulSoup(response.text)
|
||||||
|
print(soup)
|
||||||
|
data_items_soup = soup.select('[data-items]')[0]
|
||||||
|
print(data_items_soup)
|
||||||
|
data_items = json.loads(data_items_soup.get('data-items'))
|
||||||
|
|
||||||
|
for data_item in data_items:
|
||||||
|
print(data_item)
|
||||||
|
yield {
|
||||||
|
'series.name': data_item.get('anime_title_eng') or data_item.get('anime_title'),
|
||||||
|
'series.myanimelist_url': urllib.parse.urljoin(url, data_item['anime_url']),
|
||||||
|
'series.icon': urllib.parse.urljoin(url, data_item['anime_image_path']),
|
||||||
|
'me.score': data_item.get('score'),
|
||||||
|
}
|
||||||
|
|
||||||
|
del data_item
|
|
@ -114,6 +114,8 @@ def extend_csv_file(
|
||||||
deduplicate_mode: data.DeduplicateMode,
|
deduplicate_mode: data.DeduplicateMode,
|
||||||
deduplicate_ignore_columns: list[str],
|
deduplicate_ignore_columns: list[str],
|
||||||
) -> dict:
|
) -> dict:
|
||||||
|
if deduplicate_ignore_columns == data.Scraper.deduplicate_ignore_columns:
|
||||||
|
deduplicate_ignore_columns = []
|
||||||
if not isinstance(deduplicate_ignore_columns, list):
|
if not isinstance(deduplicate_ignore_columns, list):
|
||||||
raise TypeError(deduplicate_ignore_columns)
|
raise TypeError(deduplicate_ignore_columns)
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user