Compare commits
No commits in common. "a27ffe6ddb1ee5ed8c545667e1a7e8e63ac3f2f7" and "45233bd593d93843574d157fb5b8f615a800d2fc" have entirely different histories.
a27ffe6ddb
...
45233bd593
|
@ -7,15 +7,29 @@ from collections.abc import Iterator
|
||||||
|
|
||||||
import bs4
|
import bs4
|
||||||
|
|
||||||
from clients.myanimelist import MyAnimeListClient, MyAnimeListAnime, MyAnimeListSong
|
|
||||||
from clients import init_client
|
|
||||||
from personal_data.data import DeduplicateMode, Scraper
|
from personal_data.data import DeduplicateMode, Scraper
|
||||||
from .. import secrets
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
def client(session):
|
|
||||||
return init_client(MyAnimeListClient, session, secrets.secrets, throws=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class MyAnimeListAnime:
|
||||||
|
series_name_eng: str
|
||||||
|
series_name: str
|
||||||
|
series_myanimelist_url: urllib.parse.ParseResult
|
||||||
|
series_icon: urllib.parse.ParseResult
|
||||||
|
me_score: int
|
||||||
|
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
class MyAnimeListSong:
|
||||||
|
song_name_eng: str
|
||||||
|
song_name_jp: str | None
|
||||||
|
song_artist: str
|
||||||
|
song_placement: str
|
||||||
|
series_name_eng: str
|
||||||
|
series_name: str
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
class MyAnimeList(Scraper):
|
class MyAnimeList(Scraper):
|
||||||
|
@ -23,7 +37,72 @@ class MyAnimeList(Scraper):
|
||||||
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||||||
|
|
||||||
def scrape(self) -> Iterator[MyAnimeListAnime]:
|
def scrape(self) -> Iterator[MyAnimeListAnime]:
|
||||||
yield from client(self.session).get_my_anime_list()
|
username = 'WhereTheDogGoin'
|
||||||
|
url = f'https://myanimelist.net/animelist/{username}'
|
||||||
|
response = self.session.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = bs4.BeautifulSoup(response.text)
|
||||||
|
data_items_soup = soup.select('[data-items]')[0]
|
||||||
|
data_items = json.loads(data_items_soup.get('data-items'))
|
||||||
|
|
||||||
|
for data_item in data_items:
|
||||||
|
yield MyAnimeListAnime(
|
||||||
|
series_name_eng=data_item.get('anime_title_eng')
|
||||||
|
or data_item.get('anime_title'),
|
||||||
|
series_name=data_item.get('anime_title')
|
||||||
|
or data_item.get('anime_title_eng'),
|
||||||
|
series_myanimelist_url=urllib.parse.urlparse(
|
||||||
|
urllib.parse.urljoin(url, data_item['anime_url']),
|
||||||
|
),
|
||||||
|
series_icon=urllib.parse.urlparse(
|
||||||
|
urllib.parse.urljoin(url, data_item['anime_image_path']),
|
||||||
|
),
|
||||||
|
me_score=data_item.get('score'),
|
||||||
|
)
|
||||||
|
|
||||||
|
del data_item
|
||||||
|
|
||||||
|
|
||||||
|
def parse_name(text: str):
|
||||||
|
match = re.fullmatch(r'^(?:\d+:\s*)?"(.*?)(?:\((.*)\))?"$', text)
|
||||||
|
return match
|
||||||
|
|
||||||
|
|
||||||
|
def parse_songs(
|
||||||
|
tr_elements,
|
||||||
|
song_position: str,
|
||||||
|
series_name_eng: str,
|
||||||
|
series_name: str,
|
||||||
|
):
|
||||||
|
print(series_name_eng, len(tr_elements))
|
||||||
|
for song_tr in tr_elements:
|
||||||
|
artist = song_tr.select_one('.theme-song-artist')
|
||||||
|
if artist is None:
|
||||||
|
continue
|
||||||
|
artist.extract()
|
||||||
|
if e := song_tr.select_one('.theme-song-episode'):
|
||||||
|
e.extract()
|
||||||
|
del e
|
||||||
|
|
||||||
|
song_artist = artist.get_text().strip().removeprefix('by ')
|
||||||
|
|
||||||
|
song_name_eng = song_tr.get_text().strip()
|
||||||
|
m = parse_name(song_name_eng)
|
||||||
|
|
||||||
|
song_name_eng = m.group(1).strip()
|
||||||
|
song_name_jp = m.group(2).strip() if m.group(2) else None
|
||||||
|
|
||||||
|
song = MyAnimeListSong(
|
||||||
|
song_name_eng=song_name_eng,
|
||||||
|
song_name_jp=song_name_jp,
|
||||||
|
song_artist=song_artist,
|
||||||
|
song_placement=song_position,
|
||||||
|
series_name_eng=series_name_eng,
|
||||||
|
series_name=series_name,
|
||||||
|
)
|
||||||
|
print(' ', song_name_eng)
|
||||||
|
yield song
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True)
|
@dataclasses.dataclass(frozen=True)
|
||||||
|
@ -31,7 +110,30 @@ class MyAnimeListSongs(Scraper):
|
||||||
dataset_name = 'myanimelist_songs'
|
dataset_name = 'myanimelist_songs'
|
||||||
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
|
||||||
|
|
||||||
|
def get_songs_for_anime(self, anime: MyAnimeListAnime):
|
||||||
|
response = self.session.get(anime.series_myanimelist_url.geturl())
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
soup = bs4.BeautifulSoup(response.text)
|
||||||
|
|
||||||
|
for script in soup.select('script'):
|
||||||
|
script.extract()
|
||||||
|
for script in soup.select('.oped-popup'):
|
||||||
|
script.extract()
|
||||||
|
|
||||||
|
yield from parse_songs(
|
||||||
|
soup.select('.theme-songs.opnening table tr'),
|
||||||
|
'opening',
|
||||||
|
anime.series_name_eng,
|
||||||
|
anime.series_name,
|
||||||
|
)
|
||||||
|
yield from parse_songs(
|
||||||
|
soup.select('.theme-songs.ending table tr'),
|
||||||
|
'ending',
|
||||||
|
anime.series_name_eng,
|
||||||
|
anime.series_name,
|
||||||
|
)
|
||||||
|
|
||||||
def scrape(self) -> Iterator[MyAnimeListSong]:
|
def scrape(self) -> Iterator[MyAnimeListSong]:
|
||||||
my_client = client(self.session)
|
for anime in MyAnimeList(self.session).scrape():
|
||||||
for anime in my_client.get_my_anime_list():
|
yield from self.get_songs_for_anime(anime)
|
||||||
yield from my_client.get_songs_for_anime(anime)
|
|
||||||
|
|
|
@ -2,69 +2,77 @@ from secret_loader import SecretLoader
|
||||||
|
|
||||||
secrets = SecretLoader(env_key_prefix='CF_PD')
|
secrets = SecretLoader(env_key_prefix='CF_PD')
|
||||||
|
|
||||||
def load_lazy(key: str):
|
|
||||||
return lambda: secrets.load_or_fail(key)
|
|
||||||
|
|
||||||
|
|
||||||
# Crunchyroll
|
# Crunchyroll
|
||||||
crunchyroll_device_id = load_lazy('CRUNCHYROLL_DEVICE_ID')
|
CRUNCHYROLL_DEVICE_ID = secrets.load('CRUNCHYROLL_DEVICE_ID')
|
||||||
crunchyroll_auth = load_lazy('CRUNCHYROLL_AUTH')
|
CRUNCHYROLL_AUTH = secrets.load('CRUNCHYROLL_AUTH')
|
||||||
|
|
||||||
# FFXIV
|
# FFXIV
|
||||||
ffxiv_character_id = load_lazy('FFXIV_CHARACTER_ID')
|
FFXIV_CHARACTER_ID = secrets.load('FFXIV_CHARACTER_ID')
|
||||||
|
|
||||||
|
|
||||||
# Playstation
|
# Playstation
|
||||||
playstation_psn_id= load_lazy('PLAYSTATION_PSN_ID')
|
def playstation_psn_id():
|
||||||
|
return secrets.load_or_fail('PLAYSTATION_PSN_ID')
|
||||||
|
|
||||||
|
|
||||||
# Partisia Blockchain
|
# Partisia Blockchain
|
||||||
pbc_account_address= load_lazy('PBC_ACCOUNT_ADDRESS')
|
def pbc_account_address():
|
||||||
|
return secrets.load_or_fail('PBC_ACCOUNT_ADDRESS')
|
||||||
|
|
||||||
|
|
||||||
# Steam
|
# Steam
|
||||||
steam_username= load_lazy('STEAM_USERNAME')
|
def steam_username():
|
||||||
|
return secrets.load_or_fail('STEAM_USERNAME')
|
||||||
|
|
||||||
|
|
||||||
# Gitea
|
# Gitea
|
||||||
gitea_access_token = load_lazy('GITEA_ACCESS_TOKEN')
|
def gitea_access_token():
|
||||||
|
return secrets.load('GITEA_ACCESS_TOKEN')
|
||||||
|
|
||||||
|
|
||||||
# Kucoin
|
# Kucoin
|
||||||
kucoin_key= load_lazy('KUCOIN_KEY')
|
def kucoin_key():
|
||||||
|
return secrets.load_or_fail('KUCOIN_KEY')
|
||||||
|
|
||||||
|
|
||||||
kucoin_secret= load_lazy('KUCOIN_SECRET')
|
def kucoin_secret():
|
||||||
|
return secrets.load_or_fail('KUCOIN_SECRET')
|
||||||
|
|
||||||
|
|
||||||
kucoin_pass= load_lazy('KUCOIN_PASS')
|
def kucoin_pass():
|
||||||
|
return secrets.load_or_fail('KUCOIN_PASS')
|
||||||
|
|
||||||
|
|
||||||
# Kraken
|
# Kraken
|
||||||
kraken_key= load_lazy('KRAKEN_KEY')
|
def kraken_key():
|
||||||
|
return secrets.load_or_fail('KRAKEN_KEY')
|
||||||
|
|
||||||
|
|
||||||
kraken_secret= load_lazy('KRAKEN_SECRET')
|
def kraken_secret():
|
||||||
|
return secrets.load_or_fail('KRAKEN_SECRET')
|
||||||
|
|
||||||
|
|
||||||
# Home Assistant
|
# Home Assistant
|
||||||
home_assistant_root = load_lazy('HOME_ASSISTANT_ROOT')
|
HOME_ASSISTANT_ROOT = secrets.load('HOME_ASSISTANT_ROOT')
|
||||||
home_assistant_llak = load_lazy('HOME_ASSISTANT_LLAK')
|
HOME_ASSISTANT_LLAK = secrets.load('HOME_ASSISTANT_LLAK')
|
||||||
|
|
||||||
# Email configuration
|
# Email configuration
|
||||||
mailgun_api_key = load_lazy('MAILGUN_API_KEY')
|
MAILGUN_API_KEY = secrets.load('MAILGUN_API_KEY')
|
||||||
mailgun_domain = load_lazy('MAILGUN_DOMAIN')
|
MAILGUN_DOMAIN = secrets.load('MAILGUN_DOMAIN')
|
||||||
mailgun_recipient = load_lazy('MAILGUN_RECIPIENT')
|
MAILGUN_RECIPIENT = secrets.load('MAILGUN_RECIPIENT')
|
||||||
|
|
||||||
# Jellyfin
|
# Jellyfin
|
||||||
jellyfin_url = load_lazy('JELLYFIN_URL')
|
JELLYFIN_URL = secrets.load('JELLYFIN_URL')
|
||||||
jellyfin_username = load_lazy('JELLYFIN_USERNAME')
|
JELLYFIN_USERNAME = secrets.load('JELLYFIN_USERNAME')
|
||||||
jellyfin_password = load_lazy('JELLYFIN_PASSWORD')
|
JELLYFIN_PASSWORD = secrets.load('JELLYFIN_PASSWORD')
|
||||||
|
|
||||||
# Withings
|
# Withings
|
||||||
withings_clientid = load_lazy('WITHINGS_CLIENTID')
|
WITHINGS_CLIENTID = secrets.load('WITHINGS_CLIENTID')
|
||||||
withings_secret = load_lazy('WITHINGS_SECRET')
|
WITHINGS_SECRET = secrets.load('WITHINGS_SECRET')
|
||||||
withings_callback_uri = load_lazy('WITHINGS_CALLBACK_URI')
|
WITHINGS_CALLBACK_URI = secrets.load('WITHINGS_CALLBACK_URI')
|
||||||
|
|
||||||
|
|
||||||
# Other
|
# Other
|
||||||
wanikani_api_key = load_lazy('WANIKANI_API_KEY')
|
def wanikani_api_key():
|
||||||
|
return secrets.load_or_fail('WANIKANI_API_KEY')
|
||||||
|
|
|
@ -12,4 +12,3 @@ marko
|
||||||
fin-depo @ git+https://gitfub.space/Jmaa/fin-depo.git
|
fin-depo @ git+https://gitfub.space/Jmaa/fin-depo.git
|
||||||
secret_loader @ git+https://gitfub.space/Jmaa/secret_loader
|
secret_loader @ git+https://gitfub.space/Jmaa/secret_loader
|
||||||
requests-util @ git+https://gitfub.space/Jmaa/requests_util
|
requests-util @ git+https://gitfub.space/Jmaa/requests_util
|
||||||
clients @ git+https://gitfub.space/Jmaa/clients
|
|
||||||
|
|
18
tests/test_myanimelist.py
Normal file
18
tests/test_myanimelist.py
Normal file
|
@ -0,0 +1,18 @@
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from personal_data.fetchers.myanimelist import parse_name
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
('input_str', 'expected_group1', 'expected_group2'),
|
||||||
|
[
|
||||||
|
('"Soundscape"', 'Soundscape', None),
|
||||||
|
('"Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'),
|
||||||
|
('1: "Soundscape"', 'Soundscape', None),
|
||||||
|
('2: "Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_parse_name(input_str, expected_group1, expected_group2):
|
||||||
|
m = parse_name(input_str)
|
||||||
|
assert m.group(1) == expected_group1
|
||||||
|
assert m.group(2) == expected_group2
|
Loading…
Reference in New Issue
Block a user