1
0

Compare commits

..

No commits in common. "a27ffe6ddb1ee5ed8c545667e1a7e8e63ac3f2f7" and "45233bd593d93843574d157fb5b8f615a800d2fc" have entirely different histories.

4 changed files with 164 additions and 37 deletions

View File

@ -7,15 +7,29 @@ from collections.abc import Iterator
import bs4 import bs4
from clients.myanimelist import MyAnimeListClient, MyAnimeListAnime, MyAnimeListSong
from clients import init_client
from personal_data.data import DeduplicateMode, Scraper from personal_data.data import DeduplicateMode, Scraper
from .. import secrets
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
def client(session):
return init_client(MyAnimeListClient, session, secrets.secrets, throws=True) @dataclasses.dataclass(frozen=True)
class MyAnimeListAnime:
series_name_eng: str
series_name: str
series_myanimelist_url: urllib.parse.ParseResult
series_icon: urllib.parse.ParseResult
me_score: int
@dataclasses.dataclass(frozen=True)
class MyAnimeListSong:
song_name_eng: str
song_name_jp: str | None
song_artist: str
song_placement: str
series_name_eng: str
series_name: str
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
class MyAnimeList(Scraper): class MyAnimeList(Scraper):
@ -23,7 +37,72 @@ class MyAnimeList(Scraper):
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
def scrape(self) -> Iterator[MyAnimeListAnime]: def scrape(self) -> Iterator[MyAnimeListAnime]:
yield from client(self.session).get_my_anime_list() username = 'WhereTheDogGoin'
url = f'https://myanimelist.net/animelist/{username}'
response = self.session.get(url)
response.raise_for_status()
soup = bs4.BeautifulSoup(response.text)
data_items_soup = soup.select('[data-items]')[0]
data_items = json.loads(data_items_soup.get('data-items'))
for data_item in data_items:
yield MyAnimeListAnime(
series_name_eng=data_item.get('anime_title_eng')
or data_item.get('anime_title'),
series_name=data_item.get('anime_title')
or data_item.get('anime_title_eng'),
series_myanimelist_url=urllib.parse.urlparse(
urllib.parse.urljoin(url, data_item['anime_url']),
),
series_icon=urllib.parse.urlparse(
urllib.parse.urljoin(url, data_item['anime_image_path']),
),
me_score=data_item.get('score'),
)
del data_item
def parse_name(text: str):
match = re.fullmatch(r'^(?:\d+:\s*)?"(.*?)(?:\((.*)\))?"$', text)
return match
def parse_songs(
tr_elements,
song_position: str,
series_name_eng: str,
series_name: str,
):
print(series_name_eng, len(tr_elements))
for song_tr in tr_elements:
artist = song_tr.select_one('.theme-song-artist')
if artist is None:
continue
artist.extract()
if e := song_tr.select_one('.theme-song-episode'):
e.extract()
del e
song_artist = artist.get_text().strip().removeprefix('by ')
song_name_eng = song_tr.get_text().strip()
m = parse_name(song_name_eng)
song_name_eng = m.group(1).strip()
song_name_jp = m.group(2).strip() if m.group(2) else None
song = MyAnimeListSong(
song_name_eng=song_name_eng,
song_name_jp=song_name_jp,
song_artist=song_artist,
song_placement=song_position,
series_name_eng=series_name_eng,
series_name=series_name,
)
print(' ', song_name_eng)
yield song
@dataclasses.dataclass(frozen=True) @dataclasses.dataclass(frozen=True)
@ -31,7 +110,30 @@ class MyAnimeListSongs(Scraper):
dataset_name = 'myanimelist_songs' dataset_name = 'myanimelist_songs'
deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN deduplicate_mode = DeduplicateMode.BY_FIRST_COLUMN
def get_songs_for_anime(self, anime: MyAnimeListAnime):
response = self.session.get(anime.series_myanimelist_url.geturl())
response.raise_for_status()
soup = bs4.BeautifulSoup(response.text)
for script in soup.select('script'):
script.extract()
for script in soup.select('.oped-popup'):
script.extract()
yield from parse_songs(
soup.select('.theme-songs.opnening table tr'),
'opening',
anime.series_name_eng,
anime.series_name,
)
yield from parse_songs(
soup.select('.theme-songs.ending table tr'),
'ending',
anime.series_name_eng,
anime.series_name,
)
def scrape(self) -> Iterator[MyAnimeListSong]: def scrape(self) -> Iterator[MyAnimeListSong]:
my_client = client(self.session) for anime in MyAnimeList(self.session).scrape():
for anime in my_client.get_my_anime_list(): yield from self.get_songs_for_anime(anime)
yield from my_client.get_songs_for_anime(anime)

View File

@ -2,69 +2,77 @@ from secret_loader import SecretLoader
secrets = SecretLoader(env_key_prefix='CF_PD') secrets = SecretLoader(env_key_prefix='CF_PD')
def load_lazy(key: str):
return lambda: secrets.load_or_fail(key)
# Crunchyroll # Crunchyroll
crunchyroll_device_id = load_lazy('CRUNCHYROLL_DEVICE_ID') CRUNCHYROLL_DEVICE_ID = secrets.load('CRUNCHYROLL_DEVICE_ID')
crunchyroll_auth = load_lazy('CRUNCHYROLL_AUTH') CRUNCHYROLL_AUTH = secrets.load('CRUNCHYROLL_AUTH')
# FFXIV # FFXIV
ffxiv_character_id = load_lazy('FFXIV_CHARACTER_ID') FFXIV_CHARACTER_ID = secrets.load('FFXIV_CHARACTER_ID')
# Playstation # Playstation
playstation_psn_id= load_lazy('PLAYSTATION_PSN_ID') def playstation_psn_id():
return secrets.load_or_fail('PLAYSTATION_PSN_ID')
# Partisia Blockchain # Partisia Blockchain
pbc_account_address= load_lazy('PBC_ACCOUNT_ADDRESS') def pbc_account_address():
return secrets.load_or_fail('PBC_ACCOUNT_ADDRESS')
# Steam # Steam
steam_username= load_lazy('STEAM_USERNAME') def steam_username():
return secrets.load_or_fail('STEAM_USERNAME')
# Gitea # Gitea
gitea_access_token = load_lazy('GITEA_ACCESS_TOKEN') def gitea_access_token():
return secrets.load('GITEA_ACCESS_TOKEN')
# Kucoin # Kucoin
kucoin_key= load_lazy('KUCOIN_KEY') def kucoin_key():
return secrets.load_or_fail('KUCOIN_KEY')
kucoin_secret= load_lazy('KUCOIN_SECRET') def kucoin_secret():
return secrets.load_or_fail('KUCOIN_SECRET')
kucoin_pass= load_lazy('KUCOIN_PASS') def kucoin_pass():
return secrets.load_or_fail('KUCOIN_PASS')
# Kraken # Kraken
kraken_key= load_lazy('KRAKEN_KEY') def kraken_key():
return secrets.load_or_fail('KRAKEN_KEY')
kraken_secret= load_lazy('KRAKEN_SECRET') def kraken_secret():
return secrets.load_or_fail('KRAKEN_SECRET')
# Home Assistant # Home Assistant
home_assistant_root = load_lazy('HOME_ASSISTANT_ROOT') HOME_ASSISTANT_ROOT = secrets.load('HOME_ASSISTANT_ROOT')
home_assistant_llak = load_lazy('HOME_ASSISTANT_LLAK') HOME_ASSISTANT_LLAK = secrets.load('HOME_ASSISTANT_LLAK')
# Email configuration # Email configuration
mailgun_api_key = load_lazy('MAILGUN_API_KEY') MAILGUN_API_KEY = secrets.load('MAILGUN_API_KEY')
mailgun_domain = load_lazy('MAILGUN_DOMAIN') MAILGUN_DOMAIN = secrets.load('MAILGUN_DOMAIN')
mailgun_recipient = load_lazy('MAILGUN_RECIPIENT') MAILGUN_RECIPIENT = secrets.load('MAILGUN_RECIPIENT')
# Jellyfin # Jellyfin
jellyfin_url = load_lazy('JELLYFIN_URL') JELLYFIN_URL = secrets.load('JELLYFIN_URL')
jellyfin_username = load_lazy('JELLYFIN_USERNAME') JELLYFIN_USERNAME = secrets.load('JELLYFIN_USERNAME')
jellyfin_password = load_lazy('JELLYFIN_PASSWORD') JELLYFIN_PASSWORD = secrets.load('JELLYFIN_PASSWORD')
# Withings # Withings
withings_clientid = load_lazy('WITHINGS_CLIENTID') WITHINGS_CLIENTID = secrets.load('WITHINGS_CLIENTID')
withings_secret = load_lazy('WITHINGS_SECRET') WITHINGS_SECRET = secrets.load('WITHINGS_SECRET')
withings_callback_uri = load_lazy('WITHINGS_CALLBACK_URI') WITHINGS_CALLBACK_URI = secrets.load('WITHINGS_CALLBACK_URI')
# Other # Other
wanikani_api_key = load_lazy('WANIKANI_API_KEY') def wanikani_api_key():
return secrets.load_or_fail('WANIKANI_API_KEY')

View File

@ -12,4 +12,3 @@ marko
fin-depo @ git+https://gitfub.space/Jmaa/fin-depo.git fin-depo @ git+https://gitfub.space/Jmaa/fin-depo.git
secret_loader @ git+https://gitfub.space/Jmaa/secret_loader secret_loader @ git+https://gitfub.space/Jmaa/secret_loader
requests-util @ git+https://gitfub.space/Jmaa/requests_util requests-util @ git+https://gitfub.space/Jmaa/requests_util
clients @ git+https://gitfub.space/Jmaa/clients

18
tests/test_myanimelist.py Normal file
View File

@ -0,0 +1,18 @@
import pytest
from personal_data.fetchers.myanimelist import parse_name
@pytest.mark.parametrize(
('input_str', 'expected_group1', 'expected_group2'),
[
('"Soundscape"', 'Soundscape', None),
('"Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'),
('1: "Soundscape"', 'Soundscape', None),
('2: "Soundscape (サウンドスケープ)"', 'Soundscape', 'サウンドスケープ'),
],
)
def test_parse_name(input_str, expected_group1, expected_group2):
m = parse_name(input_str)
assert m.group(1) == expected_group1
assert m.group(2) == expected_group2