personal-data/personal_data/fetchers/crunchyroll.py

import secrets
import functools
import logging

from personal_data.data import Scraper, DeduplicateMode

logger = logging.getLogger(__name__)

API_ROOT = 'https://www.crunchyroll.com'
API_URL_TOKEN = API_ROOT + '/auth/v1/token'
API_URL_ME = API_ROOT + '/accounts/v1/me'
API_URL_WATCH_LIST = API_ROOT + '/content/v2/{account_uuid}/watch-history?page_size=100&locale=en-US'

def scrape_watched_last(session):
    headers = {
               'Referer': 'https://www.crunchyroll.com/history',
               'Authorization': secrets.CRUNCHYROLL_AUTH, # TODO: Determine automatically
    }

    # Request to get account UUID
    logger.info('Getting Access Token')
    response = session.post(API_URL_TOKEN, headers = headers, cookies = session.cookies, data = {
        "device_id": secrets.CRUNCHYROLL_DEVICE_ID, # TODO: Determine automatically
        "device_type": "Firefox on Linux",
        "grant_type": "etp_rt_cookie"
    })

    response.raise_for_status()
    data_me = response.json()
    headers['Authorization'] = '{} {}'.format(data_me['token_type'], data_me['access_token'])
    account_uuid = data_me['account_id']

    logger.info('        Account UUID: %s', account_uuid)

    # Request to get watch history
    logger.info('Getting Watchlist')
    response = session.get(API_URL_WATCH_LIST.format(account_uuid = account_uuid), headers = headers)
    response.raise_for_status()

    # Parse data
    episodes_data = response.json()['data']
    logger.info('        Watchlist length: %d', len(episodes_data))

    for episode_data in episodes_data:
        yield {
                # Sorting fields
                'datetime_played': episode_data['date_played'],

                # Important fields
                'series.title': episode_data['panel']['episode_metadata']['series_title'],
                'season.number': episode_data['panel']['episode_metadata']['season_number'],
                'episode.number': episode_data['panel']['episode_metadata']['episode'],
                'episode.name': episode_data['panel']['title'],

                # Secondary fields
                'episode.language': episode_data['panel']['episode_metadata']['audio_locale'],
                'episode.duration_ms': episode_data['panel']['episode_metadata']['duration_ms'],
                'episode.maturity_ratings': ' '.join(episode_data['panel']['episode_metadata']['maturity_ratings']),
                'season.title': episode_data['panel']['episode_metadata']['season_title'],
                'fully_watched': episode_data['fully_watched'],

                # Identifiers
                'episode.crunchyroll_id': episode_data['id'],
                'series.crunchyroll_id': episode_data['parent_id'],
        }

SCRAPERS = [
        Scraper(scrape_watched_last, 'episodes_watched_crunchyroll',
                deduplicate_mode = DeduplicateMode.BY_ALL_COLUMNS)
]