import secrets import functools import logging from personal_data.data import Scraper logger = logging.getLogger(__name__) API_ROOT = 'https://www.crunchyroll.com' API_URL_TOKEN = API_ROOT + '/auth/v1/token' API_URL_ME = API_ROOT + '/accounts/v1/me' API_URL_WATCH_LIST = API_ROOT + '/content/v2/{account_uuid}/watch-history?page_size=100&locale=en-US' def scrape_watched_last(session): headers = { 'Referer': 'https://www.crunchyroll.com/history', 'Authorization': secrets.CRUNCHYROLL_AUTH, # TODO: Determine automatically } # Request to get account UUID logger.info('Getting Access Token') response = session.post(API_URL_TOKEN, headers = headers, cookies = session.cookies, data = { "device_id": secrets.CRUNCHYROLL_DEVICE_ID, # TODO: Determine automatically "device_type": "Firefox on Linux", "grant_type": "etp_rt_cookie" }) response.raise_for_status() data_me = response.json() headers['Authorization'] = '{} {}'.format(data_me['token_type'], data_me['access_token']) account_uuid = data_me['account_id'] logger.info(' Account UUID: %s', account_uuid) # Request to get watch history logger.info('Getting Watchlist') response = session.get(API_URL_WATCH_LIST.format(account_uuid = account_uuid), headers = headers) response.raise_for_status() # Parse data episodes_data = response.json()['data'] logger.info(' Watchlist length: %d', len(episodes_data)) for episode_data in episodes_data: yield { # Sorting fields 'datetime_played': episode_data['date_played'], # Important fields 'series.title': episode_data['panel']['episode_metadata']['series_title'], 'season.number': episode_data['panel']['episode_metadata']['season_number'], 'episode.number': episode_data['panel']['episode_metadata']['episode'], 'episode.name': episode_data['panel']['title'], # Secondary fields 'episode.language': episode_data['panel']['episode_metadata']['audio_locale'], 'episode.duration_ms': episode_data['panel']['episode_metadata']['duration_ms'], 'episode.maturity_ratings': ' '.join(episode_data['panel']['episode_metadata']['maturity_ratings']), 'season.title': episode_data['panel']['episode_metadata']['season_title'], 'fully_watched': episode_data['fully_watched'], # Identifiers 'episode.crunchyroll_id': episode_data['id'], 'series.crunchyroll_id': episode_data['parent_id'], } SCRAPERS = [ Scraper(scrape_watched_last, 'episodes_watched_crunchyroll', deduplicate = True) ]