diff --git a/__main__.py b/__main__.py index c2a341e..0874e04 100644 --- a/__main__.py +++ b/__main__.py @@ -11,26 +11,33 @@ import logging logger = logging.getLogger(__name__) import playstation +import crunchyroll def determine_scrapers(): scrapers = [] - scrapers += playstation.SCRAPERS + #scrapers += playstation.SCRAPERS + scrapers += crunchyroll.SCRAPERS return scrapers def extend_csv_file(filename, new_dicts , deduplicate = False): dicts = [] - with open(filename, 'r') as csvfile: - reader = csv.DictReader(csvfile) - for row in reader: - dicts .append(frozendict(row)) - del csvfile + try: + with open(filename, 'r') as csvfile: + reader = csv.DictReader(csvfile) + for row in reader: + dicts.append(frozendict(row)) + del csvfile + except FileNotFoundError as e: + logger.info('Creating file: %s', filename) + pass dicts += [frozendict(d) for d in new_dicts] del new_dicts - if deduplicate: - dicts = list(set(dicts)) - fieldnames = dicts[0].keys() + fieldnames = list(dicts[0].keys()) + + if deduplicate: + dicts = sorted(set(dicts), key = lambda d: d[fieldnames[0]]) csvfile_in_memory = io.StringIO() writer = csv.DictWriter(csvfile_in_memory, fieldnames=fieldnames) @@ -44,10 +51,16 @@ def extend_csv_file(filename, new_dicts , deduplicate = False): csvfile.write(output_csv) del csvfile +STANDARD_HEADERS = { + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0', + "Accept": "application/json, text/plain, */*", + 'Accept-Language': 'en-US,en;q=0.5', + 'Accept-Encoding': 'gzip, deflate, br', +} + def main(): cookiejar = browsercookie.firefox() - logger.warning('Got cookiejar from firefox') - print('From cookiejar') + logger.warning('Got cookiejar from firefox: %s cookies', len(cookiejar)) session = requests_cache.CachedSession('web_cache', cookies = cookiejar) for cookie in cookiejar: diff --git a/crunchyroll.py b/crunchyroll.py new file mode 100644 index 0000000..5c2d301 --- /dev/null +++ b/crunchyroll.py @@ -0,0 +1,68 @@ +from data import Scraper +import secrets +import functools +import logging + +logger = logging.getLogger(__name__) + +API_ROOT = 'https://www.crunchyroll.com' +API_URL_TOKEN = API_ROOT + '/auth/v1/token' +API_URL_ME = API_ROOT + '/accounts/v1/me' +API_URL_WATCH_LIST = API_ROOT + '/content/v2/{account_uuid}/watch-history?page_size=100&locale=en-US' + +def scrape_watched_last(session): + headers = { + 'Referer': 'https://www.crunchyroll.com/history', + 'Authorization': secrets.CRUNCHYROLL_AUTH, # TODO: Determine automatically + } + + # Request to get account UUID + logger.info('Getting Access Token') + response = session.post(API_URL_TOKEN, headers = headers, cookies = session.cookies, data = { + "device_id": secrets.CRUNCHYROLL_DEVICE_ID, # TODO: Determine automatically + "device_type": "Firefox on Linux", + "grant_type": "etp_rt_cookie" + }) + + response.raise_for_status() + data_me = response.json() + headers['Authorization'] = '{} {}'.format(data_me['token_type'], data_me['access_token']) + account_uuid = data_me['account_id'] + + logger.info(' Account UUID: %s', account_uuid) + + # Request to get watch history + logger.info('Getting Watchlist') + response = session.get(API_URL_WATCH_LIST.format(account_uuid = account_uuid), headers = headers) + response.raise_for_status() + + # Parse data + episodes_data = response.json()['data'] + logger.info(' Watchlist length: %d', len(episodes_data)) + + for episode_data in episodes_data: + yield { + # Sorting fields + 'datetime_played': episode_data['date_played'], + + # Important fields + 'series.title': episode_data['panel']['episode_metadata']['series_title'], + 'season.number': episode_data['panel']['episode_metadata']['season_number'], + 'episode.number': episode_data['panel']['episode_metadata']['episode'], + 'episode.name': episode_data['panel']['title'], + + # Secondary fields + 'episode.language': episode_data['panel']['episode_metadata']['audio_locale'], + 'episode.duration_ms': episode_data['panel']['episode_metadata']['duration_ms'], + 'episode.maturity_ratings': ' '.join(episode_data['panel']['episode_metadata']['maturity_ratings']), + 'season.title': episode_data['panel']['episode_metadata']['season_title'], + 'fully_watched': episode_data['fully_watched'], + + # Identifiers + 'episode.crunchyroll_id': episode_data['id'], + 'series.crunchyroll_id': episode_data['parent_id'], + } + +SCRAPERS = [ + Scraper(scrape_watched_last, 'episodes_watched_crunchyroll', deduplicate = True) +] diff --git a/playstation.py b/playstation.py index 9ea173b..3cb157f 100644 --- a/playstation.py +++ b/playstation.py @@ -32,17 +32,8 @@ def scrape_played_last(session): "apollographql-client-name": "my-playstation", "apollographql-client-version": "0.1.0-20230720235210-hotfix-1-g1e9f07ff", "X-PSN-Request-Id": "8ad64653-d8b5-4941-b565-b5536c9853df", - "Sec-Fetch-Dest": "empty", - "Sec-Fetch-Mode": "cors", - "Sec-Fetch-Site": "same-site", - "Pragma": "no-cache", - "Cache-Control": "no-cache", - 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://library.playstation.com/', 'Origin': 'https://library.playstation.com', - 'DNT': '1', - 'Connection': 'keep-alive', - 'TE': 'trailers', } result = session.get(URL_RECENTLY_PLAYED_API, headers = headers) result.raise_for_status()