From efa066a133a9a7ee9df4e23670df5b2971dc5d7d Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sun, 28 Jan 2024 21:01:50 +0100 Subject: [PATCH] Playstation WIP --- __main__.py | 12 ++++++++++-- playstation.py | 23 ++++++++++++++++++++--- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/__main__.py b/__main__.py index 381514b..c2a341e 100644 --- a/__main__.py +++ b/__main__.py @@ -6,6 +6,9 @@ import datetime import io import browsercookie from frozendict import frozendict +import logging + +logger = logging.getLogger(__name__) import playstation @@ -15,7 +18,6 @@ def determine_scrapers(): return scrapers def extend_csv_file(filename, new_dicts , deduplicate = False): - dicts = [] with open(filename, 'r') as csvfile: reader = csv.DictReader(csvfile) @@ -44,9 +46,15 @@ def extend_csv_file(filename, new_dicts , deduplicate = False): def main(): cookiejar = browsercookie.firefox() - exit(1) + logger.warning('Got cookiejar from firefox') + print('From cookiejar') + session = requests_cache.CachedSession('web_cache', cookies = cookiejar) + for cookie in cookiejar: + session.cookies.set_cookie(cookie) + for scraper in determine_scrapers(): + logger.warning('Running scraper: %s', scraper.dataset_name) result_rows = list(scraper.scraper(session)) extend_csv_file('output/'+scraper.dataset_name, result_rows, deduplicate = scraper.deduplicate) diff --git a/playstation.py b/playstation.py index b9d0bf4..9ea173b 100644 --- a/playstation.py +++ b/playstation.py @@ -1,9 +1,27 @@ from data import Scraper import secrets +import logging + +logger = logging.getLogger(__name__) + +URL_RECENTLY_PLAYED_HTML = 'https://library.playstation.com/recently-played' +URL_RECENTLY_PLAYED_API = "https://web.np.playstation.com/api/graphql/v1/op?operationName=getUserGameList&variables=%7B%22limit%22%3A50%2C%22categories%22%3A%22ps4_game%2Cps5_native_game%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22e0136f81d7d1fb6be58238c574e9a46e1c0cc2f7f6977a08a5a46f224523a004%22%7D%7D" def scrape_played_last(session): - url = "https://web.np.playstation.com/api/graphql/v1/op?operationName=getUserGameList&variables=%7B%22limit%22%3A50%2C%22categories%22%3A%22ps4_game%2Cps5_native_game%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22e0136f81d7d1fb6be58238c574e9a46e1c0cc2f7f6977a08a5a46f224523a004%22%7D%7D" + # Initial request to trigger cookie. + logger.warning('Trying to trigger initial cookie usage') + + response = session.get(URL_RECENTLY_PLAYED_HTML, cookies = session.cookies) + response.raise_for_status() + + print('From herp') + for cookie in session.cookies: + print(' ', cookie.domain, cookie) + exit(1) + + # Now trigger API call. + logger.warning('Trying to fetch data from API') headers = { "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0", "Accept": "application/json", @@ -19,7 +37,6 @@ def scrape_played_last(session): "Sec-Fetch-Site": "same-site", "Pragma": "no-cache", "Cache-Control": "no-cache", - #"Cookie": secrets.PLAYSTATION_COM_COOKIES, 'Accept-Encoding': 'gzip, deflate, br', 'Referer': 'https://library.playstation.com/', 'Origin': 'https://library.playstation.com', @@ -27,7 +44,7 @@ def scrape_played_last(session): 'Connection': 'keep-alive', 'TE': 'trailers', } - result = session.get(url, headers = headers) + result = session.get(URL_RECENTLY_PLAYED_API, headers = headers) result.raise_for_status() print(result.json())