1
0

Playstation WIP

This commit is contained in:
Jon Michael Aanes 2024-01-28 21:01:50 +01:00
parent 2563303896
commit efa066a133
2 changed files with 30 additions and 5 deletions

View File

@ -6,6 +6,9 @@ import datetime
import io import io
import browsercookie import browsercookie
from frozendict import frozendict from frozendict import frozendict
import logging
logger = logging.getLogger(__name__)
import playstation import playstation
@ -15,7 +18,6 @@ def determine_scrapers():
return scrapers return scrapers
def extend_csv_file(filename, new_dicts , deduplicate = False): def extend_csv_file(filename, new_dicts , deduplicate = False):
dicts = [] dicts = []
with open(filename, 'r') as csvfile: with open(filename, 'r') as csvfile:
reader = csv.DictReader(csvfile) reader = csv.DictReader(csvfile)
@ -44,9 +46,15 @@ def extend_csv_file(filename, new_dicts , deduplicate = False):
def main(): def main():
cookiejar = browsercookie.firefox() cookiejar = browsercookie.firefox()
exit(1) logger.warning('Got cookiejar from firefox')
print('From cookiejar')
session = requests_cache.CachedSession('web_cache', cookies = cookiejar) session = requests_cache.CachedSession('web_cache', cookies = cookiejar)
for cookie in cookiejar:
session.cookies.set_cookie(cookie)
for scraper in determine_scrapers(): for scraper in determine_scrapers():
logger.warning('Running scraper: %s', scraper.dataset_name)
result_rows = list(scraper.scraper(session)) result_rows = list(scraper.scraper(session))
extend_csv_file('output/'+scraper.dataset_name, result_rows, extend_csv_file('output/'+scraper.dataset_name, result_rows,
deduplicate = scraper.deduplicate) deduplicate = scraper.deduplicate)

View File

@ -1,9 +1,27 @@
from data import Scraper from data import Scraper
import secrets import secrets
import logging
logger = logging.getLogger(__name__)
URL_RECENTLY_PLAYED_HTML = 'https://library.playstation.com/recently-played'
URL_RECENTLY_PLAYED_API = "https://web.np.playstation.com/api/graphql/v1/op?operationName=getUserGameList&variables=%7B%22limit%22%3A50%2C%22categories%22%3A%22ps4_game%2Cps5_native_game%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22e0136f81d7d1fb6be58238c574e9a46e1c0cc2f7f6977a08a5a46f224523a004%22%7D%7D"
def scrape_played_last(session): def scrape_played_last(session):
url = "https://web.np.playstation.com/api/graphql/v1/op?operationName=getUserGameList&variables=%7B%22limit%22%3A50%2C%22categories%22%3A%22ps4_game%2Cps5_native_game%22%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22e0136f81d7d1fb6be58238c574e9a46e1c0cc2f7f6977a08a5a46f224523a004%22%7D%7D" # Initial request to trigger cookie.
logger.warning('Trying to trigger initial cookie usage')
response = session.get(URL_RECENTLY_PLAYED_HTML, cookies = session.cookies)
response.raise_for_status()
print('From herp')
for cookie in session.cookies:
print(' ', cookie.domain, cookie)
exit(1)
# Now trigger API call.
logger.warning('Trying to fetch data from API')
headers = { headers = {
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/119.0",
"Accept": "application/json", "Accept": "application/json",
@ -19,7 +37,6 @@ def scrape_played_last(session):
"Sec-Fetch-Site": "same-site", "Sec-Fetch-Site": "same-site",
"Pragma": "no-cache", "Pragma": "no-cache",
"Cache-Control": "no-cache", "Cache-Control": "no-cache",
#"Cookie": secrets.PLAYSTATION_COM_COOKIES,
'Accept-Encoding': 'gzip, deflate, br', 'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://library.playstation.com/', 'Referer': 'https://library.playstation.com/',
'Origin': 'https://library.playstation.com', 'Origin': 'https://library.playstation.com',
@ -27,7 +44,7 @@ def scrape_played_last(session):
'Connection': 'keep-alive', 'Connection': 'keep-alive',
'TE': 'trailers', 'TE': 'trailers',
} }
result = session.get(url, headers = headers) result = session.get(URL_RECENTLY_PLAYED_API, headers = headers)
result.raise_for_status() result.raise_for_status()
print(result.json()) print(result.json())