import requests import requests_cache import csv import datetime import io import browsercookie from frozendict import frozendict import logging logger = logging.getLogger(__name__) import personal_data.fetchers.playstation import personal_data.fetchers.crunchyroll def determine_scrapers(): scrapers = [] #scrapers += personal_data.fetchers.playstation.SCRAPERS scrapers += personal_data.fetchers.crunchyroll.SCRAPERS return scrapers def extend_csv_file(filename, new_dicts , deduplicate = False): dicts = [] try: with open(filename, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: dicts.append(frozendict(row)) del csvfile except FileNotFoundError as e: logger.info('Creating file: %s', filename) pass dicts += [frozendict(d) for d in new_dicts] del new_dicts fieldnames = list(dicts[0].keys()) if deduplicate: dicts = sorted(set(dicts), key = lambda d: d[fieldnames[0]]) csvfile_in_memory = io.StringIO() writer = csv.DictWriter(csvfile_in_memory, fieldnames=fieldnames) writer.writeheader() for d in dicts: writer.writerow(d) output_csv = csvfile_in_memory.getvalue() del writer, csvfile_in_memory with open(filename, 'w') as csvfile: csvfile.write(output_csv) del csvfile STANDARD_HEADERS = { 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:122.0) Gecko/20100101 Firefox/122.0', "Accept": "application/json, text/plain, */*", 'Accept-Language': 'en-US,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', } def main(): cookiejar = browsercookie.firefox() logger.warning('Got cookiejar from firefox: %s cookies', len(cookiejar)) session = requests_cache.CachedSession('web_cache', cookies = cookiejar) for cookie in cookiejar: session.cookies.set_cookie(cookie) for scraper in determine_scrapers(): logger.warning('Running scraper: %s', scraper.dataset_name) result_rows = list(scraper.scraper(session)) extend_csv_file('output/'+scraper.dataset_name, result_rows, deduplicate = scraper.deduplicate) if __name__ == '__main__': main()