import requests import requests_cache import csv import datetime import io from frozendict import frozendict import playstation def determine_scrapers(): scrapers = [] scrapers += playstation.SCRAPERS return scrapers def extend_csv_file(filename, new_dicts , deduplicate = False): dicts = [] with open(filename, 'r') as csvfile: reader = csv.DictReader(csvfile) for row in reader: dicts .append(frozendict(row)) del csvfile dicts += [frozendict(d) for d in new_dicts] del new_dicts if deduplicate: dicts = list(set(dicts)) fieldnames = dicts[0].keys() csvfile_in_memory = io.StringIO() writer = csv.DictWriter(csvfile_in_memory, fieldnames=fieldnames) writer.writeheader() for d in dicts: writer.writerow(d) output_csv = csvfile_in_memory.getvalue() del writer, csvfile_in_memory with open(filename, 'w') as csvfile: csvfile.write(output_csv) del csvfile def main(): session = requests_cache.CachedSession('web_cache') for scraper in determine_scrapers(): result_rows = list(scraper.scraper(session)) extend_csv_file('output/'+scraper.dataset_name, result_rows, deduplicate = scraper.deduplicate) if __name__ == '__main__': main()