From 216feee34b679510e94a62d49b1f9257c19fc5af Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 22 Jul 2024 16:58:42 +0200 Subject: [PATCH] Output fetchers in help --- personal_data/__main__.py | 4 +++- personal_data/main.py | 8 ++++++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/personal_data/__main__.py b/personal_data/__main__.py index b10424c..2edcb50 100644 --- a/personal_data/__main__.py +++ b/personal_data/__main__.py @@ -7,7 +7,9 @@ from personal_data.notification import NotificationType def parse_arguments(): available_scraper_names = personal_data.main.available_scraper_names() - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser( + epilog='Available fetchers: ' + ' '.join(available_scraper_names), + ) parser.add_argument( 'fetchers', metavar='FETCHER', diff --git a/personal_data/main.py b/personal_data/main.py index c427358..10a016d 100644 --- a/personal_data/main.py +++ b/personal_data/main.py @@ -80,7 +80,7 @@ def equals_without_fields( def deduplicate_by_ignoring_certain_fields( dicts: list[dict], - deduplicate_ignore_columns: list[str], + deduplicate_ignore_columns: Iterable[str], ) -> list[dict]: """Removes duplicates that occur when ignoring certain columns. @@ -104,6 +104,8 @@ def deduplicate_dicts( deduplicate_mode: personal_data.data.DeduplicateMode, deduplicate_ignore_columns: list[str], ) -> tuple[Sequence[dict], list[str]]: + assert isinstance(deduplicate_ignore_columns, list), deduplicate_ignore_columns + fieldnames = [] for d in dicts: for k in d.keys(): @@ -143,6 +145,8 @@ def extend_csv_file( deduplicate_mode: personal_data.data.DeduplicateMode, deduplicate_ignore_columns: list[str], ) -> dict: + assert isinstance(deduplicate_ignore_columns, list), deduplicate_ignore_columns + dicts = [] try: with open(filename) as csvfile: @@ -292,7 +296,7 @@ def main( f'output/{scraper.dataset_name}.csv', result_rows, deduplicate_mode=scraper.deduplicate_mode, - deduplicate_ignore_columns=scraper.deduplicate_ignore_columns, + deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(), ) logger.info('Scraper done: %s', scraper.dataset_name)