1
0

Output fetchers in help

This commit is contained in:
Jon Michael Aanes 2024-07-22 16:58:42 +02:00
parent 6027f37c3c
commit 216feee34b
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
2 changed files with 9 additions and 3 deletions

View File

@ -7,7 +7,9 @@ from personal_data.notification import NotificationType
def parse_arguments():
available_scraper_names = personal_data.main.available_scraper_names()
parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(
epilog='Available fetchers: ' + ' '.join(available_scraper_names),
)
parser.add_argument(
'fetchers',
metavar='FETCHER',

View File

@ -80,7 +80,7 @@ def equals_without_fields(
def deduplicate_by_ignoring_certain_fields(
dicts: list[dict],
deduplicate_ignore_columns: list[str],
deduplicate_ignore_columns: Iterable[str],
) -> list[dict]:
"""Removes duplicates that occur when ignoring certain columns.
@ -104,6 +104,8 @@ def deduplicate_dicts(
deduplicate_mode: personal_data.data.DeduplicateMode,
deduplicate_ignore_columns: list[str],
) -> tuple[Sequence[dict], list[str]]:
assert isinstance(deduplicate_ignore_columns, list), deduplicate_ignore_columns
fieldnames = []
for d in dicts:
for k in d.keys():
@ -143,6 +145,8 @@ def extend_csv_file(
deduplicate_mode: personal_data.data.DeduplicateMode,
deduplicate_ignore_columns: list[str],
) -> dict:
assert isinstance(deduplicate_ignore_columns, list), deduplicate_ignore_columns
dicts = []
try:
with open(filename) as csvfile:
@ -292,7 +296,7 @@ def main(
f'output/{scraper.dataset_name}.csv',
result_rows,
deduplicate_mode=scraper.deduplicate_mode,
deduplicate_ignore_columns=scraper.deduplicate_ignore_columns,
deduplicate_ignore_columns=scraper.deduplicate_ignore_columns(),
)
logger.info('Scraper done: %s', scraper.dataset_name)