From 62db705b3e98f41528dfb6b882ddd601aa46f867 Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Sat, 18 May 2024 21:52:22 +0200 Subject: [PATCH] Normalize and deduplicate --- personal_data/main.py | 8 +++++++- test/test_deduplicate.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 test/test_deduplicate.py diff --git a/personal_data/main.py b/personal_data/main.py index 50b940a..6cce2fe 100644 --- a/personal_data/main.py +++ b/personal_data/main.py @@ -99,6 +99,12 @@ def deduplicate_dicts( return dicts, fieldnames +def normalize_dict(d: dict) -> frozendict: + return frozendict( + {k: to_value(str(v)) for k, v in d.items() if to_value(str(v)) is not None}, + ) + + def extend_csv_file( filename: str, new_dicts: list[dict], @@ -123,7 +129,7 @@ def extend_csv_file( logger.info('Creating file: %s', filename) original_num_dicts = len(dicts) - dicts += [frozendict(d) for d in new_dicts] + dicts += [normalize_dict(d) for d in new_dicts] del new_dicts dicts, fieldnames = deduplicate_dicts( diff --git a/test/test_deduplicate.py b/test/test_deduplicate.py new file mode 100644 index 0000000..b484aa5 --- /dev/null +++ b/test/test_deduplicate.py @@ -0,0 +1,29 @@ +from frozendict import frozendict + +from personal_data.data import DeduplicateMode +from personal_data.main import deduplicate_dicts + +LIST = [ + frozendict({'a': 1, 'b': 2, 't': 300}), + frozendict({'a': 1, 'b': 2, 't': 301}), + frozendict({'a': 1, 'b': 2, 't': 302}), + frozendict({'a': 1, 'b': 2, 't': 303}), +] + + +def test_no_deduplicate(): + ls, fields = deduplicate_dicts(LIST, DeduplicateMode.NONE, []) + assert fields == ['a', 'b', 't'] + assert ls == LIST + + +def test_only_latest_no_fields(): + ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, []) + assert fields == ['a', 'b', 't'] + assert ls == LIST + + +def test_only_latest(): + ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, ['t']) + assert fields == ['a', 'b', 't'] + assert ls == [frozendict({'a': 1, 'b': 2, 't': 300})]