Normalize and deduplicate
This commit is contained in:
parent
34ba384265
commit
62db705b3e
|
@ -99,6 +99,12 @@ def deduplicate_dicts(
|
|||
return dicts, fieldnames
|
||||
|
||||
|
||||
def normalize_dict(d: dict) -> frozendict:
|
||||
return frozendict(
|
||||
{k: to_value(str(v)) for k, v in d.items() if to_value(str(v)) is not None},
|
||||
)
|
||||
|
||||
|
||||
def extend_csv_file(
|
||||
filename: str,
|
||||
new_dicts: list[dict],
|
||||
|
@ -123,7 +129,7 @@ def extend_csv_file(
|
|||
logger.info('Creating file: %s', filename)
|
||||
|
||||
original_num_dicts = len(dicts)
|
||||
dicts += [frozendict(d) for d in new_dicts]
|
||||
dicts += [normalize_dict(d) for d in new_dicts]
|
||||
del new_dicts
|
||||
|
||||
dicts, fieldnames = deduplicate_dicts(
|
||||
|
|
29
test/test_deduplicate.py
Normal file
29
test/test_deduplicate.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
from frozendict import frozendict
|
||||
|
||||
from personal_data.data import DeduplicateMode
|
||||
from personal_data.main import deduplicate_dicts
|
||||
|
||||
LIST = [
|
||||
frozendict({'a': 1, 'b': 2, 't': 300}),
|
||||
frozendict({'a': 1, 'b': 2, 't': 301}),
|
||||
frozendict({'a': 1, 'b': 2, 't': 302}),
|
||||
frozendict({'a': 1, 'b': 2, 't': 303}),
|
||||
]
|
||||
|
||||
|
||||
def test_no_deduplicate():
|
||||
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.NONE, [])
|
||||
assert fields == ['a', 'b', 't']
|
||||
assert ls == LIST
|
||||
|
||||
|
||||
def test_only_latest_no_fields():
|
||||
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, [])
|
||||
assert fields == ['a', 'b', 't']
|
||||
assert ls == LIST
|
||||
|
||||
|
||||
def test_only_latest():
|
||||
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, ['t'])
|
||||
assert fields == ['a', 'b', 't']
|
||||
assert ls == [frozendict({'a': 1, 'b': 2, 't': 300})]
|
Loading…
Reference in New Issue
Block a user