Normalize and deduplicate
This commit is contained in:
parent
34ba384265
commit
62db705b3e
|
@ -99,6 +99,12 @@ def deduplicate_dicts(
|
||||||
return dicts, fieldnames
|
return dicts, fieldnames
|
||||||
|
|
||||||
|
|
||||||
|
def normalize_dict(d: dict) -> frozendict:
|
||||||
|
return frozendict(
|
||||||
|
{k: to_value(str(v)) for k, v in d.items() if to_value(str(v)) is not None},
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def extend_csv_file(
|
def extend_csv_file(
|
||||||
filename: str,
|
filename: str,
|
||||||
new_dicts: list[dict],
|
new_dicts: list[dict],
|
||||||
|
@ -123,7 +129,7 @@ def extend_csv_file(
|
||||||
logger.info('Creating file: %s', filename)
|
logger.info('Creating file: %s', filename)
|
||||||
|
|
||||||
original_num_dicts = len(dicts)
|
original_num_dicts = len(dicts)
|
||||||
dicts += [frozendict(d) for d in new_dicts]
|
dicts += [normalize_dict(d) for d in new_dicts]
|
||||||
del new_dicts
|
del new_dicts
|
||||||
|
|
||||||
dicts, fieldnames = deduplicate_dicts(
|
dicts, fieldnames = deduplicate_dicts(
|
||||||
|
|
29
test/test_deduplicate.py
Normal file
29
test/test_deduplicate.py
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
from frozendict import frozendict
|
||||||
|
|
||||||
|
from personal_data.data import DeduplicateMode
|
||||||
|
from personal_data.main import deduplicate_dicts
|
||||||
|
|
||||||
|
LIST = [
|
||||||
|
frozendict({'a': 1, 'b': 2, 't': 300}),
|
||||||
|
frozendict({'a': 1, 'b': 2, 't': 301}),
|
||||||
|
frozendict({'a': 1, 'b': 2, 't': 302}),
|
||||||
|
frozendict({'a': 1, 'b': 2, 't': 303}),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_deduplicate():
|
||||||
|
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.NONE, [])
|
||||||
|
assert fields == ['a', 'b', 't']
|
||||||
|
assert ls == LIST
|
||||||
|
|
||||||
|
|
||||||
|
def test_only_latest_no_fields():
|
||||||
|
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, [])
|
||||||
|
assert fields == ['a', 'b', 't']
|
||||||
|
assert ls == LIST
|
||||||
|
|
||||||
|
|
||||||
|
def test_only_latest():
|
||||||
|
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, ['t'])
|
||||||
|
assert fields == ['a', 'b', 't']
|
||||||
|
assert ls == [frozendict({'a': 1, 'b': 2, 't': 300})]
|
Loading…
Reference in New Issue
Block a user