1
0
personal-data/test/test_deduplicate.py

65 lines
2.0 KiB
Python

from frozendict import frozendict
import datetime
from decimal import Decimal
from personal_data.data import DeduplicateMode
from personal_data.main import deduplicate_dicts
LIST = [
frozendict({'a': 0, 'b': 12, 't': 300}),
frozendict({'a': 0, 'b': 12, 't': 301}),
frozendict({'a': 1, 'b': 2, 't': 300}),
frozendict({'a': 1, 'b': 2, 't': 301}),
frozendict({'a': 1, 'b': 2, 't': 302}),
frozendict({'a': 1, 'b': 2, 't': 303}),
]
def test_no_fields_to_ignore():
for mode in DeduplicateMode:
ls, fields = deduplicate_dicts(LIST, mode, [])
assert fields == ['a', 'b', 't']
assert ls == LIST
def test_only_latest():
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, ['t'])
assert fields == ['a', 'b', 't']
assert ls == LIST[:3]
def test_all_fields():
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.BY_ALL_COLUMNS, ['t'])
assert fields == ['a', 'b', 't']
print(ls)
assert ls == [
frozendict({'a': 0, 'b': 12, 't': 300}),
frozendict({'a': 1, 'b': 2, 't': 300}),
]
def test_all_fields():
ls, fields = deduplicate_dicts(LIST + LIST, DeduplicateMode.BY_ALL_COLUMNS, ['t'])
assert fields == ['a', 'b', 't']
print(ls)
assert ls == [
frozendict({'a': 0, 'b': 12, 't': 300}),
frozendict({'a': 1, 'b': 2, 't': 300}),
]
LIST_2 = [
frozendict({'weight.sample_time': datetime.datetime(2024, 5, 28, 6, 27, 31, 134506, tzinfo=datetime.timezone.utc), 'weight.kg': Decimal('73.6')}),
frozendict({'weight.sample_time': datetime.datetime(2024, 6, 1, 7, 36, 9, 590355, tzinfo=datetime.timezone.utc), 'weight.kg': Decimal('74.7')}),
]
def test_deduplicate_weight():
ls, fields = deduplicate_dicts(LIST_2, DeduplicateMode.BY_ALL_COLUMNS, [])
assert fields == ['weight.sample_time', 'weight.kg']
assert ls == LIST_2
def test_deduplicate_weight_2():
ls, fields = deduplicate_dicts(LIST_2 + LIST_2, DeduplicateMode.BY_ALL_COLUMNS, [])
assert fields == ['weight.sample_time', 'weight.kg']
assert ls == LIST_2