1
0
personal-data/test/test_deduplicate.py

96 lines
2.4 KiB
Python

import datetime
from decimal import Decimal
from frozendict import frozendict
from personal_data.data import DeduplicateMode
from personal_data.main import deduplicate_dicts
LIST = [
frozendict({'a': 0, 'b': 12, 't': 300}),
frozendict({'a': 0, 'b': 12, 't': 301}),
frozendict({'a': 1, 'b': 2, 't': 300}),
frozendict({'a': 1, 'b': 2, 't': 301}),
frozendict({'a': 1, 'b': 2, 't': 302}),
frozendict({'a': 1, 'b': 2, 't': 303}),
]
def test_no_fields_to_ignore():
for mode in DeduplicateMode:
ls, fields = deduplicate_dicts(LIST, mode, [])
assert fields == ['a', 'b', 't']
assert ls == LIST
def test_only_latest():
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.ONLY_LATEST, ['t'])
assert fields == ['a', 'b', 't']
assert ls == LIST[:3]
def test_all_fields():
ls, fields = deduplicate_dicts(LIST, DeduplicateMode.BY_ALL_COLUMNS, ['t'])
assert fields == ['a', 'b', 't']
print(ls)
assert ls == [
frozendict({'a': 0, 'b': 12, 't': 300}),
frozendict({'a': 1, 'b': 2, 't': 300}),
]
def test_all_fields():
ls, fields = deduplicate_dicts(LIST + LIST, DeduplicateMode.BY_ALL_COLUMNS, ['t'])
assert fields == ['a', 'b', 't']
print(ls)
assert ls == [
frozendict({'a': 0, 'b': 12, 't': 300}),
frozendict({'a': 1, 'b': 2, 't': 300}),
]
LIST_2 = [
frozendict(
{
'weight.sample_time': datetime.datetime(
2024,
5,
28,
6,
27,
31,
134506,
tzinfo=datetime.UTC,
),
'weight.kg': Decimal('73.6'),
},
),
frozendict(
{
'weight.sample_time': datetime.datetime(
2024,
6,
1,
7,
36,
9,
590355,
tzinfo=datetime.UTC,
),
'weight.kg': Decimal('74.7'),
},
),
]
def test_deduplicate_weight():
ls, fields = deduplicate_dicts(LIST_2, DeduplicateMode.BY_ALL_COLUMNS, [])
assert fields == ['weight.sample_time', 'weight.kg']
assert ls == LIST_2
def test_deduplicate_weight_2():
ls, fields = deduplicate_dicts(LIST_2 + LIST_2, DeduplicateMode.BY_ALL_COLUMNS, [])
assert fields == ['weight.sample_time', 'weight.kg']
assert ls == LIST_2