diff --git a/personal_data/util.py b/personal_data/util.py index 2185520..e1d1003 100644 --- a/personal_data/util.py +++ b/personal_data/util.py @@ -58,6 +58,7 @@ def csv_str_to_value( return None return s + def csv_safe_value(v: object) -> str: if isinstance(v, urllib.parse.ParseResult): return v.geturl() @@ -91,6 +92,8 @@ def deduplicate_by_ignoring_certain_fields( for idx2, second in enumerate(dicts[idx1 + 1 :], idx1 + 1): if equals_without_fields(first, second, deduplicate_ignore_columns): to_remove.add(idx2) + del idx2, second + del idx1, first to_remove = sorted(to_remove) while to_remove: @@ -100,10 +103,10 @@ def deduplicate_by_ignoring_certain_fields( def deduplicate_dicts( - dicts: Sequence[dict], + dicts: Sequence[dict[str,typing.Any] | frozendict[str,typing.Any]], deduplicate_mode: data.DeduplicateMode, deduplicate_ignore_columns: list[str], -) -> tuple[Sequence[dict], list[str]]: +) -> tuple[Sequence[dict[str,typing.Any]], list[str]]: assert isinstance(deduplicate_ignore_columns, list), deduplicate_ignore_columns fieldnames = [] @@ -133,7 +136,7 @@ def deduplicate_dicts( return dicts, fieldnames -def normalize_dict(d: dict) -> frozendict: +def normalize_dict(d: dict[str,typing.Any]) -> frozendict[str,typing.Any]: return frozendict( {k: csv_str_to_value(str(v)) for k, v in d.items() if csv_str_to_value(str(v)) is not None}, )