1
0
personal-data/personal_data/activity.py

125 lines
3.4 KiB
Python
Raw Permalink Normal View History

import dataclasses
import datetime
2024-10-14 17:43:34 +00:00
from collections.abc import Iterator, Sequence
HIDDEN_LABEL_CATEGORY = '__'
2024-10-14 17:43:34 +00:00
DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1)
2024-10-13 13:20:30 +00:00
@dataclasses.dataclass(frozen=True, order=True)
class Label:
category: str
label: str
def __post_init__(self):
assert self.category is not None
assert ':' not in self.category
assert self.label is not None
2024-10-13 13:20:30 +00:00
@dataclasses.dataclass(frozen=True, order=True)
2024-10-13 13:05:55 +00:00
class ActivitySample:
labels: Sequence[Label]
start_at: datetime.datetime | None
end_at: datetime.datetime | None
def __post_init__(self):
if self.start_at and self.end_at:
assert self.start_at <= self.end_at
2024-10-14 18:17:41 +00:00
def single_label_with_category(self, category: str) -> str:
for label in self.labels:
if label.category == category:
return label.label
return None
@dataclasses.dataclass(frozen=True, order=True)
2024-10-13 13:05:55 +00:00
class RealizedActivitySample(ActivitySample):
start_at: datetime.datetime
end_at: datetime.datetime
2024-10-14 17:43:34 +00:00
def __post_init__(self):
assert self.start_at is not None
assert self.end_at is not None
assert self.start_at <= self.end_at
2024-10-14 17:43:34 +00:00
def heuristically_realize_samples(
samples: list[ActivitySample],
) -> Iterator[RealizedActivitySample]:
"""Secret sauce.
Guarentees that:
* No samples overlap.
"""
2024-10-23 19:30:23 +00:00
samples.sort(key=lambda x: x.end_at)
2024-10-14 17:43:34 +00:00
previous_sample_end = None
for sample in samples:
end_at = sample.end_at
if previous_sample_end is None:
if end_at.tzinfo:
previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC)
else:
previous_sample_end = datetime.datetime.fromtimestamp(0)
assert previous_sample_end <= end_at, 'Iterating in incorrect order'
# TODO: Allow end_at is None
start_at = sample.start_at
if start_at is None:
estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION
start_at = max(previous_sample_end, end_at - estimated_duration)
del estimated_duration
yield RealizedActivitySample(
labels=sample.labels,
end_at=end_at,
start_at=start_at,
)
previous_sample_end = sample.end_at
del sample
2024-10-14 18:17:41 +00:00
def mergable_labels(a: Sequence[Label], b: Sequence[Label]) -> Sequence[Label]:
return list(set(a).intersection(set(b)))
def merge_adjacent_samples(
2024-10-23 19:30:23 +00:00
samples: list[RealizedActivitySample],
group_category: str,
2024-10-14 18:17:41 +00:00
) -> list[RealizedActivitySample]:
max_interval_between_samples = datetime.timedelta(minutes=5)
def can_merge(
2024-10-23 19:30:23 +00:00
before: RealizedActivitySample,
after: RealizedActivitySample,
2024-10-14 18:17:41 +00:00
) -> bool:
if before.single_label_with_category(
group_category,
) != after.single_label_with_category(group_category):
return False
return (after.start_at - before.end_at) < max_interval_between_samples
samples.sort(key=lambda s: s.start_at)
new: list[RealizedActivitySample] = []
for s in samples:
if len(new) > 0 and can_merge(new[-1], s):
# TODO: Merge/strip attributes?
new[-1] = RealizedActivitySample(
labels=mergable_labels(new[-1].labels, s.labels),
start_at=new[-1].start_at,
end_at=s.end_at,
)
else:
new.append(s)
return new