1
0

Filtering and heuristic is now done outside of output format

This commit is contained in:
Jon Michael Aanes 2024-08-26 00:14:12 +02:00
parent 927b603f27
commit 0d4d7bad12
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
4 changed files with 73 additions and 34 deletions

View File

@ -29,21 +29,52 @@ import argparse
import datetime
import logging
import sys
from collections.abc import Iterator
from pathlib import Path
from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample
from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample, RealizedWorkSample
from .format import cli
from .source import git_repo
logger = logging.getLogger(__name__)
DEFAULT_EST_TIME = datetime.timedelta(hours=1)
DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1)
ZERO_DURATION = datetime.timedelta(seconds=0)
HOUR = datetime.timedelta(hours=1)
MINUTE = datetime.timedelta(minutes=1)
def filter_samples(samples: list[WorkSample], sample_filter: set[str]) -> list[WorkSample]:
assert len(sample_filter) > 0
return [s for s in samples if not set(s.labels).intersection(sample_filter)]
def heuristically_realize_samples(samples: list[WorkSample]) -> Iterator[RealizedWorkSample]:
"""Secret sauce.
Guarentees that:
* No samples overlap.
"""
previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC)
for sample in samples:
end_at = sample.end_at
assert previous_sample_end <= end_at, 'Iterating in incorrect order'
# TODO: Allow end_at is None
start_at = sample.start_at
if start_at is None:
estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION
start_at = max(previous_sample_end, end_at - estimated_duration)
del estimated_duration
yield RealizedWorkSample(labels=sample.labels, end_at=end_at, start_at=start_at)
previous_sample_end = sample.end_at
del sample
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
@ -69,14 +100,24 @@ def main():
args = parse_arguments()
shared_time_stamps: set[WorkSample] = set()
shared_time_stamps_set: set[WorkSample] = set()
for repo_path in args.repositories:
logger.warning('Visit %s', repo_path)
shared_time_stamps |= set(
shared_time_stamps_set |= set(
git_repo.iterate_samples_from_git_repository(repo_path),
)
shared_time_stamps = sorted(shared_time_stamps)
shared_time_stamps = sorted(shared_time_stamps_set, key = lambda s: s.end_at)
del shared_time_stamps_set
for t in cli.generate_report(shared_time_stamps, sample_filter=args.sample_filter):
sample_filter = args.sample_filter
if len(sample_filter) != 0:
logger.warning('Filtering %s samples', len(shared_time_stamps))
shared_time_stamps = filter_samples(shared_time_stamps, sample_filter)
logger.warning('Filtered down to %s samples', len(shared_time_stamps))
logger.warning('Realizing %s samples', len(shared_time_stamps))
shared_time_stamps = heuristically_realize_samples(shared_time_stamps)
for t in cli.generate_report(shared_time_stamps):
sys.stdout.write(t)

View File

@ -8,5 +8,11 @@ HIDDEN_LABEL_TOTAL = HIDDEN_LABEL_PREFIX + 'TOTAL'
@dataclasses.dataclass(frozen=True, order=True)
class WorkSample:
registered_at: datetime.datetime
labels: Sequence[str]
start_at: datetime.datetime | None
end_at: datetime.datetime | None
@dataclasses.dataclass(frozen=True, order=True)
class RealizedWorkSample(WorkSample):
start_at: datetime.datetime
end_at: datetime.datetime

View File

@ -1,9 +1,8 @@
import datetime
from collections.abc import Iterator
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, RealizedWorkSample
DEFAULT_EST_TIME = datetime.timedelta(hours=1)
ZERO_DURATION = datetime.timedelta(seconds=0)
HOUR = datetime.timedelta(hours=1)
MINUTE = datetime.timedelta(minutes=1)
@ -37,31 +36,20 @@ def fmt_line(label_type: str, label: str, total_time: datetime.timedelta) -> str
def generate_report(
samples: list[WorkSample],
sample_filter=frozenset(),
samples: list[RealizedWorkSample],
) -> Iterator[str]:
LABEL_FILTER = {}
# Time spent per label
time_per_label: dict[str, datetime.timedelta] = {}
years_per_label: dict[str, set[int]] = {}
prev_time = datetime.datetime.fromtimestamp(0, datetime.UTC)
for sample in samples:
est_time: datetime.timedelta = DEFAULT_EST_TIME
est_time = min(sample.registered_at - prev_time, est_time)
if len(sample_filter) == 0:
pass
elif not set(sample.labels).intersection(sample_filter):
continue
duration = sample.end_at - sample.start_at
for label in sample.labels:
time_per_label.setdefault(label, ZERO_DURATION)
time_per_label[label] += est_time
years_per_label.setdefault(label, set()).add(sample.registered_at.year)
time_per_label[label] += duration
years_per_label.setdefault(label, set()).add(sample.end_at.year)
prev_time = sample.registered_at
del sample, est_time
del sample, duration
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
time_and_label.sort(reverse=True)
@ -75,9 +63,6 @@ def generate_report(
label_type, label = label_and_type.split(':', 1)
if len(LABEL_FILTER) > 0 and label_type not in LABEL_FILTER:
continue
yield fmt_line(label_type, label, total_time)
yield ' ('
yield fmt_year_ranges(years_per_label.get(label_and_type, []))

View File

@ -37,14 +37,21 @@ def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]:
labels = [HIDDEN_LABEL_TOTAL]
labels.append('project:' + project_name)
labels.append('author:' + commit.author.email)
authored_date = datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC)
committed_date = datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC)
yield WorkSample(
datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC),
tuple(labels),
)
yield WorkSample(
datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC),
tuple(labels),
labels = tuple(labels),
start_at = None,
end_at = authored_date,
)
if authored_date != committed_date:
yield WorkSample(
labels = tuple(labels),
start_at = None,
end_at = committed_date,
)
del labels