From 0d4d7bad1247e70e73e5accb478ba09de3265a8f Mon Sep 17 00:00:00 2001 From: Jon Michael Aanes Date: Mon, 26 Aug 2024 00:14:12 +0200 Subject: [PATCH] Filtering and heuristic is now done outside of output format --- git_time_tracker/__init__.py | 53 +++++++++++++++++++++++++---- git_time_tracker/data.py | 8 ++++- git_time_tracker/format/cli.py | 27 ++++----------- git_time_tracker/source/git_repo.py | 19 +++++++---- 4 files changed, 73 insertions(+), 34 deletions(-) diff --git a/git_time_tracker/__init__.py b/git_time_tracker/__init__.py index ef88615..e6243f3 100644 --- a/git_time_tracker/__init__.py +++ b/git_time_tracker/__init__.py @@ -29,21 +29,52 @@ import argparse import datetime import logging import sys +from collections.abc import Iterator from pathlib import Path -from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample +from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample, RealizedWorkSample from .format import cli from .source import git_repo logger = logging.getLogger(__name__) -DEFAULT_EST_TIME = datetime.timedelta(hours=1) +DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1) ZERO_DURATION = datetime.timedelta(seconds=0) HOUR = datetime.timedelta(hours=1) MINUTE = datetime.timedelta(minutes=1) +def filter_samples(samples: list[WorkSample], sample_filter: set[str]) -> list[WorkSample]: + assert len(sample_filter) > 0 + return [s for s in samples if not set(s.labels).intersection(sample_filter)] + +def heuristically_realize_samples(samples: list[WorkSample]) -> Iterator[RealizedWorkSample]: + """Secret sauce. + + Guarentees that: + * No samples overlap. + """ + + previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC) + for sample in samples: + end_at = sample.end_at + + assert previous_sample_end <= end_at, 'Iterating in incorrect order' + + # TODO: Allow end_at is None + + start_at = sample.start_at + if start_at is None: + estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION + start_at = max(previous_sample_end, end_at - estimated_duration) + del estimated_duration + + yield RealizedWorkSample(labels=sample.labels, end_at=end_at, start_at=start_at) + + previous_sample_end = sample.end_at + del sample + def parse_arguments(): parser = argparse.ArgumentParser() parser.add_argument( @@ -69,14 +100,24 @@ def main(): args = parse_arguments() - shared_time_stamps: set[WorkSample] = set() + shared_time_stamps_set: set[WorkSample] = set() for repo_path in args.repositories: logger.warning('Visit %s', repo_path) - shared_time_stamps |= set( + shared_time_stamps_set |= set( git_repo.iterate_samples_from_git_repository(repo_path), ) - shared_time_stamps = sorted(shared_time_stamps) + shared_time_stamps = sorted(shared_time_stamps_set, key = lambda s: s.end_at) + del shared_time_stamps_set - for t in cli.generate_report(shared_time_stamps, sample_filter=args.sample_filter): + sample_filter = args.sample_filter + if len(sample_filter) != 0: + logger.warning('Filtering %s samples', len(shared_time_stamps)) + shared_time_stamps = filter_samples(shared_time_stamps, sample_filter) + logger.warning('Filtered down to %s samples', len(shared_time_stamps)) + + logger.warning('Realizing %s samples', len(shared_time_stamps)) + shared_time_stamps = heuristically_realize_samples(shared_time_stamps) + + for t in cli.generate_report(shared_time_stamps): sys.stdout.write(t) diff --git a/git_time_tracker/data.py b/git_time_tracker/data.py index ca91455..1c99782 100644 --- a/git_time_tracker/data.py +++ b/git_time_tracker/data.py @@ -8,5 +8,11 @@ HIDDEN_LABEL_TOTAL = HIDDEN_LABEL_PREFIX + 'TOTAL' @dataclasses.dataclass(frozen=True, order=True) class WorkSample: - registered_at: datetime.datetime labels: Sequence[str] + start_at: datetime.datetime | None + end_at: datetime.datetime | None + +@dataclasses.dataclass(frozen=True, order=True) +class RealizedWorkSample(WorkSample): + start_at: datetime.datetime + end_at: datetime.datetime diff --git a/git_time_tracker/format/cli.py b/git_time_tracker/format/cli.py index 227dd4d..bab846f 100644 --- a/git_time_tracker/format/cli.py +++ b/git_time_tracker/format/cli.py @@ -1,9 +1,8 @@ import datetime from collections.abc import Iterator -from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample +from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, RealizedWorkSample -DEFAULT_EST_TIME = datetime.timedelta(hours=1) ZERO_DURATION = datetime.timedelta(seconds=0) HOUR = datetime.timedelta(hours=1) MINUTE = datetime.timedelta(minutes=1) @@ -37,31 +36,20 @@ def fmt_line(label_type: str, label: str, total_time: datetime.timedelta) -> str def generate_report( - samples: list[WorkSample], - sample_filter=frozenset(), + samples: list[RealizedWorkSample], ) -> Iterator[str]: - LABEL_FILTER = {} - # Time spent per label time_per_label: dict[str, datetime.timedelta] = {} years_per_label: dict[str, set[int]] = {} - prev_time = datetime.datetime.fromtimestamp(0, datetime.UTC) for sample in samples: - est_time: datetime.timedelta = DEFAULT_EST_TIME - est_time = min(sample.registered_at - prev_time, est_time) - - if len(sample_filter) == 0: - pass - elif not set(sample.labels).intersection(sample_filter): - continue + duration = sample.end_at - sample.start_at for label in sample.labels: time_per_label.setdefault(label, ZERO_DURATION) - time_per_label[label] += est_time - years_per_label.setdefault(label, set()).add(sample.registered_at.year) + time_per_label[label] += duration + years_per_label.setdefault(label, set()).add(sample.end_at.year) - prev_time = sample.registered_at - del sample, est_time + del sample, duration time_and_label = [(duration, label) for label, duration in time_per_label.items()] time_and_label.sort(reverse=True) @@ -75,9 +63,6 @@ def generate_report( label_type, label = label_and_type.split(':', 1) - if len(LABEL_FILTER) > 0 and label_type not in LABEL_FILTER: - continue - yield fmt_line(label_type, label, total_time) yield ' (' yield fmt_year_ranges(years_per_label.get(label_and_type, [])) diff --git a/git_time_tracker/source/git_repo.py b/git_time_tracker/source/git_repo.py index dbe22f0..1cb9ab6 100644 --- a/git_time_tracker/source/git_repo.py +++ b/git_time_tracker/source/git_repo.py @@ -37,14 +37,21 @@ def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]: labels = [HIDDEN_LABEL_TOTAL] labels.append('project:' + project_name) labels.append('author:' + commit.author.email) + + authored_date = datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC) + committed_date = datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC) + yield WorkSample( - datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC), - tuple(labels), - ) - yield WorkSample( - datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC), - tuple(labels), + labels = tuple(labels), + start_at = None, + end_at = authored_date, ) + if authored_date != committed_date: + yield WorkSample( + labels = tuple(labels), + start_at = None, + end_at = committed_date, + ) del labels