Filtering and heuristic is now done outside of output format
This commit is contained in:
parent
927b603f27
commit
0d4d7bad12
|
@ -29,21 +29,52 @@ import argparse
|
|||
import datetime
|
||||
import logging
|
||||
import sys
|
||||
from collections.abc import Iterator
|
||||
from pathlib import Path
|
||||
|
||||
from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample
|
||||
from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample, RealizedWorkSample
|
||||
from .format import cli
|
||||
from .source import git_repo
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
DEFAULT_EST_TIME = datetime.timedelta(hours=1)
|
||||
DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1)
|
||||
ZERO_DURATION = datetime.timedelta(seconds=0)
|
||||
HOUR = datetime.timedelta(hours=1)
|
||||
MINUTE = datetime.timedelta(minutes=1)
|
||||
|
||||
|
||||
def filter_samples(samples: list[WorkSample], sample_filter: set[str]) -> list[WorkSample]:
|
||||
assert len(sample_filter) > 0
|
||||
return [s for s in samples if not set(s.labels).intersection(sample_filter)]
|
||||
|
||||
def heuristically_realize_samples(samples: list[WorkSample]) -> Iterator[RealizedWorkSample]:
|
||||
"""Secret sauce.
|
||||
|
||||
Guarentees that:
|
||||
* No samples overlap.
|
||||
"""
|
||||
|
||||
previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC)
|
||||
for sample in samples:
|
||||
end_at = sample.end_at
|
||||
|
||||
assert previous_sample_end <= end_at, 'Iterating in incorrect order'
|
||||
|
||||
# TODO: Allow end_at is None
|
||||
|
||||
start_at = sample.start_at
|
||||
if start_at is None:
|
||||
estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION
|
||||
start_at = max(previous_sample_end, end_at - estimated_duration)
|
||||
del estimated_duration
|
||||
|
||||
yield RealizedWorkSample(labels=sample.labels, end_at=end_at, start_at=start_at)
|
||||
|
||||
previous_sample_end = sample.end_at
|
||||
del sample
|
||||
|
||||
def parse_arguments():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
|
@ -69,14 +100,24 @@ def main():
|
|||
|
||||
args = parse_arguments()
|
||||
|
||||
shared_time_stamps: set[WorkSample] = set()
|
||||
shared_time_stamps_set: set[WorkSample] = set()
|
||||
for repo_path in args.repositories:
|
||||
logger.warning('Visit %s', repo_path)
|
||||
shared_time_stamps |= set(
|
||||
shared_time_stamps_set |= set(
|
||||
git_repo.iterate_samples_from_git_repository(repo_path),
|
||||
)
|
||||
|
||||
shared_time_stamps = sorted(shared_time_stamps)
|
||||
shared_time_stamps = sorted(shared_time_stamps_set, key = lambda s: s.end_at)
|
||||
del shared_time_stamps_set
|
||||
|
||||
for t in cli.generate_report(shared_time_stamps, sample_filter=args.sample_filter):
|
||||
sample_filter = args.sample_filter
|
||||
if len(sample_filter) != 0:
|
||||
logger.warning('Filtering %s samples', len(shared_time_stamps))
|
||||
shared_time_stamps = filter_samples(shared_time_stamps, sample_filter)
|
||||
logger.warning('Filtered down to %s samples', len(shared_time_stamps))
|
||||
|
||||
logger.warning('Realizing %s samples', len(shared_time_stamps))
|
||||
shared_time_stamps = heuristically_realize_samples(shared_time_stamps)
|
||||
|
||||
for t in cli.generate_report(shared_time_stamps):
|
||||
sys.stdout.write(t)
|
||||
|
|
|
@ -8,5 +8,11 @@ HIDDEN_LABEL_TOTAL = HIDDEN_LABEL_PREFIX + 'TOTAL'
|
|||
|
||||
@dataclasses.dataclass(frozen=True, order=True)
|
||||
class WorkSample:
|
||||
registered_at: datetime.datetime
|
||||
labels: Sequence[str]
|
||||
start_at: datetime.datetime | None
|
||||
end_at: datetime.datetime | None
|
||||
|
||||
@dataclasses.dataclass(frozen=True, order=True)
|
||||
class RealizedWorkSample(WorkSample):
|
||||
start_at: datetime.datetime
|
||||
end_at: datetime.datetime
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
import datetime
|
||||
from collections.abc import Iterator
|
||||
|
||||
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample
|
||||
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, RealizedWorkSample
|
||||
|
||||
DEFAULT_EST_TIME = datetime.timedelta(hours=1)
|
||||
ZERO_DURATION = datetime.timedelta(seconds=0)
|
||||
HOUR = datetime.timedelta(hours=1)
|
||||
MINUTE = datetime.timedelta(minutes=1)
|
||||
|
@ -37,31 +36,20 @@ def fmt_line(label_type: str, label: str, total_time: datetime.timedelta) -> str
|
|||
|
||||
|
||||
def generate_report(
|
||||
samples: list[WorkSample],
|
||||
sample_filter=frozenset(),
|
||||
samples: list[RealizedWorkSample],
|
||||
) -> Iterator[str]:
|
||||
LABEL_FILTER = {}
|
||||
|
||||
# Time spent per label
|
||||
time_per_label: dict[str, datetime.timedelta] = {}
|
||||
years_per_label: dict[str, set[int]] = {}
|
||||
prev_time = datetime.datetime.fromtimestamp(0, datetime.UTC)
|
||||
for sample in samples:
|
||||
est_time: datetime.timedelta = DEFAULT_EST_TIME
|
||||
est_time = min(sample.registered_at - prev_time, est_time)
|
||||
|
||||
if len(sample_filter) == 0:
|
||||
pass
|
||||
elif not set(sample.labels).intersection(sample_filter):
|
||||
continue
|
||||
duration = sample.end_at - sample.start_at
|
||||
|
||||
for label in sample.labels:
|
||||
time_per_label.setdefault(label, ZERO_DURATION)
|
||||
time_per_label[label] += est_time
|
||||
years_per_label.setdefault(label, set()).add(sample.registered_at.year)
|
||||
time_per_label[label] += duration
|
||||
years_per_label.setdefault(label, set()).add(sample.end_at.year)
|
||||
|
||||
prev_time = sample.registered_at
|
||||
del sample, est_time
|
||||
del sample, duration
|
||||
|
||||
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
|
||||
time_and_label.sort(reverse=True)
|
||||
|
@ -75,9 +63,6 @@ def generate_report(
|
|||
|
||||
label_type, label = label_and_type.split(':', 1)
|
||||
|
||||
if len(LABEL_FILTER) > 0 and label_type not in LABEL_FILTER:
|
||||
continue
|
||||
|
||||
yield fmt_line(label_type, label, total_time)
|
||||
yield ' ('
|
||||
yield fmt_year_ranges(years_per_label.get(label_and_type, []))
|
||||
|
|
|
@ -37,14 +37,21 @@ def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]:
|
|||
labels = [HIDDEN_LABEL_TOTAL]
|
||||
labels.append('project:' + project_name)
|
||||
labels.append('author:' + commit.author.email)
|
||||
|
||||
authored_date = datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC)
|
||||
committed_date = datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC)
|
||||
|
||||
yield WorkSample(
|
||||
datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC),
|
||||
tuple(labels),
|
||||
)
|
||||
yield WorkSample(
|
||||
datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC),
|
||||
tuple(labels),
|
||||
labels = tuple(labels),
|
||||
start_at = None,
|
||||
end_at = authored_date,
|
||||
)
|
||||
if authored_date != committed_date:
|
||||
yield WorkSample(
|
||||
labels = tuple(labels),
|
||||
start_at = None,
|
||||
end_at = committed_date,
|
||||
)
|
||||
del labels
|
||||
|
||||
|
||||
|
|
Reference in New Issue
Block a user