Filtering and heuristic is now done outside of output format
This commit is contained in:
parent
927b603f27
commit
0d4d7bad12
|
@ -29,21 +29,52 @@ import argparse
|
||||||
import datetime
|
import datetime
|
||||||
import logging
|
import logging
|
||||||
import sys
|
import sys
|
||||||
|
from collections.abc import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample
|
from .data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample, RealizedWorkSample
|
||||||
from .format import cli
|
from .format import cli
|
||||||
from .source import git_repo
|
from .source import git_repo
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
DEFAULT_EST_TIME = datetime.timedelta(hours=1)
|
DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1)
|
||||||
ZERO_DURATION = datetime.timedelta(seconds=0)
|
ZERO_DURATION = datetime.timedelta(seconds=0)
|
||||||
HOUR = datetime.timedelta(hours=1)
|
HOUR = datetime.timedelta(hours=1)
|
||||||
MINUTE = datetime.timedelta(minutes=1)
|
MINUTE = datetime.timedelta(minutes=1)
|
||||||
|
|
||||||
|
|
||||||
|
def filter_samples(samples: list[WorkSample], sample_filter: set[str]) -> list[WorkSample]:
|
||||||
|
assert len(sample_filter) > 0
|
||||||
|
return [s for s in samples if not set(s.labels).intersection(sample_filter)]
|
||||||
|
|
||||||
|
def heuristically_realize_samples(samples: list[WorkSample]) -> Iterator[RealizedWorkSample]:
|
||||||
|
"""Secret sauce.
|
||||||
|
|
||||||
|
Guarentees that:
|
||||||
|
* No samples overlap.
|
||||||
|
"""
|
||||||
|
|
||||||
|
previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC)
|
||||||
|
for sample in samples:
|
||||||
|
end_at = sample.end_at
|
||||||
|
|
||||||
|
assert previous_sample_end <= end_at, 'Iterating in incorrect order'
|
||||||
|
|
||||||
|
# TODO: Allow end_at is None
|
||||||
|
|
||||||
|
start_at = sample.start_at
|
||||||
|
if start_at is None:
|
||||||
|
estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION
|
||||||
|
start_at = max(previous_sample_end, end_at - estimated_duration)
|
||||||
|
del estimated_duration
|
||||||
|
|
||||||
|
yield RealizedWorkSample(labels=sample.labels, end_at=end_at, start_at=start_at)
|
||||||
|
|
||||||
|
previous_sample_end = sample.end_at
|
||||||
|
del sample
|
||||||
|
|
||||||
def parse_arguments():
|
def parse_arguments():
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
|
@ -69,14 +100,24 @@ def main():
|
||||||
|
|
||||||
args = parse_arguments()
|
args = parse_arguments()
|
||||||
|
|
||||||
shared_time_stamps: set[WorkSample] = set()
|
shared_time_stamps_set: set[WorkSample] = set()
|
||||||
for repo_path in args.repositories:
|
for repo_path in args.repositories:
|
||||||
logger.warning('Visit %s', repo_path)
|
logger.warning('Visit %s', repo_path)
|
||||||
shared_time_stamps |= set(
|
shared_time_stamps_set |= set(
|
||||||
git_repo.iterate_samples_from_git_repository(repo_path),
|
git_repo.iterate_samples_from_git_repository(repo_path),
|
||||||
)
|
)
|
||||||
|
|
||||||
shared_time_stamps = sorted(shared_time_stamps)
|
shared_time_stamps = sorted(shared_time_stamps_set, key = lambda s: s.end_at)
|
||||||
|
del shared_time_stamps_set
|
||||||
|
|
||||||
for t in cli.generate_report(shared_time_stamps, sample_filter=args.sample_filter):
|
sample_filter = args.sample_filter
|
||||||
|
if len(sample_filter) != 0:
|
||||||
|
logger.warning('Filtering %s samples', len(shared_time_stamps))
|
||||||
|
shared_time_stamps = filter_samples(shared_time_stamps, sample_filter)
|
||||||
|
logger.warning('Filtered down to %s samples', len(shared_time_stamps))
|
||||||
|
|
||||||
|
logger.warning('Realizing %s samples', len(shared_time_stamps))
|
||||||
|
shared_time_stamps = heuristically_realize_samples(shared_time_stamps)
|
||||||
|
|
||||||
|
for t in cli.generate_report(shared_time_stamps):
|
||||||
sys.stdout.write(t)
|
sys.stdout.write(t)
|
||||||
|
|
|
@ -8,5 +8,11 @@ HIDDEN_LABEL_TOTAL = HIDDEN_LABEL_PREFIX + 'TOTAL'
|
||||||
|
|
||||||
@dataclasses.dataclass(frozen=True, order=True)
|
@dataclasses.dataclass(frozen=True, order=True)
|
||||||
class WorkSample:
|
class WorkSample:
|
||||||
registered_at: datetime.datetime
|
|
||||||
labels: Sequence[str]
|
labels: Sequence[str]
|
||||||
|
start_at: datetime.datetime | None
|
||||||
|
end_at: datetime.datetime | None
|
||||||
|
|
||||||
|
@dataclasses.dataclass(frozen=True, order=True)
|
||||||
|
class RealizedWorkSample(WorkSample):
|
||||||
|
start_at: datetime.datetime
|
||||||
|
end_at: datetime.datetime
|
||||||
|
|
|
@ -1,9 +1,8 @@
|
||||||
import datetime
|
import datetime
|
||||||
from collections.abc import Iterator
|
from collections.abc import Iterator
|
||||||
|
|
||||||
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, WorkSample
|
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, RealizedWorkSample
|
||||||
|
|
||||||
DEFAULT_EST_TIME = datetime.timedelta(hours=1)
|
|
||||||
ZERO_DURATION = datetime.timedelta(seconds=0)
|
ZERO_DURATION = datetime.timedelta(seconds=0)
|
||||||
HOUR = datetime.timedelta(hours=1)
|
HOUR = datetime.timedelta(hours=1)
|
||||||
MINUTE = datetime.timedelta(minutes=1)
|
MINUTE = datetime.timedelta(minutes=1)
|
||||||
|
@ -37,31 +36,20 @@ def fmt_line(label_type: str, label: str, total_time: datetime.timedelta) -> str
|
||||||
|
|
||||||
|
|
||||||
def generate_report(
|
def generate_report(
|
||||||
samples: list[WorkSample],
|
samples: list[RealizedWorkSample],
|
||||||
sample_filter=frozenset(),
|
|
||||||
) -> Iterator[str]:
|
) -> Iterator[str]:
|
||||||
LABEL_FILTER = {}
|
|
||||||
|
|
||||||
# Time spent per label
|
# Time spent per label
|
||||||
time_per_label: dict[str, datetime.timedelta] = {}
|
time_per_label: dict[str, datetime.timedelta] = {}
|
||||||
years_per_label: dict[str, set[int]] = {}
|
years_per_label: dict[str, set[int]] = {}
|
||||||
prev_time = datetime.datetime.fromtimestamp(0, datetime.UTC)
|
|
||||||
for sample in samples:
|
for sample in samples:
|
||||||
est_time: datetime.timedelta = DEFAULT_EST_TIME
|
duration = sample.end_at - sample.start_at
|
||||||
est_time = min(sample.registered_at - prev_time, est_time)
|
|
||||||
|
|
||||||
if len(sample_filter) == 0:
|
|
||||||
pass
|
|
||||||
elif not set(sample.labels).intersection(sample_filter):
|
|
||||||
continue
|
|
||||||
|
|
||||||
for label in sample.labels:
|
for label in sample.labels:
|
||||||
time_per_label.setdefault(label, ZERO_DURATION)
|
time_per_label.setdefault(label, ZERO_DURATION)
|
||||||
time_per_label[label] += est_time
|
time_per_label[label] += duration
|
||||||
years_per_label.setdefault(label, set()).add(sample.registered_at.year)
|
years_per_label.setdefault(label, set()).add(sample.end_at.year)
|
||||||
|
|
||||||
prev_time = sample.registered_at
|
del sample, duration
|
||||||
del sample, est_time
|
|
||||||
|
|
||||||
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
|
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
|
||||||
time_and_label.sort(reverse=True)
|
time_and_label.sort(reverse=True)
|
||||||
|
@ -75,9 +63,6 @@ def generate_report(
|
||||||
|
|
||||||
label_type, label = label_and_type.split(':', 1)
|
label_type, label = label_and_type.split(':', 1)
|
||||||
|
|
||||||
if len(LABEL_FILTER) > 0 and label_type not in LABEL_FILTER:
|
|
||||||
continue
|
|
||||||
|
|
||||||
yield fmt_line(label_type, label, total_time)
|
yield fmt_line(label_type, label, total_time)
|
||||||
yield ' ('
|
yield ' ('
|
||||||
yield fmt_year_ranges(years_per_label.get(label_and_type, []))
|
yield fmt_year_ranges(years_per_label.get(label_and_type, []))
|
||||||
|
|
|
@ -37,13 +37,20 @@ def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]:
|
||||||
labels = [HIDDEN_LABEL_TOTAL]
|
labels = [HIDDEN_LABEL_TOTAL]
|
||||||
labels.append('project:' + project_name)
|
labels.append('project:' + project_name)
|
||||||
labels.append('author:' + commit.author.email)
|
labels.append('author:' + commit.author.email)
|
||||||
|
|
||||||
|
authored_date = datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC)
|
||||||
|
committed_date = datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC)
|
||||||
|
|
||||||
yield WorkSample(
|
yield WorkSample(
|
||||||
datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC),
|
labels = tuple(labels),
|
||||||
tuple(labels),
|
start_at = None,
|
||||||
|
end_at = authored_date,
|
||||||
)
|
)
|
||||||
|
if authored_date != committed_date:
|
||||||
yield WorkSample(
|
yield WorkSample(
|
||||||
datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC),
|
labels = tuple(labels),
|
||||||
tuple(labels),
|
start_at = None,
|
||||||
|
end_at = committed_date,
|
||||||
)
|
)
|
||||||
del labels
|
del labels
|
||||||
|
|
||||||
|
|
Reference in New Issue
Block a user