1
0

Merged git-time-tracker into personal-data
All checks were successful
Test Python / Test (push) Successful in 31s

This commit is contained in:
Jon Michael Aanes 2024-10-13 14:31:23 +02:00
parent ecab909851
commit 4f851b21b5
Signed by: Jmaa
SSH Key Fingerprint: SHA256:Ab0GfHGCblESJx7JRE4fj4bFy/KRpeLhi41y4pF3sNA
11 changed files with 546 additions and 75 deletions

View File

@ -0,0 +1,193 @@
"""# Git-based Time Tracker.
Quick and dirty time tracker on git histories.
Uses the simple heuristics that each commit takes precisely one hour of work.
It will automatically trim commits below one hour if another commit occurred
less than an hour ago.
Usage:
```
python -m git_time_tracker REPO1 REPO2...
```
# Obligatory
This tool reports:
```
project Jmaa/git-time-tracker.git 3h 33m (2024)
```
And the ([Hamster](https://github.com/projecthamster/hamster)) manual time tracker reports:
![](docs/obligatory-hamster.png)
"""
import argparse
import datetime
import logging
import sys
from collections.abc import Iterator
from pathlib import Path
from .data import (
HIDDEN_LABEL_PREFIX,
HIDDEN_LABEL_TOTAL,
RealizedWorkSample,
WorkSample,
)
from .format import cli, icalendar
from .source import csv_file, git_repo
logger = logging.getLogger(__name__)
DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1)
ZERO_DURATION = datetime.timedelta(seconds=0)
HOUR = datetime.timedelta(hours=1)
MINUTE = datetime.timedelta(minutes=1)
def filter_samples(
samples: list[WorkSample],
sample_filter: set[str],
) -> list[WorkSample]:
assert len(sample_filter) > 0
return [s for s in samples if set(s.labels).intersection(sample_filter)]
def heuristically_realize_samples(
samples: list[WorkSample],
) -> Iterator[RealizedWorkSample]:
"""Secret sauce.
Guarentees that:
* No samples overlap.
"""
previous_sample_end = None
for sample in samples:
end_at = sample.end_at
if previous_sample_end is None:
if end_at.tzinfo:
previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC)
else:
previous_sample_end = datetime.datetime.fromtimestamp(0)
assert previous_sample_end <= end_at, 'Iterating in incorrect order'
# TODO: Allow end_at is None
start_at = sample.start_at
if start_at is None:
estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION
start_at = max(previous_sample_end, end_at - estimated_duration)
del estimated_duration
yield RealizedWorkSample(labels=sample.labels, end_at=end_at, start_at=start_at)
previous_sample_end = sample.end_at
del sample
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument(
'--git-repo',
action='extend',
nargs='+',
type=Path,
dest='repositories',
default=[],
)
parser.add_argument(
'--csv-file',
action='extend',
nargs='+',
type=Path,
dest='csv_files',
default=[],
)
parser.add_argument(
'--filter',
action='extend',
nargs='+',
type=str,
dest='sample_filter',
default=[],
)
parser.add_argument(
'--format',
action='store',
type=str,
dest='format_mode',
default='cli_report',
choices=['cli_report', 'icalendar'],
)
parser.add_argument(
'--out',
action='store',
type=Path,
dest='output_file',
default='output/samples.ics',
)
return parser.parse_args()
def load_samples(args) -> set[WorkSample]:
shared_time_stamps_set: set[WorkSample] = set()
# Git repositories
for repo_path in args.repositories:
logger.warning('Determine commits from %s', repo_path)
shared_time_stamps_set |= set(
git_repo.iterate_samples_from_git_repository(repo_path),
)
del repo_path
# CSV Files
for csv_path in args.csv_files:
logger.warning('Load samples from %s', csv_path)
shared_time_stamps_set |= set(
csv_file.iterate_samples_from_csv_file(csv_path),
)
del csv_path
return shared_time_stamps_set
def main():
logging.basicConfig()
args = parse_arguments()
# Determine samples
shared_time_stamps_set = load_samples(args)
# Sort samples
shared_time_stamps = sorted(shared_time_stamps_set, key=lambda s: s.end_at)
del shared_time_stamps_set
# Filter samples
sample_filter = args.sample_filter
if len(sample_filter) != 0:
logger.warning('Filtering %s samples', len(shared_time_stamps))
shared_time_stamps = filter_samples(shared_time_stamps, sample_filter)
logger.warning('Filtered down to %s samples', len(shared_time_stamps))
# Heuristic samples
logger.warning('Realizing %s samples', len(shared_time_stamps))
shared_time_stamps = list(heuristically_realize_samples(shared_time_stamps))
# Output format
if args.format_mode == 'cli_report':
for t in cli.generate_report(shared_time_stamps):
sys.stdout.write(t)
elif args.format_mode == 'icalendar':
icalendar.generate_icalendar_file(
shared_time_stamps,
file=args.output_file,
)

View File

@ -0,0 +1,4 @@
from git_time_tracker import main
if __name__ == '__main__':
main()

View File

@ -0,0 +1 @@
__version__ = '0.1.19'

19
git_time_tracker/data.py Normal file
View File

@ -0,0 +1,19 @@
import dataclasses
import datetime
from collections.abc import Sequence
HIDDEN_LABEL_PREFIX = '__'
HIDDEN_LABEL_TOTAL = HIDDEN_LABEL_PREFIX + 'TOTAL'
@dataclasses.dataclass(frozen=True, order=True)
class WorkSample:
labels: Sequence[str]
start_at: datetime.datetime | None
end_at: datetime.datetime | None
@dataclasses.dataclass(frozen=True, order=True)
class RealizedWorkSample(WorkSample):
start_at: datetime.datetime
end_at: datetime.datetime

View File

@ -0,0 +1 @@
"""Submodule containing output formats."""

View File

@ -0,0 +1,77 @@
import datetime
from collections.abc import Iterator
from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, RealizedWorkSample
ZERO_DURATION = datetime.timedelta(seconds=0)
HOUR = datetime.timedelta(hours=1)
MINUTE = datetime.timedelta(minutes=1)
def fmt_year_ranges_internal(years: list[int]) -> Iterator[str]:
years = sorted(years)
for idx, year in enumerate(years):
at_end = idx == len(years) - 1
range_before = idx > 0 and years[idx - 1] == year - 1
range_after = not at_end and years[idx + 1] == year + 1
if not range_before or not range_after:
yield str(year)
if not at_end:
if not range_before and range_after:
yield '-'
elif not range_after:
yield ','
def fmt_year_ranges(years: list[int]) -> str:
return ''.join(list(fmt_year_ranges_internal(years)))
def fmt_line(label_type: str, label: str, total_time: datetime.timedelta) -> str:
hours = int(total_time / HOUR)
minutes = int((total_time - hours * HOUR) / MINUTE)
return f' {label_type:10} {label:40} {hours:-4d}h {minutes:-2d}m'
def generate_report(
samples: list[RealizedWorkSample],
) -> Iterator[str]:
# Time spent per label
time_per_label: dict[str, datetime.timedelta] = {}
years_per_label: dict[str, set[int]] = {}
for sample in samples:
duration = sample.end_at - sample.start_at
for label in sample.labels:
time_per_label.setdefault(label, ZERO_DURATION)
time_per_label[label] += duration
years_per_label.setdefault(label, set()).add(sample.end_at.year)
del sample, duration
time_and_label = [(duration, label) for label, duration in time_per_label.items()]
time_and_label.sort(reverse=True)
#
yield '-' * 66
yield '\n'
for total_time, label_and_type in time_and_label:
if label_and_type.startswith(HIDDEN_LABEL_PREFIX):
continue
label_type, label = label_and_type.split(':', 1)
yield fmt_line(label_type, label, total_time)
yield ' ('
yield fmt_year_ranges(years_per_label.get(label_and_type, []))
yield ')'
yield '\n'
del label, total_time
yield '-' * 66
yield '\n'
yield fmt_line('', 'TOTAL', time_per_label.get(HIDDEN_LABEL_TOTAL, ZERO_DURATION))
yield '\n'

View File

@ -0,0 +1,67 @@
import datetime
import icalendar
from ..data import HIDDEN_LABEL_PREFIX, RealizedWorkSample
ZERO_DURATION = datetime.timedelta(seconds=0)
HOUR = datetime.timedelta(hours=1)
MINUTE = datetime.timedelta(minutes=1)
def create_title(sample: RealizedWorkSample) -> tuple[str, str]:
ls = []
desc = []
for label_and_type in sample.labels:
if label_and_type.startswith(HIDDEN_LABEL_PREFIX):
continue
if label_and_type.startswith('author:'):
continue
if len(ls) == 0:
ls.append(label_and_type.split(':')[1])
else:
desc.append(label_and_type)
return ' '.join(ls), '\n'.join(desc)
def generate_calendar(
samples: list[RealizedWorkSample],
) -> icalendar.Calendar:
max_title_parts = 2
cal = icalendar.Calendar()
cal.add('prodid', '-//personal_data_calendar//example.org//')
cal.add('version', '2.0')
for sample in samples:
title, description = create_title(sample)
# Create event
event = icalendar.Event()
event.add('summary', title)
event.add('description', description)
event.add('dtstart', sample.start_at)
event.add('dtend', sample.end_at)
for label_and_type in sample.labels:
if label_and_type.startswith('author:'):
event.add(
'organizer',
'mailto:' + label_and_type.removeprefix('author:'),
)
cal.add_component(event)
del event
return cal
def generate_icalendar_file(
samples: list[RealizedWorkSample],
file: str,
) -> None:
calendar = generate_calendar(samples)
with open(file, 'wb') as f:
f.write(calendar.to_ical())

View File

@ -0,0 +1 @@
"""Submodule containing input formats."""

View File

@ -0,0 +1,115 @@
import datetime
import urllib.parse
from typing import Any
from collections.abc import Iterator
from decimal import Decimal
from pathlib import Path
import dataclasses
from personal_data.util import load_csv_file
from ..data import WorkSample
@dataclasses.dataclass
class PossibleKeys:
time_start: list[str]
time_end: list[str]
duration: list[str]
name: list[str]
image: list[str]
misc: list[str]
def determine_possible_keys(event_data: dict[str, Any]) -> PossibleKeys:
# Select data
time_keys = [
k for k, v in event_data.items() if isinstance(v, datetime.date)
]
duration_keys = [
k
for k, v in event_data.items()
if isinstance(v, Decimal) and 'duration_seconds' in k
]
name_keys = [k for k, v in event_data.items() if isinstance(v, str)]
image_keys = [
k for k, v in event_data.items() if isinstance(v, urllib.parse.ParseResult)
]
misc_keys = list(event_data.keys())
for k in image_keys:
if k in misc_keys:
misc_keys.remove(k)
del k
for k in time_keys:
if k in misc_keys:
misc_keys.remove(k)
del k
time_start_keys = [k for k in time_keys if 'start' in k.lower() ]
time_end_keys = [k for k in time_keys if 'end' in k.lower() or 'stop' in k.lower() ]
return PossibleKeys(
time_start = time_start_keys,
time_end = time_end_keys,
duration = duration_keys,
name = name_keys,
image = image_keys,
misc = misc_keys,
)
def start_end(sample: dict[str,Any], keys: PossibleKeys) -> tuple[datetime.datetime | None, datetime.datetime | None]:
if keys.time_start and keys.time_end:
return (sample[keys.time_start[0]], sample[keys.time_end[0]])
if keys.time_start and keys.duration:
start = sample[keys.time_start[0]]
duration = datetime.timedelta(seconds=float(sample[keys.duration[0]]))
return (start, start + duration)
if keys.time_start:
start = sample[keys.time_start[0]]
return (start, None)
if keys.time_end:
return (None, sample[keys.time_end[0]])
return (None, None)
def iterate_samples_from_dicts(rows: list[dict[str,Any]]) -> Iterator[WorkSample]:
assert len(rows) > 0
max_title_parts = 2
if True:
event_data = rows[len(rows)//2] # Hopefully select a useful representative.
possible_keys = determine_possible_keys(event_data)
del event_data
assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1
assert len(possible_keys.image) >= 0
for event_data in rows:
'''
title = ': '.join(event_data[k] for k in possible_name_keys[:max_title_parts])
description = '\n\n'.join(
event_data[k] for k in possible_name_keys[max_title_parts:]
)
image = event_data[possible_keys.image[0]] if possible_keys.image else None
'''
(start_at, end_at) = start_end(event_data, possible_keys)
labels = [f'{k}:{event_data[k]}' for k in possible_keys.misc]
# Create event
yield WorkSample(
labels=tuple(labels),
start_at=start_at,
end_at=end_at,
)
del event_data
def iterate_samples_from_csv_file(file_path: Path) -> Iterator[WorkSample]:
dicts = load_csv_file(file_path)
samples = list(iterate_samples_from_dicts(dicts))
assert len(samples) > 0, 'Did not found any samples'
yield from samples

View File

@ -0,0 +1,68 @@
import datetime
import logging
from collections.abc import Iterator
from pathlib import Path
import git
from ..data import HIDDEN_LABEL_TOTAL, WorkSample
logger = logging.getLogger(__name__)
def determine_default_branch(repo: git.Repo):
try:
repo.commit('main')
return 'main'
except:
return 'master'
def determine_project_name(repo: git.Repo) -> str:
remotes = repo.remotes
if len(remotes) > 0:
return remotes.origin.url.removeprefix('git@gitfub.space:')
return Path(repo.working_tree_dir).name
def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]:
project_name = determine_project_name(repo)
assert project_name is not None
# TODO: Branch on main or master or default
repo.commit()
for commit in repo.iter_commits(determine_default_branch(repo)):
labels = [HIDDEN_LABEL_TOTAL]
labels.append('project:' + project_name)
labels.append('author:' + commit.author.email)
authored_date = datetime.datetime.fromtimestamp(
commit.authored_date,
tz=datetime.UTC,
)
committed_date = datetime.datetime.fromtimestamp(
commit.committed_date,
tz=datetime.UTC,
)
yield WorkSample(
labels=tuple(labels),
start_at=None,
end_at=authored_date,
)
if authored_date != committed_date:
yield WorkSample(
labels=tuple(labels),
start_at=None,
end_at=committed_date,
)
del labels
def iterate_samples_from_git_repository(repo_path: Path) -> Iterator[WorkSample]:
try:
yield from get_samples_from_project(git.Repo(repo_path))
except git.exc.InvalidGitRepositoryError:
logger.warning('Ignoring non-repo %s', repo_path)

View File

@ -1,75 +0,0 @@
import argparse
import datetime
import urllib.parse
import icalendar
from personal_data.util import load_csv_file
NOW = datetime.datetime.now(tz=datetime.UTC)
def parse_arguments():
parser = argparse.ArgumentParser()
parser.add_argument('data_folder')
parser.add_argument('output_file')
return parser.parse_args()
def generate_calendar(rows: list[dict]) -> icalendar.Calendar:
max_title_parts = 2
cal = icalendar.Calendar()
cal.add('prodid', '-//personal_data_calendar//example.org//')
cal.add('version', '2.0')
for event_data in rows:
# Select data
possible_time_keys = [
k for k, v in event_data.items() if isinstance(v, datetime.date)
]
possible_name_keys = [k for k, v in event_data.items() if isinstance(v, str)]
possible_image_keys = [
k for k, v in event_data.items() if isinstance(v, urllib.parse.ParseResult)
]
date = event_data[possible_time_keys[0]] if possible_time_keys else None
image = event_data[possible_image_keys[0]] if possible_image_keys else None
if date is None:
continue
title = ': '.join(event_data[k] for k in possible_name_keys[:max_title_parts])
description = '\n\n'.join(
event_data[k] for k in possible_name_keys[max_title_parts:]
)
# Create event
event = icalendar.Event()
event.add('summary', title)
event.add('description', description)
event.add('dtstart', date)
event.add('dtend', date + datetime.timedelta(minutes=30))
event.add('created', NOW)
event.add('dtstamp', NOW)
if image:
event.add('image', image.geturl())
cal.add_component(event)
del event
return cal
def main():
args = parse_arguments()
dicts = load_csv_file(args.data_folder + '/games_played_playstation.csv')
calendar = generate_calendar(dicts)
with open(args.output_file, 'wb') as f:
f.write(calendar.to_ical())
if __name__ == '__main__':
main()