diff --git a/git_time_tracker/__init__.py b/git_time_tracker/__init__.py new file mode 100644 index 0000000..6c11b13 --- /dev/null +++ b/git_time_tracker/__init__.py @@ -0,0 +1,193 @@ +"""# Git-based Time Tracker. + +Quick and dirty time tracker on git histories. + +Uses the simple heuristics that each commit takes precisely one hour of work. +It will automatically trim commits below one hour if another commit occurred +less than an hour ago. + +Usage: + +``` +python -m git_time_tracker REPO1 REPO2... +``` + +# Obligatory + +This tool reports: + +``` +project Jmaa/git-time-tracker.git 3h 33m (2024) +``` + +And the ([Hamster](https://github.com/projecthamster/hamster)) manual time tracker reports: + +![](docs/obligatory-hamster.png) +""" + +import argparse +import datetime +import logging +import sys +from collections.abc import Iterator +from pathlib import Path + +from .data import ( + HIDDEN_LABEL_PREFIX, + HIDDEN_LABEL_TOTAL, + RealizedWorkSample, + WorkSample, +) +from .format import cli, icalendar +from .source import csv_file, git_repo + +logger = logging.getLogger(__name__) + + +DEFAULT_ESTIMATED_DURATION = datetime.timedelta(hours=1) +ZERO_DURATION = datetime.timedelta(seconds=0) +HOUR = datetime.timedelta(hours=1) +MINUTE = datetime.timedelta(minutes=1) + + +def filter_samples( + samples: list[WorkSample], + sample_filter: set[str], +) -> list[WorkSample]: + assert len(sample_filter) > 0 + return [s for s in samples if set(s.labels).intersection(sample_filter)] + + +def heuristically_realize_samples( + samples: list[WorkSample], +) -> Iterator[RealizedWorkSample]: + """Secret sauce. + + Guarentees that: + * No samples overlap. + """ + + previous_sample_end = None + for sample in samples: + end_at = sample.end_at + + if previous_sample_end is None: + if end_at.tzinfo: + previous_sample_end = datetime.datetime.fromtimestamp(0, datetime.UTC) + else: + previous_sample_end = datetime.datetime.fromtimestamp(0) + + assert previous_sample_end <= end_at, 'Iterating in incorrect order' + + # TODO: Allow end_at is None + + start_at = sample.start_at + if start_at is None: + estimated_duration: datetime.timedelta = DEFAULT_ESTIMATED_DURATION + start_at = max(previous_sample_end, end_at - estimated_duration) + del estimated_duration + + yield RealizedWorkSample(labels=sample.labels, end_at=end_at, start_at=start_at) + + previous_sample_end = sample.end_at + del sample + + +def parse_arguments(): + parser = argparse.ArgumentParser() + parser.add_argument( + '--git-repo', + action='extend', + nargs='+', + type=Path, + dest='repositories', + default=[], + ) + parser.add_argument( + '--csv-file', + action='extend', + nargs='+', + type=Path, + dest='csv_files', + default=[], + ) + parser.add_argument( + '--filter', + action='extend', + nargs='+', + type=str, + dest='sample_filter', + default=[], + ) + parser.add_argument( + '--format', + action='store', + type=str, + dest='format_mode', + default='cli_report', + choices=['cli_report', 'icalendar'], + ) + parser.add_argument( + '--out', + action='store', + type=Path, + dest='output_file', + default='output/samples.ics', + ) + return parser.parse_args() + + +def load_samples(args) -> set[WorkSample]: + shared_time_stamps_set: set[WorkSample] = set() + + # Git repositories + for repo_path in args.repositories: + logger.warning('Determine commits from %s', repo_path) + shared_time_stamps_set |= set( + git_repo.iterate_samples_from_git_repository(repo_path), + ) + del repo_path + + # CSV Files + for csv_path in args.csv_files: + logger.warning('Load samples from %s', csv_path) + shared_time_stamps_set |= set( + csv_file.iterate_samples_from_csv_file(csv_path), + ) + del csv_path + + return shared_time_stamps_set + + +def main(): + logging.basicConfig() + + args = parse_arguments() + + # Determine samples + shared_time_stamps_set = load_samples(args) + + # Sort samples + shared_time_stamps = sorted(shared_time_stamps_set, key=lambda s: s.end_at) + del shared_time_stamps_set + + # Filter samples + sample_filter = args.sample_filter + if len(sample_filter) != 0: + logger.warning('Filtering %s samples', len(shared_time_stamps)) + shared_time_stamps = filter_samples(shared_time_stamps, sample_filter) + logger.warning('Filtered down to %s samples', len(shared_time_stamps)) + + # Heuristic samples + logger.warning('Realizing %s samples', len(shared_time_stamps)) + shared_time_stamps = list(heuristically_realize_samples(shared_time_stamps)) + + # Output format + if args.format_mode == 'cli_report': + for t in cli.generate_report(shared_time_stamps): + sys.stdout.write(t) + elif args.format_mode == 'icalendar': + icalendar.generate_icalendar_file( + shared_time_stamps, + file=args.output_file, + ) diff --git a/git_time_tracker/__main__.py b/git_time_tracker/__main__.py new file mode 100644 index 0000000..7224f22 --- /dev/null +++ b/git_time_tracker/__main__.py @@ -0,0 +1,4 @@ +from git_time_tracker import main + +if __name__ == '__main__': + main() diff --git a/git_time_tracker/_version.py b/git_time_tracker/_version.py new file mode 100644 index 0000000..d83c2a9 --- /dev/null +++ b/git_time_tracker/_version.py @@ -0,0 +1 @@ +__version__ = '0.1.19' diff --git a/git_time_tracker/data.py b/git_time_tracker/data.py new file mode 100644 index 0000000..1b0860a --- /dev/null +++ b/git_time_tracker/data.py @@ -0,0 +1,19 @@ +import dataclasses +import datetime +from collections.abc import Sequence + +HIDDEN_LABEL_PREFIX = '__' +HIDDEN_LABEL_TOTAL = HIDDEN_LABEL_PREFIX + 'TOTAL' + + +@dataclasses.dataclass(frozen=True, order=True) +class WorkSample: + labels: Sequence[str] + start_at: datetime.datetime | None + end_at: datetime.datetime | None + + +@dataclasses.dataclass(frozen=True, order=True) +class RealizedWorkSample(WorkSample): + start_at: datetime.datetime + end_at: datetime.datetime diff --git a/git_time_tracker/format/__init__.py b/git_time_tracker/format/__init__.py new file mode 100644 index 0000000..75e6e4e --- /dev/null +++ b/git_time_tracker/format/__init__.py @@ -0,0 +1 @@ +"""Submodule containing output formats.""" diff --git a/git_time_tracker/format/cli.py b/git_time_tracker/format/cli.py new file mode 100644 index 0000000..bab846f --- /dev/null +++ b/git_time_tracker/format/cli.py @@ -0,0 +1,77 @@ +import datetime +from collections.abc import Iterator + +from ..data import HIDDEN_LABEL_PREFIX, HIDDEN_LABEL_TOTAL, RealizedWorkSample + +ZERO_DURATION = datetime.timedelta(seconds=0) +HOUR = datetime.timedelta(hours=1) +MINUTE = datetime.timedelta(minutes=1) + + +def fmt_year_ranges_internal(years: list[int]) -> Iterator[str]: + years = sorted(years) + for idx, year in enumerate(years): + at_end = idx == len(years) - 1 + range_before = idx > 0 and years[idx - 1] == year - 1 + range_after = not at_end and years[idx + 1] == year + 1 + + if not range_before or not range_after: + yield str(year) + + if not at_end: + if not range_before and range_after: + yield '-' + elif not range_after: + yield ',' + + +def fmt_year_ranges(years: list[int]) -> str: + return ''.join(list(fmt_year_ranges_internal(years))) + + +def fmt_line(label_type: str, label: str, total_time: datetime.timedelta) -> str: + hours = int(total_time / HOUR) + minutes = int((total_time - hours * HOUR) / MINUTE) + return f' {label_type:10} {label:40} {hours:-4d}h {minutes:-2d}m' + + +def generate_report( + samples: list[RealizedWorkSample], +) -> Iterator[str]: + # Time spent per label + time_per_label: dict[str, datetime.timedelta] = {} + years_per_label: dict[str, set[int]] = {} + for sample in samples: + duration = sample.end_at - sample.start_at + + for label in sample.labels: + time_per_label.setdefault(label, ZERO_DURATION) + time_per_label[label] += duration + years_per_label.setdefault(label, set()).add(sample.end_at.year) + + del sample, duration + + time_and_label = [(duration, label) for label, duration in time_per_label.items()] + time_and_label.sort(reverse=True) + + # + yield '-' * 66 + yield '\n' + for total_time, label_and_type in time_and_label: + if label_and_type.startswith(HIDDEN_LABEL_PREFIX): + continue + + label_type, label = label_and_type.split(':', 1) + + yield fmt_line(label_type, label, total_time) + yield ' (' + yield fmt_year_ranges(years_per_label.get(label_and_type, [])) + yield ')' + yield '\n' + del label, total_time + + yield '-' * 66 + yield '\n' + + yield fmt_line('', 'TOTAL', time_per_label.get(HIDDEN_LABEL_TOTAL, ZERO_DURATION)) + yield '\n' diff --git a/git_time_tracker/format/icalendar.py b/git_time_tracker/format/icalendar.py new file mode 100644 index 0000000..d3232b5 --- /dev/null +++ b/git_time_tracker/format/icalendar.py @@ -0,0 +1,67 @@ +import datetime + +import icalendar + +from ..data import HIDDEN_LABEL_PREFIX, RealizedWorkSample + +ZERO_DURATION = datetime.timedelta(seconds=0) +HOUR = datetime.timedelta(hours=1) +MINUTE = datetime.timedelta(minutes=1) + + +def create_title(sample: RealizedWorkSample) -> tuple[str, str]: + ls = [] + desc = [] + for label_and_type in sample.labels: + if label_and_type.startswith(HIDDEN_LABEL_PREFIX): + continue + if label_and_type.startswith('author:'): + continue + if len(ls) == 0: + ls.append(label_and_type.split(':')[1]) + else: + desc.append(label_and_type) + return ' '.join(ls), '\n'.join(desc) + + +def generate_calendar( + samples: list[RealizedWorkSample], +) -> icalendar.Calendar: + max_title_parts = 2 + + cal = icalendar.Calendar() + cal.add('prodid', '-//personal_data_calendar//example.org//') + cal.add('version', '2.0') + + for sample in samples: + title, description = create_title(sample) + + # Create event + event = icalendar.Event() + + event.add('summary', title) + event.add('description', description) + event.add('dtstart', sample.start_at) + event.add('dtend', sample.end_at) + + for label_and_type in sample.labels: + if label_and_type.startswith('author:'): + event.add( + 'organizer', + 'mailto:' + label_and_type.removeprefix('author:'), + ) + + cal.add_component(event) + del event + + return cal + + +def generate_icalendar_file( + samples: list[RealizedWorkSample], + file: str, +) -> None: + calendar = generate_calendar(samples) + + with open(file, 'wb') as f: + f.write(calendar.to_ical()) diff --git a/git_time_tracker/source/__init__.py b/git_time_tracker/source/__init__.py new file mode 100644 index 0000000..0708e14 --- /dev/null +++ b/git_time_tracker/source/__init__.py @@ -0,0 +1 @@ +"""Submodule containing input formats.""" diff --git a/git_time_tracker/source/csv_file.py b/git_time_tracker/source/csv_file.py new file mode 100644 index 0000000..ee66f2e --- /dev/null +++ b/git_time_tracker/source/csv_file.py @@ -0,0 +1,115 @@ +import datetime +import urllib.parse +from typing import Any +from collections.abc import Iterator +from decimal import Decimal +from pathlib import Path +import dataclasses + +from personal_data.util import load_csv_file + +from ..data import WorkSample + +@dataclasses.dataclass +class PossibleKeys: + time_start: list[str] + time_end: list[str] + duration: list[str] + name: list[str] + image: list[str] + misc: list[str] + +def determine_possible_keys(event_data: dict[str, Any]) -> PossibleKeys: + # Select data + time_keys = [ + k for k, v in event_data.items() if isinstance(v, datetime.date) + ] + duration_keys = [ + k + for k, v in event_data.items() + if isinstance(v, Decimal) and 'duration_seconds' in k + ] + name_keys = [k for k, v in event_data.items() if isinstance(v, str)] + image_keys = [ + k for k, v in event_data.items() if isinstance(v, urllib.parse.ParseResult) + ] + + misc_keys = list(event_data.keys()) + for k in image_keys: + if k in misc_keys: + misc_keys.remove(k) + del k + for k in time_keys: + if k in misc_keys: + misc_keys.remove(k) + del k + + time_start_keys = [k for k in time_keys if 'start' in k.lower() ] + time_end_keys = [k for k in time_keys if 'end' in k.lower() or 'stop' in k.lower() ] + + return PossibleKeys( + time_start = time_start_keys, + time_end = time_end_keys, + duration = duration_keys, + name = name_keys, + image = image_keys, + misc = misc_keys, + ) + +def start_end(sample: dict[str,Any], keys: PossibleKeys) -> tuple[datetime.datetime | None, datetime.datetime | None]: + if keys.time_start and keys.time_end: + return (sample[keys.time_start[0]], sample[keys.time_end[0]]) + + if keys.time_start and keys.duration: + start = sample[keys.time_start[0]] + duration = datetime.timedelta(seconds=float(sample[keys.duration[0]])) + return (start, start + duration) + + if keys.time_start: + start = sample[keys.time_start[0]] + return (start, None) + if keys.time_end: + return (None, sample[keys.time_end[0]]) + return (None, None) + +def iterate_samples_from_dicts(rows: list[dict[str,Any]]) -> Iterator[WorkSample]: + assert len(rows) > 0 + max_title_parts = 2 + + + if True: + event_data = rows[len(rows)//2] # Hopefully select a useful representative. + possible_keys = determine_possible_keys(event_data) + del event_data + + assert len(possible_keys.time_start) + len(possible_keys.time_end) >= 1 + assert len(possible_keys.image) >= 0 + + for event_data in rows: + ''' + title = ': '.join(event_data[k] for k in possible_name_keys[:max_title_parts]) + description = '\n\n'.join( + event_data[k] for k in possible_name_keys[max_title_parts:] + ) + image = event_data[possible_keys.image[0]] if possible_keys.image else None + ''' + + + (start_at, end_at) = start_end(event_data, possible_keys) + labels = [f'{k}:{event_data[k]}' for k in possible_keys.misc] + + # Create event + yield WorkSample( + labels=tuple(labels), + start_at=start_at, + end_at=end_at, + ) + + del event_data + + +def iterate_samples_from_csv_file(file_path: Path) -> Iterator[WorkSample]: + dicts = load_csv_file(file_path) + samples = list(iterate_samples_from_dicts(dicts)) + assert len(samples) > 0, 'Did not found any samples' + yield from samples diff --git a/git_time_tracker/source/git_repo.py b/git_time_tracker/source/git_repo.py new file mode 100644 index 0000000..67839aa --- /dev/null +++ b/git_time_tracker/source/git_repo.py @@ -0,0 +1,68 @@ +import datetime +import logging +from collections.abc import Iterator +from pathlib import Path + +import git + +from ..data import HIDDEN_LABEL_TOTAL, WorkSample + +logger = logging.getLogger(__name__) + + +def determine_default_branch(repo: git.Repo): + try: + repo.commit('main') + return 'main' + except: + return 'master' + + +def determine_project_name(repo: git.Repo) -> str: + remotes = repo.remotes + if len(remotes) > 0: + return remotes.origin.url.removeprefix('git@gitfub.space:') + return Path(repo.working_tree_dir).name + + +def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]: + project_name = determine_project_name(repo) + assert project_name is not None + + # TODO: Branch on main or master or default + + repo.commit() + + for commit in repo.iter_commits(determine_default_branch(repo)): + labels = [HIDDEN_LABEL_TOTAL] + labels.append('project:' + project_name) + labels.append('author:' + commit.author.email) + + authored_date = datetime.datetime.fromtimestamp( + commit.authored_date, + tz=datetime.UTC, + ) + committed_date = datetime.datetime.fromtimestamp( + commit.committed_date, + tz=datetime.UTC, + ) + + yield WorkSample( + labels=tuple(labels), + start_at=None, + end_at=authored_date, + ) + if authored_date != committed_date: + yield WorkSample( + labels=tuple(labels), + start_at=None, + end_at=committed_date, + ) + del labels + + +def iterate_samples_from_git_repository(repo_path: Path) -> Iterator[WorkSample]: + try: + yield from get_samples_from_project(git.Repo(repo_path)) + except git.exc.InvalidGitRepositoryError: + logger.warning('Ignoring non-repo %s', repo_path) diff --git a/personal_data_calendar/__main__.py b/personal_data_calendar/__main__.py deleted file mode 100644 index 118eb9e..0000000 --- a/personal_data_calendar/__main__.py +++ /dev/null @@ -1,75 +0,0 @@ -import argparse -import datetime -import urllib.parse - -import icalendar - -from personal_data.util import load_csv_file - -NOW = datetime.datetime.now(tz=datetime.UTC) - - -def parse_arguments(): - parser = argparse.ArgumentParser() - parser.add_argument('data_folder') - parser.add_argument('output_file') - return parser.parse_args() - - -def generate_calendar(rows: list[dict]) -> icalendar.Calendar: - max_title_parts = 2 - - cal = icalendar.Calendar() - cal.add('prodid', '-//personal_data_calendar//example.org//') - cal.add('version', '2.0') - - for event_data in rows: - # Select data - possible_time_keys = [ - k for k, v in event_data.items() if isinstance(v, datetime.date) - ] - possible_name_keys = [k for k, v in event_data.items() if isinstance(v, str)] - possible_image_keys = [ - k for k, v in event_data.items() if isinstance(v, urllib.parse.ParseResult) - ] - - date = event_data[possible_time_keys[0]] if possible_time_keys else None - image = event_data[possible_image_keys[0]] if possible_image_keys else None - - if date is None: - continue - - title = ': '.join(event_data[k] for k in possible_name_keys[:max_title_parts]) - description = '\n\n'.join( - event_data[k] for k in possible_name_keys[max_title_parts:] - ) - - # Create event - event = icalendar.Event() - event.add('summary', title) - event.add('description', description) - event.add('dtstart', date) - event.add('dtend', date + datetime.timedelta(minutes=30)) - event.add('created', NOW) - event.add('dtstamp', NOW) - if image: - event.add('image', image.geturl()) - cal.add_component(event) - del event - - return cal - - -def main(): - args = parse_arguments() - - dicts = load_csv_file(args.data_folder + '/games_played_playstation.csv') - - calendar = generate_calendar(dicts) - - with open(args.output_file, 'wb') as f: - f.write(calendar.to_ical()) - - -if __name__ == '__main__': - main()