import datetime import logging from collections.abc import Iterator from pathlib import Path import git from ..data import HIDDEN_LABEL_TOTAL, WorkSample logger = logging.getLogger(__name__) def determine_default_branch(repo: git.Repo): try: repo.commit('main') return 'main' except: return 'master' def determine_project_name(repo: git.Repo) -> str: remotes = repo.remotes if len(remotes) > 0: return remotes.origin.url.removeprefix('git@gitfub.space:') return Path(repo.working_tree_dir).name def get_samples_from_project(repo: git.Repo) -> Iterator[WorkSample]: project_name = determine_project_name(repo) assert project_name is not None # TODO: Branch on main or master or default repo.commit() for commit in repo.iter_commits(determine_default_branch(repo)): labels = [HIDDEN_LABEL_TOTAL] labels.append('project:' + project_name) labels.append('author:' + commit.author.email) yield WorkSample( datetime.datetime.fromtimestamp(commit.authored_date, tz=datetime.UTC), tuple(labels), ) yield WorkSample( datetime.datetime.fromtimestamp(commit.committed_date, tz=datetime.UTC), tuple(labels), ) del labels def iterate_samples_from_git_repository(repo_path: Path) -> Iterator[WorkSample]: try: yield from get_samples_from_project(git.Repo(repo_path)) except git.exc.InvalidGitRepositoryError: logger.warning('Ignoring non-repo %s', repo_path)