aider-gitea/aider_gitea/__init__.py

"""Aider Gitea.

A code automation tool that integrates Gitea with Aider to automatically solve issues.

This program monitors your [Gitea](https://about.gitea.com/) repository for issues with the 'aider' label.
When such an issue is found, it:

1. Creates a new branch.
2. Invokes [Aider](https://aider.chat/) to solve the issue using a Large-Language Model.
3. Runs tests and code quality checks.
4. Creates a pull request with the solution.

Inspired by [the AI workflows](https://github.com/oscoreio/ai-workflows/)
project.

## Usage

An application token must be supplied for the `gitea_token` secret. This must
have the following permissions:

- `read:issue`: To be able to read issues on the specified repository.
- `write:repository`: To be able to create pull requests.
- `read:user`: Needed to iterate all user's repositories.

### Command Line

```bash
# Run with default settings
python -m aider_gitea

# Specify custom repository and owner
python -m aider_gitea --owner myorg --repo myproject

# Use a custom Gitea URL
python -m aider_gitea --gitea-url https://gitea.example.com

# Specify a different base branch
python -m aider_gitea --base-branch develop
```

### Python API

```python
from aider_gitea import solve_issue_in_repository
from pathlib import Path

# Solve an issue programmatically
args = argparse.Namespace(
    gitea_url="https://gitea.example.com",
    owner="myorg",
    repo="myproject",
    base_branch="main"
)

solve_issue_in_repository(
    args,
    Path("/path/to/repo"),
    "issue-123-fix-bug",
    "Fix critical bug",
    "The application crashes when processing large files",
    "123"
)
```

### Environment Configuration

The tool uses environment variables for sensitive information:
- `GITEA_TOKEN`: Your Gitea API token
- `LLM_API_KEY`: API key for the language model used by Aider
```
"""

import dataclasses
import logging
import re
import subprocess
import sys
import tempfile
from pathlib import Path

from . import secrets
from ._version import __version__  # noqa: F401
from .seen_issues_db import SeenIssuesDB

logger = logging.getLogger(__name__)


@dataclasses.dataclass(frozen=True)
class RepositoryConfig:
    gitea_url: str
    owner: str
    repo: str
    base_branch: str

    def repo_url(self) -> str:
        return f'{self.gitea_url}:{self.owner}/{self.repo}.git'.replace(
            'https://',
            'git@',
        )


@dataclasses.dataclass(frozen=True)
class IssueResolution:
    success: bool
    pull_request_url: str | None = None
    pull_request_id: str | None = None


def generate_branch_name(issue_number: str, issue_title: str) -> str:
    """Create a branch name by sanitizing the issue title.

    Non-alphanumeric characters (except spaces) are removed,
    the text is lowercased, and spaces are replaced with dashes.

    Args:
        issue_number: The issue number to include in the branch name.
        issue_title: The issue title to sanitize and include in the branch name.

    Returns:
        A sanitized branch name combining the issue number and title.
    """
    sanitized = re.sub(r'[^0-9a-zA-Z ]+', '', issue_title)
    parts = ['issue', str(issue_number), *sanitized.lower().split()]
    return '-'.join(parts)


def bash_cmd(*commands: str) -> str:
    commands = ('set -e', *commands)
    return 'bash -c "' + ';'.join(commands) + '"'


AIDER_TEST = bash_cmd(
    'virtualenv venv',
    'source venv/bin/activate',
    'pip install -e .',
    'pytest test',
)

RUFF_FORMAT_AND_AUTO_FIX = bash_cmd(
    'ruff format',
    'ruff check --fix --ignore RUF022 --ignore PGH004',
    'ruff format',
    'ruff check --fix --ignore RUF022 --ignore PGH004',
)

AIDER_LINT = bash_cmd(
    RUFF_FORMAT_AND_AUTO_FIX,
    'ruff format',
    'ruff check --ignore RUF022 --ignore PGH004',
)


LLM_MESSAGE_FORMAT = (
    """{issue}\nDo not wait for explicit approval before working on code changes."""
)

# CODE_MODEL = 'ollama/gemma3:4b'
CODE_MODEL = 'o4-mini'
EVALUATOR_MODEL = 'ollama/gemma3:27b'


def create_aider_command(issue: str) -> list[str]:
    l = [
        'aider',
        '--chat-language',
        'english',
        '--no-stream',
        '--no-analytics',
        #'--no-check-update',
        '--test-cmd',
        AIDER_TEST,
        '--lint-cmd',
        AIDER_LINT,
        '--auto-test',
        '--no-auto-lint',
        '--yes',
    ]

    for key in secrets.llm_api_keys():
        l += ['--api-key', key]

    if False:
        l.append('--read')
        l.append('CONVENTIONS.md')

    if True:
        l.append('--cache-prompts')

    if False:
        l.append('--architect')

    if CODE_MODEL:
        l.append('--model')
        l.append(CODE_MODEL)

        if CODE_MODEL.startswith('ollama/'):
            l.append('--auto-lint')

    if True:
        l.append('--message')
        l.append(LLM_MESSAGE_FORMAT.format(issue=issue))

    return l


def get_commit_messages(cwd: Path, base_branch: str, current_branch: str) -> list[str]:
    """Get commit messages between base branch and current branch.

    Args:
        cwd: The current working directory (repository path).
        base_branch: The name of the base branch to compare against.
        current_branch: The name of the current branch to check for commits.

    Returns:
        A string containing all commit messages, one per line.
    """
    try:
        result = subprocess.run(
            ['git', 'log', f'{base_branch}..{current_branch}', '--pretty=format:%s'],
            check=True,
            cwd=cwd,
            capture_output=True,
            text=True,
        )
        return list(reversed(result.stdout.strip().split('\n')))
    except subprocess.CalledProcessError:
        logger.exception(f'Failed to get commit messages on branch {current_branch}')
        return []


def get_diff(cwd: Path, base_branch: str, current_branch: str) -> str:
    result = subprocess.run(
        ['git', 'diff', f'{base_branch}..{current_branch}', '--pretty=format:%s'],
        check=True,
        cwd=cwd,
        capture_output=True,
        text=True,
    )
    return result.stdout.strip()


def push_changes(
    repository_config: RepositoryConfig,
    cwd: Path,
    branch_name: str,
    issue_number: str,
    issue_title: str,
    gitea_client,
) -> IssueResolution:
    # Check if there are any commits on the branch before pushing
    if not has_commits_on_branch(cwd, repository_config.base_branch, branch_name):
        logger.info('No commits made on branch %s, skipping push', branch_name)
        return IssueResolution(False)

    # Get commit messages for PR description
    commit_messages = get_commit_messages(
        cwd,
        repository_config.base_branch,
        branch_name,
    )
    description = f'This pull request resolves #{issue_number}\n\n'

    if commit_messages:
        description += '## Commit Messages\n\n'
        for message in commit_messages:
            description += f'- {message}\n'

    # First push the branch without creating a PR
    cmd = ['git', 'push', 'origin', branch_name, '--force']
    run_cmd(cmd, cwd)

    # Then create the PR with the aider label
    pr_response = gitea_client.create_pull_request(
        owner=repository_config.owner,
        repo=repository_config.repo,
        title=issue_title,
        body=description,
        head=branch_name,
        base=repository_config.base_branch,
        labels=['aider'],
    )

    # Extract PR number and URL if available
    return IssueResolution(
        True,
        str(pr_response.get('number')),
        pr_response.get('html_url'),
    )


def has_commits_on_branch(cwd: Path, base_branch: str, current_branch: str) -> bool:
    """Check if there are any commits on the current branch that aren't in the base branch.

    Args:
        cwd: The current working directory (repository path).
        base_branch: The name of the base branch to compare against.
        current_branch: The name of the current branch to check for commits.

    Returns:
        True if there are commits on the current branch not in the base branch, False otherwise.
    """
    try:
        commit_messages = get_commit_messages(cwd, base_branch, current_branch)
        return bool(list(commit_messages))
    except Exception:
        logger.exception('Failed to check commits on branch %s', current_branch)
        return False


def run_cmd(cmd: list[str], cwd: Path | None = None, check=True) -> bool:
    """Run a shell command and return its success status.

    Args:
        cmd: The command to run as a list of strings.
        cwd: The directory to run the command in.
        check: Whether to raise an exception if the command fails.

    Returns:
        True if the command succeeded, False otherwise.
    """
    result = subprocess.run(cmd, check=check, cwd=cwd)
    return result.returncode == 0


def issue_solution_round(repository_path, issue_content):
    # Primary Aider command
    aider_command = create_aider_command(issue_content)
    print(aider_command)
    aider_did_not_crash = run_cmd(
        aider_command,
        repository_path,
        check=False,
    )
    if not aider_did_not_crash:
        return aider_did_not_crash

    # Auto-fix standard code quality stuff after aider
    run_cmd(['bash', '-c', RUFF_FORMAT_AND_AUTO_FIX], repository_path, check=False)
    run_cmd(['git', 'add', '.'], repository_path)
    run_cmd(['git', 'commit', '-m', 'Ruff after aider'], repository_path, check=False)

    return True


def run_ollama(cwd: Path, texts: list[str]) -> str:
    cmd = ['ollama', 'run', EVALUATOR_MODEL.removeprefix('ollama/')]
    print(cmd)
    process = subprocess.Popen(
        cmd,
        cwd=cwd,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
    )
    stdout, stderr = process.communicate('\n'.join(texts))
    print(stdout)
    return stdout


def parse_yes_no_answer(text: str) -> bool | None:
    text = text.lower().strip()
    words = text.split('\n \t.,?-')
    print(words)
    if words[-1] in {'yes', 'agree'}:
        return True
    if words[-1] in {'no', 'disagree'}:
        return False
    return None


def run_ollama_and_get_yes_or_no(cwd, initial_texts: list[str]) -> bool:
    texts = list(initial_texts)
    texts.append('Think through your answer.')
    while True:
        response = run_ollama(cwd, texts)
        yes_or_no = parse_yes_no_answer(response)
        if yes_or_no is not None:
            return yes_or_no
        else:
            texts.append(response)
            texts.append('Please answer either "yes" or "no".')


def verify_solution(repository_path: Path, issue_content: str) -> bool:
    summary = run_ollama(
        repository_path,
        [
            'Concisely summarize following changeset',
            get_diff(repository_path, 'main', 'HEAD'),
        ],
    )

    return run_ollama_and_get_yes_or_no(
        repository_path,
        [
            'Does this changeset accomplish the entire task?',
            '# Change set',
            summary,
            '# Issue',
            issue_content,
        ],
    )


def get_head_commit_hash(repository_path: Path) -> str:
    return subprocess.run(
        ['git', 'rev-parse', 'HEAD'],
        check=True,
        cwd=repository_path,
        capture_output=True,
        text=True,
    ).stdout.strip()


def solve_issue_in_repository(
    repository_config: RepositoryConfig,
    repository_path: Path,
    branch_name: str,
    issue_title: str,
    issue_description: str,
    issue_number: str,
    gitea_client,
) -> IssueResolution:
    logger.info('### %s #####', issue_title)

    # Setup repository
    run_cmd(['git', 'clone', repository_config.repo_url(), repository_path])
    run_cmd(['bash', '-c', AIDER_TEST], repository_path)
    run_cmd(['git', 'checkout', repository_config.base_branch], repository_path)
    run_cmd(['git', 'checkout', '-b', branch_name], repository_path)

    # Run initial ruff pass before aider
    run_cmd(['bash', '-c', RUFF_FORMAT_AND_AUTO_FIX], repository_path, check=False)
    run_cmd(['git', 'add', '.'], repository_path)
    run_cmd(['git', 'commit', '-m', 'Initial ruff pass'], repository_path, check=False)

    # Run aider
    issue_content = f'# {issue_title}\n{issue_description}'

    while True:
        # Save the commit hash after ruff but before aider
        pre_aider_commit = get_head_commit_hash(repository_path)

        # Run aider
        aider_did_not_crash = issue_solution_round(repository_path, issue_content)
        if not aider_did_not_crash:
            logger.error('Aider invocation failed for issue #%s', issue_number)
            return IssueResolution(False)

        # Check if aider made any changes beyond the initial ruff pass
        if not has_commits_on_branch(repository_path, pre_aider_commit, 'HEAD'):
            logger.error(
                'Aider did not make any changes beyond the initial ruff pass for issue #%s',
                issue_number,
            )
            return IssueResolution(False)

        # Push changes and create/update the pull request on every iteration
        resolution = push_changes(
            repository_config,
            repository_path,
            branch_name,
            issue_number,
            issue_title,
            gitea_client,
        )
        if not resolution.success:
            return resolution

        # Verify whether this is a satisfactory solution
        if verify_solution(repository_path, issue_content):
            return resolution


def solve_issues_in_repository(
    repository_config: RepositoryConfig,
    client,
    seen_issues_db,
):
    """Process all open issues with the 'aider' label.

    Args:
        repository_config: Command line arguments.
        client: The Gitea client instance.
        seen_issues_db: Database of previously processed issues.
    """
    try:
        issues = client.get_issues(repository_config.owner, repository_config.repo)
    except Exception:
        logger.exception('Failed to retrieve issues')
        sys.exit(1)

    if not issues:
        logger.info('No issues found for %s', repository_config.repo)
        return

    for issue in issues:
        issue_url = issue.get('web_url')
        issue_number = issue.get('number')
        issue_description = issue.get('body', '')
        title = issue.get('title', f'Issue {issue_number}')
        if seen_issues_db.has_seen(issue_url):
            logger.info('Skipping already processed issue #%s: %s', issue_number, title)
            continue

        branch_name = generate_branch_name(issue_number, title)
        with tempfile.TemporaryDirectory() as repository_path:
            issue_resolution = solve_issue_in_repository(
                repository_config,
                Path(repository_path),
                branch_name,
                title,
                issue_description,
                issue_number,
                client,
            )

        if issue_resolution.success:
            # Handle unresolved pull request comments
            handle_pr_comments(
                repository_config,
                issue_resolution.pull_request_id,
                branch_name,
                Path(repository_path),
                client,
                seen_issues_db,
                issue_url,
            )

            # Handle failing pipelines
            handle_failing_pipelines(
                repository_config,
                issue_resolution.pull_request_id,
                branch_name,
                Path(repository_path),
                client,
            )
            seen_issues_db.mark_as_seen(issue_url, str(issue_number))
            seen_issues_db.update_pr_info(
                issue_url,
                issue_resolution.pull_request_id,
                issue_resolution.pull_request_url,
            )
            logger.info(
                'Stored PR #%s information for issue #%s',
                issue_resolution.pull_request_id,
                issue_number,
            )


def handle_pr_comments(
    repository_config,
    pr_number,
    branch_name,
    repository_path,
    client,
    seen_issues_db,
    issue_url,
):
    """Fetch unresolved PR comments and resolve them via aider."""
    comments = client.get_pull_request_comments(
        repository_config.owner,
        repository_config.repo,
        pr_number,
    )
    for comment in comments:
        path = comment.get('path')
        line = comment.get('line') or comment.get('position') or 0
        file_path = repository_path / path
        try:
            lines = file_path.read_text().splitlines()
            start = max(0, line - 3)
            end = min(len(lines), line + 2)
            context = '\n'.join(lines[start:end])
        except Exception:
            context = ''
        body = comment.get('body', '')
        issue = (
            f'Resolve the following reviewer comment:\n{body}\n\n'
            f'File: {path}\n\nContext:\n{context}'
        )
        # invoke aider on the comment context
        issue_solution_round(repository_path, issue)
        # commit and push changes for this comment
        run_cmd(['git', 'add', path], repository_path, check=False)
        run_cmd(
            ['git', 'commit', '-m', f'Resolve comment {comment.get("id")}'],
            repository_path,
            check=False,
        )
        run_cmd(['git', 'push', 'origin', branch_name], repository_path, check=False)

def handle_failing_pipelines(
    repository_config: RepositoryConfig,
    pr_number: str,
    branch_name: str,
    repository_path: Path,
    client,
) -> None:
    """Fetch failing pipelines for the given PR and resolve them via aider."""
    while True:
        failed_runs = client.get_failed_pipelines(
            repository_config.owner,
            repository_config.repo,
            pr_number,
        )
        if not failed_runs:
            break
        for run_id in failed_runs:
            log = client.get_pipeline_log(
                repository_config.owner,
                repository_config.repo,
                run_id,
            )
            lines = log.strip().split('\n')
            context = '\n'.join(lines[-100:])
            issue = f'Resolve the following failing pipeline run {run_id}:\n\n{context}'
            issue_solution_round(repository_path, issue)
            run_cmd(['git', 'add', '.'], repository_path, check=False)
            run_cmd(
                ['git', 'commit', '-m', f'Resolve pipeline {run_id}'],
                repository_path,
                check=False,
            )
            run_cmd(['git', 'push', 'origin', branch_name], repository_path, check=False)