aider-gitea/aider_gitea/__init__.py

"""Aider Gitea.

A code automation tool that integrates Gitea with Aider to automatically solve issues.

This program monitors your [Gitea](https://about.gitea.com/) repository for issues with the 'aider' label.
When such an issue is found, it:

1. Creates a new branch.
2. Invokes [Aider](https://aider.chat/) to solve the issue using a Large-Language Model.
3. Runs tests and code quality checks.
4. Creates a pull request with the solution.

Inspired by [the AI workflows](https://github.com/oscoreio/ai-workflows/)
project.

## Usage

An application token must be supplied for the `gitea_token` secret. This must
have the following permissions:

- `read:issue`: To be able to read issues on the specified repository.
- `write:repository`: To be able to create pull requests.
- `read:user`: Needed to iterate all user's repositories.

### Command Line

```bash
# Run with default settings
python -m aider_gitea

# Specify custom repository and owner
python -m aider_gitea --owner myorg --repo myproject

# Use a custom Gitea URL
python -m aider_gitea --gitea-url https://gitea.example.com

# Specify a different base branch
python -m aider_gitea --base-branch develop
```

### Python API

```python
from aider_gitea import solve_issue_in_repository
from pathlib import Path

# Solve an issue programmatically
args = argparse.Namespace(
    gitea_url="https://gitea.example.com",
    owner="myorg",
    repo="myproject",
    base_branch="main"
)

solve_issue_in_repository(
    args,
    Path("/path/to/repo"),
    "issue-123-fix-bug",
    "Fix critical bug",
    "The application crashes when processing large files",
    "123"
)
```

### Environment Configuration

The tool uses environment variables for sensitive information:
- `GITEA_TOKEN`: Your Gitea API token
- `LLM_API_KEY`: API key for the language model used by Aider
```
"""

import dataclasses
import logging
import re
import subprocess
import sys
import tempfile
from pathlib import Path

from . import secrets
from ._version import __version__  # noqa: F401

logger = logging.getLogger(__name__)


@dataclasses.dataclass(frozen=True)
class RepositoryConfig:
    gitea_url: str
    owner: str
    repo: str
    base_branch: str

    def repo_url(self) -> str:
        return f'{self.gitea_url}:{self.owner}/{self.repo}.git'.replace(
            'https://',
            'git@',
        )


@dataclasses.dataclass(frozen=True)
class IssueResolution:
    success: bool
    pull_request_url: str | None = None
    pull_request_id: str | None = None


def generate_branch_name(issue_number: str, issue_title: str) -> str:
    """Create a branch name by sanitizing the issue title.

    Non-alphanumeric characters (except spaces) are removed,
    the text is lowercased, and spaces are replaced with dashes.

    Args:
        issue_number: The issue number to include in the branch name.
        issue_title: The issue title to sanitize and include in the branch name.

    Returns:
        A sanitized branch name combining the issue number and title.
    """
    sanitized = re.sub(r'[^0-9a-zA-Z ]+', '', issue_title)
    parts = ['issue', str(issue_number), *sanitized.lower().split()]
    return '-'.join(parts)


def bash_cmd(*commands: str) -> str:
    commands = ('set -e', *commands)
    return 'bash -c "' + ';'.join(commands) + '"'


AIDER_TEST = bash_cmd(
    'virtualenv venv',
    'source venv/bin/activate',
    'pip install -e .',
    'pytest test',
)

RUFF_FORMAT_AND_AUTO_FIX = bash_cmd(
    'ruff format',
    'ruff check --fix --ignore RUF022 --ignore PGH004',
    'ruff format',
    'ruff check --fix --ignore RUF022 --ignore PGH004',
)

AIDER_LINT = bash_cmd(
    RUFF_FORMAT_AND_AUTO_FIX,
    'ruff format',
    'ruff check --ignore RUF022 --ignore PGH004',
)


LLM_MESSAGE_FORMAT = """
{issue}

# Solution Details

For code tasks:

1. Create a plan for how to solve the issue.
2. Write unit tests that proves that your solution works.
3. Then, solve the issue by writing the required code.

# Code Quality Pass

4. Perform a code quality pass to ensure all conventions from CONVENTIONS.md are followed and that ruff reports no issues. Improve code quality by adding early error validation, enforcing strict behavior, and avoiding code duplication.

Key focus areas:
- Dataclasses should be marked frozen.
- Re-use functions to avoid code duplication.
- Add early error validation.
- Enforce strict behavior.
"""

MODEL = None


def create_aider_command(issue: str) -> list[str]:
    l = [
        'aider',
        '--chat-language',
        'english',
        '--no-stream',
        '--no-analytics',
        '--test-cmd',
        AIDER_TEST,
        '--lint-cmd',
        AIDER_LINT,
        '--auto-test',
        '--no-auto-lint',
        '--read',
        'CONVENTIONS.md',
        '--message',
        LLM_MESSAGE_FORMAT.format(issue=issue),
        '--yes',
    ]

    for key in secrets.llm_api_keys():
        l += ['--api-key', key]

    if True:
        l.append('--cache-prompts')

    if False:
        l.append('--architect')

    if MODEL:
        l.append('--model')
        l.append(MODEL)

    return l


def get_commit_messages(cwd: Path, base_branch: str, current_branch: str) -> list[str]:
    """Get commit messages between base branch and current branch.

    Args:
        cwd: The current working directory (repository path).
        base_branch: The name of the base branch to compare against.
        current_branch: The name of the current branch to check for commits.

    Returns:
        A string containing all commit messages, one per line.
    """
    try:
        result = subprocess.run(
            ['git', 'log', f'{base_branch}..{current_branch}', '--pretty=format:%s'],
            check=True,
            cwd=cwd,
            capture_output=True,
            text=True,
        )
        return list(reversed(result.stdout.strip().split('\n')))
    except subprocess.CalledProcessError:
        logger.exception(f'Failed to get commit messages on branch {current_branch}')
        return []


def push_changes(
    repository_config: RepositoryConfig,
    cwd: Path,
    branch_name: str,
    issue_number: str,
    issue_title: str,
    gitea_client,
) -> IssueResolution:
    # Check if there are any commits on the branch before pushing
    if not has_commits_on_branch(cwd, repository_config.base_branch, branch_name):
        logger.info('No commits made on branch %s, skipping push', branch_name)
        return IssueResolution(False)

    # Get commit messages for PR description
    commit_messages = get_commit_messages(
        cwd,
        repository_config.base_branch,
        branch_name,
    )
    description = f'This pull request resolves #{issue_number}\n\n'

    if commit_messages:
        description += '## Commit Messages\n\n'
        for message in commit_messages:
            description += f'- {message}\n'

    # First push the branch without creating a PR
    cmd = ['git', 'push', 'origin', branch_name, '--force']
    run_cmd(cmd, cwd)

    # Then create the PR with the aider label
    pr_response = gitea_client.create_pull_request(
        owner=repository_config.owner,
        repo=repository_config.repo,
        title=issue_title,
        body=description,
        head=branch_name,
        base=repository_config.base_branch,
        labels=['aider'],
    )

    # Extract PR number and URL if available
    return IssueResolution(
        True,
        str(pr_response.get('number')),
        pr_response.get('html_url'),
    )


def has_commits_on_branch(cwd: Path, base_branch: str, current_branch: str) -> bool:
    """Check if there are any commits on the current branch that aren't in the base branch.

    Args:
        cwd: The current working directory (repository path).
        base_branch: The name of the base branch to compare against.
        current_branch: The name of the current branch to check for commits.

    Returns:
        True if there are commits on the current branch not in the base branch, False otherwise.
    """
    try:
        commit_messages = get_commit_messages(cwd, base_branch, current_branch)
        return bool(list(commit_messages))
    except Exception:
        logger.exception('Failed to check commits on branch %s', current_branch)
        return False


def run_cmd(cmd: list[str], cwd: Path | None = None, check=True) -> bool:
    """Run a shell command and return its success status.

    Args:
        cmd: The command to run as a list of strings.
        cwd: The directory to run the command in.
        check: Whether to raise an exception if the command fails.

    Returns:
        True if the command succeeded, False otherwise.
    """
    result = subprocess.run(cmd, check=check, cwd=cwd)
    return result.returncode == 0


SKIP_AIDER = False


def solve_issue_in_repository(
    repository_config: RepositoryConfig,
    tmpdirname: Path,
    branch_name: str,
    issue_title: str,
    issue_description: str,
    issue_number: str,
    gitea_client,
) -> IssueResolution:
    logger.info('### %s #####', issue_title)

    # Setup repository
    run_cmd(['git', 'clone', repository_config.repo_url(), tmpdirname])
    run_cmd(['bash', '-c', AIDER_TEST], tmpdirname)
    run_cmd(['git', 'checkout', repository_config.base_branch], tmpdirname)
    run_cmd(['git', 'checkout', '-b', branch_name], tmpdirname)

    # Run initial ruff pass before aider
    run_cmd(['bash', '-c', RUFF_FORMAT_AND_AUTO_FIX], tmpdirname, check=False)
    run_cmd(['git', 'add', '.'], tmpdirname)
    run_cmd(['git', 'commit', '-m', 'Initial ruff pass'], tmpdirname, check=False)

    # Save the commit hash after ruff but before aider
    result = subprocess.run(
        ['git', 'rev-parse', 'HEAD'],
        check=True,
        cwd=tmpdirname,
        capture_output=True,
        text=True,
    )
    pre_aider_commit = result.stdout.strip()

    # Run aider
    issue_content = f'# {issue_title}\n{issue_description}'
    if not SKIP_AIDER:
        succeeded = run_cmd(
            create_aider_command(issue_content),
            tmpdirname,
            check=False,
        )
    else:
        logger.warning('Skipping aider command (for testing)')
        succeeded = True
    if not succeeded:
        logger.error('Aider invocation failed for issue #%s', issue_number)
        return IssueResolution(False)

    # Auto-fix standard code quality stuff after aider
    run_cmd(['bash', '-c', RUFF_FORMAT_AND_AUTO_FIX], tmpdirname, check=False)
    run_cmd(['git', 'add', '.'], tmpdirname)
    run_cmd(['git', 'commit', '-m', 'Ruff after aider'], tmpdirname, check=False)

    # Check if aider made any changes beyond the initial ruff pass
    result = subprocess.run(
        ['git', 'diff', pre_aider_commit, 'HEAD', '--name-only'],
        check=True,
        cwd=tmpdirname,
        capture_output=True,
        text=True,
    )
    files_changed = result.stdout.strip()

    if not files_changed and not SKIP_AIDER:
        logger.info(
            'Aider did not make any changes beyond the initial ruff pass for issue #%s',
            issue_number,
        )
        return IssueResolution(False)

    # Push changes
    return push_changes(
        repository_config,
        tmpdirname,
        branch_name,
        issue_number,
        issue_title,
        gitea_client,
    )


def solve_issues_in_repository(
    repository_config: RepositoryConfig,
    client,
    seen_issues_db,
):
    """Process all open issues with the 'aider' label.

    Args:
        repository_config: Command line arguments.
        client: The Gitea client instance.
        seen_issues_db: Database of previously processed issues.
    """
    try:
        issues = client.get_issues(repository_config.owner, repository_config.repo)
    except Exception:
        logger.exception('Failed to retrieve issues')
        sys.exit(1)

    if not issues:
        logger.info('No issues found for %s', repository_config.repo)
        return

    for issue in issues:
        issue_url = issue.get('web_url')
        issue_number = issue.get('number')
        issue_description = issue.get('body', '')
        title = issue.get('title', f'Issue {issue_number}')
        if seen_issues_db.has_seen(issue_url):
            logger.info('Skipping already processed issue #%s: %s', issue_number, title)
            continue

        branch_name = generate_branch_name(issue_number, title)
        with tempfile.TemporaryDirectory() as tmpdirname:
            issue_resolution = solve_issue_in_repository(
                repository_config,
                Path(tmpdirname),
                branch_name,
                title,
                issue_description,
                issue_number,
                client,
            )

        if issue_resolution.success:
            seen_issues_db.mark_as_seen(issue_url, str(issue_number))
            seen_issues_db.update_pr_info(
                issue_url,
                issue_resolution.pull_request_id,
                issue_resolution.pull_request_url,
            )
            logger.info(
                'Stored PR #%s information for issue #%s',
                issue_resolution.pull_request_id,
                issue_number,
            )