aider-gitea/aider_gitea/__init__.py

627 lines
18 KiB
Python

"""Aider Gitea.
A code automation tool that integrates Gitea with Aider to automatically solve issues.
This program monitors your [Gitea](https://about.gitea.com/) repository for issues with the 'aider' label.
When such an issue is found, it:
1. Creates a new branch.
2. Invokes [Aider](https://aider.chat/) to solve the issue using a Large-Language Model.
3. Runs tests and code quality checks.
4. Creates a pull request with the solution.
Inspired by [the AI workflows](https://github.com/oscoreio/ai-workflows/)
project.
## Usage
An application token must be supplied for the `gitea_token` secret. This must
have the following permissions:
- `read:issue`: To be able to read issues on the specified repository.
- `write:repository`: To be able to create pull requests.
- `read:user`: Needed to iterate all user's repositories.
### Command Line
```bash
# Run with default settings
python -m aider_gitea
# Specify custom repository and owner
python -m aider_gitea --owner myorg --repo myproject
# Use a custom Gitea URL
python -m aider_gitea --gitea-url https://gitea.example.com
# Specify a different base branch
python -m aider_gitea --base-branch develop
```
### Python API
```python
from aider_gitea import solve_issue_in_repository
from pathlib import Path
# Solve an issue programmatically
args = argparse.Namespace(
gitea_url="https://gitea.example.com",
owner="myorg",
repo="myproject",
base_branch="main"
)
solve_issue_in_repository(
args,
Path("/path/to/repo"),
"issue-123-fix-bug",
"Fix critical bug",
"The application crashes when processing large files",
"123"
)
```
### Environment Configuration
The tool uses environment variables for sensitive information:
- `GITEA_TOKEN`: Your Gitea API token
- `LLM_API_KEY`: API key for the language model used by Aider
```
"""
import dataclasses
import logging
import re
import subprocess
import sys
import tempfile
from pathlib import Path
from . import secrets
from ._version import __version__ # noqa: F401
from .seen_issues_db import SeenIssuesDB
logger = logging.getLogger(__name__)
@dataclasses.dataclass(frozen=True)
class RepositoryConfig:
gitea_url: str
owner: str
repo: str
base_branch: str
def repo_url(self) -> str:
return f'{self.gitea_url}:{self.owner}/{self.repo}.git'.replace(
'https://',
'git@',
)
@dataclasses.dataclass(frozen=True)
class IssueResolution:
success: bool
pull_request_url: str | None = None
pull_request_id: str | None = None
def generate_branch_name(issue_number: str, issue_title: str) -> str:
"""Create a branch name by sanitizing the issue title.
Non-alphanumeric characters (except spaces) are removed,
the text is lowercased, and spaces are replaced with dashes.
Args:
issue_number: The issue number to include in the branch name.
issue_title: The issue title to sanitize and include in the branch name.
Returns:
A sanitized branch name combining the issue number and title.
"""
sanitized = re.sub(r'[^0-9a-zA-Z ]+', '', issue_title)
parts = ['issue', str(issue_number), *sanitized.lower().split()]
return '-'.join(parts)
def bash_cmd(*commands: str) -> str:
commands = ('set -e', *commands)
return 'bash -c "' + ';'.join(commands) + '"'
AIDER_TEST = bash_cmd(
'virtualenv venv',
'source venv/bin/activate',
'pip install -e .',
'pytest test',
)
RUFF_FORMAT_AND_AUTO_FIX = bash_cmd(
'ruff format',
'ruff check --fix --ignore RUF022 --ignore PGH004',
'ruff format',
'ruff check --fix --ignore RUF022 --ignore PGH004',
)
AIDER_LINT = bash_cmd(
RUFF_FORMAT_AND_AUTO_FIX,
'ruff format',
'ruff check --ignore RUF022 --ignore PGH004',
)
LLM_MESSAGE_FORMAT = (
"""{issue}\nDo not wait for explicit approval before working on code changes."""
)
# CODE_MODEL = 'ollama/gemma3:4b'
CODE_MODEL = 'o4-mini'
EVALUATOR_MODEL = 'ollama/gemma3:27b'
def create_aider_command(issue: str) -> list[str]:
l = [
'aider',
'--chat-language',
'english',
'--no-stream',
'--no-analytics',
#'--no-check-update',
'--test-cmd',
AIDER_TEST,
'--lint-cmd',
AIDER_LINT,
'--auto-test',
'--no-auto-lint',
'--yes',
]
for key in secrets.llm_api_keys():
l += ['--api-key', key]
if False:
l.append('--read')
l.append('CONVENTIONS.md')
if True:
l.append('--cache-prompts')
if False:
l.append('--architect')
if CODE_MODEL:
l.append('--model')
l.append(CODE_MODEL)
if CODE_MODEL.startswith('ollama/'):
l.append('--auto-lint')
if True:
l.append('--message')
l.append(LLM_MESSAGE_FORMAT.format(issue=issue))
return l
def get_commit_messages(cwd: Path, base_branch: str, current_branch: str) -> list[str]:
"""Get commit messages between base branch and current branch.
Args:
cwd: The current working directory (repository path).
base_branch: The name of the base branch to compare against.
current_branch: The name of the current branch to check for commits.
Returns:
A string containing all commit messages, one per line.
"""
try:
result = subprocess.run(
['git', 'log', f'{base_branch}..{current_branch}', '--pretty=format:%s'],
check=True,
cwd=cwd,
capture_output=True,
text=True,
)
return list(reversed(result.stdout.strip().split('\n')))
except subprocess.CalledProcessError:
logger.exception(f'Failed to get commit messages on branch {current_branch}')
return []
def get_diff(cwd: Path, base_branch: str, current_branch: str) -> str:
result = subprocess.run(
['git', 'diff', f'{base_branch}..{current_branch}', '--pretty=format:%s'],
check=True,
cwd=cwd,
capture_output=True,
text=True,
)
return result.stdout.strip()
def push_changes(
repository_config: RepositoryConfig,
cwd: Path,
branch_name: str,
issue_number: str,
issue_title: str,
gitea_client,
) -> IssueResolution:
# Check if there are any commits on the branch before pushing
if not has_commits_on_branch(cwd, repository_config.base_branch, branch_name):
logger.info('No commits made on branch %s, skipping push', branch_name)
return IssueResolution(False)
# Get commit messages for PR description
commit_messages = get_commit_messages(
cwd,
repository_config.base_branch,
branch_name,
)
description = f'This pull request resolves #{issue_number}\n\n'
if commit_messages:
description += '## Commit Messages\n\n'
for message in commit_messages:
description += f'- {message}\n'
# First push the branch without creating a PR
cmd = ['git', 'push', 'origin', branch_name, '--force']
run_cmd(cmd, cwd)
# Then create the PR with the aider label
pr_response = gitea_client.create_pull_request(
owner=repository_config.owner,
repo=repository_config.repo,
title=issue_title,
body=description,
head=branch_name,
base=repository_config.base_branch,
labels=['aider'],
)
# Extract PR number and URL if available
return IssueResolution(
True,
str(pr_response.get('number')),
pr_response.get('html_url'),
)
def has_commits_on_branch(cwd: Path, base_branch: str, current_branch: str) -> bool:
"""Check if there are any commits on the current branch that aren't in the base branch.
Args:
cwd: The current working directory (repository path).
base_branch: The name of the base branch to compare against.
current_branch: The name of the current branch to check for commits.
Returns:
True if there are commits on the current branch not in the base branch, False otherwise.
"""
try:
commit_messages = get_commit_messages(cwd, base_branch, current_branch)
return bool(list(commit_messages))
except Exception:
logger.exception('Failed to check commits on branch %s', current_branch)
return False
def run_cmd(cmd: list[str], cwd: Path | None = None, check=True) -> bool:
"""Run a shell command and return its success status.
Args:
cmd: The command to run as a list of strings.
cwd: The directory to run the command in.
check: Whether to raise an exception if the command fails.
Returns:
True if the command succeeded, False otherwise.
"""
result = subprocess.run(cmd, check=check, cwd=cwd)
return result.returncode == 0
def issue_solution_round(repository_path, issue_content):
# Primary Aider command
aider_command = create_aider_command(issue_content)
print(aider_command)
aider_did_not_crash = run_cmd(
aider_command,
repository_path,
check=False,
)
if not aider_did_not_crash:
return aider_did_not_crash
# Auto-fix standard code quality stuff after aider
run_cmd(['bash', '-c', RUFF_FORMAT_AND_AUTO_FIX], repository_path, check=False)
run_cmd(['git', 'add', '.'], repository_path)
run_cmd(['git', 'commit', '-m', 'Ruff after aider'], repository_path, check=False)
return True
def run_ollama(cwd: Path, texts: list[str]) -> str:
cmd = ['ollama', 'run', EVALUATOR_MODEL.removeprefix('ollama/')]
print(cmd)
process = subprocess.Popen(
cmd,
cwd=cwd,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
text=True,
)
stdout, stderr = process.communicate('\n'.join(texts))
print(stdout)
return stdout
def parse_yes_no_answer(text: str) -> bool | None:
text = text.lower().strip()
words = text.split('\n \t.,?-')
print(words)
if words[-1] in {'yes', 'agree'}:
return True
if words[-1] in {'no', 'disagree'}:
return False
return None
def run_ollama_and_get_yes_or_no(cwd, initial_texts: list[str]) -> bool:
texts = list(initial_texts)
texts.append('Think through your answer.')
while True:
response = run_ollama(cwd, texts)
yes_or_no = parse_yes_no_answer(response)
if yes_or_no is not None:
return yes_or_no
else:
texts.append(response)
texts.append('Please answer either "yes" or "no".')
def verify_solution(repository_path: Path, issue_content: str) -> bool:
summary = run_ollama(
repository_path,
[
'Concisely summarize following changeset',
get_diff(repository_path, 'main', 'HEAD'),
],
)
return run_ollama_and_get_yes_or_no(
repository_path,
[
'Does this changeset accomplish the entire task?',
'# Change set',
summary,
'# Issue',
issue_content,
],
)
def get_head_commit_hash(repository_path: Path) -> str:
return subprocess.run(
['git', 'rev-parse', 'HEAD'],
check=True,
cwd=repository_path,
capture_output=True,
text=True,
).stdout.strip()
def solve_issue_in_repository(
repository_config: RepositoryConfig,
repository_path: Path,
branch_name: str,
issue_title: str,
issue_description: str,
issue_number: str,
gitea_client,
) -> IssueResolution:
logger.info('### %s #####', issue_title)
# Setup repository
run_cmd(['git', 'clone', repository_config.repo_url(), repository_path])
run_cmd(['bash', '-c', AIDER_TEST], repository_path)
run_cmd(['git', 'checkout', repository_config.base_branch], repository_path)
run_cmd(['git', 'checkout', '-b', branch_name], repository_path)
# Run initial ruff pass before aider
run_cmd(['bash', '-c', RUFF_FORMAT_AND_AUTO_FIX], repository_path, check=False)
run_cmd(['git', 'add', '.'], repository_path)
run_cmd(['git', 'commit', '-m', 'Initial ruff pass'], repository_path, check=False)
# Run aider
issue_content = f'# {issue_title}\n{issue_description}'
while True:
# Save the commit hash after ruff but before aider
pre_aider_commit = get_head_commit_hash(repository_path)
# Run aider
aider_did_not_crash = issue_solution_round(repository_path, issue_content)
if not aider_did_not_crash:
logger.error('Aider invocation failed for issue #%s', issue_number)
return IssueResolution(False)
# Check if aider made any changes beyond the initial ruff pass
if not has_commits_on_branch(repository_path, pre_aider_commit, 'HEAD'):
logger.error(
'Aider did not make any changes beyond the initial ruff pass for issue #%s',
issue_number,
)
return IssueResolution(False)
# Push changes and create/update the pull request on every iteration
resolution = push_changes(
repository_config,
repository_path,
branch_name,
issue_number,
issue_title,
gitea_client,
)
if not resolution.success:
return resolution
# Verify whether this is a satisfactory solution
if verify_solution(repository_path, issue_content):
return resolution
def solve_issues_in_repository(
repository_config: RepositoryConfig,
client,
seen_issues_db,
):
"""Process all open issues with the 'aider' label.
Args:
repository_config: Command line arguments.
client: The Gitea client instance.
seen_issues_db: Database of previously processed issues.
"""
try:
issues = client.get_issues(repository_config.owner, repository_config.repo)
except Exception:
logger.exception('Failed to retrieve issues')
sys.exit(1)
if not issues:
logger.info('No issues found for %s', repository_config.repo)
return
for issue in issues:
issue_url = issue.get('web_url')
issue_number = issue.get('number')
issue_description = issue.get('body', '')
title = issue.get('title', f'Issue {issue_number}')
if seen_issues_db.has_seen(issue_url):
logger.info('Skipping already processed issue #%s: %s', issue_number, title)
continue
branch_name = generate_branch_name(issue_number, title)
with tempfile.TemporaryDirectory() as repository_path:
issue_resolution = solve_issue_in_repository(
repository_config,
Path(repository_path),
branch_name,
title,
issue_description,
issue_number,
client,
)
if issue_resolution.success:
# Handle unresolved pull request comments
handle_pr_comments(
repository_config,
issue_resolution.pull_request_id,
branch_name,
Path(repository_path),
client,
seen_issues_db,
issue_url,
)
# Handle failing pipelines
handle_failing_pipelines(
repository_config,
issue_resolution.pull_request_id,
branch_name,
Path(repository_path),
client,
)
seen_issues_db.mark_as_seen(issue_url, str(issue_number))
seen_issues_db.update_pr_info(
issue_url,
issue_resolution.pull_request_id,
issue_resolution.pull_request_url,
)
logger.info(
'Stored PR #%s information for issue #%s',
issue_resolution.pull_request_id,
issue_number,
)
def handle_pr_comments(
repository_config,
pr_number,
branch_name,
repository_path,
client,
seen_issues_db,
issue_url,
):
"""Fetch unresolved PR comments and resolve them via aider."""
comments = client.get_pull_request_comments(
repository_config.owner,
repository_config.repo,
pr_number,
)
for comment in comments:
path = comment.get('path')
line = comment.get('line') or comment.get('position') or 0
file_path = repository_path / path
try:
lines = file_path.read_text().splitlines()
start = max(0, line - 3)
end = min(len(lines), line + 2)
context = '\n'.join(lines[start:end])
except Exception:
context = ''
body = comment.get('body', '')
issue = (
f'Resolve the following reviewer comment:\n{body}\n\n'
f'File: {path}\n\nContext:\n{context}'
)
# invoke aider on the comment context
issue_solution_round(repository_path, issue)
# commit and push changes for this comment
run_cmd(['git', 'add', path], repository_path, check=False)
run_cmd(
['git', 'commit', '-m', f'Resolve comment {comment.get("id")}'],
repository_path,
check=False,
)
run_cmd(['git', 'push', 'origin', branch_name], repository_path, check=False)
def handle_failing_pipelines(
repository_config: RepositoryConfig,
pr_number: str,
branch_name: str,
repository_path: Path,
client,
) -> None:
"""Fetch failing pipelines for the given PR and resolve them via aider."""
while True:
failed_runs = client.get_failed_pipelines(
repository_config.owner,
repository_config.repo,
pr_number,
)
if not failed_runs:
break
for run_id in failed_runs:
log = client.get_pipeline_log(
repository_config.owner,
repository_config.repo,
run_id,
)
lines = log.strip().split('\n')
context = '\n'.join(lines[-100:])
issue = f'Resolve the following failing pipeline run {run_id}:\n\n{context}'
issue_solution_round(repository_path, issue)
run_cmd(['git', 'add', '.'], repository_path, check=False)
run_cmd(
['git', 'commit', '-m', f'Resolve pipeline {run_id}'],
repository_path,
check=False,
)
run_cmd(['git', 'push', 'origin', branch_name], repository_path, check=False)