aider-gitea/aider_gitea/seen_issues_db.py
Jon Michael Aanes fc85c982b9
Some checks failed
Run Python tests (through Pytest) / Test (push) Successful in 25s
Verify Python project can be installed, loaded and have version checked / Test (push) Has been cancelled
Ruff
2025-04-13 18:42:40 +02:00

97 lines
2.9 KiB
Python

"""
Database module for tracking previously processed issues.
This module provides functionality to track which issues have already been processed
by the system to avoid duplicate processing. It uses a simple SQLite database to
store hashes of seen issues for efficient lookup.
"""
import sqlite3
from hashlib import sha256
DEFAULT_DB_PATH = 'output/seen_issues.db'
class SeenIssuesDB:
"""
Database handler for tracking processed issues.
This class manages a SQLite database that stores hashes of issues that have
already been processed. It provides methods to mark issues as seen and check
if an issue has been seen before, helping to prevent duplicate processing.
Attributes:
conn: SQLite database connection
"""
def __init__(self, db_path=DEFAULT_DB_PATH):
"""
Initialize the database connection.
Args:
db_path: Path to the SQLite database file. Defaults to 'output/seen_issues.db'.
"""
self.conn = sqlite3.connect(db_path)
self._create_table()
def _create_table(self):
"""
Create the seen_issues table if it doesn't exist.
Creates a table with a single column for storing issue hashes.
"""
with self.conn:
self.conn.execute("""
CREATE TABLE IF NOT EXISTS seen_issues (
issue_hash TEXT PRIMARY KEY
)
""")
def mark_as_seen(self, issue_text: str):
"""
Mark an issue as seen in the database.
Computes a hash of the issue text and stores it in the database.
If the issue has already been marked as seen, this operation has no effect.
Args:
issue_text: The text content of the issue to mark as seen.
"""
issue_hash = self._compute_hash(issue_text)
with self.conn:
self.conn.execute(
'INSERT OR IGNORE INTO seen_issues (issue_hash) VALUES (?)',
(issue_hash,),
)
def has_seen(self, issue_text: str) -> bool:
"""
Check if an issue has been seen before.
Computes a hash of the issue text and checks if it exists in the database.
Args:
issue_text: The text content of the issue to check.
Returns:
True if the issue has been seen before, False otherwise.
"""
issue_hash = self._compute_hash(issue_text)
cursor = self.conn.execute(
'SELECT 1 FROM seen_issues WHERE issue_hash = ?',
(issue_hash,),
)
return cursor.fetchone() is not None
def _compute_hash(self, text: str) -> str:
"""
Compute a SHA-256 hash of the given text.
Args:
text: The text to hash.
Returns:
A hexadecimal string representation of the hash.
"""
return sha256(text.encode('utf-8')).hexdigest()