diff --git a/aider_gitea/seen_issues_db.py b/aider_gitea/seen_issues_db.py index 19de052..e6ff176 100644 --- a/aider_gitea/seen_issues_db.py +++ b/aider_gitea/seen_issues_db.py @@ -1,3 +1,11 @@ +""" +Database module for tracking previously processed issues. + +This module provides functionality to track which issues have already been processed +by the system to avoid duplicate processing. It uses a simple SQLite database to +store hashes of seen issues for efficient lookup. +""" + import sqlite3 from hashlib import sha256 @@ -5,11 +13,33 @@ DEFAULT_DB_PATH = 'output/seen_issues.db' class SeenIssuesDB: + """ + Database handler for tracking processed issues. + + This class manages a SQLite database that stores hashes of issues that have + already been processed. It provides methods to mark issues as seen and check + if an issue has been seen before, helping to prevent duplicate processing. + + Attributes: + conn: SQLite database connection + """ + def __init__(self, db_path=DEFAULT_DB_PATH): + """ + Initialize the database connection. + + Args: + db_path: Path to the SQLite database file. Defaults to 'output/seen_issues.db'. + """ self.conn = sqlite3.connect(db_path) self._create_table() def _create_table(self): + """ + Create the seen_issues table if it doesn't exist. + + Creates a table with a single column for storing issue hashes. + """ with self.conn: self.conn.execute(""" CREATE TABLE IF NOT EXISTS seen_issues ( @@ -18,6 +48,15 @@ class SeenIssuesDB: """) def mark_as_seen(self, issue_text: str): + """ + Mark an issue as seen in the database. + + Computes a hash of the issue text and stores it in the database. + If the issue has already been marked as seen, this operation has no effect. + + Args: + issue_text: The text content of the issue to mark as seen. + """ issue_hash = self._compute_hash(issue_text) with self.conn: self.conn.execute( @@ -26,6 +65,17 @@ class SeenIssuesDB: ) def has_seen(self, issue_text: str) -> bool: + """ + Check if an issue has been seen before. + + Computes a hash of the issue text and checks if it exists in the database. + + Args: + issue_text: The text content of the issue to check. + + Returns: + True if the issue has been seen before, False otherwise. + """ issue_hash = self._compute_hash(issue_text) cursor = self.conn.execute( 'SELECT 1 FROM seen_issues WHERE issue_hash = ?', @@ -34,4 +84,13 @@ class SeenIssuesDB: return cursor.fetchone() is not None def _compute_hash(self, text: str) -> str: + """ + Compute a SHA-256 hash of the given text. + + Args: + text: The text to hash. + + Returns: + A hexadecimal string representation of the hash. + """ return sha256(text.encode('utf-8')).hexdigest()