docs: Add comprehensive docstrings to seen_issues_db module

This commit is contained in:
Jon Michael Aanes (aider) 2025-04-13 18:42:36 +02:00
parent 107ee6b55c
commit 8d46519784

View File

@ -1,3 +1,11 @@
"""
Database module for tracking previously processed issues.
This module provides functionality to track which issues have already been processed
by the system to avoid duplicate processing. It uses a simple SQLite database to
store hashes of seen issues for efficient lookup.
"""
import sqlite3 import sqlite3
from hashlib import sha256 from hashlib import sha256
@ -5,11 +13,33 @@ DEFAULT_DB_PATH = 'output/seen_issues.db'
class SeenIssuesDB: class SeenIssuesDB:
"""
Database handler for tracking processed issues.
This class manages a SQLite database that stores hashes of issues that have
already been processed. It provides methods to mark issues as seen and check
if an issue has been seen before, helping to prevent duplicate processing.
Attributes:
conn: SQLite database connection
"""
def __init__(self, db_path=DEFAULT_DB_PATH): def __init__(self, db_path=DEFAULT_DB_PATH):
"""
Initialize the database connection.
Args:
db_path: Path to the SQLite database file. Defaults to 'output/seen_issues.db'.
"""
self.conn = sqlite3.connect(db_path) self.conn = sqlite3.connect(db_path)
self._create_table() self._create_table()
def _create_table(self): def _create_table(self):
"""
Create the seen_issues table if it doesn't exist.
Creates a table with a single column for storing issue hashes.
"""
with self.conn: with self.conn:
self.conn.execute(""" self.conn.execute("""
CREATE TABLE IF NOT EXISTS seen_issues ( CREATE TABLE IF NOT EXISTS seen_issues (
@ -18,6 +48,15 @@ class SeenIssuesDB:
""") """)
def mark_as_seen(self, issue_text: str): def mark_as_seen(self, issue_text: str):
"""
Mark an issue as seen in the database.
Computes a hash of the issue text and stores it in the database.
If the issue has already been marked as seen, this operation has no effect.
Args:
issue_text: The text content of the issue to mark as seen.
"""
issue_hash = self._compute_hash(issue_text) issue_hash = self._compute_hash(issue_text)
with self.conn: with self.conn:
self.conn.execute( self.conn.execute(
@ -26,6 +65,17 @@ class SeenIssuesDB:
) )
def has_seen(self, issue_text: str) -> bool: def has_seen(self, issue_text: str) -> bool:
"""
Check if an issue has been seen before.
Computes a hash of the issue text and checks if it exists in the database.
Args:
issue_text: The text content of the issue to check.
Returns:
True if the issue has been seen before, False otherwise.
"""
issue_hash = self._compute_hash(issue_text) issue_hash = self._compute_hash(issue_text)
cursor = self.conn.execute( cursor = self.conn.execute(
'SELECT 1 FROM seen_issues WHERE issue_hash = ?', 'SELECT 1 FROM seen_issues WHERE issue_hash = ?',
@ -34,4 +84,13 @@ class SeenIssuesDB:
return cursor.fetchone() is not None return cursor.fetchone() is not None
def _compute_hash(self, text: str) -> str: def _compute_hash(self, text: str) -> str:
"""
Compute a SHA-256 hash of the given text.
Args:
text: The text to hash.
Returns:
A hexadecimal string representation of the hash.
"""
return sha256(text.encode('utf-8')).hexdigest() return sha256(text.encode('utf-8')).hexdigest()