Compare commits
No commits in common. "fc85c982b9f2508159ee0085f61b3f7236ef4d44" and "107ee6b55c1aa9098fcca52734c1c066e0cc3fc1" have entirely different histories.
fc85c982b9
...
107ee6b55c
|
@ -1,11 +1,3 @@
|
||||||
"""
|
|
||||||
Database module for tracking previously processed issues.
|
|
||||||
|
|
||||||
This module provides functionality to track which issues have already been processed
|
|
||||||
by the system to avoid duplicate processing. It uses a simple SQLite database to
|
|
||||||
store hashes of seen issues for efficient lookup.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import sqlite3
|
import sqlite3
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
|
|
||||||
|
@ -13,33 +5,11 @@ DEFAULT_DB_PATH = 'output/seen_issues.db'
|
||||||
|
|
||||||
|
|
||||||
class SeenIssuesDB:
|
class SeenIssuesDB:
|
||||||
"""
|
|
||||||
Database handler for tracking processed issues.
|
|
||||||
|
|
||||||
This class manages a SQLite database that stores hashes of issues that have
|
|
||||||
already been processed. It provides methods to mark issues as seen and check
|
|
||||||
if an issue has been seen before, helping to prevent duplicate processing.
|
|
||||||
|
|
||||||
Attributes:
|
|
||||||
conn: SQLite database connection
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, db_path=DEFAULT_DB_PATH):
|
def __init__(self, db_path=DEFAULT_DB_PATH):
|
||||||
"""
|
|
||||||
Initialize the database connection.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
db_path: Path to the SQLite database file. Defaults to 'output/seen_issues.db'.
|
|
||||||
"""
|
|
||||||
self.conn = sqlite3.connect(db_path)
|
self.conn = sqlite3.connect(db_path)
|
||||||
self._create_table()
|
self._create_table()
|
||||||
|
|
||||||
def _create_table(self):
|
def _create_table(self):
|
||||||
"""
|
|
||||||
Create the seen_issues table if it doesn't exist.
|
|
||||||
|
|
||||||
Creates a table with a single column for storing issue hashes.
|
|
||||||
"""
|
|
||||||
with self.conn:
|
with self.conn:
|
||||||
self.conn.execute("""
|
self.conn.execute("""
|
||||||
CREATE TABLE IF NOT EXISTS seen_issues (
|
CREATE TABLE IF NOT EXISTS seen_issues (
|
||||||
|
@ -48,15 +18,6 @@ class SeenIssuesDB:
|
||||||
""")
|
""")
|
||||||
|
|
||||||
def mark_as_seen(self, issue_text: str):
|
def mark_as_seen(self, issue_text: str):
|
||||||
"""
|
|
||||||
Mark an issue as seen in the database.
|
|
||||||
|
|
||||||
Computes a hash of the issue text and stores it in the database.
|
|
||||||
If the issue has already been marked as seen, this operation has no effect.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
issue_text: The text content of the issue to mark as seen.
|
|
||||||
"""
|
|
||||||
issue_hash = self._compute_hash(issue_text)
|
issue_hash = self._compute_hash(issue_text)
|
||||||
with self.conn:
|
with self.conn:
|
||||||
self.conn.execute(
|
self.conn.execute(
|
||||||
|
@ -65,17 +26,6 @@ class SeenIssuesDB:
|
||||||
)
|
)
|
||||||
|
|
||||||
def has_seen(self, issue_text: str) -> bool:
|
def has_seen(self, issue_text: str) -> bool:
|
||||||
"""
|
|
||||||
Check if an issue has been seen before.
|
|
||||||
|
|
||||||
Computes a hash of the issue text and checks if it exists in the database.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
issue_text: The text content of the issue to check.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if the issue has been seen before, False otherwise.
|
|
||||||
"""
|
|
||||||
issue_hash = self._compute_hash(issue_text)
|
issue_hash = self._compute_hash(issue_text)
|
||||||
cursor = self.conn.execute(
|
cursor = self.conn.execute(
|
||||||
'SELECT 1 FROM seen_issues WHERE issue_hash = ?',
|
'SELECT 1 FROM seen_issues WHERE issue_hash = ?',
|
||||||
|
@ -84,13 +34,4 @@ class SeenIssuesDB:
|
||||||
return cursor.fetchone() is not None
|
return cursor.fetchone() is not None
|
||||||
|
|
||||||
def _compute_hash(self, text: str) -> str:
|
def _compute_hash(self, text: str) -> str:
|
||||||
"""
|
|
||||||
Compute a SHA-256 hash of the given text.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
text: The text to hash.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
A hexadecimal string representation of the hash.
|
|
||||||
"""
|
|
||||||
return sha256(text.encode('utf-8')).hexdigest()
|
return sha256(text.encode('utf-8')).hexdigest()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user