Files
egutierrez 9fd0ca9cac feat: funciones Python infra y tipos Python (core, datascience, infra)
Infra: cache_to_file, cache_to_sqlite, http_download_file, http_get_json,
http_post_json, read_file_with_encoding, safe_extract_zip, scan_directory,
setup_logger, normalize_zip_filenames.
Tipos: 30+ tipos core (agent_action, context, task, message, parse_result...),
6 tipos datascience (entity_candidate, extraction_result...), 2 tipos infra.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:43 +02:00

143 lines
4.8 KiB
Python

"""Cache key-value persistido en SQLite con TTL y lazy eviction."""
import json
import sqlite3
import threading
import time
class CacheStore:
"""Cache key-value respaldado en SQLite con soporte de TTL y namespaces."""
_schema = """
CREATE TABLE IF NOT EXISTS cache (
namespace TEXT NOT NULL,
key TEXT NOT NULL,
value TEXT NOT NULL,
created_at REAL NOT NULL,
expires_at REAL,
PRIMARY KEY (namespace, key)
);
"""
def __init__(self, db_path: str, namespace: str = "default") -> None:
self._db_path = db_path
self._namespace = namespace
self._hits = 0
self._misses = 0
self._lock = threading.Lock()
self._local = threading.local()
self._init_db()
def _conn(self) -> sqlite3.Connection:
"""Retorna una conexion SQLite thread-local."""
if not hasattr(self._local, "conn"):
conn = sqlite3.connect(self._db_path, check_same_thread=False)
conn.execute("PRAGMA journal_mode=WAL")
self._local.conn = conn
return self._local.conn
def _init_db(self) -> None:
conn = self._conn()
conn.execute(self._schema)
conn.commit()
def _evict_expired(self, conn: sqlite3.Connection) -> None:
"""Elimina entradas expiradas del namespace actual (lazy eviction)."""
now = time.time()
conn.execute(
"DELETE FROM cache WHERE namespace = ? AND expires_at IS NOT NULL AND expires_at <= ?",
(self._namespace, now),
)
def get(self, key: str) -> object:
"""Retorna el valor o None si no existe o esta expirado."""
with self._lock:
conn = self._conn()
self._evict_expired(conn)
conn.commit()
row = conn.execute(
"SELECT value FROM cache WHERE namespace = ? AND key = ?",
(self._namespace, key),
).fetchone()
if row is None:
self._misses += 1
return None
self._hits += 1
return json.loads(row[0])
def set(self, key: str, value: object, ttl: float = 0) -> None:
"""Almacena un valor. ttl en segundos; 0 = sin expiracion."""
now = time.time()
expires_at = (now + ttl) if ttl > 0 else None
with self._lock:
conn = self._conn()
conn.execute(
"""
INSERT INTO cache (namespace, key, value, created_at, expires_at)
VALUES (?, ?, ?, ?, ?)
ON CONFLICT(namespace, key) DO UPDATE SET
value = excluded.value,
created_at = excluded.created_at,
expires_at = excluded.expires_at
""",
(self._namespace, key, json.dumps(value), now, expires_at),
)
conn.commit()
def delete(self, key: str) -> bool:
"""Elimina una entrada. Retorna True si existia."""
with self._lock:
conn = self._conn()
cursor = conn.execute(
"DELETE FROM cache WHERE namespace = ? AND key = ?",
(self._namespace, key),
)
conn.commit()
return cursor.rowcount > 0
def clear(self) -> int:
"""Elimina todas las entradas del namespace. Retorna filas eliminadas."""
with self._lock:
conn = self._conn()
cursor = conn.execute(
"DELETE FROM cache WHERE namespace = ?",
(self._namespace,),
)
conn.commit()
return cursor.rowcount
def stats(self) -> dict:
"""Retorna estadisticas del store: hits, misses y size actual."""
with self._lock:
conn = self._conn()
self._evict_expired(conn)
conn.commit()
row = conn.execute(
"SELECT COUNT(*) FROM cache WHERE namespace = ?",
(self._namespace,),
).fetchone()
size = row[0] if row else 0
return {"hits": self._hits, "misses": self._misses, "size": size}
def get_or_set(self, key: str, factory: callable, ttl: float = 0) -> object:
"""Retorna el valor cacheado o llama factory() y lo almacena."""
value = self.get(key)
if value is None:
value = factory()
self.set(key, value, ttl)
return value
def cache_to_sqlite(db_path: str, namespace: str = "default") -> CacheStore:
"""Crea un CacheStore respaldado en SQLite.
Args:
db_path: Ruta al archivo SQLite (se crea si no existe).
namespace: Espacio de nombres logico dentro de la base de datos.
Returns:
CacheStore con metodos get/set/delete/clear/stats/get_or_set.
"""
return CacheStore(db_path, namespace)