9fd0ca9cac
Infra: cache_to_file, cache_to_sqlite, http_download_file, http_get_json, http_post_json, read_file_with_encoding, safe_extract_zip, scan_directory, setup_logger, normalize_zip_filenames. Tipos: 30+ tipos core (agent_action, context, task, message, parse_result...), 6 tipos datascience (entity_candidate, extraction_result...), 2 tipos infra. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
136 lines
4.6 KiB
Python
136 lines
4.6 KiB
Python
"""Cache key-value donde cada entry es un archivo JSON en disco."""
|
|
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import threading
|
|
import time
|
|
|
|
|
|
class FileCache:
|
|
"""Cache key-value respaldado en archivos JSON, con metadata sidecar .meta."""
|
|
|
|
def __init__(self, cache_dir: str, namespace: str = "default") -> None:
|
|
self._base = os.path.join(cache_dir, namespace)
|
|
self._hits = 0
|
|
self._misses = 0
|
|
self._lock = threading.Lock()
|
|
os.makedirs(self._base, exist_ok=True)
|
|
|
|
def _hash_key(self, key: str) -> str:
|
|
return hashlib.sha256(key.encode("utf-8")).hexdigest()
|
|
|
|
def _value_path(self, hashed: str) -> str:
|
|
return os.path.join(self._base, f"{hashed}.json")
|
|
|
|
def _meta_path(self, hashed: str) -> str:
|
|
return os.path.join(self._base, f"{hashed}.meta")
|
|
|
|
def _is_expired(self, meta: dict) -> bool:
|
|
expires_at = meta.get("expires_at")
|
|
if expires_at is None:
|
|
return False
|
|
return time.time() >= expires_at
|
|
|
|
def _load_meta(self, hashed: str) -> dict | None:
|
|
path = self._meta_path(hashed)
|
|
if not os.path.exists(path):
|
|
return None
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
|
|
def get(self, key: str) -> object:
|
|
"""Retorna el valor o None si no existe o esta expirado."""
|
|
hashed = self._hash_key(key)
|
|
with self._lock:
|
|
meta = self._load_meta(hashed)
|
|
if meta is None:
|
|
self._misses += 1
|
|
return None
|
|
if self._is_expired(meta):
|
|
self._delete_files(hashed)
|
|
self._misses += 1
|
|
return None
|
|
value_path = self._value_path(hashed)
|
|
if not os.path.exists(value_path):
|
|
self._misses += 1
|
|
return None
|
|
with open(value_path, "r", encoding="utf-8") as f:
|
|
self._hits += 1
|
|
return json.load(f)
|
|
|
|
def set(self, key: str, value: object, ttl: float = 0) -> None:
|
|
"""Almacena un valor. ttl en segundos; 0 = sin expiracion."""
|
|
hashed = self._hash_key(key)
|
|
now = time.time()
|
|
expires_at = (now + ttl) if ttl > 0 else None
|
|
meta = {"created_at": now, "expires_at": expires_at, "original_key": key}
|
|
with self._lock:
|
|
with open(self._value_path(hashed), "w", encoding="utf-8") as f:
|
|
json.dump(value, f)
|
|
with open(self._meta_path(hashed), "w", encoding="utf-8") as f:
|
|
json.dump(meta, f)
|
|
|
|
def _delete_files(self, hashed: str) -> bool:
|
|
vp = self._value_path(hashed)
|
|
mp = self._meta_path(hashed)
|
|
deleted = False
|
|
if os.path.exists(vp):
|
|
os.remove(vp)
|
|
deleted = True
|
|
if os.path.exists(mp):
|
|
os.remove(mp)
|
|
deleted = True
|
|
return deleted
|
|
|
|
def delete(self, key: str) -> bool:
|
|
"""Elimina una entrada. Retorna True si existia."""
|
|
hashed = self._hash_key(key)
|
|
with self._lock:
|
|
return self._delete_files(hashed)
|
|
|
|
def clear(self) -> int:
|
|
"""Elimina todas las entradas del namespace. Retorna pares eliminados."""
|
|
with self._lock:
|
|
count = 0
|
|
if not os.path.isdir(self._base):
|
|
return 0
|
|
for fname in os.listdir(self._base):
|
|
if fname.endswith(".json"):
|
|
count += 1
|
|
fpath = os.path.join(self._base, fname)
|
|
os.remove(fpath)
|
|
return count
|
|
|
|
def stats(self) -> dict:
|
|
"""Retorna estadisticas del store: hits, misses y size actual."""
|
|
with self._lock:
|
|
if not os.path.isdir(self._base):
|
|
size = 0
|
|
else:
|
|
size = sum(
|
|
1 for f in os.listdir(self._base) if f.endswith(".json")
|
|
)
|
|
return {"hits": self._hits, "misses": self._misses, "size": size}
|
|
|
|
def get_or_set(self, key: str, factory: callable, ttl: float = 0) -> object:
|
|
"""Retorna el valor cacheado o llama factory() y lo almacena."""
|
|
value = self.get(key)
|
|
if value is None:
|
|
value = factory()
|
|
self.set(key, value, ttl)
|
|
return value
|
|
|
|
|
|
def cache_to_file(cache_dir: str, namespace: str = "default") -> FileCache:
|
|
"""Crea un FileCache respaldado en archivos JSON en disco.
|
|
|
|
Args:
|
|
cache_dir: Directorio raiz donde se almacenan los archivos de cache.
|
|
namespace: Subdirectorio logico dentro de cache_dir.
|
|
|
|
Returns:
|
|
FileCache con metodos get/set/delete/clear/stats/get_or_set.
|
|
"""
|
|
return FileCache(cache_dir, namespace)
|