Files
fn_registry/python/functions/infra/cache_to_file.py
T
egutierrez 9fd0ca9cac feat: funciones Python infra y tipos Python (core, datascience, infra)
Infra: cache_to_file, cache_to_sqlite, http_download_file, http_get_json,
http_post_json, read_file_with_encoding, safe_extract_zip, scan_directory,
setup_logger, normalize_zip_filenames.
Tipos: 30+ tipos core (agent_action, context, task, message, parse_result...),
6 tipos datascience (entity_candidate, extraction_result...), 2 tipos infra.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-05 17:11:43 +02:00

136 lines
4.6 KiB
Python

"""Cache key-value donde cada entry es un archivo JSON en disco."""
import hashlib
import json
import os
import threading
import time
class FileCache:
"""Cache key-value respaldado en archivos JSON, con metadata sidecar .meta."""
def __init__(self, cache_dir: str, namespace: str = "default") -> None:
self._base = os.path.join(cache_dir, namespace)
self._hits = 0
self._misses = 0
self._lock = threading.Lock()
os.makedirs(self._base, exist_ok=True)
def _hash_key(self, key: str) -> str:
return hashlib.sha256(key.encode("utf-8")).hexdigest()
def _value_path(self, hashed: str) -> str:
return os.path.join(self._base, f"{hashed}.json")
def _meta_path(self, hashed: str) -> str:
return os.path.join(self._base, f"{hashed}.meta")
def _is_expired(self, meta: dict) -> bool:
expires_at = meta.get("expires_at")
if expires_at is None:
return False
return time.time() >= expires_at
def _load_meta(self, hashed: str) -> dict | None:
path = self._meta_path(hashed)
if not os.path.exists(path):
return None
with open(path, "r", encoding="utf-8") as f:
return json.load(f)
def get(self, key: str) -> object:
"""Retorna el valor o None si no existe o esta expirado."""
hashed = self._hash_key(key)
with self._lock:
meta = self._load_meta(hashed)
if meta is None:
self._misses += 1
return None
if self._is_expired(meta):
self._delete_files(hashed)
self._misses += 1
return None
value_path = self._value_path(hashed)
if not os.path.exists(value_path):
self._misses += 1
return None
with open(value_path, "r", encoding="utf-8") as f:
self._hits += 1
return json.load(f)
def set(self, key: str, value: object, ttl: float = 0) -> None:
"""Almacena un valor. ttl en segundos; 0 = sin expiracion."""
hashed = self._hash_key(key)
now = time.time()
expires_at = (now + ttl) if ttl > 0 else None
meta = {"created_at": now, "expires_at": expires_at, "original_key": key}
with self._lock:
with open(self._value_path(hashed), "w", encoding="utf-8") as f:
json.dump(value, f)
with open(self._meta_path(hashed), "w", encoding="utf-8") as f:
json.dump(meta, f)
def _delete_files(self, hashed: str) -> bool:
vp = self._value_path(hashed)
mp = self._meta_path(hashed)
deleted = False
if os.path.exists(vp):
os.remove(vp)
deleted = True
if os.path.exists(mp):
os.remove(mp)
deleted = True
return deleted
def delete(self, key: str) -> bool:
"""Elimina una entrada. Retorna True si existia."""
hashed = self._hash_key(key)
with self._lock:
return self._delete_files(hashed)
def clear(self) -> int:
"""Elimina todas las entradas del namespace. Retorna pares eliminados."""
with self._lock:
count = 0
if not os.path.isdir(self._base):
return 0
for fname in os.listdir(self._base):
if fname.endswith(".json"):
count += 1
fpath = os.path.join(self._base, fname)
os.remove(fpath)
return count
def stats(self) -> dict:
"""Retorna estadisticas del store: hits, misses y size actual."""
with self._lock:
if not os.path.isdir(self._base):
size = 0
else:
size = sum(
1 for f in os.listdir(self._base) if f.endswith(".json")
)
return {"hits": self._hits, "misses": self._misses, "size": size}
def get_or_set(self, key: str, factory: callable, ttl: float = 0) -> object:
"""Retorna el valor cacheado o llama factory() y lo almacena."""
value = self.get(key)
if value is None:
value = factory()
self.set(key, value, ttl)
return value
def cache_to_file(cache_dir: str, namespace: str = "default") -> FileCache:
"""Crea un FileCache respaldado en archivos JSON en disco.
Args:
cache_dir: Directorio raiz donde se almacenan los archivos de cache.
namespace: Subdirectorio logico dentro de cache_dir.
Returns:
FileCache con metodos get/set/delete/clear/stats/get_or_set.
"""
return FileCache(cache_dir, namespace)