feat: funciones Python infra y tipos Python (core, datascience, infra)
Infra: cache_to_file, cache_to_sqlite, http_download_file, http_get_json, http_post_json, read_file_with_encoding, safe_extract_zip, scan_directory, setup_logger, normalize_zip_filenames. Tipos: 30+ tipos core (agent_action, context, task, message, parse_result...), 6 tipos datascience (entity_candidate, extraction_result...), 2 tipos infra. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,135 @@
|
||||
"""Cache key-value donde cada entry es un archivo JSON en disco."""
|
||||
|
||||
import hashlib
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import time
|
||||
|
||||
|
||||
class FileCache:
|
||||
"""Cache key-value respaldado en archivos JSON, con metadata sidecar .meta."""
|
||||
|
||||
def __init__(self, cache_dir: str, namespace: str = "default") -> None:
|
||||
self._base = os.path.join(cache_dir, namespace)
|
||||
self._hits = 0
|
||||
self._misses = 0
|
||||
self._lock = threading.Lock()
|
||||
os.makedirs(self._base, exist_ok=True)
|
||||
|
||||
def _hash_key(self, key: str) -> str:
|
||||
return hashlib.sha256(key.encode("utf-8")).hexdigest()
|
||||
|
||||
def _value_path(self, hashed: str) -> str:
|
||||
return os.path.join(self._base, f"{hashed}.json")
|
||||
|
||||
def _meta_path(self, hashed: str) -> str:
|
||||
return os.path.join(self._base, f"{hashed}.meta")
|
||||
|
||||
def _is_expired(self, meta: dict) -> bool:
|
||||
expires_at = meta.get("expires_at")
|
||||
if expires_at is None:
|
||||
return False
|
||||
return time.time() >= expires_at
|
||||
|
||||
def _load_meta(self, hashed: str) -> dict | None:
|
||||
path = self._meta_path(hashed)
|
||||
if not os.path.exists(path):
|
||||
return None
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
def get(self, key: str) -> object:
|
||||
"""Retorna el valor o None si no existe o esta expirado."""
|
||||
hashed = self._hash_key(key)
|
||||
with self._lock:
|
||||
meta = self._load_meta(hashed)
|
||||
if meta is None:
|
||||
self._misses += 1
|
||||
return None
|
||||
if self._is_expired(meta):
|
||||
self._delete_files(hashed)
|
||||
self._misses += 1
|
||||
return None
|
||||
value_path = self._value_path(hashed)
|
||||
if not os.path.exists(value_path):
|
||||
self._misses += 1
|
||||
return None
|
||||
with open(value_path, "r", encoding="utf-8") as f:
|
||||
self._hits += 1
|
||||
return json.load(f)
|
||||
|
||||
def set(self, key: str, value: object, ttl: float = 0) -> None:
|
||||
"""Almacena un valor. ttl en segundos; 0 = sin expiracion."""
|
||||
hashed = self._hash_key(key)
|
||||
now = time.time()
|
||||
expires_at = (now + ttl) if ttl > 0 else None
|
||||
meta = {"created_at": now, "expires_at": expires_at, "original_key": key}
|
||||
with self._lock:
|
||||
with open(self._value_path(hashed), "w", encoding="utf-8") as f:
|
||||
json.dump(value, f)
|
||||
with open(self._meta_path(hashed), "w", encoding="utf-8") as f:
|
||||
json.dump(meta, f)
|
||||
|
||||
def _delete_files(self, hashed: str) -> bool:
|
||||
vp = self._value_path(hashed)
|
||||
mp = self._meta_path(hashed)
|
||||
deleted = False
|
||||
if os.path.exists(vp):
|
||||
os.remove(vp)
|
||||
deleted = True
|
||||
if os.path.exists(mp):
|
||||
os.remove(mp)
|
||||
deleted = True
|
||||
return deleted
|
||||
|
||||
def delete(self, key: str) -> bool:
|
||||
"""Elimina una entrada. Retorna True si existia."""
|
||||
hashed = self._hash_key(key)
|
||||
with self._lock:
|
||||
return self._delete_files(hashed)
|
||||
|
||||
def clear(self) -> int:
|
||||
"""Elimina todas las entradas del namespace. Retorna pares eliminados."""
|
||||
with self._lock:
|
||||
count = 0
|
||||
if not os.path.isdir(self._base):
|
||||
return 0
|
||||
for fname in os.listdir(self._base):
|
||||
if fname.endswith(".json"):
|
||||
count += 1
|
||||
fpath = os.path.join(self._base, fname)
|
||||
os.remove(fpath)
|
||||
return count
|
||||
|
||||
def stats(self) -> dict:
|
||||
"""Retorna estadisticas del store: hits, misses y size actual."""
|
||||
with self._lock:
|
||||
if not os.path.isdir(self._base):
|
||||
size = 0
|
||||
else:
|
||||
size = sum(
|
||||
1 for f in os.listdir(self._base) if f.endswith(".json")
|
||||
)
|
||||
return {"hits": self._hits, "misses": self._misses, "size": size}
|
||||
|
||||
def get_or_set(self, key: str, factory: callable, ttl: float = 0) -> object:
|
||||
"""Retorna el valor cacheado o llama factory() y lo almacena."""
|
||||
value = self.get(key)
|
||||
if value is None:
|
||||
value = factory()
|
||||
self.set(key, value, ttl)
|
||||
return value
|
||||
|
||||
|
||||
def cache_to_file(cache_dir: str, namespace: str = "default") -> FileCache:
|
||||
"""Crea un FileCache respaldado en archivos JSON en disco.
|
||||
|
||||
Args:
|
||||
cache_dir: Directorio raiz donde se almacenan los archivos de cache.
|
||||
namespace: Subdirectorio logico dentro de cache_dir.
|
||||
|
||||
Returns:
|
||||
FileCache con metodos get/set/delete/clear/stats/get_or_set.
|
||||
"""
|
||||
return FileCache(cache_dir, namespace)
|
||||
Reference in New Issue
Block a user