"""Carga (y cachea) un modelo GLiNER2 (NER+RE joint en una sola pasada). LICENSE: Apache 2.0 — uso comercial permitido. Modelo por defecto: fastino/gliner2-large-v1 """ from __future__ import annotations from typing import Any # Cache global: (model_name, device) -> instancia GLiNER2 _MODEL_CACHE: dict[tuple[str, str], Any] = {} def _resolve_device(device: str) -> str: """Resuelve 'auto' a 'cuda' o 'cpu' segun disponibilidad de torch.""" if device != "auto": return device try: import torch except ImportError: return "cpu" return "cuda" if torch.cuda.is_available() else "cpu" def gliner2_load_model( model_name: str = "fastino/gliner2-large-v1", device: str = "auto", ) -> Any: """Load (and cache) a GLiNER2 model. GLiNER2 extracts entities AND relations in a single forward pass using a joint schema (entities + relation_labels). This is ~2x faster than running GLiNER + GLiREL separately for co-occurring entities. Returns model instance with .extract() and .create_schema() methods. LICENSE: Apache 2.0 — commercial use OK. Args: model_name: HuggingFace Hub model ID. Default: fastino/gliner2-large-v1. device: 'auto' uses CUDA if available, else CPU. 'cpu', 'cuda', 'cuda:N'. Returns: GLiNER2 instance cached by (model_name, device). """ resolved = _resolve_device(device) key = (model_name, resolved) if key in _MODEL_CACHE: return _MODEL_CACHE[key] from gliner2 import GLiNER2 # type: ignore[import] m = GLiNER2.from_pretrained(model_name) if hasattr(m, "to") and resolved != "cpu": try: m.to(resolved) except Exception: pass # Fallback to CPU silently _MODEL_CACHE[key] = m return m