"""Carga (y cachea) el modelo REBEL para extraccion de relaciones en ingles.""" from __future__ import annotations from typing import Any # Cache global: model_name -> (tokenizer, model) _MODEL_CACHE: dict[str, tuple[Any, Any]] = {} def rebel_load_model( model_name: str = "Babelscape/rebel-large", ) -> tuple[Any, Any]: """Loads (and caches) the REBEL tokenizer and model. English only. REBEL is a BART-based seq2seq model (~1.5 GB) for relation extraction, trained on English Wikipedia (KELM). It extracts triplets (head, relation, tail) from English text. LICENSE: Apache 2.0 — commercial use permitted. The first call downloads the model from HuggingFace Hub (~1.5 GB). Subsequent calls with the same ``model_name`` return the cached instance. Args: model_name: HuggingFace Hub model ID. Default is the large variant. Returns: Tuple ``(tokenizer, model)`` both ready for inference. Raises: ImportError: if ``transformers`` is not installed. OSError: if the model cannot be downloaded or loaded from disk. """ cached = _MODEL_CACHE.get(model_name) if cached is not None: return cached try: from transformers import AutoModelForSeq2SeqLM, AutoTokenizer except ImportError as exc: raise ImportError( "transformers no esta instalado. Instalalo con " "`uv pip install transformers` o `uv pip install -e '.[nlp]'`." ) from exc tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSeq2SeqLM.from_pretrained(model_name) model.eval() _MODEL_CACHE[model_name] = (tokenizer, model) return tokenizer, model