fn_registry/python/functions/datascience/mrebel_base_load_model.py

"""Carga (y cachea) el modelo mREBEL-base (variante rapida, 250M params)."""

from __future__ import annotations

from typing import Any

from python.functions.datascience.mrebel_load_model import mrebel_load_model


def mrebel_base_load_model(
    model_name: str = "Babelscape/mrebel-base",
    src_lang: str = "es_XX",
    tgt_lang: str = "tp_XX",
) -> tuple[Any, Any]:
    """Loads (and caches) the mREBEL-base tokenizer and model.

    Thin wrapper over ``mrebel_load_model`` with the base checkpoint as
    default (250M params, ~900 MB). Faster than the large variant at the
    cost of some recall on complex sentences.

    LICENSE NOTICE: Babelscape/mrebel-base is licensed under CC BY-NC-SA 4.0
    (Creative Commons Non-Commercial Share-Alike). Do NOT use in commercial
    products without replacing this model.

    Args:
        model_name: HuggingFace Hub model ID. Defaults to the base checkpoint.
        src_lang: Source language code for the mBART tokenizer.
        tgt_lang: Target language token for the decoder (always ``"tp_XX"``).

    Returns:
        Tuple ``(tokenizer, model)`` ready for inference.

    Raises:
        ImportError: if ``transformers`` is not installed.
        OSError: if the model cannot be downloaded or loaded from disk.
    """
    return mrebel_load_model(
        model_name=model_name,
        src_lang=src_lang,
        tgt_lang=tgt_lang,
    )