"""Extraccion de entidades + relaciones en una pasada con GLiNER2.""" from __future__ import annotations import time from typing import Any def extract_graph_gliner2( text: str, entity_labels: list[str], relation_labels: list | dict, model: Any, threshold: float = 0.3, include_confidence: bool = False, ) -> dict: """Extract entities + relations using GLiNER2 with one schema pass. Wrapper de alto nivel sobre la API de GLiNER2. Construye el schema, ejecuta la extraccion y normaliza el resultado a un dict plano. NO aplica post-filtrado ni coreference — eso lo hace el caller con filter_relations_by_entity_types y merge_entity_aliases. Args: text: Texto a analizar. Recomendado: <= 1500 chars (pre-chunked). entity_labels: Lista de strings con los tipos de entidad. E.g. ["person", "organization", "location"] relation_labels: Lista de strings o dict {label: description} con los tipos de relacion. E.g. ["works_at", "ceo_of"] o {"works_at": "person works at organization"} model: Instancia GLiNER2 cargada con gliner2_load_model. threshold: Umbral de confianza (0-1). 0.3 es el valor validado empiricamente en los notebooks del analisis. include_confidence: Si True, el modelo devuelve scores por entidad y relacion (formato interno de GLiNER2). Returns: { "entities": {type: [name, ...]}, "relation_extraction": {rel_type: [(head, tail), ...]}, "elapsed_s": float } """ schema = model.create_schema().entities(entity_labels).relations(relation_labels) t0 = time.time() r = model.extract( text, schema=schema, threshold=threshold, include_confidence=include_confidence, ) elapsed = round(time.time() - t0, 3) return { "entities": r.get("entities", {}), "relation_extraction": r.get("relation_extraction", {}), "elapsed_s": elapsed, }