chore: initial sync — gliner+glirel benchmark notebooks

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-04 23:44:11 +02:00
commit b8c760d004
49 changed files with 47850 additions and 0 deletions
@@ -0,0 +1,291 @@
+<!DOCTYPE html>
+<html lang="es">
+<head>
+<meta charset="utf-8">
+<title>GLiNER2 Playground — graph_explorer</title>
+<script src="/static/graphology.umd.min.js"></script>
+<script src="/static/sigma.min.js"></script>
+<style>
+  * { box-sizing: border-box; margin: 0; padding: 0; }
+  html, body { height: 100%; font-family: -apple-system, "Segoe UI", Roboto, sans-serif;
+                background: #181a1f; color: #ddd; }
+  .app { display: grid; grid-template-columns: 420px 1fr; height: 100%; gap: 0; }
+  .left { padding: 16px; border-right: 1px solid #2a2d34; display: flex; flex-direction: column; gap: 12px; overflow-y: auto; }
+  h1 { font-size: 14px; font-weight: 600; letter-spacing: 0.02em; color: #fff; }
+  h1 .badge { background: #2c2f3a; color: #9aa0ad; padding: 2px 8px; border-radius: 4px;
+              font-size: 11px; margin-left: 8px; font-weight: 400; }
+  textarea { width: 100%; height: 320px; padding: 10px; font-family: ui-monospace, monospace;
+              font-size: 12px; line-height: 1.45; background: #14161b; color: #d8dadf;
+              border: 1px solid #2a2d34; border-radius: 6px; resize: vertical; }
+  textarea:focus { outline: none; border-color: #3d6cb8; }
+  .controls { display: flex; gap: 8px; align-items: center; }
+  button { background: #3d6cb8; color: #fff; border: none; padding: 8px 14px;
+            border-radius: 6px; font-weight: 600; cursor: pointer; font-size: 13px; }
+  button:hover { background: #4d7cc8; }
+  button:disabled { background: #555; cursor: not-allowed; }
+  label { font-size: 12px; color: #9aa0ad; display: flex; align-items: center; gap: 6px; }
+  input[type="number"] { width: 60px; padding: 4px 6px; background: #14161b; color: #d8dadf;
+                          border: 1px solid #2a2d34; border-radius: 4px; font-size: 12px; }
+  .kpis { display: grid; grid-template-columns: 1fr 1fr; gap: 8px; margin-top: 4px; }
+  .kpi { background: #14161b; border: 1px solid #2a2d34; border-radius: 6px;
+          padding: 10px 12px; }
+  .kpi .num { font-size: 28px; font-weight: 700; color: #fff; }
+  .kpi .lbl { font-size: 11px; color: #9aa0ad; text-transform: uppercase; letter-spacing: 0.06em; }
+  .kpi.full { grid-column: span 2; }
+  .legend { display: flex; gap: 12px; flex-wrap: wrap; font-size: 11px; }
+  .legend-item { display: flex; align-items: center; gap: 4px; }
+  .swatch { width: 10px; height: 10px; border-radius: 50%; border: 1px solid #fff3; }
+  .right { background: #0e1015; position: relative; }
+  #graph { width: 100%; height: 100%; }
+  .empty-msg { position: absolute; inset: 0; display: flex; align-items: center;
+                justify-content: center; color: #4c5060; font-size: 14px; pointer-events: none; }
+  details { background: #14161b; border: 1px solid #2a2d34; border-radius: 6px; padding: 8px 10px;
+            font-size: 11px; color: #9aa0ad; }
+  details summary { cursor: pointer; color: #d8dadf; font-weight: 500; }
+  details pre { margin-top: 6px; font-size: 10px; line-height: 1.4; max-height: 280px; overflow: auto;
+                color: #d8dadf; font-family: ui-monospace, "JetBrains Mono", monospace;
+                background: #0e1015; padding: 6px; border-radius: 4px; white-space: pre; }
+  details[open] summary { color: #fff; margin-bottom: 4px; }
+  .examples { display: flex; flex-direction: column; gap: 4px; }
+  .examples a { color: #9aa0ad; font-size: 11px; cursor: pointer; padding: 4px 6px;
+                 background: #14161b; border: 1px solid #2a2d34; border-radius: 4px; text-decoration: none; }
+  .examples a:hover { background: #1e2027; color: #d8dadf; }
+</style>
+</head>
+<body>
+<div class="app">
+  <div class="left">
+    <h1>GLiNER2 Playground <span class="badge">graph_explorer</span></h1>
+
+    <textarea id="input" placeholder="Pega aqui un texto en castellano (sector empresarial, OSINT, legal...)"></textarea>
+
+    <div class="controls">
+      <button id="btn">Procesar</button>
+      <label>threshold
+        <input id="threshold" type="number" value="0.3" step="0.05" min="0.1" max="0.9">
+      </label>
+      <span id="status" style="font-size: 11px; color: #6c7080;"></span>
+    </div>
+
+    <div class="kpis">
+      <div class="kpi"><div class="num" id="kpi-nodes">—</div><div class="lbl">nodos</div></div>
+      <div class="kpi"><div class="num" id="kpi-edges">—</div><div class="lbl">relaciones</div></div>
+      <div class="kpi full"><div class="num" id="kpi-time">—</div><div class="lbl">tiempo (s)</div></div>
+    </div>
+
+    <div class="legend">
+      <div class="legend-item"><div class="swatch" style="background:#5DA5DA"></div>person</div>
+      <div class="legend-item"><div class="swatch" style="background:#F17CB0"></div>organization</div>
+      <div class="legend-item"><div class="swatch" style="background:#60BD68"></div>location</div>
+    </div>
+
+    <div class="examples">
+      <a data-ex="corp">📰 Ej: corporate ES (Pablo Isla / Inditex)</a>
+      <a data-ex="osint">🔒 Ej: OSINT ES (APT-29 / CozyBear)</a>
+      <a data-ex="banking">🏦 Ej: banca ES (BBVA / Sabadell / OPA)</a>
+    </div>
+
+    <details>
+      <summary>Stack aplicado</summary>
+      <pre>1. snake_case verbal labels
+2. threshold (configurable)
+3. post-filter typed (head_type, tail_type)
+4. coreferencia normalize+substring
+5. chunking automatico > 1500 chars
+6. layout server-side (networkx spring_layout)
+7. render: sigma.js + graphology</pre>
+    </details>
+
+    <details open>
+      <summary>Relaciones extraidas (texto)</summary>
+      <pre id="relations-text">(corre una extraccion para verlo)</pre>
+    </details>
+
+    <details>
+      <summary>Entidades extraidas por tipo</summary>
+      <pre id="entities-text">(corre una extraccion para verlo)</pre>
+    </details>
+
+    <details>
+      <summary>JSON completo</summary>
+      <pre id="raw-json">(corre una extraccion para verlo)</pre>
+    </details>
+
+    <details>
+      <summary>Relaciones descartadas por filtro typed</summary>
+      <pre id="dropped">(corre una extraccion para verlo)</pre>
+    </details>
+  </div>
+  <div class="right">
+    <div id="graph"></div>
+    <div class="empty-msg" id="empty">Pega un texto y pulsa Procesar</div>
+  </div>
+</div>
+
+<script>
+// Filtra ResizeObserver warnings benignos (vis-network los disparaba; sigma puede tambien)
+window.addEventListener('error', e => {
+  if (e.message && e.message.includes('ResizeObserver')) {
+    e.stopImmediatePropagation();
+    return false;
+  }
+});
+
+const TYPE_COLOR = { person:'#5DA5DA', organization:'#F17CB0', location:'#60BD68', '?':'#888' };
+
+const EXAMPLES = {
+  corp: `Pablo Isla, expresidente de Inditex, ha sido nombrado consejero de Telefonica. La operacion fue anunciada por el presidente Jose Maria Alvarez-Pallete en Madrid el pasado lunes. Inditex factura mas de 30.000 millones anuales y tiene su sede en Arteixo, A Coruna. En paralelo, Iberdrola y Endesa firmaron un acuerdo de colaboracion en proyectos eolicos en Galicia. El presidente de Iberdrola, Ignacio Galan, se reunio con la CEO de Endesa, Marina Serrano, en Bilbao. El BBVA, presidido por Carlos Torres, mostro interes en participar en la financiacion del proyecto. Su sede central esta en Bilbao.`,
+  osint: `El 15 de agosto de 2024, el grupo APT-29 (atribuido a Rusia) lanzo una campana de phishing contra empresas energeticas espanolas. El servidor de comando y control 185.220.101.45 conectaba con sistemas internos de Iberdrola via TLS. El malware utilizado, identificado como CozyBear, exploto la vulnerabilidad CVE-2024-21412 en Microsoft Defender. El operador @phantomzero reivindico el ataque en un foro de la dark web. El analista Carlos Garcia, del CCN-CERT, publico un informe tecnico. Telefonica Tech alerto a sus clientes sobre indicadores de compromiso adicionales en el dominio cloudfront-cdn[.]net.`,
+  banking: `BBVA, presidido por Carlos Torres, anuncio en mayo de 2024 una OPA hostil sobre Banco Sabadell. Onur Genc, consejero delegado del banco desde 2018, lidero el proceso desde la sede central en Bilbao. Cesar Gonzalez-Bueno, CEO de Sabadell, defendio la independencia junto con su presidente Josep Oliu. Banco Santander, dirigido por Ana Botin, sigue siendo el primer banco espanol. CaixaBank, presidida por Jose Ignacio Goirigolzarri y con sede en Valencia, completo la fusion con Bankia. El Banco de Espana, gobernado por Pablo Hernandez de Cos, supervisa el sector. Luis de Guindos, vicepresidente del Banco Central Europeo, fue ministro de Economia en el gobierno de Mariano Rajoy.`
+};
+
+document.querySelectorAll('.examples a').forEach(a => {
+  a.onclick = () => { document.getElementById('input').value = EXAMPLES[a.dataset.ex] || ''; };
+});
+
+let renderer = null;
+
+function renderGraph(data) {
+  const empty = document.getElementById('empty');
+  const container = document.getElementById('graph');
+
+  if (typeof graphology === 'undefined' || typeof Sigma === 'undefined') {
+    empty.textContent = 'Sigma o graphology no cargaron — verifica /static/';
+    return;
+  }
+
+  if (!data.nodes || !data.nodes.length) {
+    empty.style.display = 'flex';
+    empty.textContent = 'Sin nodos extraidos';
+    if (renderer) { renderer.kill(); renderer = null; }
+    return;
+  }
+
+  empty.style.display = 'none';
+
+  // Construir el grafo en graphology
+  const Graph = graphology.Graph || graphology.default || graphology;
+  const g = new Graph({ multi: false, type: 'directed', allowSelfLoops: false });
+
+  data.nodes.forEach(n => {
+    if (!g.hasNode(n.id)) {
+      g.addNode(n.id, {
+        label: n.label,
+        x: n.x || Math.random() * 10,
+        y: n.y || Math.random() * 10,
+        size: 10,
+        color: TYPE_COLOR[n.type] || '#888',
+      });
+    }
+  });
+
+  data.edges.forEach((e, i) => {
+    if (!g.hasNode(e.from) || !g.hasNode(e.to)) return;
+    if (e.from === e.to) return;
+    const eid = `e${i}`;
+    if (!g.hasEdge(e.from, e.to)) {
+      g.addEdgeWithKey(eid, e.from, e.to, {
+        label: e.label || '',
+        size: 1.5,
+        color: '#666',
+        type: 'arrow',
+      });
+    }
+  });
+
+  // Re-instanciar el renderer
+  if (renderer) { renderer.kill(); renderer = null; }
+  container.innerHTML = '';
+  renderer = new Sigma(g, container, {
+    renderEdgeLabels: true,
+    defaultEdgeType: 'arrow',
+    edgeLabelSize: 9,
+    edgeLabelColor: { color: '#aaa' },
+    labelColor: { color: '#fff' },
+    labelSize: 12,
+    labelDensity: 1.0,
+    labelGridCellSize: 80,
+    labelRenderedSizeThreshold: 6,
+    minCameraRatio: 0.05,
+    maxCameraRatio: 6,
+  });
+}
+
+document.getElementById('btn').onclick = async () => {
+  const text = document.getElementById('input').value.trim();
+  if (!text) { alert('Pega algo de texto'); return; }
+  const threshold = parseFloat(document.getElementById('threshold').value);
+  const btn = document.getElementById('btn');
+  const status = document.getElementById('status');
+  btn.disabled = true;
+  const estChunks = Math.max(1, Math.ceil(text.length / 1500));
+  status.textContent = estChunks > 1
+    ? `procesando ${estChunks} chunks (~${(estChunks * 1.5).toFixed(0)}s)…`
+    : 'procesando...';
+  try {
+    const res = await fetch('/extract', {
+      method: 'POST', headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ text, threshold }),
+    });
+    const data = await res.json();
+    if (!res.ok) throw new Error(data.error || 'extract failed');
+    document.getElementById('kpi-nodes').textContent = data.n_nodes;
+    document.getElementById('kpi-edges').textContent = data.n_edges;
+    document.getElementById('kpi-time').textContent = data.elapsed_s + 's';
+
+    // Texto de relaciones — alineado para legibilidad
+    const relsText = (data.edges || []).length
+      ? (() => {
+          const padFrom = Math.max(...data.edges.map(e => e.from.length));
+          const padKind = Math.max(...data.edges.map(e => (e.label || '').length));
+          return data.edges.map(e =>
+            `${e.from.padEnd(padFrom)}  --[${(e.label || '').padEnd(padKind)}]-->  ${e.to}`
+          ).join('\n');
+        })()
+      : '(sin relaciones — prueba a bajar threshold o cambiar el texto)';
+    document.getElementById('relations-text').textContent = relsText;
+
+    // Entidades agrupadas por tipo
+    const byType = {};
+    (data.nodes || []).forEach(n => {
+      const t = n.type || '?';
+      if (!byType[t]) byType[t] = [];
+      byType[t].push(n.id);
+    });
+    document.getElementById('entities-text').textContent =
+      Object.keys(byType).sort().map(t =>
+        `${t} (${byType[t].length}):\n  ${byType[t].sort().join(', ')}`
+      ).join('\n\n') || '(sin entidades)';
+
+    // JSON completo (pretty)
+    document.getElementById('raw-json').textContent = JSON.stringify({
+      n_nodes: data.n_nodes,
+      n_edges: data.n_edges,
+      n_chunks: data.n_chunks,
+      n_dropped_typed: data.n_dropped_typed,
+      elapsed_s: data.elapsed_s,
+      nodes: (data.nodes || []).map(n => ({ id: n.id, type: n.type })),
+      edges: data.edges,
+    }, null, 2);
+
+    document.getElementById('dropped').textContent = (data.dropped || []).length
+      ? data.dropped.map(d => `${d.from} (${d.head_type}) -[${d.kind}]-> ${d.to} (${d.tail_type})`).join('\n')
+      : '(ninguna — el filtro typed no descarto nada)';
+    const chunkInfo = data.n_chunks > 1 ? ` · ${data.n_chunks} chunks` : '';
+    status.textContent = `${data.n_nodes} nodos · ${data.n_edges} aristas · ${data.elapsed_s}s${chunkInfo}`;
+    renderGraph(data);
+  } catch (e) {
+    console.error('[playground] extract failed:', e);
+    alert('Error: ' + e.message);
+    status.textContent = 'error';
+  } finally {
+    btn.disabled = false;
+  }
+};
+
+document.getElementById('input').addEventListener('keydown', e => {
+  if ((e.ctrlKey || e.metaKey) && e.key === 'Enter') document.getElementById('btn').click();
+});
+</script>
+</body>
+</html>
@@ -0,0 +1,264 @@
+"""Playground server — GLiNER2 + post-filter typed sobre cualquier texto.
+
+Aplica las recetas del notebook 08:
+  - snake_case verbal labels
+  - threshold=0.3
+  - post-filter por (head_type, tail_type)
+  - coreference simple normalize+substring
+
+Run:
+  cd playground && ../.venv/bin/python3 server.py
+Luego: http://localhost:7878
+"""
+from __future__ import annotations
+
+import os
+import re
+import sys
+import time
+import warnings
+from collections import defaultdict
+from pathlib import Path
+
+warnings.filterwarnings("ignore")
+os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
+
+# sys.path cleanup (mismo workaround documentado en notebook 08)
+_pf = "/home/lucas/fn_registry/python/functions"
+sys.path = [p for p in sys.path if not p.startswith(_pf + "/")]
+if _pf not in sys.path:
+    sys.path.insert(0, _pf)
+
+from fastapi import FastAPI
+from fastapi.responses import FileResponse, JSONResponse
+from fastapi.staticfiles import StaticFiles
+from pydantic import BaseModel
+from gliner2 import GLiNER2
+
+HERE = Path(__file__).resolve().parent
+
+# ── carga modelo una sola vez ──
+print("[load] GLiNER2-large-v1 (CPU)...", flush=True)
+t0 = time.time()
+MODEL = GLiNER2.from_pretrained("fastino/gliner2-large-v1")
+print(f"[load] done in {time.time()-t0:.1f}s", flush=True)
+
+# ── recetas del notebook 08 ──
+ENTITY_LABELS = ["person", "organization", "location"]
+RELATION_LABELS = [
+    "works_at", "located_in", "ceo_of", "president_of",
+    "headquartered_in", "agreement_with", "subsidiary_of", "founded_by",
+]
+ALLOWED = {
+    "works_at":         (["person"],                              ["organization"]),
+    "ceo_of":           (["person"],                              ["organization"]),
+    "president_of":     (["person"],                              ["organization"]),
+    "headquartered_in": (["organization"],                        ["location"]),
+    "located_in":       (["organization", "person", "location"],  ["location"]),
+    "agreement_with":   (["organization"],                        ["organization"]),
+    "subsidiary_of":    (["organization"],                        ["organization"]),
+    "founded_by":       (["organization"],                        ["person"]),
+}
+
+
+def normalize_name(s: str) -> str:
+    s = re.sub(r"[\.,;:\"'`()\[\]]", "", s.strip())
+    s = re.sub(r"\s+", " ", s)
+    return s.strip().lower()
+
+
+def merge_aliases(names: list[str]) -> dict[str, str]:
+    norm_groups: dict = defaultdict(list)
+    for n in names:
+        norm_groups[normalize_name(n)].append(n)
+    canonical: dict = {}
+    for nrm, group in norm_groups.items():
+        winner = max(group, key=lambda x: (len(x), x))
+        for n in group:
+            canonical[n] = winner
+    canon_set = sorted(set(canonical.values()), key=len, reverse=True)
+    absorbed: dict = {}
+    for long_n in canon_set:
+        long_norm = normalize_name(long_n)
+        for short_n in canon_set:
+            if short_n == long_n or short_n in absorbed:
+                continue
+            short_norm = normalize_name(short_n)
+            if len(short_norm) < 4:
+                continue
+            if re.search(r"\b" + re.escape(short_norm) + r"\b", long_norm):
+                absorbed[short_n] = long_n
+    final: dict = {}
+    for orig, canon in canonical.items():
+        final[orig] = absorbed.get(canon, canon)
+    return final
+
+
+def filter_typed(rels: dict, name_to_type: dict, allowed: dict) -> tuple[list, list]:
+    keep: list = []
+    drop: list = []
+    for rt, pairs in rels.items():
+        head_ok, tail_ok = allowed.get(rt, (None, None))
+        for h, t in pairs:
+            ht = name_to_type.get(h.lower().strip())
+            tt = name_to_type.get(t.lower().strip())
+            if head_ok is None or (ht in head_ok and tt in tail_ok):
+                keep.append({"from": h, "kind": rt, "to": t, "head_type": ht, "tail_type": tt})
+            else:
+                drop.append({"from": h, "kind": rt, "to": t, "head_type": ht, "tail_type": tt})
+    return keep, drop
+
+
+def chunk_text(text: str, max_chars: int = 1500, overlap_sentences: int = 2):
+    """Split largo en chunks con sliding window. Same pattern as notebook 06."""
+    sentences = re.split(r"(?<=[\.!?])\s+", text)
+    sentences = [s.strip() for s in sentences if s.strip()]
+    chunks = []
+    i = 0
+    while i < len(sentences):
+        current_sents: list[str] = []
+        current_len = 0
+        if chunks and overlap_sentences > 0:
+            prev_sents = chunks[-1][-overlap_sentences:]
+            overlap_len = sum(len(s) + 1 for s in prev_sents)
+            next_sentence_len = len(sentences[i]) + 1
+            if overlap_len + next_sentence_len <= max_chars:
+                current_sents = list(prev_sents)
+                current_len = overlap_len
+        if i < len(sentences):
+            current_sents.append(sentences[i])
+            current_len += len(sentences[i]) + 1
+            i += 1
+        while i < len(sentences) and current_len + len(sentences[i]) + 1 <= max_chars:
+            current_sents.append(sentences[i])
+            current_len += len(sentences[i]) + 1
+            i += 1
+        chunks.append(current_sents)
+    return [" ".join(c) for c in chunks]
+
+
+def extract_graph(text: str, threshold: float = 0.3, max_chars_per_chunk: int = 1500) -> dict:
+    schema = MODEL.create_schema().entities(ENTITY_LABELS).relations(RELATION_LABELS)
+
+    # Chunking automatico si el texto es largo
+    if len(text) <= max_chars_per_chunk:
+        chunks = [text]
+    else:
+        chunks = chunk_text(text, max_chars=max_chars_per_chunk, overlap_sentences=2)
+    print(f"[extract] {len(text)}c → {len(chunks)} chunks", flush=True)
+
+    t0 = time.time()
+
+    # Acumuladores deduplicados
+    name_to_type: dict = {}        # name_lower → type (last seen wins)
+    name_canonical: dict = {}      # name_lower → original casing
+    raw_relations: dict = {}        # rel_type → list of (h, t)
+
+    for idx, chunk in enumerate(chunks):
+        r = MODEL.extract(chunk, schema=schema, threshold=threshold)
+        for typ, names in r["entities"].items():
+            for n in names:
+                key = n.lower().strip()
+                if not key: continue
+                if key not in name_to_type:
+                    name_to_type[key] = typ
+                    name_canonical[key] = n.strip()
+                # if seen with different name_canonical, keep the longer
+                elif len(n.strip()) > len(name_canonical[key]):
+                    name_canonical[key] = n.strip()
+        for rt, pairs in r["relation_extraction"].items():
+            if rt not in raw_relations: raw_relations[rt] = []
+            for h, t in pairs:
+                raw_relations[rt].append((h.strip(), t.strip()))
+        if (idx + 1) % 10 == 0:
+            print(f"[extract] chunk {idx+1}/{len(chunks)}  ents acum={len(name_to_type)}  rels acum={sum(len(v) for v in raw_relations.values())}", flush=True)
+
+    # Post-filter typed
+    keep, drop = filter_typed(raw_relations, name_to_type, ALLOWED)
+
+    # Coreferencia: alias map sobre los canonical names
+    original_names = list(name_canonical.values())
+    alias = merge_aliases(original_names)
+
+    # Construir nodos con alias aplicado
+    nodes_dict: dict = {}
+    for key, typ in name_to_type.items():
+        canon_orig = name_canonical[key]
+        canon_resolved = alias.get(canon_orig, canon_orig)
+        if canon_resolved not in nodes_dict:
+            nodes_dict[canon_resolved] = typ
+
+    # Construir aristas dedupeadas tras alias
+    edges_set: set = set()
+    for e in keep:
+        h_canon = alias.get(e["from"], e["from"])
+        t_canon = alias.get(e["to"], e["to"])
+        if h_canon == t_canon:
+            continue
+        if h_canon not in nodes_dict:
+            nodes_dict[h_canon] = e.get("head_type") or "?"
+        if t_canon not in nodes_dict:
+            nodes_dict[t_canon] = e.get("tail_type") or "?"
+        edges_set.add((h_canon, e["kind"], t_canon))
+
+    # Layout server-side (sigma solo renderiza)
+    import networkx as nx
+    G = nx.DiGraph()
+    for n, t in nodes_dict.items():
+        G.add_node(n)
+    for h, k, t in edges_set:
+        G.add_edge(h, t, kind=k)
+    if G.number_of_nodes() > 0:
+        try:
+            pos = nx.spring_layout(G, k=2.0, iterations=80, seed=42)
+        except Exception:
+            pos = {n: (0.0, 0.0) for n in G.nodes}
+    else:
+        pos = {}
+
+    elapsed = time.time() - t0
+    print(f"[extract] done {elapsed:.2f}s  nodos={len(nodes_dict)}  aristas={len(edges_set)}", flush=True)
+
+    return {
+        "elapsed_s": round(elapsed, 2),
+        "n_chunks": len(chunks),
+        "n_nodes": len(nodes_dict),
+        "n_edges": len(edges_set),
+        "n_dropped_typed": len(drop),
+        "nodes": [
+            {"id": n, "label": n, "type": t,
+             "x": float(pos.get(n, (0.0, 0.0))[0]),
+             "y": float(pos.get(n, (0.0, 0.0))[1])}
+            for n, t in nodes_dict.items()
+        ],
+        "edges": [{"from": h, "to": t, "label": k} for h, k, t in edges_set],
+        "dropped": drop[:10],
+    }
+
+
+# ── API ──
+app = FastAPI(title="GLiNER2 Playground")
+app.mount("/static", StaticFiles(directory=HERE / "static"), name="static")
+
+
+class ExtractReq(BaseModel):
+    text: str
+    threshold: float = 0.3
+
+
+@app.get("/")
+def index():
+    return FileResponse(HERE / "index.html")
+
+
+@app.post("/extract")
+def extract(req: ExtractReq):
+    if not req.text.strip():
+        return JSONResponse({"error": "empty text"}, status_code=400)
+    return extract_graph(req.text, threshold=req.threshold)
+
+
+if __name__ == "__main__":
+    import uvicorn
+    print("\nServing at http://localhost:7878\n", flush=True)
+    uvicorn.run(app, host="0.0.0.0", port=7878, log_level="warning")