From 268a76602a5555617958f3a10043574c95c8ea0d Mon Sep 17 00:00:00 2001 From: Egutierrez Date: Wed, 1 Apr 2026 20:55:39 +0200 Subject: [PATCH] =?UTF-8?q?feat:=20funciones=20Jupyter=20notebook=20Python?= =?UTF-8?q?=20=E2=80=94=20discover,=20read,=20write,=20exec,=20kernel?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Funciones Python para interactuar con Jupyter Lab programáticamente: descubrir instancias, leer/escribir celdas, ejecutar código y gestionar kernels. Reemplazan MCP jupyter con API REST + WebSocket directa. Co-Authored-By: Claude Opus 4.6 (1M context) --- python/functions/notebook/__init__.py | 1 + python/functions/notebook/jupyter_discover.md | 88 +++++ python/functions/notebook/jupyter_discover.py | 219 ++++++++++++ python/functions/notebook/jupyter_exec.md | 94 +++++ python/functions/notebook/jupyter_exec.py | 280 +++++++++++++++ python/functions/notebook/jupyter_kernel.md | 136 ++++++++ python/functions/notebook/jupyter_kernel.py | 287 +++++++++++++++ python/functions/notebook/jupyter_read.md | 118 +++++++ python/functions/notebook/jupyter_read.py | 293 ++++++++++++++++ python/functions/notebook/jupyter_write.md | 111 ++++++ python/functions/notebook/jupyter_write.py | 329 ++++++++++++++++++ 11 files changed, 1956 insertions(+) create mode 100644 python/functions/notebook/__init__.py create mode 100644 python/functions/notebook/jupyter_discover.md create mode 100644 python/functions/notebook/jupyter_discover.py create mode 100644 python/functions/notebook/jupyter_exec.md create mode 100644 python/functions/notebook/jupyter_exec.py create mode 100644 python/functions/notebook/jupyter_kernel.md create mode 100644 python/functions/notebook/jupyter_kernel.py create mode 100644 python/functions/notebook/jupyter_read.md create mode 100644 python/functions/notebook/jupyter_read.py create mode 100644 python/functions/notebook/jupyter_write.md create mode 100644 python/functions/notebook/jupyter_write.py diff --git a/python/functions/notebook/__init__.py b/python/functions/notebook/__init__.py new file mode 100644 index 00000000..42ea774c --- /dev/null +++ b/python/functions/notebook/__init__.py @@ -0,0 +1 @@ +# notebook — funciones para interaccion con Jupyter diff --git a/python/functions/notebook/jupyter_discover.md b/python/functions/notebook/jupyter_discover.md new file mode 100644 index 00000000..a01d6168 --- /dev/null +++ b/python/functions/notebook/jupyter_discover.md @@ -0,0 +1,88 @@ +--- +name: jupyter_discover +kind: function +lang: py +domain: notebook +version: "1.0.0" +purity: impure +signature: "def jupyter_discover(registry_root: str = \"\", ports: list[int] | None = None) -> list[dict]" +description: "Descubre instancias de Jupyter Lab activas escaneando archivos .jupyter-port en analysis/ y puertos comunes (8888-8892). Para cada instancia consulta /api/status, /api/config, /api/kernels y /api/sessions via HTTP REST." +tags: [jupyter, notebook, discovery, api, http, kernels, sessions, analysis] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: [json, os, urllib.error, urllib.request, pathlib] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/notebook/jupyter_discover.py" +--- + +## Ejemplo + +```python +from notebook.jupyter_discover import jupyter_discover + +# Descubrir con deteccion automatica de puertos +instances = jupyter_discover(registry_root="/home/lucas/fn_registry") + +# Escanear puertos especificos +instances = jupyter_discover(ports=[8888, 8900]) + +for inst in instances: + print(inst["url"], inst["collaborative"], len(inst["kernels"])) +# http://localhost:8888 True 2 +``` + +## Estructura del dict retornado + +Cada elemento de la lista tiene la siguiente forma: + +```python +{ + "url": "http://localhost:8888", + "port": 8888, + "analysis": "finanzas_personales", # nombre del subdirectorio en analysis/, o "" + "collaborative": True, # True si YDocExtension esta activo + "kernels": [ + { + "id": "abc123...", + "name": "python3", + "execution_state": "idle", + "last_activity": "2026-04-01T10:00:00.000Z" + } + ], + "sessions": [ + { + "notebook": "notebooks/01_exploracion.ipynb", + "kernel_id": "abc123...", + "kernel_state": "idle" + } + ] +} +``` + +## CLI + +```bash +# Descubrir con deteccion automatica +python python/functions/notebook/jupyter_discover.py --registry-root /home/lucas/fn_registry + +# Puertos especificos, salida JSON +python python/functions/notebook/jupyter_discover.py --port 8888 --port 8889 --json + +# Usando variable de entorno +FN_REGISTRY_ROOT=/home/lucas/fn_registry python python/functions/notebook/jupyter_discover.py +``` + +## Notas + +Solo usa stdlib: `urllib`, `json`, `pathlib`, `os`. No requiere `requests` ni clientes Jupyter especializados. + +El escaneo de puertos tiene un timeout de 2 segundos por instancia para no bloquear en puertos cerrados. + +La deteccion de modo colaborativo busca `YDocExtension` o `collaborative` en el JSON de `/api/config`. Esto cubre tanto jupyter-collaboration >= 2.x (que expone la extension bajo `LabApp`) como configuraciones antiguas. + +Archivos `.jupyter-port`: el pipeline `init_jupyter_analysis` escribe este archivo en cada analisis al lanzar Jupyter, permitiendo que `jupyter_discover` los encuentre sin escanear todos los puertos. diff --git a/python/functions/notebook/jupyter_discover.py b/python/functions/notebook/jupyter_discover.py new file mode 100644 index 00000000..9c760c75 --- /dev/null +++ b/python/functions/notebook/jupyter_discover.py @@ -0,0 +1,219 @@ +"""Descubrimiento de instancias Jupyter Lab activas via API REST.""" + +import json +import os +import urllib.error +import urllib.request +from pathlib import Path + + +_DEFAULT_PORTS = [8888, 8889, 8890, 8891, 8892] + + +def _get(url: str, timeout: float = 2.0) -> dict | list | None: + """Hace GET a url y retorna el JSON parseado, o None si falla.""" + try: + with urllib.request.urlopen(url, timeout=timeout) as resp: + return json.loads(resp.read().decode()) + except Exception: + return None + + +def _is_collaborative(config: dict | list | None) -> bool: + """Detecta si el servidor tiene jupyter-collaboration/YDocExtension activo.""" + if not isinstance(config, dict): + return False + # Jupyter Lab expone la config de extensiones bajo claves como + # 'LabApp' o similares; la presencia de 'collaborative' o 'YDocExtension' + # en cualquier valor de primer nivel indica modo colaborativo. + raw = json.dumps(config).lower() + return "ydocextension" in raw or "collaborative" in raw + + +def _query_instance(base_url: str) -> dict | None: + """Consulta la API REST de una instancia Jupyter y retorna su estado. + + Retorna None si la instancia no responde o no es Jupyter. + """ + status = _get(f"{base_url}/api/status") + if status is None: + return None + + config = _get(f"{base_url}/api/config") + kernels_raw = _get(f"{base_url}/api/kernels") or [] + sessions_raw = _get(f"{base_url}/api/sessions") or [] + + kernels = [] + if isinstance(kernels_raw, list): + for k in kernels_raw: + if isinstance(k, dict): + kernels.append({ + "id": k.get("id", ""), + "name": k.get("name", ""), + "execution_state": k.get("execution_state", ""), + "last_activity": k.get("last_activity", ""), + }) + + sessions = [] + if isinstance(sessions_raw, list): + for s in sessions_raw: + if isinstance(s, dict): + kernel = s.get("kernel") or {} + path = s.get("path") or s.get("notebook", {}).get("path", "") + sessions.append({ + "notebook": path, + "kernel_id": kernel.get("id", ""), + "kernel_state": kernel.get("execution_state", ""), + }) + + return { + "kernels": kernels, + "sessions": sessions, + "collaborative": _is_collaborative(config), + } + + +def _scan_analysis_ports(registry_root: str) -> list[tuple[int, str]]: + """Escanea subdirectorios de analysis/ buscando archivos .jupyter-port. + + Retorna lista de (puerto, nombre_analisis). + """ + root = Path(registry_root) if registry_root else Path.cwd() + analysis_dir = root / "analysis" + results: list[tuple[int, str]] = [] + + if not analysis_dir.is_dir(): + return results + + for entry in analysis_dir.iterdir(): + if not entry.is_dir(): + continue + port_file = entry / ".jupyter-port" + if port_file.is_file(): + try: + port = int(port_file.read_text().strip()) + results.append((port, entry.name)) + except (ValueError, OSError): + pass + + return results + + +def jupyter_discover( + registry_root: str = "", + ports: list[int] | None = None, +) -> list[dict]: + """Descubre instancias de Jupyter Lab activas consultando su API REST. + + Escanea primero los archivos .jupyter-port en subdirectorios de analysis/ + para encontrar puertos registrados, y luego aplica un fallback sobre puertos + comunes (8888-8892). Para cada instancia que responde consulta /api/status, + /api/config, /api/kernels y /api/sessions. + + Args: + registry_root: Raiz del fn_registry. Si vacio usa el directorio actual + o la variable de entorno FN_REGISTRY_ROOT. + ports: Lista de puertos a escanear. Si None, usa los puertos encontrados + en .jupyter-port mas los defaults (8888-8892). + + Returns: + Lista de dicts con: url, port, analysis, collaborative, kernels, sessions. + Cada sesion incluye: notebook, kernel_id, kernel_state. + """ + if not registry_root: + registry_root = os.environ.get("FN_REGISTRY_ROOT", "") + + # Recopilar puertos a escanear + port_analysis: dict[int, str] = {} + + if ports is not None: + for p in ports: + port_analysis[p] = "" + else: + # Primero los registrados en .jupyter-port + for port, analysis_name in _scan_analysis_ports(registry_root): + port_analysis[port] = analysis_name + # Fallback: puertos comunes que no estén ya en la lista + for p in _DEFAULT_PORTS: + if p not in port_analysis: + port_analysis[p] = "" + + results = [] + for port, analysis_name in port_analysis.items(): + base_url = f"http://localhost:{port}" + info = _query_instance(base_url) + if info is None: + continue + results.append({ + "url": base_url, + "port": port, + "analysis": analysis_name, + "collaborative": info["collaborative"], + "kernels": info["kernels"], + "sessions": info["sessions"], + }) + + return results + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + import sys + + parser = argparse.ArgumentParser( + description="Descubre instancias de Jupyter Lab activas." + ) + parser.add_argument( + "--registry-root", + default="", + help="Raiz del fn_registry (default: FN_REGISTRY_ROOT env o cwd)", + ) + parser.add_argument( + "--port", + dest="ports", + type=int, + action="append", + metavar="PORT", + help="Puerto a escanear (puede repetirse). Default: .jupyter-port + 8888-8892", + ) + parser.add_argument( + "--json", + action="store_true", + help="Emitir salida en JSON", + ) + args = parser.parse_args() + + instances = jupyter_discover( + registry_root=args.registry_root, + ports=args.ports, + ) + + if args.json: + print(json.dumps(instances, indent=2)) + sys.exit(0) + + if not instances: + print("No se encontraron instancias de Jupyter Lab activas.") + sys.exit(0) + + for inst in instances: + label = f" analysis: {inst['analysis']}" if inst["analysis"] else "" + collab = "colaborativo" if inst["collaborative"] else "estandar" + print(f"Jupyter Lab en {inst['url']} [{collab}]{label}") + if inst["kernels"]: + print(f" Kernels ({len(inst['kernels'])}):") + for k in inst["kernels"]: + print(f" - {k['name']} estado={k['execution_state']} id={k['id'][:8]}...") + else: + print(" Kernels: ninguno") + if inst["sessions"]: + print(f" Sesiones ({len(inst['sessions'])}):") + for s in inst["sessions"]: + print(f" - {s['notebook']} kernel={s['kernel_id'][:8]}... estado={s['kernel_state']}") + else: + print(" Sesiones: ninguna") + print() diff --git a/python/functions/notebook/jupyter_exec.md b/python/functions/notebook/jupyter_exec.md new file mode 100644 index 00000000..182d1b1c --- /dev/null +++ b/python/functions/notebook/jupyter_exec.md @@ -0,0 +1,94 @@ +--- +name: jupyter_exec +kind: function +lang: py +domain: notebook +version: "1.0.0" +purity: impure +signature: "jupyter_append_execute(notebook_path: str, code: str, server_url: str, token: str) -> dict" +description: "Ejecuta codigo en kernels de Jupyter via WebSocket. Tres modos: append (añade celda al notebook y la ejecuta), cell (ejecuta celda existente por indice), kernel (ejecuta en el kernel sin tocar ningun notebook)." +tags: [jupyter, notebook, kernel, websocket, execution, cells] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: [jupyter_kernel_client, jupyter_nbmodel_client] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/notebook/jupyter_exec.py" +--- + +## Funciones + +### `jupyter_append_execute(notebook_path, code, server_url, token)` + +Añade una celda de codigo al final del notebook y la ejecuta. Usa el protocolo +colaborativo de Jupyter, por lo que tanto el agente como el usuario ven la celda +y su output en tiempo real en JupyterLab. + +```python +from notebook.jupyter_exec import jupyter_append_execute + +result = jupyter_append_execute( + "notebooks/analisis.ipynb", + "import pandas as pd\nprint(pd.__version__)", + server_url="http://localhost:8888", + token="", +) +# {"cell_index": 5, "outputs": ["2.2.1"]} +``` + +### `jupyter_execute_cell(notebook_path, cell_index, server_url, token)` + +Ejecuta una celda existente del notebook por su indice (0-based). + +```python +from notebook.jupyter_exec import jupyter_execute_cell + +result = jupyter_execute_cell("notebooks/analisis.ipynb", 3) +# {"cell_index": 3, "outputs": ["42"]} +``` + +### `jupyter_kernel_execute(code, server_url, token)` + +Ejecuta codigo directamente en el kernel sin modificar ningun notebook. Util para +consultas rapidas, inspeccion de variables o verificacion de estado del kernel. + +```python +from notebook.jupyter_exec import jupyter_kernel_execute + +result = jupyter_kernel_execute("len(df)") +# {"outputs": ["1500"], "status": "ok"} +``` + +## CLI + +```bash +# Añadir celda y ejecutar +python -m notebook.jupyter_exec append notebooks/mi.ipynb "print('hola')" --server http://localhost:8888 --token mytoken + +# Ejecutar celda existente +python -m notebook.jupyter_exec cell notebooks/mi.ipynb 2 --server http://localhost:8888 + +# Ejecutar en kernel directamente +python -m notebook.jupyter_exec kernel "x = 42; print(x)" +``` + +Output siempre JSON. En error retorna `{"error": "..."}` por stderr con exit code 1. + +## Extraccion de outputs + +| output_type | campo leido | +|---|---| +| stream | `text` | +| display_data / execute_result | `data.text/plain` | +| error | `traceback` (joined con `\n`) | + +## Notas + +- Las funciones `append` y `cell` son async internamente; las publicas usan `asyncio.run()`. +- `jupyter_kernel_execute` es sincrona directamente porque `KernelClient.execute` es bloqueante. +- El token puede ser cadena vacia si el servidor tiene autenticacion deshabilitada. +- `NbModelClient` requiere que el servidor tenga habilitado el endpoint colaborativo (`/api/collaboration/`), disponible en JupyterLab >= 4 con `jupyter-collaboration` instalado. diff --git a/python/functions/notebook/jupyter_exec.py b/python/functions/notebook/jupyter_exec.py new file mode 100644 index 00000000..9d694619 --- /dev/null +++ b/python/functions/notebook/jupyter_exec.py @@ -0,0 +1,280 @@ +"""Ejecuta codigo en kernels de Jupyter via WebSocket. + +Tres modos de ejecucion: +- append: añade una celda al final del notebook y la ejecuta +- cell: ejecuta una celda existente por indice +- kernel: ejecuta codigo directamente en el kernel sin modificar ningun notebook +""" + +import asyncio +import json +from typing import Any +from urllib.error import URLError +from urllib.request import Request, urlopen + +from jupyter_kernel_client import KernelClient +from jupyter_nbmodel_client import NbModelClient, get_jupyter_notebook_websocket_url + + +# --------------------------------------------------------------------------- +# Helpers internos +# --------------------------------------------------------------------------- + + +def _api_get(url: str, token: str = "") -> dict | list | None: + """GET a Jupyter REST API endpoint.""" + headers = {"Accept": "application/json"} + if token: + headers["Authorization"] = f"token {token}" + try: + req = Request(url, headers=headers) + with urlopen(req, timeout=5) as resp: + return json.loads(resp.read()) + except (URLError, OSError, json.JSONDecodeError): + return None + + +def _resolve_kernel_id(server_url: str, token: str, notebook_path: str) -> str | None: + """Find the kernel_id associated with a notebook via the sessions API.""" + sessions = _api_get(f"{server_url}/api/sessions", token) or [] + for session in sessions: + nb = session.get("notebook", session.get("path", {})) + nb_path = nb.get("path", nb) if isinstance(nb, dict) else str(nb) + if nb_path == notebook_path: + kernel = session.get("kernel", {}) + return kernel.get("id") + return None + + +def _resolve_collab_username(server_url: str, token: str) -> str: + """Resolve the display name of the active user in Jupyter collaboration. + + Queries /api/me to get the identity Jupyter assigned to the browser user. + Falls back to 'Anonymous' if unavailable. + """ + me = _api_get(f"{server_url}/api/me", token) + if me: + identity = me.get("identity", {}) + return identity.get("display_name", "") or identity.get("username", "") or identity.get("name", "Anonymous") + return "Anonymous" + + +def _extract_outputs(raw_outputs: list[dict]) -> list[str]: + """Convierte outputs de nbformat a lista de strings legibles.""" + result: list[str] = [] + for output in raw_outputs: + output_type = output.get("output_type", "") + if output_type == "stream": + text = output.get("text", "") + if isinstance(text, list): + text = "".join(text) + result.append(text.rstrip("\n")) + elif output_type in ("display_data", "execute_result"): + data = output.get("data", {}) + text = data.get("text/plain", "") + if isinstance(text, list): + text = "".join(text) + result.append(text.rstrip("\n")) + elif output_type == "error": + traceback = output.get("traceback", []) + result.append("\n".join(traceback)) + return result + + +# --------------------------------------------------------------------------- +# Modo append (async interno) +# --------------------------------------------------------------------------- + + +async def _async_append_execute( + notebook_path: str, + code: str, + server_url: str, + token: str, +) -> dict[str, Any]: + ws_url = get_jupyter_notebook_websocket_url( + server_url, + notebook_path, + token or None, + ) + + kernel_id = _resolve_kernel_id(server_url, token, notebook_path) + username = _resolve_collab_username(server_url, token) + + async with NbModelClient(ws_url, username=username) as nb: + await nb.wait_until_synced() + + with KernelClient(server_url=server_url, token=token, kernel_id=kernel_id) as kernel: + cell_index = nb.add_code_cell(code) + result = nb.execute_cell(cell_index, kernel) + + # Let Y.js propagate changes to other clients (browser) + await asyncio.sleep(2) + + outputs = _extract_outputs(result.get("outputs", [])) + return {"cell_index": cell_index, "outputs": outputs} + + +# --------------------------------------------------------------------------- +# Modo cell (async interno) +# --------------------------------------------------------------------------- + + +async def _async_execute_cell( + notebook_path: str, + cell_index: int, + server_url: str, + token: str, +) -> dict[str, Any]: + ws_url = get_jupyter_notebook_websocket_url( + server_url, + notebook_path, + token or None, + ) + kernel_id = _resolve_kernel_id(server_url, token, notebook_path) + username = _resolve_collab_username(server_url, token) + + async with NbModelClient(ws_url, username=username) as nb: + await nb.wait_until_synced() + + with KernelClient(server_url=server_url, token=token, kernel_id=kernel_id) as kernel: + result = nb.execute_cell(cell_index, kernel) + + await asyncio.sleep(2) + + outputs = _extract_outputs(result.get("outputs", [])) + return {"cell_index": cell_index, "outputs": outputs} + + +# --------------------------------------------------------------------------- +# API publica +# --------------------------------------------------------------------------- + + +def jupyter_append_execute( + notebook_path: str, + code: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict[str, Any]: + """Añade una celda de codigo al final del notebook y la ejecuta. + + Tanto el agente como el usuario ven la celda y su output en tiempo real + porque la escritura se realiza a traves del protocolo colaborativo de Jupyter. + + Args: + notebook_path: Ruta al notebook relativa a la raiz del servidor Jupyter. + code: Codigo Python a insertar y ejecutar. + server_url: URL del servidor Jupyter; por defecto http://localhost:8888. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con 'cell_index' (indice de la nueva celda) y 'outputs' (lista de strings). + + Raises: + Exception: si no se puede conectar al servidor o al kernel. + """ + return asyncio.run(_async_append_execute(notebook_path, code, server_url, token)) + + +def jupyter_execute_cell( + notebook_path: str, + cell_index: int, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict[str, Any]: + """Ejecuta una celda existente del notebook por indice. + + Args: + notebook_path: Ruta al notebook relativa a la raiz del servidor Jupyter. + cell_index: Indice de la celda a ejecutar (0-based). + server_url: URL del servidor Jupyter; por defecto http://localhost:8888. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con 'cell_index' y 'outputs' (lista de strings). + + Raises: + IndexError: si cell_index esta fuera de rango. + Exception: si no se puede conectar al servidor o al kernel. + """ + return asyncio.run(_async_execute_cell(notebook_path, cell_index, server_url, token)) + + +def jupyter_kernel_execute( + code: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict[str, Any]: + """Ejecuta codigo directamente en el kernel sin modificar ningun notebook. + + Util para consultas rapidas, inspeccion de variables, comprobaciones de estado. + + Args: + code: Codigo Python a ejecutar en el kernel activo. + server_url: URL del servidor Jupyter; por defecto http://localhost:8888. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con 'outputs' (lista de strings) y 'status' ('ok' o 'error'). + + Raises: + Exception: si no se puede conectar al servidor o al kernel. + """ + with KernelClient(server_url=server_url, token=token) as kernel: + result = kernel.execute(code) + + outputs = _extract_outputs(result.get("outputs", [])) + return {"outputs": outputs, "status": result.get("status", "unknown")} + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + import sys + + parser = argparse.ArgumentParser( + description="Ejecuta codigo en kernels de Jupyter", + ) + sub = parser.add_subparsers(dest="command", required=True) + + # append + p_append = sub.add_parser("append", help="Añade celda al notebook y la ejecuta") + p_append.add_argument("notebook", help="Ruta al notebook relativa al servidor") + p_append.add_argument("code", help="Codigo a insertar y ejecutar") + p_append.add_argument("--server", default="http://localhost:8888") + p_append.add_argument("--token", default="") + + # cell + p_cell = sub.add_parser("cell", help="Ejecuta celda existente por indice") + p_cell.add_argument("notebook", help="Ruta al notebook relativa al servidor") + p_cell.add_argument("index", type=int, help="Indice de la celda (0-based)") + p_cell.add_argument("--server", default="http://localhost:8888") + p_cell.add_argument("--token", default="") + + # kernel + p_kernel = sub.add_parser("kernel", help="Ejecuta codigo en el kernel sin tocar notebook") + p_kernel.add_argument("code", help="Codigo a ejecutar") + p_kernel.add_argument("--server", default="http://localhost:8888") + p_kernel.add_argument("--token", default="") + + args = parser.parse_args() + + try: + if args.command == "append": + result = jupyter_append_execute(args.notebook, args.code, args.server, args.token) + elif args.command == "cell": + result = jupyter_execute_cell(args.notebook, args.index, args.server, args.token) + elif args.command == "kernel": + result = jupyter_kernel_execute(args.code, args.server, args.token) + else: + parser.print_help() + sys.exit(1) + + print(json.dumps(result, ensure_ascii=False, indent=2)) + except Exception as exc: + print(json.dumps({"error": str(exc)}, ensure_ascii=False), file=sys.stderr) + sys.exit(1) diff --git a/python/functions/notebook/jupyter_kernel.md b/python/functions/notebook/jupyter_kernel.md new file mode 100644 index 00000000..dc2864c4 --- /dev/null +++ b/python/functions/notebook/jupyter_kernel.md @@ -0,0 +1,136 @@ +--- +name: jupyter_kernel +kind: function +lang: py +domain: notebook +version: "1.0.0" +purity: impure +signature: "def jupyter_kernel_list(server_url: str = \"http://localhost:8888\", token: str = \"\") -> list[dict]" +description: "CRUD completo de kernels Jupyter via REST API. Expone seis operaciones: list, start, restart, interrupt, shutdown y sessions. Usa solo stdlib (urllib, json), sin dependencias externas." +tags: [jupyter, notebook, kernel, api, http, rest, sessions, crud] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: [json, urllib.error, urllib.request] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/notebook/jupyter_kernel.py" +--- + +## Funciones expuestas + +| Funcion | Endpoint | Descripcion | +|---------|----------|-------------| +| `jupyter_kernel_list(server_url, token)` | `GET /api/kernels` | Lista kernels activos | +| `jupyter_kernel_start(server_url, token, name)` | `POST /api/kernels` | Inicia un kernel nuevo | +| `jupyter_kernel_restart(server_url, token, kernel_id)` | `POST /api/kernels/{id}/restart` | Reinicia un kernel | +| `jupyter_kernel_interrupt(server_url, token, kernel_id)` | `POST /api/kernels/{id}/interrupt` | Interrumpe ejecucion | +| `jupyter_kernel_shutdown(server_url, token, kernel_id)` | `DELETE /api/kernels/{id}` | Apaga y elimina un kernel | +| `jupyter_kernel_sessions(server_url, token)` | `GET /api/sessions` | Lista sesiones activas | + +## Ejemplo + +```python +from notebook.jupyter_kernel import ( + jupyter_kernel_list, + jupyter_kernel_start, + jupyter_kernel_restart, + jupyter_kernel_interrupt, + jupyter_kernel_shutdown, + jupyter_kernel_sessions, +) + +# Listar kernels activos +kernels = jupyter_kernel_list("http://localhost:8888", token="mi_token") +# [{"id": "abc123", "name": "python3", "execution_state": "idle", +# "last_activity": "2026-04-01T10:00:00.000Z", "connections": 1}] + +# Iniciar un kernel nuevo +kernel = jupyter_kernel_start("http://localhost:8888", token="mi_token", name="python3") +# {"id": "def456", "name": "python3", "execution_state": "starting"} + +# Reiniciar +jupyter_kernel_restart("http://localhost:8888", "mi_token", kernel["id"]) + +# Interrumpir ejecucion en curso +jupyter_kernel_interrupt("http://localhost:8888", "mi_token", kernel["id"]) + +# Apagar +jupyter_kernel_shutdown("http://localhost:8888", "mi_token", kernel["id"]) + +# Listar sesiones (mapeo notebook <-> kernel) +sessions = jupyter_kernel_sessions("http://localhost:8888", token="mi_token") +# [{"id": "s1", "notebook": "notebooks/01_analisis.ipynb", +# "kernel_id": "abc123", "kernel_state": "idle", "type": "notebook", "name": "01_analisis"}] +``` + +## CLI + +```bash +# Listar kernels +python python/functions/notebook/jupyter_kernel.py list --server http://localhost:8888 --token TK + +# Iniciar kernel (por defecto python3) +python python/functions/notebook/jupyter_kernel.py start --name python3 + +# Reiniciar +python python/functions/notebook/jupyter_kernel.py restart abc123-... + +# Interrumpir +python python/functions/notebook/jupyter_kernel.py interrupt abc123-... + +# Apagar +python python/functions/notebook/jupyter_kernel.py shutdown abc123-... + +# Listar sesiones +python python/functions/notebook/jupyter_kernel.py sessions +``` + +Todos los subcomandos aceptan `--server` y `--token`. El output es siempre JSON. + +## Estructura de los dicts retornados + +**`jupyter_kernel_list`** — cada elemento: +```python +{ + "id": "abc123-...", + "name": "python3", + "execution_state": "idle", # idle | busy | starting + "last_activity": "2026-04-01T10:00:00.000Z", + "connections": 1 +} +``` + +**`jupyter_kernel_start`** — elemento retornado: +```python +{ + "id": "def456-...", + "name": "python3", + "execution_state": "starting" +} +``` + +**`jupyter_kernel_sessions`** — cada elemento: +```python +{ + "id": "session-id", + "notebook": "notebooks/01_analisis.ipynb", # path relativo al root del servidor + "kernel_id": "abc123-...", + "kernel_state": "idle", + "type": "notebook", + "name": "01_analisis" +} +``` + +## Notas + +Solo usa stdlib: `urllib.request`, `urllib.error`, `json`. No requiere `requests`, `jupyter_client` ni ningun paquete externo. + +La funcion interna `_make_request` centraliza la construccion de headers y serializacion JSON. El header `Authorization: token {token}` se omite si `token` esta vacio, permitiendo conectar a servidores sin autenticacion. + +`jupyter_kernel_interrupt` y `jupyter_kernel_shutdown` retornan `None` porque la API de Jupyter devuelve 204 No Content en esos casos. + +Compatible con Jupyter Lab 3.x, 4.x y Jupyter Notebook 6.x/7.x — todos exponen la misma REST API en `/api/kernels` y `/api/sessions`. diff --git a/python/functions/notebook/jupyter_kernel.py b/python/functions/notebook/jupyter_kernel.py new file mode 100644 index 00000000..7b603502 --- /dev/null +++ b/python/functions/notebook/jupyter_kernel.py @@ -0,0 +1,287 @@ +"""CRUD de kernels Jupyter via REST API.""" + +import json +import urllib.error +import urllib.request + + +def _make_request( + method: str, + url: str, + token: str = "", + body: dict | None = None, +) -> dict | list | None: + """Ejecuta una request HTTP a la API de Jupyter. + + Args: + method: Metodo HTTP (GET, POST, DELETE). + url: URL completa del endpoint. + token: Token de autenticacion de Jupyter. Vacio si no se requiere. + body: Cuerpo de la request para metodos POST. + + Returns: + Respuesta deserializada como dict o list, o None si la respuesta esta vacia. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error. + urllib.error.URLError: Si no se puede conectar al servidor. + """ + data = json.dumps(body).encode("utf-8") if body is not None else None + + headers = { + "Accept": "application/json", + "Content-Type": "application/json", + } + if token: + headers["Authorization"] = f"token {token}" + + req = urllib.request.Request(url, data=data, headers=headers, method=method) + + with urllib.request.urlopen(req) as resp: + raw = resp.read() + if not raw: + return None + return json.loads(raw.decode("utf-8")) + + +def jupyter_kernel_list( + server_url: str = "http://localhost:8888", + token: str = "", +) -> list[dict]: + """Lista todos los kernels activos en el servidor Jupyter. + + Args: + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. Vacio si el servidor no requiere auth. + + Returns: + Lista de dicts con los campos: id, name, execution_state, + last_activity, connections. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error. + urllib.error.URLError: Si no se puede conectar al servidor. + """ + url = f"{server_url.rstrip('/')}/api/kernels" + result = _make_request("GET", url, token) + return result if isinstance(result, list) else [] + + +def jupyter_kernel_start( + server_url: str = "http://localhost:8888", + token: str = "", + name: str = "python3", +) -> dict: + """Inicia un kernel nuevo en el servidor Jupyter. + + Args: + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. Vacio si el servidor no requiere auth. + name: Nombre del kernel a iniciar (p.ej. "python3", "ir"). + + Returns: + Dict con los campos: id, name, execution_state. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error. + urllib.error.URLError: Si no se puede conectar al servidor. + """ + url = f"{server_url.rstrip('/')}/api/kernels" + result = _make_request("POST", url, token, body={"name": name}) + return result if isinstance(result, dict) else {} + + +def jupyter_kernel_restart( + server_url: str = "http://localhost:8888", + token: str = "", + kernel_id: str = "", +) -> dict: + """Reinicia un kernel existente. + + Args: + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. Vacio si el servidor no requiere auth. + kernel_id: ID del kernel a reiniciar. + + Returns: + Dict con la informacion actualizada del kernel. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error (p.ej. 404 si no existe). + urllib.error.URLError: Si no se puede conectar al servidor. + """ + url = f"{server_url.rstrip('/')}/api/kernels/{kernel_id}/restart" + result = _make_request("POST", url, token) + return result if isinstance(result, dict) else {} + + +def jupyter_kernel_interrupt( + server_url: str = "http://localhost:8888", + token: str = "", + kernel_id: str = "", +) -> None: + """Interrumpe la ejecucion actual de un kernel. + + Args: + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. Vacio si el servidor no requiere auth. + kernel_id: ID del kernel a interrumpir. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error (p.ej. 404 si no existe). + urllib.error.URLError: Si no se puede conectar al servidor. + """ + url = f"{server_url.rstrip('/')}/api/kernels/{kernel_id}/interrupt" + _make_request("POST", url, token) + + +def jupyter_kernel_shutdown( + server_url: str = "http://localhost:8888", + token: str = "", + kernel_id: str = "", +) -> None: + """Apaga y elimina un kernel. + + Args: + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. Vacio si el servidor no requiere auth. + kernel_id: ID del kernel a apagar. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error (p.ej. 404 si no existe). + urllib.error.URLError: Si no se puede conectar al servidor. + """ + url = f"{server_url.rstrip('/')}/api/kernels/{kernel_id}" + _make_request("DELETE", url, token) + + +def jupyter_kernel_sessions( + server_url: str = "http://localhost:8888", + token: str = "", +) -> list[dict]: + """Lista las sesiones activas del servidor Jupyter. + + Cada sesion mapea un notebook a su kernel y usuario actuales. + + Args: + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. Vacio si el servidor no requiere auth. + + Returns: + Lista de dicts con los campos: id, notebook (path), kernel_id, + kernel_state, type, name. + + Raises: + urllib.error.HTTPError: Si la respuesta HTTP indica un error. + urllib.error.URLError: Si no se puede conectar al servidor. + """ + url = f"{server_url.rstrip('/')}/api/sessions" + raw = _make_request("GET", url, token) + if not isinstance(raw, list): + return [] + + sessions = [] + for s in raw: + kernel = s.get("kernel") or {} + sessions.append( + { + "id": s.get("id", ""), + "notebook": s.get("path", ""), + "kernel_id": kernel.get("id", ""), + "kernel_state": kernel.get("execution_state", ""), + "type": s.get("type", ""), + "name": s.get("name", ""), + } + ) + return sessions + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + import sys + + parser = argparse.ArgumentParser( + description="CRUD de kernels Jupyter via REST API." + ) + parser.add_argument( + "--server", + default="http://localhost:8888", + help="URL base del servidor Jupyter (default: http://localhost:8888)", + ) + parser.add_argument( + "--token", + default="", + help="Token de autenticacion de Jupyter (default: vacio)", + ) + + subparsers = parser.add_subparsers(dest="command", required=True) + + # list + subparsers.add_parser("list", help="Lista todos los kernels activos.") + + # start + sp_start = subparsers.add_parser("start", help="Inicia un kernel nuevo.") + sp_start.add_argument( + "--name", + default="python3", + help="Nombre del kernel (default: python3)", + ) + + # restart + sp_restart = subparsers.add_parser("restart", help="Reinicia un kernel existente.") + sp_restart.add_argument("kernel_id", help="ID del kernel a reiniciar.") + + # interrupt + sp_interrupt = subparsers.add_parser( + "interrupt", help="Interrumpe la ejecucion de un kernel." + ) + sp_interrupt.add_argument("kernel_id", help="ID del kernel a interrumpir.") + + # shutdown + sp_shutdown = subparsers.add_parser("shutdown", help="Apaga y elimina un kernel.") + sp_shutdown.add_argument("kernel_id", help="ID del kernel a apagar.") + + # sessions + subparsers.add_parser("sessions", help="Lista las sesiones activas.") + + args = parser.parse_args() + + try: + if args.command == "list": + result = jupyter_kernel_list(args.server, args.token) + elif args.command == "start": + result = jupyter_kernel_start(args.server, args.token, args.name) + elif args.command == "restart": + result = jupyter_kernel_restart(args.server, args.token, args.kernel_id) + if result is None: + result = {"status": "restarted", "kernel_id": args.kernel_id} + elif args.command == "interrupt": + jupyter_kernel_interrupt(args.server, args.token, args.kernel_id) + result = {"status": "interrupted", "kernel_id": args.kernel_id} + elif args.command == "shutdown": + jupyter_kernel_shutdown(args.server, args.token, args.kernel_id) + result = {"status": "shutdown", "kernel_id": args.kernel_id} + elif args.command == "sessions": + result = jupyter_kernel_sessions(args.server, args.token) + else: + parser.print_help() + sys.exit(1) + + print(json.dumps(result, indent=2)) + except urllib.error.HTTPError as e: + body = e.read().decode("utf-8", errors="replace") + print( + json.dumps({"error": f"HTTP {e.code}: {e.reason}", "detail": body}), + file=sys.stderr, + ) + sys.exit(1) + except urllib.error.URLError as e: + print( + json.dumps({"error": f"URLError: {e.reason}"}), + file=sys.stderr, + ) + sys.exit(1) diff --git a/python/functions/notebook/jupyter_read.md b/python/functions/notebook/jupyter_read.md new file mode 100644 index 00000000..0e04d426 --- /dev/null +++ b/python/functions/notebook/jupyter_read.md @@ -0,0 +1,118 @@ +--- +name: jupyter_read +kind: function +lang: py +domain: notebook +version: "1.0.0" +purity: impure +signature: "def jupyter_read_cells(notebook_path: str, server_url: str = 'http://localhost:8888', token: str = '', cell_index: int | None = None) -> list[dict]" +description: "Lee celdas de un notebook Jupyter abierto via el protocolo de colaboracion en tiempo real (CRDT/Y.js). Devuelve el estado actual incluyendo cambios no guardados. Expone tambien jupyter_notebook_info() para metadata rapida." +tags: [jupyter, notebook, crdt, yjs, websocket, cells, read, realtime] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: [jupyter_nbmodel_client] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/notebook/jupyter_read.py" +--- + +## Funciones exportadas + +### `jupyter_read_cells` + +```python +def jupyter_read_cells( + notebook_path: str, + server_url: str = "http://localhost:8888", + token: str = "", + cell_index: int | None = None, +) -> list[dict] +``` + +Lee todas las celdas o una especifica de un notebook Jupyter en vivo. +Retorna lista de dicts: + +```python +{"index": 0, "type": "code", "source": "import pandas as pd", "outputs": ["..."]} +``` + +- Para celdas markdown y raw, el campo `outputs` no se incluye. +- Para code cells, los outputs se convierten a texto legible: + - `stream` -> texto plano + - `display_data`/`execute_result` -> `text/plain`, o `[HTML Output]`, `[Image Output (PNG)]` + - `error` -> traceback limpio (sin codigos ANSI) + +### `jupyter_notebook_info` + +```python +def jupyter_notebook_info( + notebook_path: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict +``` + +Retorna metadata del notebook: + +```python +{ + "notebook_path": "notebooks/analysis.ipynb", + "server_url": "http://localhost:8888", + "total_cells": 12, + "cell_counts": {"code": 9, "markdown": 3} +} +``` + +## Ejemplo + +```python +from notebook.jupyter_read import jupyter_read_cells, jupyter_notebook_info + +# Leer todas las celdas +cells = jupyter_read_cells( + "notebooks/analysis.ipynb", + server_url="http://localhost:8888", + token="mi-token", +) +for cell in cells: + print(f"[{cell['index']}] {cell['type']}: {cell['source'][:60]}") + +# Leer solo la celda 3 +cell = jupyter_read_cells("notebooks/analysis.ipynb", token="mi-token", cell_index=3) + +# Solo metadata +info = jupyter_notebook_info("notebooks/analysis.ipynb", token="mi-token") +print(f"Total celdas: {info['total_cells']}") +``` + +## CLI + +```bash +# Ver todas las celdas (formato legible) +python jupyter_read.py notebooks/analysis.ipynb --token MI_TOKEN + +# Ver solo la celda 5 +python jupyter_read.py notebooks/analysis.ipynb --token MI_TOKEN --cell 5 + +# Solo metadata +python jupyter_read.py notebooks/analysis.ipynb --token MI_TOKEN --info + +# Salida JSON (todas las celdas) +python jupyter_read.py notebooks/analysis.ipynb --token MI_TOKEN --json + +# Servidor remoto +python jupyter_read.py notebooks/analysis.ipynb --server http://mi-servidor:8888 --token MI_TOKEN +``` + +## Notas + +- Usa `NbModelClient` de `jupyter_nbmodel_client` con protocolo CRDT/Y.js. +- Las funciones internas son `async`; las publicas envuelven con `asyncio.run()` para ser sincronas. +- Lee el estado **en memoria del servidor**, no el archivo `.ipynb` en disco — captura cambios no guardados. +- `notebook_path` debe ser relativo a la raiz del servidor Jupyter, no al sistema de archivos local. +- Para servidores sin token, usar `token=""` (default). +- El CLI muestra preview de hasta 8 lineas de source y 4 lineas por output en modo legible. diff --git a/python/functions/notebook/jupyter_read.py b/python/functions/notebook/jupyter_read.py new file mode 100644 index 00000000..c8fb695a --- /dev/null +++ b/python/functions/notebook/jupyter_read.py @@ -0,0 +1,293 @@ +"""Lee celdas de un notebook Jupyter via protocolo de colaboracion en tiempo real (CRDT/Y.js).""" + +from __future__ import annotations + +import asyncio +import json +import sys +from typing import Any +from urllib.error import URLError +from urllib.request import Request, urlopen + +from jupyter_nbmodel_client import NbModelClient, get_jupyter_notebook_websocket_url + + +def _resolve_collab_username(server_url: str, token: str) -> str: + """Resolve the display name of the active user in Jupyter collaboration.""" + headers = {"Accept": "application/json"} + if token: + headers["Authorization"] = f"token {token}" + try: + req = Request(f"{server_url}/api/me", headers=headers) + with urlopen(req, timeout=5) as resp: + me = json.loads(resp.read()) + identity = me.get("identity", {}) + return identity.get("display_name", "") or identity.get("username", "") or identity.get("name", "Anonymous") + except (URLError, OSError, json.JSONDecodeError): + return "Anonymous" + + +# --------------------------------------------------------------------------- +# Helpers internos (async) +# --------------------------------------------------------------------------- + +def _extract_outputs(outputs: list[dict]) -> list[str]: + """Convierte outputs de celda en representaciones legibles.""" + result = [] + for out in outputs: + out_type = out.get("output_type", "") + if out_type == "stream": + text = out.get("text", "") + if isinstance(text, list): + text = "".join(text) + result.append(text.rstrip()) + elif out_type in ("display_data", "execute_result"): + data = out.get("data", {}) + if "text/plain" in data: + plain = data["text/plain"] + if isinstance(plain, list): + plain = "".join(plain) + result.append(plain.rstrip()) + elif "text/html" in data: + result.append("[HTML Output]") + elif "image/png" in data: + result.append("[Image Output (PNG)]") + else: + result.append(f"[Output: {list(data.keys())}]") + elif out_type == "error": + traceback = out.get("traceback", []) + # Strip ANSI codes for clean text + import re + ansi_escape = re.compile(r"\x1b\[[0-9;]*m") + clean = [ansi_escape.sub("", line) for line in traceback] + result.append("\n".join(clean)) + return result + + +def _cell_to_dict(index: int, cell: Any) -> dict: + """Convierte una NotebookNode en un dict normalizado.""" + cell_type = cell.get("cell_type", "code") + source = cell.get("source", "") + if isinstance(source, list): + source = "".join(source) + + entry: dict = { + "index": index, + "type": cell_type, + "source": source, + } + + if cell_type == "code": + raw_outputs = cell.get("outputs", []) + entry["outputs"] = _extract_outputs(raw_outputs) + + return entry + + +async def _read_cells_async( + notebook_path: str, + server_url: str, + token: str, + cell_index: int | None, +) -> list[dict]: + """Conecta al servidor Jupyter y lee las celdas del notebook.""" + ws_url = get_jupyter_notebook_websocket_url( + server_url, + notebook_path, + token, + ) + username = _resolve_collab_username(server_url, token) + async with NbModelClient(ws_url, username=username) as client: + await client.wait_until_synced() + total = len(client) + if cell_index is not None: + if cell_index < 0 or cell_index >= total: + raise IndexError( + f"cell_index {cell_index} fuera de rango (0-{total - 1})" + ) + return [_cell_to_dict(cell_index, client[cell_index])] + return [_cell_to_dict(i, client[i]) for i in range(total)] + + +async def _notebook_info_async( + notebook_path: str, + server_url: str, + token: str, +) -> dict: + """Conecta al servidor Jupyter y retorna metadata del notebook.""" + ws_url = get_jupyter_notebook_websocket_url( + server_url, + notebook_path, + token, + ) + username = _resolve_collab_username(server_url, token) + async with NbModelClient(ws_url, username=username) as client: + await client.wait_until_synced() + total = len(client) + counts: dict[str, int] = {} + for i in range(total): + ct = client[i].get("cell_type", "code") + counts[ct] = counts.get(ct, 0) + 1 + return { + "notebook_path": notebook_path, + "server_url": server_url, + "total_cells": total, + "cell_counts": counts, + } + + +# --------------------------------------------------------------------------- +# API publica (sincrona) +# --------------------------------------------------------------------------- + +def jupyter_read_cells( + notebook_path: str, + server_url: str = "http://localhost:8888", + token: str = "", + cell_index: int | None = None, +) -> list[dict]: + """Lee todas las celdas de un notebook Jupyter o una celda especifica. + + Conecta via el protocolo de colaboracion en tiempo real (CRDT/Y.js) y + devuelve el estado actual del notebook incluyendo cambios no guardados. + + Args: + notebook_path: Ruta relativa al notebook desde la raiz del servidor + (ej: "notebooks/analysis.ipynb"). + server_url: URL base del servidor Jupyter (default http://localhost:8888). + token: Token de autenticacion del servidor Jupyter. + cell_index: Si se indica, retorna solo esa celda (0-based). Si es None, + retorna todas las celdas. + + Returns: + Lista de dicts con campos: + - index (int): posicion de la celda en el notebook + - type (str): "code", "markdown" o "raw" + - source (str): contenido de la celda + - outputs (list[str]): solo para code cells; representacion legible + de cada output (stream, texto plano, [HTML Output], [Image Output (PNG)], + traceback de errores). + + Raises: + IndexError: Si cell_index esta fuera del rango del notebook. + Exception: Si no se puede conectar al servidor Jupyter o al notebook. + """ + return asyncio.run( + _read_cells_async(notebook_path, server_url, token, cell_index) + ) + + +def jupyter_notebook_info( + notebook_path: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict: + """Retorna metadata de un notebook Jupyter abierto. + + Args: + notebook_path: Ruta relativa al notebook desde la raiz del servidor. + server_url: URL base del servidor Jupyter. + token: Token de autenticacion. + + Returns: + Dict con: + - notebook_path (str): ruta del notebook + - server_url (str): URL del servidor + - total_cells (int): numero total de celdas + - cell_counts (dict): conteo por tipo {"code": N, "markdown": M, ...} + + Raises: + Exception: Si no se puede conectar al servidor Jupyter o al notebook. + """ + return asyncio.run( + _notebook_info_async(notebook_path, server_url, token) + ) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def _format_readable(cells: list[dict]) -> str: + """Formatea celdas en texto legible con preview de hasta 8 lineas.""" + lines = [] + for cell in cells: + header = f"[{cell['index']}] {cell['type'].upper()}" + lines.append(header) + lines.append("-" * len(header)) + source_lines = cell["source"].splitlines() + preview = source_lines[:8] + lines.extend(preview) + if len(source_lines) > 8: + lines.append(f"... ({len(source_lines) - 8} lineas mas)") + if "outputs" in cell and cell["outputs"]: + lines.append(" -> outputs:") + for out in cell["outputs"]: + out_preview = out.splitlines()[:4] + for ol in out_preview: + lines.append(f" {ol}") + if len(out.splitlines()) > 4: + lines.append(" ...") + lines.append("") + return "\n".join(lines) + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser( + description="Lee celdas de un notebook Jupyter via CRDT/Y.js" + ) + parser.add_argument("notebook", help="Ruta del notebook relativa al servidor") + parser.add_argument( + "--server", + default="http://localhost:8888", + help="URL del servidor Jupyter (default: http://localhost:8888)", + ) + parser.add_argument("--token", default="", help="Token de autenticacion") + parser.add_argument( + "--cell", + type=int, + default=None, + metavar="INDEX", + help="Indice de celda especifica (0-based)", + ) + parser.add_argument( + "--info", + action="store_true", + help="Mostrar solo metadata del notebook", + ) + parser.add_argument( + "--json", + action="store_true", + dest="as_json", + help="Salida en formato JSON", + ) + args = parser.parse_args() + + try: + if args.info: + result = jupyter_notebook_info(args.notebook, args.server, args.token) + if args.as_json: + print(json.dumps(result, ensure_ascii=False, indent=2)) + else: + print(f"Notebook: {result['notebook_path']}") + print(f"Servidor: {result['server_url']}") + print(f"Total celdas: {result['total_cells']}") + for ct, count in result["cell_counts"].items(): + print(f" {ct}: {count}") + else: + cells = jupyter_read_cells( + args.notebook, args.server, args.token, args.cell + ) + if args.as_json: + print(json.dumps(cells, ensure_ascii=False, indent=2)) + else: + print(_format_readable(cells)) + except Exception as exc: + print(f"Error: {exc}", file=sys.stderr) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/python/functions/notebook/jupyter_write.md b/python/functions/notebook/jupyter_write.md new file mode 100644 index 00000000..397f1df2 --- /dev/null +++ b/python/functions/notebook/jupyter_write.md @@ -0,0 +1,111 @@ +--- +name: jupyter_write +kind: function +lang: py +domain: notebook +version: "1.0.0" +purity: impure +signature: "def jupyter_append_code(notebook_path: str, source: str, server_url: str = 'http://localhost:8888', token: str = '') -> dict" +description: "Operaciones de escritura sobre celdas de un notebook Jupyter via colaboracion en tiempo real (WebSocket). Expone cinco operaciones: append_code, append_markdown, insert, edit, delete. NO ejecuta celdas — solo modifica la estructura del notebook." +tags: [jupyter, notebook, websocket, cell, write, append, insert, edit, delete, nbmodel] +uses_functions: [] +uses_types: [] +returns: [] +returns_optional: false +error_type: "error_go_core" +imports: [jupyter_nbmodel_client] +tested: false +tests: [] +test_file_path: "" +file_path: "python/functions/notebook/jupyter_write.py" +--- + +## Funciones expuestas + +| Funcion | Descripcion | +|---------|-------------| +| `jupyter_append_code(notebook_path, source, server_url, token)` | Anade celda de codigo al final | +| `jupyter_append_markdown(notebook_path, source, server_url, token)` | Anade celda markdown al final | +| `jupyter_insert_cell(notebook_path, cell_index, source, cell_type, server_url, token)` | Inserta celda en posicion especifica | +| `jupyter_edit_cell(notebook_path, cell_index, source, server_url, token)` | Sobrescribe contenido de celda existente | +| `jupyter_delete_cell(notebook_path, cell_index, server_url, token)` | Elimina una celda | + +## Ejemplo + +```python +from notebook.jupyter_write import ( + jupyter_append_code, + jupyter_append_markdown, + jupyter_insert_cell, + jupyter_edit_cell, + jupyter_delete_cell, +) + +# Anadir celda de codigo al final +result = jupyter_append_code( + notebook_path="notebooks/01_analisis.ipynb", + source="import pandas as pd\ndf = pd.read_csv('data.csv')", + server_url="http://localhost:8888", + token="mi-token", +) +# {"action": "append_code", "cell_index": 5, "notebook": "notebooks/01_analisis.ipynb"} + +# Anadir celda markdown +result = jupyter_append_markdown( + notebook_path="notebooks/01_analisis.ipynb", + source="## Resultados\n\nAnalisis de los datos obtenidos.", +) +# {"action": "append_markdown", "cell_index": 6, "notebook": "notebooks/01_analisis.ipynb"} + +# Insertar celda en posicion 2 +result = jupyter_insert_cell( + notebook_path="notebooks/01_analisis.ipynb", + cell_index=2, + source="# celda insertada", + cell_type="code", +) +# {"action": "insert", "cell_index": 2, "cell_type": "code", "notebook": "..."} + +# Editar celda existente (indice 0) +result = jupyter_edit_cell( + notebook_path="notebooks/01_analisis.ipynb", + cell_index=0, + source="# Titulo actualizado", +) +# {"action": "edit", "cell_index": 0, "notebook": "..."} + +# Eliminar celda +result = jupyter_delete_cell( + notebook_path="notebooks/01_analisis.ipynb", + cell_index=3, +) +# {"action": "delete", "cell_index": 3, "notebook": "..."} +``` + +## CLI + +```bash +# Anadir celda de codigo +python -m notebook.jupyter_write append-code notebooks/01.ipynb "print('hola')" --server http://localhost:8888 --token mi-token + +# Anadir celda markdown +python -m notebook.jupyter_write append-markdown notebooks/01.ipynb "## Titulo" + +# Insertar en posicion 2 +python -m notebook.jupyter_write insert notebooks/01.ipynb 2 "x = 42" --type code + +# Editar celda 0 +python -m notebook.jupyter_write edit notebooks/01.ipynb 0 "# Nuevo titulo" + +# Eliminar celda 3 +python -m notebook.jupyter_write delete notebooks/01.ipynb 3 +``` + +## Notas + +- Todas las funciones son sincronas publicamente. Internamente usan `asyncio.run()` sobre corutinas async que se comunican via WebSocket con `NbModelClient`. +- El `notebook_path` es relativo al servidor Jupyter (no al filesystem local). +- Si el servidor no esta corriendo o el token es incorrecto, lanza excepcion de conexion de `jupyter_nbmodel_client`. +- NO ejecuta celdas — solo modifica la estructura. Para ejecutar, usar el MCP de Jupyter o la API REST de Jupyter. +- `server_url` y `token` tienen defaults convenientes para desarrollo local (`http://localhost:8888`, token vacio). +- El campo `cell_index` en el resultado refleja la posicion final de la celda en el notebook. diff --git a/python/functions/notebook/jupyter_write.py b/python/functions/notebook/jupyter_write.py new file mode 100644 index 00000000..aafc1672 --- /dev/null +++ b/python/functions/notebook/jupyter_write.py @@ -0,0 +1,329 @@ +"""Operaciones de escritura sobre celdas de un notebook Jupyter via colaboracion en tiempo real. + +NO ejecuta celdas — solo modifica la estructura del notebook (append, insert, edit, delete). +Usa jupyter_nbmodel_client para comunicarse con el servidor Jupyter via WebSocket. +""" + +import asyncio +import json +import argparse +from urllib.error import URLError +from urllib.request import Request, urlopen + +from jupyter_nbmodel_client import NbModelClient, get_jupyter_notebook_websocket_url + + +def _resolve_collab_username(server_url: str, token: str) -> str: + """Resolve the display name of the active user in Jupyter collaboration.""" + headers = {"Accept": "application/json"} + if token: + headers["Authorization"] = f"token {token}" + try: + req = Request(f"{server_url}/api/me", headers=headers) + with urlopen(req, timeout=5) as resp: + me = json.loads(resp.read()) + identity = me.get("identity", {}) + return identity.get("display_name", "") or identity.get("username", "") or identity.get("name", "Anonymous") + except (URLError, OSError, json.JSONDecodeError): + return "Anonymous" + + +# --------------------------------------------------------------------------- +# Helpers internos async +# --------------------------------------------------------------------------- + + +async def _append_cell( + notebook_path: str, + source: str, + cell_type: str, + server_url: str, + token: str, +) -> dict: + ws_url = get_jupyter_notebook_websocket_url( + server_url=server_url, + token=token, + path=notebook_path, + ) + username = _resolve_collab_username(server_url, token) + async with NbModelClient(ws_url, username=username) as nb: + if cell_type == "markdown": + nb.add_markdown_cell(source) + else: + nb.add_code_cell(source) + cell_index = len(nb) - 1 + await asyncio.sleep(2) + return { + "action": f"append_{cell_type}", + "cell_index": cell_index, + "notebook": notebook_path, + } + + +async def _insert_cell( + notebook_path: str, + cell_index: int, + source: str, + cell_type: str, + server_url: str, + token: str, +) -> dict: + ws_url = get_jupyter_notebook_websocket_url( + server_url=server_url, + token=token, + path=notebook_path, + ) + username = _resolve_collab_username(server_url, token) + async with NbModelClient(ws_url, username=username) as nb: + nb.insert_cell(cell_index, cell_type=cell_type, source=source) + await asyncio.sleep(2) + return { + "action": "insert", + "cell_index": cell_index, + "cell_type": cell_type, + "notebook": notebook_path, + } + + +async def _edit_cell( + notebook_path: str, + cell_index: int, + source: str, + server_url: str, + token: str, +) -> dict: + ws_url = get_jupyter_notebook_websocket_url( + server_url=server_url, + token=token, + path=notebook_path, + ) + username = _resolve_collab_username(server_url, token) + async with NbModelClient(ws_url, username=username) as nb: + nb.set_cell_source(cell_index, source) + await asyncio.sleep(2) + return { + "action": "edit", + "cell_index": cell_index, + "notebook": notebook_path, + } + + +async def _delete_cell( + notebook_path: str, + cell_index: int, + server_url: str, + token: str, +) -> dict: + ws_url = get_jupyter_notebook_websocket_url( + server_url=server_url, + token=token, + path=notebook_path, + ) + username = _resolve_collab_username(server_url, token) + async with NbModelClient(ws_url, username=username) as nb: + nb.delete_cell(cell_index) + await asyncio.sleep(2) + return { + "action": "delete", + "cell_index": cell_index, + "notebook": notebook_path, + } + + +# --------------------------------------------------------------------------- +# API publica sincrona +# --------------------------------------------------------------------------- + + +def jupyter_append_code( + notebook_path: str, + source: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict: + """Anade una celda de codigo al final del notebook. + + Args: + notebook_path: Ruta relativa al notebook dentro del servidor Jupyter. + source: Codigo fuente de la celda. + server_url: URL base del servidor Jupyter. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con action, cell_index y notebook. + """ + return asyncio.run(_append_cell(notebook_path, source, "code", server_url, token)) + + +def jupyter_append_markdown( + notebook_path: str, + source: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict: + """Anade una celda markdown al final del notebook. + + Args: + notebook_path: Ruta relativa al notebook dentro del servidor Jupyter. + source: Contenido markdown de la celda. + server_url: URL base del servidor Jupyter. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con action, cell_index y notebook. + """ + return asyncio.run( + _append_cell(notebook_path, source, "markdown", server_url, token) + ) + + +def jupyter_insert_cell( + notebook_path: str, + cell_index: int, + source: str, + cell_type: str = "code", + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict: + """Inserta una celda en una posicion especifica del notebook. + + Args: + notebook_path: Ruta relativa al notebook dentro del servidor Jupyter. + cell_index: Indice donde insertar (0 = primer posicion). + source: Contenido de la celda. + cell_type: Tipo de celda: "code" o "markdown". + server_url: URL base del servidor Jupyter. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con action, cell_index, cell_type y notebook. + """ + return asyncio.run( + _insert_cell(notebook_path, cell_index, source, cell_type, server_url, token) + ) + + +def jupyter_edit_cell( + notebook_path: str, + cell_index: int, + source: str, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict: + """Sobrescribe el contenido de una celda existente. + + Args: + notebook_path: Ruta relativa al notebook dentro del servidor Jupyter. + cell_index: Indice de la celda a editar (0-based). + source: Nuevo contenido de la celda. + server_url: URL base del servidor Jupyter. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con action, cell_index y notebook. + """ + return asyncio.run( + _edit_cell(notebook_path, cell_index, source, server_url, token) + ) + + +def jupyter_delete_cell( + notebook_path: str, + cell_index: int, + server_url: str = "http://localhost:8888", + token: str = "", +) -> dict: + """Elimina una celda del notebook. + + Args: + notebook_path: Ruta relativa al notebook dentro del servidor Jupyter. + cell_index: Indice de la celda a eliminar (0-based). + server_url: URL base del servidor Jupyter. + token: Token de autenticacion del servidor Jupyter. + + Returns: + dict con action, cell_index y notebook. + """ + return asyncio.run( + _delete_cell(notebook_path, cell_index, server_url, token) + ) + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + + +def _build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="jupyter_write", + description="Operaciones de escritura sobre celdas de un notebook Jupyter.", + ) + sub = parser.add_subparsers(dest="command", required=True) + + # Argumentos comunes + def add_common(p: argparse.ArgumentParser) -> None: + p.add_argument("--server", default="http://localhost:8888", help="URL del servidor Jupyter") + p.add_argument("--token", default="", help="Token de autenticacion") + + # append-code + p_ac = sub.add_parser("append-code", help="Anade celda de codigo al final") + p_ac.add_argument("notebook", help="Ruta del notebook") + p_ac.add_argument("source", help="Codigo fuente") + add_common(p_ac) + + # append-markdown + p_am = sub.add_parser("append-markdown", help="Anade celda markdown al final") + p_am.add_argument("notebook", help="Ruta del notebook") + p_am.add_argument("source", help="Contenido markdown") + add_common(p_am) + + # insert + p_ins = sub.add_parser("insert", help="Inserta celda en posicion especifica") + p_ins.add_argument("notebook", help="Ruta del notebook") + p_ins.add_argument("index", type=int, help="Indice de insercion (0-based)") + p_ins.add_argument("source", help="Contenido de la celda") + p_ins.add_argument("--type", dest="cell_type", choices=["code", "markdown"], default="code") + add_common(p_ins) + + # edit + p_ed = sub.add_parser("edit", help="Sobrescribe el contenido de una celda") + p_ed.add_argument("notebook", help="Ruta del notebook") + p_ed.add_argument("index", type=int, help="Indice de la celda (0-based)") + p_ed.add_argument("source", help="Nuevo contenido") + add_common(p_ed) + + # delete + p_del = sub.add_parser("delete", help="Elimina una celda") + p_del.add_argument("notebook", help="Ruta del notebook") + p_del.add_argument("index", type=int, help="Indice de la celda (0-based)") + add_common(p_del) + + return parser + + +def main() -> None: + parser = _build_parser() + args = parser.parse_args() + + if args.command == "append-code": + result = jupyter_append_code(args.notebook, args.source, args.server, args.token) + elif args.command == "append-markdown": + result = jupyter_append_markdown(args.notebook, args.source, args.server, args.token) + elif args.command == "insert": + result = jupyter_insert_cell( + args.notebook, args.index, args.source, args.cell_type, args.server, args.token + ) + elif args.command == "edit": + result = jupyter_edit_cell(args.notebook, args.index, args.source, args.server, args.token) + elif args.command == "delete": + result = jupyter_delete_cell(args.notebook, args.index, args.server, args.token) + else: + parser.print_help() + return + + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main()