feat: enhance jupyter notebook functions with auto-init and kernel management

Auto-create notebooks y sesiones en jupyter_exec (append y cell).
Auto-create en jupyter_write (append_code, append_markdown, batch).
Nuevos subcomandos cleanup y shutdown-all en jupyter_kernel.
README.md renombrado a README.txt para evitar error de parseo del indexer.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-04-08 00:10:23 +02:00
parent 7eef2544ab
commit fc8062bade
7 changed files with 215 additions and 7 deletions
@@ -102,4 +102,6 @@ Output siempre JSON. En error retorna `{"error": "..."}` por stderr con exit cod
- `jupyter_kernel_execute` es sincrona directamente porque `KernelClient.execute` es bloqueante.
- El token puede ser cadena vacia si el servidor tiene autenticacion deshabilitada.
- `NbModelClient` requiere que el servidor tenga habilitado el endpoint colaborativo (`/api/collaboration/`), disponible en JupyterLab >= 4 con `jupyter-collaboration` instalado.
- **Auto-init**: `jupyter_append_execute` crea el notebook automaticamente si no existe (via REST PUT /api/contents) y arranca una sesion con kernel si no hay ninguna activa para ese notebook (via POST /api/sessions). No es necesario abrir el notebook manualmente en el navegador.
- **Auto-session**: `jupyter_execute_cell` tambien garantiza que exista una sesion con kernel antes de ejecutar.
- **Fix Issue 006**: `jupyter_execute_cell` normaliza la celda antes de ejecutar. Las celdas creadas manualmente (no via la UI de Jupyter) pueden carecer de `outputs` o `execution_count` en el modelo CRDT, lo que causaba `KeyError: 'outputs'` dentro de `execute_cell` al hacer `del ycell["outputs"][:]`. El fix lee la celda con `nb[cell_index]`, detecta los campos faltantes, y reemplaza la celda via `nb[cell_index] = _normalize_code_cell(cell)` — que usa `set_cell` internamente para re-crear el mapa CRDT completo preservando el source original.
+80 -4
View File
@@ -10,7 +10,7 @@ import asyncio
import json
from functools import partial
from typing import Any
from urllib.error import URLError
from urllib.error import HTTPError, URLError
from urllib.request import Request, urlopen
from jupyter_kernel_client import KernelClient
@@ -23,6 +23,80 @@ from nbformat import NotebookNode
# ---------------------------------------------------------------------------
def _notebook_exists(notebook_path: str, server_url: str, token: str) -> bool:
"""Comprueba si un notebook existe en el servidor Jupyter via HEAD /api/contents."""
headers = {"Accept": "application/json"}
if token:
headers["Authorization"] = f"token {token}"
check_url = f"{server_url}/api/contents/{notebook_path}"
req = Request(check_url, headers=headers, method="HEAD")
try:
with urlopen(req, timeout=5):
return True
except HTTPError as e:
if e.code == 404:
return False
raise
def _create_notebook(notebook_path: str, server_url: str, token: str, kernel_name: str = "python3") -> None:
"""Crea un notebook vacio via PUT /api/contents si no existe."""
if _notebook_exists(notebook_path, server_url, token):
return
headers = {
"Content-Type": "application/json",
"Accept": "application/json",
}
if token:
headers["Authorization"] = f"token {token}"
kernel_display = {"python3": "Python 3 (ipykernel)", "python": "Python 3"}.get(kernel_name, kernel_name)
notebook_content = {
"nbformat": 4,
"nbformat_minor": 5,
"metadata": {
"kernelspec": {"name": kernel_name, "display_name": kernel_display, "language": "python"},
"language_info": {"name": "python"},
},
"cells": [],
}
body = json.dumps({"type": "notebook", "content": notebook_content}).encode("utf-8")
url = f"{server_url}/api/contents/{notebook_path}"
req = Request(url, data=body, headers=headers, method="PUT")
with urlopen(req, timeout=10) as resp:
resp.read()
def _ensure_session(server_url: str, token: str, notebook_path: str, kernel_name: str = "python3") -> str:
"""Garantiza que exista una sesion para el notebook. Retorna el kernel_id.
Si ya hay una sesion activa, retorna su kernel_id. Si no, crea una nueva
via POST /api/sessions (lo cual tambien arranca un kernel).
"""
kernel_id = _resolve_kernel_id(server_url, token, notebook_path)
if kernel_id:
return kernel_id
headers = {
"Accept": "application/json",
"Content-Type": "application/json",
}
if token:
headers["Authorization"] = f"token {token}"
body = json.dumps({
"path": notebook_path,
"type": "notebook",
"kernel": {"name": kernel_name},
}).encode("utf-8")
url = f"{server_url}/api/sessions"
req = Request(url, data=body, headers=headers, method="POST")
with urlopen(req, timeout=10) as resp:
session = json.loads(resp.read())
return session.get("kernel", {}).get("id", "")
def _api_get(url: str, token: str = "") -> dict | list | None:
"""GET a Jupyter REST API endpoint."""
headers = {"Accept": "application/json"}
@@ -112,13 +186,14 @@ async def _async_append_execute(
server_url: str,
token: str,
) -> dict[str, Any]:
_create_notebook(notebook_path, server_url, token)
kernel_id = _ensure_session(server_url, token, notebook_path)
ws_url = get_jupyter_notebook_websocket_url(
server_url,
notebook_path,
token or None,
)
kernel_id = _resolve_kernel_id(server_url, token, notebook_path)
username = _resolve_collab_username(server_url, token)
async with NbModelClient(ws_url, username=username) as nb:
@@ -149,12 +224,13 @@ async def _async_execute_cell(
server_url: str,
token: str,
) -> dict[str, Any]:
kernel_id = _ensure_session(server_url, token, notebook_path)
ws_url = get_jupyter_notebook_websocket_url(
server_url,
notebook_path,
token or None,
)
kernel_id = _resolve_kernel_id(server_url, token, notebook_path)
username = _resolve_collab_username(server_url, token)
async with NbModelClient(ws_url, username=username) as nb:
+10 -2
View File
@@ -6,8 +6,8 @@ domain: notebook
version: "1.0.0"
purity: impure
signature: "def jupyter_kernel_list(server_url: str = \"http://localhost:8888\", token: str = \"\") -> list[dict]"
description: "CRUD completo de kernels Jupyter via REST API. Expone seis operaciones: list, start, restart, interrupt, shutdown y sessions. Usa solo stdlib (urllib, json), sin dependencias externas."
tags: [jupyter, notebook, kernel, api, http, rest, sessions, crud]
description: "CRUD completo de kernels Jupyter via REST API. Expone ocho operaciones: list, start, restart, interrupt, shutdown, sessions, cleanup y shutdown-all. Usa solo stdlib (urllib, json), sin dependencias externas."
tags: [jupyter, notebook, kernel, api, http, rest, sessions, crud, cleanup]
uses_functions: []
uses_types: []
returns: []
@@ -31,6 +31,8 @@ file_path: "python/functions/notebook/jupyter_kernel.py"
| `jupyter_kernel_interrupt(server_url, token, kernel_id)` | `POST /api/kernels/{id}/interrupt` | Interrumpe ejecucion |
| `jupyter_kernel_shutdown(server_url, token, kernel_id)` | `DELETE /api/kernels/{id}` | Apaga y elimina un kernel |
| `jupyter_kernel_sessions(server_url, token)` | `GET /api/sessions` | Lista sesiones activas |
| `jupyter_kernel_cleanup(server_url, token, idle_seconds)` | `GET + DELETE` | Apaga kernels inactivos |
| `jupyter_kernel_shutdown_all(server_url, token)` | `GET + DELETE` | Apaga todos los kernels |
## Ejemplo
@@ -88,6 +90,12 @@ python python/functions/notebook/jupyter_kernel.py shutdown abc123-...
# Listar sesiones
python python/functions/notebook/jupyter_kernel.py sessions
# Limpiar kernels inactivos (default: 1h sin actividad)
python python/functions/notebook/jupyter_kernel.py cleanup --idle-seconds 1800
# Apagar todos los kernels
python python/functions/notebook/jupyter_kernel.py shutdown-all
```
Todos los subcomandos aceptan `--server` y `--token`. El output es siempre JSON.
@@ -196,6 +196,80 @@ def jupyter_kernel_sessions(
return sessions
def jupyter_kernel_cleanup(
server_url: str = "http://localhost:8888",
token: str = "",
idle_seconds: int = 3600,
) -> list[dict]:
"""Apaga todos los kernels que llevan mas de idle_seconds sin actividad.
Util para liberar recursos en servidores con muchos notebooks abiertos.
Por defecto cierra kernels inactivos desde hace mas de 1 hora.
Args:
server_url: URL base del servidor Jupyter.
token: Token de autenticacion. Vacio si el servidor no requiere auth.
idle_seconds: Segundos de inactividad para considerar un kernel ocioso.
Returns:
Lista de dicts con los kernels apagados (id, name, last_activity, idle_seconds).
Raises:
urllib.error.HTTPError: Si la respuesta HTTP indica un error.
urllib.error.URLError: Si no se puede conectar al servidor.
"""
from datetime import datetime, timezone
kernels = jupyter_kernel_list(server_url, token)
now = datetime.now(timezone.utc)
shutdown_list = []
for k in kernels:
last_activity = k.get("last_activity", "")
if not last_activity:
continue
try:
last_dt = datetime.fromisoformat(last_activity.replace("Z", "+00:00"))
idle = (now - last_dt).total_seconds()
except (ValueError, TypeError):
continue
if idle >= idle_seconds:
jupyter_kernel_shutdown(server_url, token, k["id"])
shutdown_list.append({
"id": k["id"],
"name": k.get("name", ""),
"last_activity": last_activity,
"idle_seconds": int(idle),
})
return shutdown_list
def jupyter_kernel_shutdown_all(
server_url: str = "http://localhost:8888",
token: str = "",
) -> list[dict]:
"""Apaga todos los kernels activos del servidor.
Args:
server_url: URL base del servidor Jupyter.
token: Token de autenticacion. Vacio si el servidor no requiere auth.
Returns:
Lista de dicts con los kernels apagados (id, name).
Raises:
urllib.error.HTTPError: Si la respuesta HTTP indica un error.
urllib.error.URLError: Si no se puede conectar al servidor.
"""
kernels = jupyter_kernel_list(server_url, token)
shutdown_list = []
for k in kernels:
jupyter_kernel_shutdown(server_url, token, k["id"])
shutdown_list.append({"id": k["id"], "name": k.get("name", "")})
return shutdown_list
# ---------------------------------------------------------------------------
# CLI
# ---------------------------------------------------------------------------
@@ -248,6 +322,18 @@ if __name__ == "__main__":
# sessions
subparsers.add_parser("sessions", help="Lista las sesiones activas.")
# cleanup
sp_cleanup = subparsers.add_parser("cleanup", help="Apaga kernels inactivos.")
sp_cleanup.add_argument(
"--idle-seconds",
type=int,
default=3600,
help="Segundos de inactividad para considerar ocioso (default: 3600)",
)
# shutdown-all
subparsers.add_parser("shutdown-all", help="Apaga todos los kernels activos.")
args = parser.parse_args()
try:
@@ -267,6 +353,10 @@ if __name__ == "__main__":
result = {"status": "shutdown", "kernel_id": args.kernel_id}
elif args.command == "sessions":
result = jupyter_kernel_sessions(args.server, args.token)
elif args.command == "cleanup":
result = jupyter_kernel_cleanup(args.server, args.token, args.idle_seconds)
elif args.command == "shutdown-all":
result = jupyter_kernel_shutdown_all(args.server, args.token)
else:
parser.print_help()
sys.exit(1)
+2 -1
View File
@@ -153,4 +153,5 @@ python -m notebook.jupyter_write delete notebooks/01.ipynb 3
- NO ejecuta celdas — solo modifica la estructura. Para ejecutar, usar `jupyter_exec`.
- `server_url` y `token` tienen defaults convenientes para desarrollo local (`http://localhost:8888`, token vacio).
- El campo `cell_index` en el resultado refleja la posicion final de la celda en el notebook.
- Patron tipico: `create` para crear el notebook, luego `batch` para poblar las celdas iniciales.
- `append_code`, `append_markdown` y `batch` crean el notebook automaticamente si no existe (auto-create via REST). No es necesario llamar a `create` previamente.
- Patron tipico: `batch` para poblar las celdas iniciales (crea el notebook si no existe), o `create` + `batch` si se necesita control explicito.
@@ -30,6 +30,35 @@ def _resolve_collab_username(server_url: str, token: str) -> str:
return "Anonymous"
# ---------------------------------------------------------------------------
# Helpers internos
# ---------------------------------------------------------------------------
def _notebook_exists(notebook_path: str, server_url: str, token: str) -> bool:
"""Comprueba si un notebook existe en el servidor Jupyter via HEAD /api/contents."""
headers = {"Accept": "application/json"}
if token:
headers["Authorization"] = f"token {token}"
check_url = f"{server_url}/api/contents/{notebook_path}"
req = Request(check_url, headers=headers, method="HEAD")
try:
with urlopen(req, timeout=5):
return True
except HTTPError as e:
if e.code == 404:
return False
raise
def _auto_create_notebook(notebook_path: str, server_url: str, token: str) -> bool:
"""Crea el notebook si no existe. Retorna True si fue creado."""
if not _notebook_exists(notebook_path, server_url, token):
jupyter_create_notebook(notebook_path, server_url=server_url, token=token)
return True
return False
# ---------------------------------------------------------------------------
# Helpers internos async
# ---------------------------------------------------------------------------
@@ -42,6 +71,7 @@ async def _append_cell(
server_url: str,
token: str,
) -> dict:
_auto_create_notebook(notebook_path, server_url, token)
ws_url = get_jupyter_notebook_websocket_url(
server_url=server_url,
token=token,
@@ -139,6 +169,7 @@ async def _batch_write(
token: str,
) -> dict:
"""Anade multiples celdas en una sola conexion WebSocket."""
_auto_create_notebook(notebook_path, server_url, token)
ws_url = get_jupyter_notebook_websocket_url(
server_url=server_url,
token=token,