chore: sync from fn-registry agent

This commit is contained in:
fn-registry agent
2026-05-14 02:06:42 +02:00
commit 480dd958e5
17 changed files with 4510 additions and 0 deletions
+40
View File
@@ -0,0 +1,40 @@
# JUPYTER HABILITADO EN ESTE ANALISIS
## Reglas OBLIGATORIAS para Claude
### 1. CODIGO INMUTABLE — NUNCA MODIFICAR CELDAS EXISTENTES
- **PROHIBIDO** usar NotebookEdit para reemplazar celdas existentes
- **SIEMPRE** anadir celdas NUEVAS al final del notebook
- Si hay un error en una celda, crear celda nueva con la correccion
- El historial de trabajo debe quedar intacto para trazabilidad
### 2. PROGRAMACION FUNCIONAL OBLIGATORIA
- **Funciones puras**: sin efectos secundarios, mismo input -> mismo output
- **Inmutabilidad**: nunca mutar datos, crear copias transformadas
- **Composicion**: funciones pequenas que se combinan
- Preferir: `map`, `filter`, `reduce`, list comprehensions
- Evitar: loops con mutacion, `global`, modificar argumentos in-place
### 3. SIEMPRE usar MCP jupyter para ejecutar codigo Python
- Las ejecuciones se ven en tiempo real en Jupyter Lab del usuario
- Compartimos variables y estado del kernel
- **NUNCA usar bash para ejecutar Python en este analisis**
### 4. Verificar Jupyter activo ANTES de ejecutar
- Si no esta activo: pedir al usuario que ejecute `./run-jupyter-lab.sh`
### 5. Gestion de notebooks
- Notebooks en la carpeta `notebooks/` o subcarpetas
- Si un notebook tiene >50 celdas, crear uno nuevo
- Nombrar descriptivamente: `01_exploracion.ipynb`, `02_limpieza.ipynb`
### 6. Gestion de Python
- **SIEMPRE usar `uv`** para gestionar dependencias
- Anadir paquetes con `uv add nombre_paquete`
### 7. Acceso al fn_registry
- `FN_REGISTRY_ROOT` apunta a la raiz del registry
- Para importar funciones Python: `sys.path.insert(0, os.path.join(os.environ["FN_REGISTRY_ROOT"], "python", "functions"))`
- Para consultar registry.db: `sqlite3` o `import sqlite3` con la ruta `$FN_REGISTRY_ROOT/registry.db`
Binary file not shown.
@@ -0,0 +1,100 @@
"""
fn_registry kernel startup
Autoconfigura acceso al registry en cada notebook.
Generado por write_jupyter_registry_kernel (fn_registry).
"""
import os
import sys
import sqlite3
from pathlib import Path
# ── FN_REGISTRY_ROOT ────────────────────────────────────────
# Prioridad: env var > path hardcoded > descubrimiento automatico
def _discover_registry_root():
if os.environ.get("FN_REGISTRY_ROOT"):
return Path(os.environ["FN_REGISTRY_ROOT"]).resolve()
hardcoded = Path("/home/lucas/fn_registry")
if (hardcoded / "registry.db").exists():
return hardcoded
# Subir desde CWD hasta encontrar registry.db
p = Path.cwd()
for _ in range(10):
if (p / "registry.db").exists():
return p
if p.parent == p:
break
p = p.parent
return hardcoded
FN_REGISTRY_ROOT = _discover_registry_root()
os.environ["FN_REGISTRY_ROOT"] = str(FN_REGISTRY_ROOT)
# ── sys.path: importar funciones Python del registry ────────
_python_functions = FN_REGISTRY_ROOT / "python" / "functions"
for _domain in sorted(_python_functions.iterdir()) if _python_functions.exists() else []:
if _domain.is_dir() and not _domain.name.startswith("_"):
_path = str(_domain)
if _path not in sys.path:
sys.path.insert(0, _path)
# Tambien el directorio padre para imports por dominio: from core import filter_list
_pf = str(_python_functions)
if _pf not in sys.path:
sys.path.insert(0, _pf)
# ── fn_query: consultar registry.db desde el notebook ───────
_REGISTRY_DB = FN_REGISTRY_ROOT / "registry.db"
def fn_query(sql, params=()):
"""Ejecuta una consulta SQL sobre registry.db y retorna las filas.
Ejemplos:
fn_query("SELECT id, description FROM functions WHERE domain = ?", ("finance",))
fn_query("SELECT id FROM functions_fts WHERE functions_fts MATCH ?", ("slice*",))
"""
if not _REGISTRY_DB.exists():
raise FileNotFoundError(f"registry.db no encontrado en {_REGISTRY_DB}")
con = sqlite3.connect(str(_REGISTRY_DB))
con.row_factory = sqlite3.Row
try:
rows = con.execute(sql, params).fetchall()
return [dict(r) for r in rows]
finally:
con.close()
def fn_search(term):
"""Busca funciones y tipos en el registry por nombre o descripcion.
Ejemplo:
fn_search("slice")
fn_search("finance")
"""
fts_term = f"name:{term}* OR description:{term}*"
functions = fn_query(
"SELECT id, kind, purity, lang, description FROM functions "
"WHERE id IN (SELECT id FROM functions_fts WHERE functions_fts MATCH ?) "
"ORDER BY name", (fts_term,)
)
types = fn_query(
"SELECT id, algebraic, lang, description FROM types "
"WHERE id IN (SELECT id FROM types_fts WHERE types_fts MATCH ?) "
"ORDER BY name", (fts_term,)
)
return {"functions": functions, "types": types}
def fn_code(function_id):
"""Retorna el codigo fuente de una funcion del registry.
Ejemplo:
print(fn_code("filter_list_py_core"))
"""
rows = fn_query("SELECT code FROM functions WHERE id = ?", (function_id,))
if not rows:
raise KeyError(f"Funcion no encontrada: {function_id}")
return rows[0]["code"]
# ── Mensaje de bienvenida ───────────────────────────────────
print(f"fn_registry conectado: {FN_REGISTRY_ROOT}")
print(f" registry.db: {'OK' if _REGISTRY_DB.exists() else 'NO ENCONTRADO'}")
print(f" Python functions: {_pf}")
print(f" Helpers: fn_query(), fn_search(), fn_code()")
+1
View File
@@ -0,0 +1 @@
8889
+7
View File
@@ -0,0 +1,7 @@
{
"c1085b1e-4f62-4837-ae69-9d08b917dc85": {
"version": "2.4.0",
"created_at": "2026-05-13T22:42:24.017631+00:00",
"document_version": "2.0.0"
}
}
Binary file not shown.
+12
View File
@@ -0,0 +1,12 @@
{
"mcpServers": {
"jupyter": {
"command": "/home/lucas/fn_registry/projects/fn_monitoring/analysis/domain_coverage_gaps/.venv/bin/python",
"args": ["-m", "jupyter_mcp_server.server"],
"env": {
"SERVER_URL": "http://localhost:8889",
"TOKEN": ""
}
}
}
}
+1
View File
@@ -0,0 +1 @@
3.13
View File
+17
View File
@@ -0,0 +1,17 @@
---
name: domain_coverage_gaps
lang: py
domain: datascience
description: "Mapa de funciones interesantes por dominio + gap analysis para trading/scraping/quant/realtime/IA gen"
tags: []
uses_functions: []
uses_types: []
framework: "jupyterlab"
entry_point: "notebooks/main.ipynb"
dir_path: "projects/fn_monitoring/analysis/domain_coverage_gaps"
repo_url: ""
---
## Notas
Mapa de funciones interesantes por dominio + gap analysis para trading/scraping/quant/realtime/IA gen
+6
View File
@@ -0,0 +1,6 @@
def main():
print("Hello from domain-coverage-gaps!")
if __name__ == "__main__":
main()
@@ -0,0 +1,533 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "95651c14",
"metadata": {},
"source": [
"# 02 — Gap analysis: 8 temas\n",
"\n",
"Para cada tema: **(A) lo que YA tenemos**, **(B) lo que falta**, **(C) primer paso** (funciones concretas a delegar a `fn-constructor`).\n",
"\n",
"Temas: trading · scraping_web · analisis_quantitativo · monitorizacion_realtime · generacion_imagenes_ia · generacion_texto_ia · generacion_audio · audio_realtime_voiceconversion."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f0bbd20",
"metadata": {},
"outputs": [],
"source": [
"import os, sqlite3, pandas as pd\n",
"ROOT = os.environ['FN_REGISTRY_ROOT']\n",
"conn = sqlite3.connect(f'file:{ROOT}/registry.db?mode=ro', uri=True)\n",
"pd.set_option('display.max_colwidth', 120)\n",
"\n",
"def show(ids, title=''):\n",
" if not ids: print(f'{title}: (vacio)'); return None\n",
" qm = ','.join('?'*len(ids))\n",
" df = pd.read_sql_query(\n",
" f\"SELECT id, lang, purity, description FROM functions WHERE id IN ({qm})\",\n",
" conn, params=ids)\n",
" if title: print(f'=== {title} ({len(df)}/{len(ids)}) ===')\n",
" return df\n",
"\n",
"def fts(q, limit=15):\n",
" return pd.read_sql_query(\n",
" '''SELECT f.id, f.lang, f.purity, f.description\n",
" FROM functions_fts JOIN functions f ON f.id = functions_fts.id\n",
" WHERE functions_fts MATCH ? ORDER BY rank LIMIT ?''',\n",
" conn, params=[q, limit])"
]
},
{
"cell_type": "markdown",
"id": "e367b10d",
"metadata": {},
"source": [
"---\n",
"## 1) trading\n",
"\n",
"**Lo que tenemos** — `finance` ya cubre indicators + OHLCV + persistencia y un simulador de mercado.\n",
"\n",
"**Falta** para un stack de trading real: conectores exchange (REST + WS) por venue concreto, libro de ordenes, ejecucion paper/real, gestion de riesgo, backtester vectorizado, sizing/portfolio."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "640b287d",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'fetch_ohlcv_go_finance','tick_to_ohlcv_go_finance','stream_ticks_go_finance',\n",
" 'sma_go_finance','ema_go_finance','rsi_go_finance','vwap_go_finance',\n",
" 'bollinger_bands_go_finance','sharpe_ratio_go_finance','max_drawdown_go_finance',\n",
" 'log_return_go_finance','annualized_volatility_go_finance','normalize_ohlcv_go_finance',\n",
" 'write_ohlcv_to_parquet_go_finance','load_ohlcv_from_duckdb_go_finance',\n",
" 'avellaneda_stoikov_quotes_py_finance','generate_taker_order_py_finance',\n",
" 'hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n",
" 'run_market_sim_py_pipelines','monte_carlo_market_py_pipelines'\n",
"], 'trading — YA')"
]
},
{
"cell_type": "markdown",
"id": "dacc42b2",
"metadata": {},
"source": [
"**Gap & primer batch (delegar a fn-constructor, tag `trading`):**\n",
"\n",
"| # | id propuesto | proposito |\n",
"|---|---|---|\n",
"| 1 | `binance_rest_client_py_finance` | client REST autenticado (klines, balance, order) |\n",
"| 2 | `binance_ws_stream_py_finance` | WS streams trade/depth/kline reconectable |\n",
"| 3 | `orderbook_l2_py_finance` | book L2 con snapshot+delta, BBO, walk-the-book |\n",
"| 4 | `paper_broker_py_finance` | simulador FIFO con slippage configurable |\n",
"| 5 | `position_sizer_py_finance` | Kelly fraccional + cap por riesgo |\n",
"| 6 | `backtest_vectorized_py_finance` | apply de signal sobre OHLCV → equity curve |\n",
"| 7 | `risk_metrics_py_finance` | VaR/ES/Calmar (los 3 que faltan respecto a sharpe/drawdown) |\n",
"| 8 | `signal_crossover_go_finance` | golden/death cross + zscore mean reversion (puras) |"
]
},
{
"cell_type": "markdown",
"id": "b3b3735e",
"metadata": {},
"source": [
"---\n",
"## 2) scraping_web\n",
"\n",
"**Lo que tenemos** — domain `browser` con CDP completo en Go puro + `http_*` en infra. Excelente base.\n",
"\n",
"**Falta** — parsing HTML/CSS-select sin browser, robots/sitemap, deduplicacion, rate-limit por host, persistencia incremental, captchas. Y un tag `scraping` que agrupe."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90b5b349",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'chrome_launch_go_browser','cdp_connect_go_browser','cdp_navigate_go_browser',\n",
" 'cdp_evaluate_go_browser','cdp_get_html_go_browser','cdp_screenshot_go_browser',\n",
" 'cdp_click_go_browser','cdp_click_text_go_browser','cdp_find_by_text_go_browser',\n",
" 'cdp_type_text_go_browser','cdp_wait_element_go_browser','cdp_wait_load_go_browser',\n",
" 'cdp_har_record_go_browser','cdp_set_cookie_go_browser','cdp_new_tab_go_browser',\n",
" 'http_get_json_go_infra','http_download_file_go_infra','extract_urls_go_cybersecurity'\n",
"], 'scraping_web — YA')"
]
},
{
"cell_type": "markdown",
"id": "081dc473",
"metadata": {},
"source": [
"**Primer batch (tag `scraping`):**\n",
"\n",
"| # | id propuesto | proposito |\n",
"|---|---|---|\n",
"| 1 | `html_css_select_go_browser` | goquery-like, devuelve nodos por selector CSS |\n",
"| 2 | `html_to_text_go_browser` | strip tags conservando estructura semantica |\n",
"| 3 | `robots_txt_check_go_browser` | parse + match user-agent/path antes de fetch |\n",
"| 4 | `sitemap_iter_go_browser` | descubre URLs desde sitemap.xml (+ index) |\n",
"| 5 | `host_rate_limiter_go_infra` | token-bucket por hostname con backoff 429 |\n",
"| 6 | `crawl_frontier_go_browser` | cola con dedupe + politeness por dominio |\n",
"| 7 | `cdp_intercept_request_go_browser` | bloquear assets (img/font) para acelerar |\n",
"| 8 | `scrape_pagination_py_browser` | helper next-page con xpath/css o cursor JSON |\n",
"\n",
"Promover `apps/scraper_*` apps despues."
]
},
{
"cell_type": "markdown",
"id": "22b7a80c",
"metadata": {},
"source": [
"---\n",
"## 3) analisis_quantitativo\n",
"\n",
"**Lo que tenemos** — Monte Carlo de mercado, Hawkes, GBM, Avellaneda-Stoikov, sharpe/drawdown. Suficiente para microestructura.\n",
"\n",
"**Falta** — todo lo que NO es microestructura: regresion, cointegration, PCA, portfolio optimization, GARCH, risk parity, distribuciones (kurtosis/skew)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "884a7570",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'run_market_sim_py_pipelines','monte_carlo_market_py_pipelines',\n",
" 'hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n",
" 'avellaneda_stoikov_quotes_py_finance','generate_taker_order_py_finance',\n",
" 'sharpe_ratio_py_finance','max_drawdown_py_finance',\n",
" 'annualized_volatility_py_finance','log_return_py_finance'\n",
"], 'quant — YA')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75317f4c",
"metadata": {},
"outputs": [],
"source": [
"fts('regression OR cointegration OR portfolio OR garch OR pca')"
]
},
{
"cell_type": "markdown",
"id": "8e045748",
"metadata": {},
"source": [
"**Primer batch (tag `quant`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `linear_regression_py_datascience` | OLS con stats (R2, t, p) |\n",
"| 2 | `engle_granger_test_py_finance` | cointegracion 2 series |\n",
"| 3 | `johansen_test_py_finance` | cointegracion n series |\n",
"| 4 | `garch_fit_py_finance` | GARCH(1,1) volatilidad condicional |\n",
"| 5 | `markowitz_optim_py_finance` | min-variance / max-sharpe |\n",
"| 6 | `risk_parity_py_finance` | pesos por contribucion de riesgo |\n",
"| 7 | `pca_explained_var_py_datascience` | PCA sobre returns + varianza explicada |\n",
"| 8 | `var_es_historical_py_finance` | VaR/Expected Shortfall historicos |\n",
"| 9 | `pairs_zscore_py_finance` | spread y zscore para pairs trading |"
]
},
{
"cell_type": "markdown",
"id": "93fce6ed",
"metadata": {},
"source": [
"---\n",
"## 4) monitorizacion_realtime\n",
"\n",
"**Lo que tenemos** — SSE handlers, WS hub, rate limit, logger middleware, health check. Plomeria casi completa.\n",
"\n",
"**Falta** — la capa de **semantica**: metricas (counter/gauge/histogram), alerting, anomaly detection online, ring-buffers de series, exporter Prometheus, panel de tail de logs."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a0e7780",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'sse_handler_go_infra','sse_send_go_infra','sse_keepalive_go_infra',\n",
" 'ws_handler_go_infra','ws_upgrader_go_infra',\n",
" 'http_logger_middleware_go_infra','logger_middleware_go_infra',\n",
" 'rate_limit_middleware_go_infra','rate_limiter_by_key_go_infra',\n",
" 'health_check_http_go_infra'\n",
"], 'realtime — YA (transporte)')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "018870d6",
"metadata": {},
"outputs": [],
"source": [
"fts('metric OR prometheus OR alert OR anomaly')"
]
},
{
"cell_type": "markdown",
"id": "7d5eef7f",
"metadata": {},
"source": [
"**Primer batch (tag `realtime` / `metrics`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `metric_counter_go_infra` | atomic counter thread-safe |\n",
"| 2 | `metric_gauge_go_infra` | gauge con set/inc/dec |\n",
"| 3 | `metric_histogram_go_infra` | buckets configurables, sum/count |\n",
"| 4 | `prometheus_exporter_go_infra` | handler /metrics text format |\n",
"| 5 | `ringbuffer_series_go_core` | buffer circular para timeseries (pure) |\n",
"| 6 | `ewma_anomaly_go_datascience` | EWMA + 3-sigma deteccion outliers |\n",
"| 7 | `alert_rule_evaluator_go_infra` | expresion threshold → notif (compose con `slack_send`/email) |\n",
"| 8 | `log_tail_sse_go_infra` | broadcaster de log lines via SSE |"
]
},
{
"cell_type": "markdown",
"id": "4c1fe07a",
"metadata": {},
"source": [
"---\n",
"## 5) generacion_imagenes_ia\n",
"\n",
"**Lo que tenemos** — solo **tipos** (`image_generator`, `model_ref`, `lora_ref`, `generation_config`, `image_gen_result` × Go+Py). El contrato esta listo, **las implementaciones no existen**."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec66c272",
"metadata": {},
"outputs": [],
"source": [
"pd.read_sql_query(\n",
" \"SELECT id, lang, algebraic, description FROM types WHERE domain='ml' AND \"\n",
" \"(id LIKE '%image%' OR id LIKE '%lora%' OR id LIKE '%model_ref%' OR id LIKE '%generation%')\",\n",
" conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61dd77c0",
"metadata": {},
"outputs": [],
"source": [
"fts('diffusion OR stable OR sdxl OR comfy OR flux', 20)"
]
},
{
"cell_type": "markdown",
"id": "d46ec553",
"metadata": {},
"source": [
"**Primer batch (tag `image-gen`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `diffusers_generate_py_ml` | impl local con `diffusers` cumpliendo `image_generator_py_ml` |\n",
"| 2 | `comfyui_generate_py_ml` | impl HTTP contra ComfyUI server local |\n",
"| 3 | `openai_image_generate_py_ml` | DALL-E / gpt-image-1 client |\n",
"| 4 | `replicate_image_generate_py_ml` | API generica replicate.com |\n",
"| 5 | `image_to_image_py_ml` | init image + strength sobre stack actual |\n",
"| 6 | `controlnet_generate_py_ml` | preprocessor + condicionamiento |\n",
"| 7 | `image_grid_py_ml` | helper PIL: grid NxM con seeds |\n",
"| 8 | `prompt_template_render_py_core` | Jinja-like prompt + LoRA tags + weights |\n",
"\n",
"Pipeline `image_gen_batch_py_pipelines` componiendo prompt → generator → save+meta."
]
},
{
"cell_type": "markdown",
"id": "0f77fa34",
"metadata": {},
"source": [
"---\n",
"## 6) generacion_texto_ia\n",
"\n",
"**Lo que tenemos** — solo **tipos** en `core`: `message`, `part`, `tool_part`, `text_part`, `context_part`, `query_plan`. No hay cliente."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1692778",
"metadata": {},
"outputs": [],
"source": [
"pd.read_sql_query(\n",
" \"SELECT id, lang, algebraic, description FROM types \"\n",
" \"WHERE id IN ('message_py_core','part_py_core','text_part_py_core','tool_part_py_core',\"\n",
" \"'context_part_py_core','query_plan_py_core','matched_context_py_core')\",\n",
" conn)"
]
},
{
"cell_type": "markdown",
"id": "687d64ba",
"metadata": {},
"source": [
"**Primer batch (tag `llm`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `anthropic_client_py_ml` | client Claude (messages API + streaming SSE) |\n",
"| 2 | `openai_client_py_ml` | client GPT (chat completions + responses) |\n",
"| 3 | `ollama_client_py_ml` | local LLM via Ollama HTTP |\n",
"| 4 | `llm_stream_to_sse_py_infra` | bridge stream LLM → SSE para UI |\n",
"| 5 | `tool_use_dispatcher_py_core` | ejecuta tool_part contra registry de funciones |\n",
"| 6 | `embedding_openai_py_ml` | embeddings + cosine search |\n",
"| 7 | `prompt_cache_anthropic_py_ml` | ephemeral cache_control breakpoint |\n",
"| 8 | `token_count_py_core` | tiktoken / claude tokenizer |\n",
"| 9 | `chat_session_jsonl_py_core` | persistir/cargar `message[]` JSONL |"
]
},
{
"cell_type": "markdown",
"id": "ef57c750",
"metadata": {},
"source": [
"---\n",
"## 7) generacion_audio\n",
"\n",
"**Lo que tenemos** — solo **playback** en gamedev (`audio_engine_cpp_gamedev`, `audio_play_cpp_gamedev`, miniaudio). **0 generacion**, **0 STT/TTS**, sin dominio `audio`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "339b8fb6",
"metadata": {},
"outputs": [],
"source": [
"show(['audio_engine_cpp_gamedev','audio_play_cpp_gamedev'], 'audio — YA (solo playback)')"
]
},
{
"cell_type": "markdown",
"id": "4bc34071",
"metadata": {},
"source": [
"**Primer batch (nuevo dominio `audio`, tag `audio-gen`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `wav_read_py_audio` | sf.read → np.ndarray + sample_rate |\n",
"| 2 | `wav_write_py_audio` | np.ndarray → wav PCM16 |\n",
"| 3 | `resample_audio_py_audio` | librosa/scipy resample (pure salvo IO) |\n",
"| 4 | `tts_piper_py_audio` | TTS offline Piper, multi-voz |\n",
"| 5 | `tts_elevenlabs_py_audio` | client API ElevenLabs |\n",
"| 6 | `tts_openai_py_audio` | client API OpenAI tts-1 |\n",
"| 7 | `stt_whisper_local_py_audio` | faster-whisper local |\n",
"| 8 | `stt_whisper_api_py_audio` | OpenAI whisper API |\n",
"| 9 | `musicgen_generate_py_audio` | facebook/musicgen via transformers |\n",
"| 10| `audio_concat_py_audio` | concatenar wavs con crossfade ms |"
]
},
{
"cell_type": "markdown",
"id": "1a991579",
"metadata": {},
"source": [
"---\n",
"## 8) audio_realtime_voiceconversion\n",
"\n",
"**Lo que tenemos** — **nada**. Sin captura, sin streaming, sin VC.\n",
"\n",
"Es el tema con mayor coste de entrada: requiere binario nativo (cmake/CUDA), latencia <100ms, ring-buffers PortAudio/miniaudio en input."
]
},
{
"cell_type": "markdown",
"id": "3cebc01a",
"metadata": {},
"source": [
"**Primer batch (tag `audio-rt`, dominio `audio`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `audio_input_cpp_audio` | captura miniaudio device → ring buffer (mirror de `audio_engine`) |\n",
"| 2 | `audio_ring_buffer_cpp_core` | spsc lock-free para samples float32 |\n",
"| 3 | `vad_silero_py_audio` | Voice Activity Detection on chunks 30ms |\n",
"| 4 | `rvc_infer_py_audio` | Retrieval-based Voice Conversion local (torch) |\n",
"| 5 | `seed_vc_infer_py_audio` | Seed-VC zero-shot baseline |\n",
"| 6 | `audio_ws_stream_go_infra` | WS server que recibe PCM y devuelve PCM convertido |\n",
"| 7 | `audio_chunker_py_audio` | dividir stream en chunks 320 samples para inferencia |\n",
"| 8 | `pitch_shift_psola_py_audio` | pitch shift sin neural, fallback rapido |\n",
"\n",
"Mas un app `apps/voice_changer/` (C++ ImGui + Go service) que componga el pipeline."
]
},
{
"cell_type": "markdown",
"id": "bf80cad5",
"metadata": {},
"source": [
"---\n",
"## Resumen\n",
"\n",
"| Tema | Cobertura actual | Esfuerzo proximo |\n",
"|------|------------------|------------------|\n",
"| trading | media-alta | conectores exchange + paper broker (8 fn) |\n",
"| scraping_web | alta (CDP completo) | parser HTML + politeness + frontier (8 fn) |\n",
"| quant | baja-media | regresion/coint/portfolio/risk (9 fn) |\n",
"| realtime | alta (transporte) | metrics + alerting (8 fn) |\n",
"| image_gen | cero (solo tipos) | implementaciones diffusers/comfy/openai (8 fn) |\n",
"| text_gen | cero (solo tipos) | clientes LLM + streaming (9 fn) |\n",
"| audio_gen | cero (solo playback) | dominio nuevo `audio`, TTS/STT/music (10 fn) |\n",
"| audio_rt_vc | cero | el mas costoso, requiere C++ (8 fn + app) |\n",
"\n",
"**Total**: ~70 funciones nuevas para cubrir los 8 temas con un primer baseline funcional.\n",
"\n",
"**Prioridad sugerida** (por ratio valor / coste):\n",
"1. text_gen (clientes LLM ya bloquean muchas otras apps).\n",
"2. realtime metrics + alerting (acelera el propio fn_monitoring).\n",
"3. trading conectores + paper broker (cierra el stack que ya esta a medias).\n",
"4. scraping HTML parser + politeness (multiplicador para osint_graph y data ingest).\n",
"5. image_gen (alto valor demo, dependencias pesadas).\n",
"6. quant (puede vivir como funciones puras Py sin infra).\n",
"7. audio_gen.\n",
"8. audio_rt_vc (ultimo: nuevo dominio C++ + dep nativa)."
]
},
{
"cell_type": "markdown",
"id": "e7a722e1",
"metadata": {},
"source": [
"---\n",
"## Apendice — workaround FTS5\n",
"\n",
"`functions_fts` esta desfasada del contenido (`fts5: missing row N from content table 'main'.'functions'`).\n",
"Las celdas `fts(...)` de arriba pueden petar. Solucion: regenerar el indice con `cd $FN_REGISTRY_ROOT && ./fn index`.\n",
"\n",
"Mientras, override con LIKE para que las busquedas funcionen sin FTS:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "658421c3",
"metadata": {},
"outputs": [],
"source": [
"def fts(q, limit=15):\n",
" \"\"\"Override seguro: busca term1|term2|... en name+description+tags via LIKE.\"\"\"\n",
" terms = [t.strip().lower() for t in q.replace(' OR ', '|').split('|') if t.strip()]\n",
" if not terms: return pd.DataFrame()\n",
" where = ' OR '.join([\"lower(name||' '||description||' '||tags) LIKE ?\"] * len(terms))\n",
" params = [f'%{t}%' for t in terms] + [limit]\n",
" return pd.read_sql_query(\n",
" f\"SELECT id, lang, purity, description FROM functions WHERE {where} LIMIT ?\",\n",
" conn, params=params)\n",
"\n",
"# Verifica que ahora encuentra funciones para los 3 gaps:\n",
"for q in ['regression OR cointegration OR portfolio OR garch OR pca',\n",
" 'metric OR prometheus OR alert OR anomaly',\n",
" 'diffusion OR stable OR sdxl OR comfy OR flux']:\n",
" df = fts(q, limit=20)\n",
" print(f'--- {q} -> {len(df)} hits ---')\n",
" print(df.to_string(index=False) if len(df) else '(ninguno)')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+117
View File
@@ -0,0 +1,117 @@
{
"cells": [
{"cell_type": "markdown", "metadata": {}, "source": [
"# 01 — Panorama del registry por dominio\n",
"\n",
"**Objetivo**: ver cuantas funciones tenemos por dominio, pureza/test/pipelines, y listar las **mas interesantes** (por reutilizacion y signature) de cada dominio.\n",
"\n",
"**Fuente**: `registry.db` (FTS5 indexado por `fn index`).\n",
"\n",
"**Secciones**\n",
"1. Conteo por dominio + cuota de puras y testeadas\n",
"2. Top funciones por dominio (curado a mano tras revisar names+desc)\n",
"3. Conclusiones"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"import os, sqlite3\n",
"import pandas as pd\n",
"ROOT = os.environ['FN_REGISTRY_ROOT']\n",
"conn = sqlite3.connect(f'file:{ROOT}/registry.db?mode=ro', uri=True)\n",
"pd.set_option('display.max_colwidth', 110)"
]},
{"cell_type": "markdown", "metadata": {}, "source": ["## 1. Cuenta por dominio"]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"q = '''\n",
"SELECT domain,\n",
" COUNT(*) AS total,\n",
" SUM(CASE WHEN purity='pure' THEN 1 ELSE 0 END) AS pure,\n",
" SUM(CASE WHEN tested=1 THEN 1 ELSE 0 END) AS tested,\n",
" SUM(CASE WHEN kind='pipeline' THEN 1 ELSE 0 END) AS pipelines\n",
" FROM functions\n",
" GROUP BY domain\n",
" ORDER BY total DESC;\n",
"'''\n",
"df = pd.read_sql_query(q, conn)\n",
"df['pure_pct'] = (100*df['pure']/df['total']).round(1)\n",
"df['tested_pct'] = (100*df['tested']/df['total']).round(1)\n",
"df"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"import matplotlib.pyplot as plt\n",
"ax = df.set_index('domain')[['pure','total']].plot.bar(figsize=(11,4))\n",
"ax.set_title('Funciones por dominio (totales y puras)'); plt.tight_layout(); plt.show()"
]},
{"cell_type": "markdown", "metadata": {}, "source": [
"## 2. Top funciones interesantes por dominio\n",
"\n",
"Seleccion manual de las funciones mas reutilizables/expresivas de cada bloque (no es ranking automatico — el FTS no captura 'interesante')."
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"def top(domain, ids):\n",
" qmarks = ','.join('?'*len(ids))\n",
" df = pd.read_sql_query(\n",
" f\"SELECT id, lang, purity, signature, description FROM functions WHERE id IN ({qmarks})\",\n",
" conn, params=ids)\n",
" print(f'=== {domain} ({len(df)}) ===')\n",
" return df"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('finance', [\n",
" 'fetch_ohlcv_go_finance','tick_to_ohlcv_go_finance','stream_ticks_go_finance',\n",
" 'sma_go_finance','ema_go_finance','rsi_go_finance','vwap_go_finance',\n",
" 'bollinger_bands_go_finance','sharpe_ratio_go_finance','max_drawdown_go_finance',\n",
" 'avellaneda_stoikov_quotes_py_finance','hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n",
" 'write_ohlcv_to_parquet_go_finance','load_ohlcv_from_duckdb_go_finance'])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('browser (CDP)', [\n",
" 'chrome_launch_go_browser','cdp_connect_go_browser','cdp_navigate_go_browser',\n",
" 'cdp_evaluate_go_browser','cdp_get_html_go_browser','cdp_screenshot_go_browser',\n",
" 'cdp_click_go_browser','cdp_click_text_go_browser','cdp_find_by_text_go_browser',\n",
" 'cdp_type_text_go_browser','cdp_wait_element_go_browser','cdp_wait_load_go_browser',\n",
" 'cdp_har_record_go_browser','cdp_set_cookie_go_browser','cdp_new_tab_go_browser'])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('infra (HTTP/WS/SSE)', [\n",
" 'http_get_json_go_infra','http_post_json_go_infra','http_router_go_infra','http_serve_go_infra',\n",
" 'http_download_file_go_infra','http_cors_middleware_go_infra','http_logger_middleware_go_infra',\n",
" 'rate_limit_middleware_go_infra','jwt_middleware_go_infra','sse_handler_go_infra',\n",
" 'sse_send_go_infra','sse_keepalive_go_infra','ws_handler_go_infra','ws_upgrader_go_infra',\n",
" 'health_check_http_go_infra'])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('datascience', [r[0] for r in conn.execute(\n",
" \"SELECT id FROM functions WHERE domain='datascience' AND purity='pure' ORDER BY name LIMIT 15\")])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('cybersecurity', [r[0] for r in conn.execute(\n",
" \"SELECT id FROM functions WHERE domain='cybersecurity' ORDER BY tested DESC, name LIMIT 12\")])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('ml', [r[0] for r in conn.execute(\n",
" \"SELECT id FROM functions WHERE domain='ml' ORDER BY name LIMIT 15\")])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('pipelines', [r[0] for r in conn.execute(\n",
" \"SELECT id FROM functions WHERE domain='pipelines' ORDER BY name LIMIT 15\")])"
]},
{"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [
"top('gamedev (audio)', ['audio_engine_cpp_gamedev','audio_play_cpp_gamedev'])"
]},
{"cell_type": "markdown", "metadata": {}, "source": [
"## 3. Conclusiones\n",
"\n",
"- **infra (496)** y **core (240)** dominan: middleware HTTP, SSE/WS, SQLite, helpers Go puros.\n",
"- **finance (28)** ya tiene un mini stack de trading + market-making: indicadores, OHLCV, simulador Avellaneda-Stoikov + Hawkes + GBM.\n",
"- **browser (17)** = CDP completo en Go puro (sin chromedp). Base solida para scraping y RPA.\n",
"- **ml (25)** son casi todos **tipos** (`image_generator`, `model_ref`, `lora_ref`, `generation_config`) — el contrato esta definido, las **funciones de ejecucion estan vacias**.\n",
"- **audio**: solo playback (miniaudio en `gamedev`). 0 generacion, 0 STT/TTS, 0 voice conversion.\n",
"- **LLM/text**: 0 clientes — solo tipos `message/part/tool_part` en core."
]}
],
"metadata": {
"kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
"language_info": {"name": "python", "version": "3.12"}
},
"nbformat": 4, "nbformat_minor": 5
}
+533
View File
@@ -0,0 +1,533 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "95651c14",
"metadata": {},
"source": [
"# 02 — Gap analysis: 8 temas\n",
"\n",
"Para cada tema: **(A) lo que YA tenemos**, **(B) lo que falta**, **(C) primer paso** (funciones concretas a delegar a `fn-constructor`).\n",
"\n",
"Temas: trading · scraping_web · analisis_quantitativo · monitorizacion_realtime · generacion_imagenes_ia · generacion_texto_ia · generacion_audio · audio_realtime_voiceconversion."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8f0bbd20",
"metadata": {},
"outputs": [],
"source": [
"import os, sqlite3, pandas as pd\n",
"ROOT = os.environ['FN_REGISTRY_ROOT']\n",
"conn = sqlite3.connect(f'file:{ROOT}/registry.db?mode=ro', uri=True)\n",
"pd.set_option('display.max_colwidth', 120)\n",
"\n",
"def show(ids, title=''):\n",
" if not ids: print(f'{title}: (vacio)'); return None\n",
" qm = ','.join('?'*len(ids))\n",
" df = pd.read_sql_query(\n",
" f\"SELECT id, lang, purity, description FROM functions WHERE id IN ({qm})\",\n",
" conn, params=ids)\n",
" if title: print(f'=== {title} ({len(df)}/{len(ids)}) ===')\n",
" return df\n",
"\n",
"def fts(q, limit=15):\n",
" return pd.read_sql_query(\n",
" '''SELECT f.id, f.lang, f.purity, f.description\n",
" FROM functions_fts JOIN functions f ON f.id = functions_fts.id\n",
" WHERE functions_fts MATCH ? ORDER BY rank LIMIT ?''',\n",
" conn, params=[q, limit])"
]
},
{
"cell_type": "markdown",
"id": "e367b10d",
"metadata": {},
"source": [
"---\n",
"## 1) trading\n",
"\n",
"**Lo que tenemos** — `finance` ya cubre indicators + OHLCV + persistencia y un simulador de mercado.\n",
"\n",
"**Falta** para un stack de trading real: conectores exchange (REST + WS) por venue concreto, libro de ordenes, ejecucion paper/real, gestion de riesgo, backtester vectorizado, sizing/portfolio."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "640b287d",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'fetch_ohlcv_go_finance','tick_to_ohlcv_go_finance','stream_ticks_go_finance',\n",
" 'sma_go_finance','ema_go_finance','rsi_go_finance','vwap_go_finance',\n",
" 'bollinger_bands_go_finance','sharpe_ratio_go_finance','max_drawdown_go_finance',\n",
" 'log_return_go_finance','annualized_volatility_go_finance','normalize_ohlcv_go_finance',\n",
" 'write_ohlcv_to_parquet_go_finance','load_ohlcv_from_duckdb_go_finance',\n",
" 'avellaneda_stoikov_quotes_py_finance','generate_taker_order_py_finance',\n",
" 'hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n",
" 'run_market_sim_py_pipelines','monte_carlo_market_py_pipelines'\n",
"], 'trading — YA')"
]
},
{
"cell_type": "markdown",
"id": "dacc42b2",
"metadata": {},
"source": [
"**Gap & primer batch (delegar a fn-constructor, tag `trading`):**\n",
"\n",
"| # | id propuesto | proposito |\n",
"|---|---|---|\n",
"| 1 | `binance_rest_client_py_finance` | client REST autenticado (klines, balance, order) |\n",
"| 2 | `binance_ws_stream_py_finance` | WS streams trade/depth/kline reconectable |\n",
"| 3 | `orderbook_l2_py_finance` | book L2 con snapshot+delta, BBO, walk-the-book |\n",
"| 4 | `paper_broker_py_finance` | simulador FIFO con slippage configurable |\n",
"| 5 | `position_sizer_py_finance` | Kelly fraccional + cap por riesgo |\n",
"| 6 | `backtest_vectorized_py_finance` | apply de signal sobre OHLCV → equity curve |\n",
"| 7 | `risk_metrics_py_finance` | VaR/ES/Calmar (los 3 que faltan respecto a sharpe/drawdown) |\n",
"| 8 | `signal_crossover_go_finance` | golden/death cross + zscore mean reversion (puras) |"
]
},
{
"cell_type": "markdown",
"id": "b3b3735e",
"metadata": {},
"source": [
"---\n",
"## 2) scraping_web\n",
"\n",
"**Lo que tenemos** — domain `browser` con CDP completo en Go puro + `http_*` en infra. Excelente base.\n",
"\n",
"**Falta** — parsing HTML/CSS-select sin browser, robots/sitemap, deduplicacion, rate-limit por host, persistencia incremental, captchas. Y un tag `scraping` que agrupe."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "90b5b349",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'chrome_launch_go_browser','cdp_connect_go_browser','cdp_navigate_go_browser',\n",
" 'cdp_evaluate_go_browser','cdp_get_html_go_browser','cdp_screenshot_go_browser',\n",
" 'cdp_click_go_browser','cdp_click_text_go_browser','cdp_find_by_text_go_browser',\n",
" 'cdp_type_text_go_browser','cdp_wait_element_go_browser','cdp_wait_load_go_browser',\n",
" 'cdp_har_record_go_browser','cdp_set_cookie_go_browser','cdp_new_tab_go_browser',\n",
" 'http_get_json_go_infra','http_download_file_go_infra','extract_urls_go_cybersecurity'\n",
"], 'scraping_web — YA')"
]
},
{
"cell_type": "markdown",
"id": "081dc473",
"metadata": {},
"source": [
"**Primer batch (tag `scraping`):**\n",
"\n",
"| # | id propuesto | proposito |\n",
"|---|---|---|\n",
"| 1 | `html_css_select_go_browser` | goquery-like, devuelve nodos por selector CSS |\n",
"| 2 | `html_to_text_go_browser` | strip tags conservando estructura semantica |\n",
"| 3 | `robots_txt_check_go_browser` | parse + match user-agent/path antes de fetch |\n",
"| 4 | `sitemap_iter_go_browser` | descubre URLs desde sitemap.xml (+ index) |\n",
"| 5 | `host_rate_limiter_go_infra` | token-bucket por hostname con backoff 429 |\n",
"| 6 | `crawl_frontier_go_browser` | cola con dedupe + politeness por dominio |\n",
"| 7 | `cdp_intercept_request_go_browser` | bloquear assets (img/font) para acelerar |\n",
"| 8 | `scrape_pagination_py_browser` | helper next-page con xpath/css o cursor JSON |\n",
"\n",
"Promover `apps/scraper_*` apps despues."
]
},
{
"cell_type": "markdown",
"id": "22b7a80c",
"metadata": {},
"source": [
"---\n",
"## 3) analisis_quantitativo\n",
"\n",
"**Lo que tenemos** — Monte Carlo de mercado, Hawkes, GBM, Avellaneda-Stoikov, sharpe/drawdown. Suficiente para microestructura.\n",
"\n",
"**Falta** — todo lo que NO es microestructura: regresion, cointegration, PCA, portfolio optimization, GARCH, risk parity, distribuciones (kurtosis/skew)."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "884a7570",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'run_market_sim_py_pipelines','monte_carlo_market_py_pipelines',\n",
" 'hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n",
" 'avellaneda_stoikov_quotes_py_finance','generate_taker_order_py_finance',\n",
" 'sharpe_ratio_py_finance','max_drawdown_py_finance',\n",
" 'annualized_volatility_py_finance','log_return_py_finance'\n",
"], 'quant — YA')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "75317f4c",
"metadata": {},
"outputs": [],
"source": [
"fts('regression OR cointegration OR portfolio OR garch OR pca')"
]
},
{
"cell_type": "markdown",
"id": "8e045748",
"metadata": {},
"source": [
"**Primer batch (tag `quant`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `linear_regression_py_datascience` | OLS con stats (R2, t, p) |\n",
"| 2 | `engle_granger_test_py_finance` | cointegracion 2 series |\n",
"| 3 | `johansen_test_py_finance` | cointegracion n series |\n",
"| 4 | `garch_fit_py_finance` | GARCH(1,1) volatilidad condicional |\n",
"| 5 | `markowitz_optim_py_finance` | min-variance / max-sharpe |\n",
"| 6 | `risk_parity_py_finance` | pesos por contribucion de riesgo |\n",
"| 7 | `pca_explained_var_py_datascience` | PCA sobre returns + varianza explicada |\n",
"| 8 | `var_es_historical_py_finance` | VaR/Expected Shortfall historicos |\n",
"| 9 | `pairs_zscore_py_finance` | spread y zscore para pairs trading |"
]
},
{
"cell_type": "markdown",
"id": "93fce6ed",
"metadata": {},
"source": [
"---\n",
"## 4) monitorizacion_realtime\n",
"\n",
"**Lo que tenemos** — SSE handlers, WS hub, rate limit, logger middleware, health check. Plomeria casi completa.\n",
"\n",
"**Falta** — la capa de **semantica**: metricas (counter/gauge/histogram), alerting, anomaly detection online, ring-buffers de series, exporter Prometheus, panel de tail de logs."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a0e7780",
"metadata": {},
"outputs": [],
"source": [
"show([\n",
" 'sse_handler_go_infra','sse_send_go_infra','sse_keepalive_go_infra',\n",
" 'ws_handler_go_infra','ws_upgrader_go_infra',\n",
" 'http_logger_middleware_go_infra','logger_middleware_go_infra',\n",
" 'rate_limit_middleware_go_infra','rate_limiter_by_key_go_infra',\n",
" 'health_check_http_go_infra'\n",
"], 'realtime — YA (transporte)')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "018870d6",
"metadata": {},
"outputs": [],
"source": [
"fts('metric OR prometheus OR alert OR anomaly')"
]
},
{
"cell_type": "markdown",
"id": "7d5eef7f",
"metadata": {},
"source": [
"**Primer batch (tag `realtime` / `metrics`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `metric_counter_go_infra` | atomic counter thread-safe |\n",
"| 2 | `metric_gauge_go_infra` | gauge con set/inc/dec |\n",
"| 3 | `metric_histogram_go_infra` | buckets configurables, sum/count |\n",
"| 4 | `prometheus_exporter_go_infra` | handler /metrics text format |\n",
"| 5 | `ringbuffer_series_go_core` | buffer circular para timeseries (pure) |\n",
"| 6 | `ewma_anomaly_go_datascience` | EWMA + 3-sigma deteccion outliers |\n",
"| 7 | `alert_rule_evaluator_go_infra` | expresion threshold → notif (compose con `slack_send`/email) |\n",
"| 8 | `log_tail_sse_go_infra` | broadcaster de log lines via SSE |"
]
},
{
"cell_type": "markdown",
"id": "4c1fe07a",
"metadata": {},
"source": [
"---\n",
"## 5) generacion_imagenes_ia\n",
"\n",
"**Lo que tenemos** — solo **tipos** (`image_generator`, `model_ref`, `lora_ref`, `generation_config`, `image_gen_result` × Go+Py). El contrato esta listo, **las implementaciones no existen**."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec66c272",
"metadata": {},
"outputs": [],
"source": [
"pd.read_sql_query(\n",
" \"SELECT id, lang, algebraic, description FROM types WHERE domain='ml' AND \"\n",
" \"(id LIKE '%image%' OR id LIKE '%lora%' OR id LIKE '%model_ref%' OR id LIKE '%generation%')\",\n",
" conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "61dd77c0",
"metadata": {},
"outputs": [],
"source": [
"fts('diffusion OR stable OR sdxl OR comfy OR flux', 20)"
]
},
{
"cell_type": "markdown",
"id": "d46ec553",
"metadata": {},
"source": [
"**Primer batch (tag `image-gen`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `diffusers_generate_py_ml` | impl local con `diffusers` cumpliendo `image_generator_py_ml` |\n",
"| 2 | `comfyui_generate_py_ml` | impl HTTP contra ComfyUI server local |\n",
"| 3 | `openai_image_generate_py_ml` | DALL-E / gpt-image-1 client |\n",
"| 4 | `replicate_image_generate_py_ml` | API generica replicate.com |\n",
"| 5 | `image_to_image_py_ml` | init image + strength sobre stack actual |\n",
"| 6 | `controlnet_generate_py_ml` | preprocessor + condicionamiento |\n",
"| 7 | `image_grid_py_ml` | helper PIL: grid NxM con seeds |\n",
"| 8 | `prompt_template_render_py_core` | Jinja-like prompt + LoRA tags + weights |\n",
"\n",
"Pipeline `image_gen_batch_py_pipelines` componiendo prompt → generator → save+meta."
]
},
{
"cell_type": "markdown",
"id": "0f77fa34",
"metadata": {},
"source": [
"---\n",
"## 6) generacion_texto_ia\n",
"\n",
"**Lo que tenemos** — solo **tipos** en `core`: `message`, `part`, `tool_part`, `text_part`, `context_part`, `query_plan`. No hay cliente."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1692778",
"metadata": {},
"outputs": [],
"source": [
"pd.read_sql_query(\n",
" \"SELECT id, lang, algebraic, description FROM types \"\n",
" \"WHERE id IN ('message_py_core','part_py_core','text_part_py_core','tool_part_py_core',\"\n",
" \"'context_part_py_core','query_plan_py_core','matched_context_py_core')\",\n",
" conn)"
]
},
{
"cell_type": "markdown",
"id": "687d64ba",
"metadata": {},
"source": [
"**Primer batch (tag `llm`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `anthropic_client_py_ml` | client Claude (messages API + streaming SSE) |\n",
"| 2 | `openai_client_py_ml` | client GPT (chat completions + responses) |\n",
"| 3 | `ollama_client_py_ml` | local LLM via Ollama HTTP |\n",
"| 4 | `llm_stream_to_sse_py_infra` | bridge stream LLM → SSE para UI |\n",
"| 5 | `tool_use_dispatcher_py_core` | ejecuta tool_part contra registry de funciones |\n",
"| 6 | `embedding_openai_py_ml` | embeddings + cosine search |\n",
"| 7 | `prompt_cache_anthropic_py_ml` | ephemeral cache_control breakpoint |\n",
"| 8 | `token_count_py_core` | tiktoken / claude tokenizer |\n",
"| 9 | `chat_session_jsonl_py_core` | persistir/cargar `message[]` JSONL |"
]
},
{
"cell_type": "markdown",
"id": "ef57c750",
"metadata": {},
"source": [
"---\n",
"## 7) generacion_audio\n",
"\n",
"**Lo que tenemos** — solo **playback** en gamedev (`audio_engine_cpp_gamedev`, `audio_play_cpp_gamedev`, miniaudio). **0 generacion**, **0 STT/TTS**, sin dominio `audio`."
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "339b8fb6",
"metadata": {},
"outputs": [],
"source": [
"show(['audio_engine_cpp_gamedev','audio_play_cpp_gamedev'], 'audio — YA (solo playback)')"
]
},
{
"cell_type": "markdown",
"id": "4bc34071",
"metadata": {},
"source": [
"**Primer batch (nuevo dominio `audio`, tag `audio-gen`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `wav_read_py_audio` | sf.read → np.ndarray + sample_rate |\n",
"| 2 | `wav_write_py_audio` | np.ndarray → wav PCM16 |\n",
"| 3 | `resample_audio_py_audio` | librosa/scipy resample (pure salvo IO) |\n",
"| 4 | `tts_piper_py_audio` | TTS offline Piper, multi-voz |\n",
"| 5 | `tts_elevenlabs_py_audio` | client API ElevenLabs |\n",
"| 6 | `tts_openai_py_audio` | client API OpenAI tts-1 |\n",
"| 7 | `stt_whisper_local_py_audio` | faster-whisper local |\n",
"| 8 | `stt_whisper_api_py_audio` | OpenAI whisper API |\n",
"| 9 | `musicgen_generate_py_audio` | facebook/musicgen via transformers |\n",
"| 10| `audio_concat_py_audio` | concatenar wavs con crossfade ms |"
]
},
{
"cell_type": "markdown",
"id": "1a991579",
"metadata": {},
"source": [
"---\n",
"## 8) audio_realtime_voiceconversion\n",
"\n",
"**Lo que tenemos** — **nada**. Sin captura, sin streaming, sin VC.\n",
"\n",
"Es el tema con mayor coste de entrada: requiere binario nativo (cmake/CUDA), latencia <100ms, ring-buffers PortAudio/miniaudio en input."
]
},
{
"cell_type": "markdown",
"id": "3cebc01a",
"metadata": {},
"source": [
"**Primer batch (tag `audio-rt`, dominio `audio`):**\n",
"\n",
"| # | id | proposito |\n",
"|---|---|---|\n",
"| 1 | `audio_input_cpp_audio` | captura miniaudio device → ring buffer (mirror de `audio_engine`) |\n",
"| 2 | `audio_ring_buffer_cpp_core` | spsc lock-free para samples float32 |\n",
"| 3 | `vad_silero_py_audio` | Voice Activity Detection on chunks 30ms |\n",
"| 4 | `rvc_infer_py_audio` | Retrieval-based Voice Conversion local (torch) |\n",
"| 5 | `seed_vc_infer_py_audio` | Seed-VC zero-shot baseline |\n",
"| 6 | `audio_ws_stream_go_infra` | WS server que recibe PCM y devuelve PCM convertido |\n",
"| 7 | `audio_chunker_py_audio` | dividir stream en chunks 320 samples para inferencia |\n",
"| 8 | `pitch_shift_psola_py_audio` | pitch shift sin neural, fallback rapido |\n",
"\n",
"Mas un app `apps/voice_changer/` (C++ ImGui + Go service) que componga el pipeline."
]
},
{
"cell_type": "markdown",
"id": "bf80cad5",
"metadata": {},
"source": [
"---\n",
"## Resumen\n",
"\n",
"| Tema | Cobertura actual | Esfuerzo proximo |\n",
"|------|------------------|------------------|\n",
"| trading | media-alta | conectores exchange + paper broker (8 fn) |\n",
"| scraping_web | alta (CDP completo) | parser HTML + politeness + frontier (8 fn) |\n",
"| quant | baja-media | regresion/coint/portfolio/risk (9 fn) |\n",
"| realtime | alta (transporte) | metrics + alerting (8 fn) |\n",
"| image_gen | cero (solo tipos) | implementaciones diffusers/comfy/openai (8 fn) |\n",
"| text_gen | cero (solo tipos) | clientes LLM + streaming (9 fn) |\n",
"| audio_gen | cero (solo playback) | dominio nuevo `audio`, TTS/STT/music (10 fn) |\n",
"| audio_rt_vc | cero | el mas costoso, requiere C++ (8 fn + app) |\n",
"\n",
"**Total**: ~70 funciones nuevas para cubrir los 8 temas con un primer baseline funcional.\n",
"\n",
"**Prioridad sugerida** (por ratio valor / coste):\n",
"1. text_gen (clientes LLM ya bloquean muchas otras apps).\n",
"2. realtime metrics + alerting (acelera el propio fn_monitoring).\n",
"3. trading conectores + paper broker (cierra el stack que ya esta a medias).\n",
"4. scraping HTML parser + politeness (multiplicador para osint_graph y data ingest).\n",
"5. image_gen (alto valor demo, dependencias pesadas).\n",
"6. quant (puede vivir como funciones puras Py sin infra).\n",
"7. audio_gen.\n",
"8. audio_rt_vc (ultimo: nuevo dominio C++ + dep nativa)."
]
},
{
"cell_type": "markdown",
"id": "e7a722e1",
"metadata": {},
"source": [
"---\n",
"## Apendice — workaround FTS5\n",
"\n",
"`functions_fts` esta desfasada del contenido (`fts5: missing row N from content table 'main'.'functions'`).\n",
"Las celdas `fts(...)` de arriba pueden petar. Solucion: regenerar el indice con `cd $FN_REGISTRY_ROOT && ./fn index`.\n",
"\n",
"Mientras, override con LIKE para que las busquedas funcionen sin FTS:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "658421c3",
"metadata": {},
"outputs": [],
"source": [
"def fts(q, limit=15):\n",
" \"\"\"Override seguro: busca term1|term2|... en name+description+tags via LIKE.\"\"\"\n",
" terms = [t.strip().lower() for t in q.replace(' OR ', '|').split('|') if t.strip()]\n",
" if not terms: return pd.DataFrame()\n",
" where = ' OR '.join([\"lower(name||' '||description||' '||tags) LIKE ?\"] * len(terms))\n",
" params = [f'%{t}%' for t in terms] + [limit]\n",
" return pd.read_sql_query(\n",
" f\"SELECT id, lang, purity, description FROM functions WHERE {where} LIMIT ?\",\n",
" conn, params=params)\n",
"\n",
"# Verifica que ahora encuentra funciones para los 3 gaps:\n",
"for q in ['regression OR cointegration OR portfolio OR garch OR pca',\n",
" 'metric OR prometheus OR alert OR anomaly',\n",
" 'diffusion OR stable OR sdxl OR comfy OR flux']:\n",
" df = fts(q, limit=20)\n",
" print(f'--- {q} -> {len(df)} hits ---')\n",
" print(df.to_string(index=False) if len(df) else '(ninguno)')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+16
View File
@@ -0,0 +1,16 @@
[project]
name = "domain-coverage-gaps"
version = "0.1.0"
description = "Add your description here"
readme = "README.md"
requires-python = ">=3.13"
dependencies = [
"jupyter>=1.1.1",
"jupyter-collaboration>=4.4.0",
"jupyter-mcp-server>=1.0.2",
"jupyterlab>=4.5.7",
"matplotlib>=3.10.9",
"numpy>=2.4.4",
"pandas>=3.0.3",
"tabulate>=0.10.0",
]
+50
View File
@@ -0,0 +1,50 @@
#!/bin/bash
# Jupyter Lab — modo colaborativo con autodeteccion de puerto
# Generado por write_jupyter_launcher (fn_registry)
find_free_port() {
for port in 8888 8889 8890 8891 8892 8893 8894 8895 8896 8897 8898 8899; do
if ! ss -tln 2>/dev/null | grep -q ":${port} " && \
! lsof -i:"$port" >/dev/null 2>&1; then
echo $port
return
fi
done
echo 8888
}
PORT=${1:-$(find_free_port)}
cd "$(dirname "$0")"
echo $PORT > .jupyter-port
source .venv/bin/activate 2>/dev/null || true
# IPython startup: cargar .ipython/ local (FN_REGISTRY_ROOT, helpers, sys.path)
if [ -d "$(pwd)/.ipython" ]; then
export IPYTHONDIR="$(pwd)/.ipython"
fi
if ! python -c "import jupyter_collaboration" 2>/dev/null; then
echo "ERROR: jupyter-collaboration no esta instalado"
echo "Instala con: uv add jupyter-collaboration"
exit 1
fi
echo "════════════════════════════════════════════════"
echo " Jupyter Lab + Colaboracion en puerto $PORT"
echo "════════════════════════════════════════════════"
echo ""
echo " Abre: http://localhost:$PORT"
echo " Ctrl+C para detener"
echo ""
jupyter lab \
--port=$PORT \
--no-browser \
--ServerApp.token='' \
--ServerApp.password='' \
--ServerApp.disable_check_xsrf=True \
--ServerApp.allow_origin='*' \
--ServerApp.root_dir="$(pwd)" \
--collaborative
Generated
+3077
View File
File diff suppressed because it is too large Load Diff