diff --git a/.ipython/profile_default/history.sqlite b/.ipython/profile_default/history.sqlite index 5f06c08..b7b3018 100644 Binary files a/.ipython/profile_default/history.sqlite and b/.ipython/profile_default/history.sqlite differ diff --git a/.jupyter/collaboration_sessions.json b/.jupyter/collaboration_sessions.json index 968d5e5..4814025 100644 --- a/.jupyter/collaboration_sessions.json +++ b/.jupyter/collaboration_sessions.json @@ -1,7 +1,7 @@ { "c1085b1e-4f62-4837-ae69-9d08b917dc85": { "version": "2.4.0", - "created_at": "2026-05-13T22:42:24.017631+00:00", + "created_at": "2026-05-15T16:00:35.124402+00:00", "document_version": "2.0.0" } } \ No newline at end of file diff --git a/.jupyter_ystore.db b/.jupyter_ystore.db index df97fa7..4d9ac0c 100644 Binary files a/.jupyter_ystore.db and b/.jupyter_ystore.db differ diff --git a/notebooks/.ipynb_checkpoints/01_domains_overview-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/01_domains_overview-checkpoint.ipynb new file mode 100644 index 0000000..0e3fab9 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/01_domains_overview-checkpoint.ipynb @@ -0,0 +1,239 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "5e4a34a1", + "metadata": {}, + "source": [ + "# 01 — Panorama del registry por dominio\n", + "\n", + "**Objetivo**: ver cuantas funciones tenemos por dominio, pureza/test/pipelines, y listar las **mas interesantes** (por reutilizacion y signature) de cada dominio.\n", + "\n", + "**Fuente**: `registry.db` (FTS5 indexado por `fn index`).\n", + "\n", + "**Secciones**\n", + "1. Conteo por dominio + cuota de puras y testeadas\n", + "2. Top funciones por dominio (curado a mano tras revisar names+desc)\n", + "3. Conclusiones" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2e006e3", + "metadata": {}, + "outputs": [], + "source": [ + "import os, sqlite3\n", + "import pandas as pd\n", + "ROOT = os.environ['FN_REGISTRY_ROOT']\n", + "conn = sqlite3.connect(f'file:{ROOT}/registry.db?mode=ro', uri=True)\n", + "pd.set_option('display.max_colwidth', 110)" + ] + }, + { + "cell_type": "markdown", + "id": "b6bc2805", + "metadata": {}, + "source": [ + "## 1. Cuenta por dominio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d4163db", + "metadata": {}, + "outputs": [], + "source": [ + "q = '''\n", + "SELECT domain,\n", + " COUNT(*) AS total,\n", + " SUM(CASE WHEN purity='pure' THEN 1 ELSE 0 END) AS pure,\n", + " SUM(CASE WHEN tested=1 THEN 1 ELSE 0 END) AS tested,\n", + " SUM(CASE WHEN kind='pipeline' THEN 1 ELSE 0 END) AS pipelines\n", + " FROM functions\n", + " GROUP BY domain\n", + " ORDER BY total DESC;\n", + "'''\n", + "df = pd.read_sql_query(q, conn)\n", + "df['pure_pct'] = (100*df['pure']/df['total']).round(1)\n", + "df['tested_pct'] = (100*df['tested']/df['total']).round(1)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "862a60a9", + "metadata": {}, + "outputs": [], + "source": [ + "import matplotlib.pyplot as plt\n", + "ax = df.set_index('domain')[['pure','total']].plot.bar(figsize=(11,4))\n", + "ax.set_title('Funciones por dominio (totales y puras)'); plt.tight_layout(); plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "422a7426", + "metadata": {}, + "source": [ + "## 2. Top funciones interesantes por dominio\n", + "\n", + "Seleccion manual de las funciones mas reutilizables/expresivas de cada bloque (no es ranking automatico — el FTS no captura 'interesante')." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30da4c90", + "metadata": {}, + "outputs": [], + "source": [ + "def top(domain, ids):\n", + " qmarks = ','.join('?'*len(ids))\n", + " df = pd.read_sql_query(\n", + " f\"SELECT id, lang, purity, signature, description FROM functions WHERE id IN ({qmarks})\",\n", + " conn, params=ids)\n", + " print(f'=== {domain} ({len(df)}) ===')\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e70c0f1", + "metadata": {}, + "outputs": [], + "source": [ + "top('finance', [\n", + " 'fetch_ohlcv_go_finance','tick_to_ohlcv_go_finance','stream_ticks_go_finance',\n", + " 'sma_go_finance','ema_go_finance','rsi_go_finance','vwap_go_finance',\n", + " 'bollinger_bands_go_finance','sharpe_ratio_go_finance','max_drawdown_go_finance',\n", + " 'avellaneda_stoikov_quotes_py_finance','hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n", + " 'write_ohlcv_to_parquet_go_finance','load_ohlcv_from_duckdb_go_finance'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5aa67236", + "metadata": {}, + "outputs": [], + "source": [ + "top('browser (CDP)', [\n", + " 'chrome_launch_go_browser','cdp_connect_go_browser','cdp_navigate_go_browser',\n", + " 'cdp_evaluate_go_browser','cdp_get_html_go_browser','cdp_screenshot_go_browser',\n", + " 'cdp_click_go_browser','cdp_click_text_go_browser','cdp_find_by_text_go_browser',\n", + " 'cdp_type_text_go_browser','cdp_wait_element_go_browser','cdp_wait_load_go_browser',\n", + " 'cdp_har_record_go_browser','cdp_set_cookie_go_browser','cdp_new_tab_go_browser'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75cced20", + "metadata": {}, + "outputs": [], + "source": [ + "top('infra (HTTP/WS/SSE)', [\n", + " 'http_get_json_go_infra','http_post_json_go_infra','http_router_go_infra','http_serve_go_infra',\n", + " 'http_download_file_go_infra','http_cors_middleware_go_infra','http_logger_middleware_go_infra',\n", + " 'rate_limit_middleware_go_infra','jwt_middleware_go_infra','sse_handler_go_infra',\n", + " 'sse_send_go_infra','sse_keepalive_go_infra','ws_handler_go_infra','ws_upgrader_go_infra',\n", + " 'health_check_http_go_infra'])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c655f8fc", + "metadata": {}, + "outputs": [], + "source": [ + "top('datascience', [r[0] for r in conn.execute(\n", + " \"SELECT id FROM functions WHERE domain='datascience' AND purity='pure' ORDER BY name LIMIT 15\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "364cdb4b", + "metadata": {}, + "outputs": [], + "source": [ + "top('cybersecurity', [r[0] for r in conn.execute(\n", + " \"SELECT id FROM functions WHERE domain='cybersecurity' ORDER BY tested DESC, name LIMIT 12\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "958aaac8", + "metadata": {}, + "outputs": [], + "source": [ + "top('ml', [r[0] for r in conn.execute(\n", + " \"SELECT id FROM functions WHERE domain='ml' ORDER BY name LIMIT 15\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c03903d6", + "metadata": {}, + "outputs": [], + "source": [ + "top('pipelines', [r[0] for r in conn.execute(\n", + " \"SELECT id FROM functions WHERE domain='pipelines' ORDER BY name LIMIT 15\")])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54a82572", + "metadata": {}, + "outputs": [], + "source": [ + "top('gamedev (audio)', ['audio_engine_cpp_gamedev','audio_play_cpp_gamedev'])" + ] + }, + { + "cell_type": "markdown", + "id": "55adc346", + "metadata": {}, + "source": [ + "## 3. Conclusiones\n", + "\n", + "- **infra (496)** y **core (240)** dominan: middleware HTTP, SSE/WS, SQLite, helpers Go puros.\n", + "- **finance (28)** ya tiene un mini stack de trading + market-making: indicadores, OHLCV, simulador Avellaneda-Stoikov + Hawkes + GBM.\n", + "- **browser (17)** = CDP completo en Go puro (sin chromedp). Base solida para scraping y RPA.\n", + "- **ml (25)** son casi todos **tipos** (`image_generator`, `model_ref`, `lora_ref`, `generation_config`) — el contrato esta definido, las **funciones de ejecucion estan vacias**.\n", + "- **audio**: solo playback (miniaudio en `gamedev`). 0 generacion, 0 STT/TTS, 0 voice conversion.\n", + "- **LLM/text**: 0 clientes — solo tipos `message/part/tool_part` en core." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/01_domains_overview.ipynb b/notebooks/01_domains_overview.ipynb index 21875d8..0e3fab9 100644 --- a/notebooks/01_domains_overview.ipynb +++ b/notebooks/01_domains_overview.ipynb @@ -1,6 +1,10 @@ { "cells": [ - {"cell_type": "markdown", "metadata": {}, "source": [ + { + "cell_type": "markdown", + "id": "5e4a34a1", + "metadata": {}, + "source": [ "# 01 — Panorama del registry por dominio\n", "\n", "**Objetivo**: ver cuantas funciones tenemos por dominio, pureza/test/pipelines, y listar las **mas interesantes** (por reutilizacion y signature) de cada dominio.\n", @@ -11,16 +15,37 @@ "1. Conteo por dominio + cuota de puras y testeadas\n", "2. Top funciones por dominio (curado a mano tras revisar names+desc)\n", "3. Conclusiones" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f2e006e3", + "metadata": {}, + "outputs": [], + "source": [ "import os, sqlite3\n", "import pandas as pd\n", "ROOT = os.environ['FN_REGISTRY_ROOT']\n", "conn = sqlite3.connect(f'file:{ROOT}/registry.db?mode=ro', uri=True)\n", "pd.set_option('display.max_colwidth', 110)" - ]}, - {"cell_type": "markdown", "metadata": {}, "source": ["## 1. Cuenta por dominio"]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "markdown", + "id": "b6bc2805", + "metadata": {}, + "source": [ + "## 1. Cuenta por dominio" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d4163db", + "metadata": {}, + "outputs": [], + "source": [ "q = '''\n", "SELECT domain,\n", " COUNT(*) AS total,\n", @@ -35,18 +60,37 @@ "df['pure_pct'] = (100*df['pure']/df['total']).round(1)\n", "df['tested_pct'] = (100*df['tested']/df['total']).round(1)\n", "df" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "862a60a9", + "metadata": {}, + "outputs": [], + "source": [ "import matplotlib.pyplot as plt\n", "ax = df.set_index('domain')[['pure','total']].plot.bar(figsize=(11,4))\n", "ax.set_title('Funciones por dominio (totales y puras)'); plt.tight_layout(); plt.show()" - ]}, - {"cell_type": "markdown", "metadata": {}, "source": [ + ] + }, + { + "cell_type": "markdown", + "id": "422a7426", + "metadata": {}, + "source": [ "## 2. Top funciones interesantes por dominio\n", "\n", "Seleccion manual de las funciones mas reutilizables/expresivas de cada bloque (no es ranking automatico — el FTS no captura 'interesante')." - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30da4c90", + "metadata": {}, + "outputs": [], + "source": [ "def top(domain, ids):\n", " qmarks = ','.join('?'*len(ids))\n", " df = pd.read_sql_query(\n", @@ -54,51 +98,112 @@ " conn, params=ids)\n", " print(f'=== {domain} ({len(df)}) ===')\n", " return df" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5e70c0f1", + "metadata": {}, + "outputs": [], + "source": [ "top('finance', [\n", " 'fetch_ohlcv_go_finance','tick_to_ohlcv_go_finance','stream_ticks_go_finance',\n", " 'sma_go_finance','ema_go_finance','rsi_go_finance','vwap_go_finance',\n", " 'bollinger_bands_go_finance','sharpe_ratio_go_finance','max_drawdown_go_finance',\n", " 'avellaneda_stoikov_quotes_py_finance','hawkes_intensity_py_finance','generate_gbm_prices_py_finance',\n", " 'write_ohlcv_to_parquet_go_finance','load_ohlcv_from_duckdb_go_finance'])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5aa67236", + "metadata": {}, + "outputs": [], + "source": [ "top('browser (CDP)', [\n", " 'chrome_launch_go_browser','cdp_connect_go_browser','cdp_navigate_go_browser',\n", " 'cdp_evaluate_go_browser','cdp_get_html_go_browser','cdp_screenshot_go_browser',\n", " 'cdp_click_go_browser','cdp_click_text_go_browser','cdp_find_by_text_go_browser',\n", " 'cdp_type_text_go_browser','cdp_wait_element_go_browser','cdp_wait_load_go_browser',\n", " 'cdp_har_record_go_browser','cdp_set_cookie_go_browser','cdp_new_tab_go_browser'])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "75cced20", + "metadata": {}, + "outputs": [], + "source": [ "top('infra (HTTP/WS/SSE)', [\n", " 'http_get_json_go_infra','http_post_json_go_infra','http_router_go_infra','http_serve_go_infra',\n", " 'http_download_file_go_infra','http_cors_middleware_go_infra','http_logger_middleware_go_infra',\n", " 'rate_limit_middleware_go_infra','jwt_middleware_go_infra','sse_handler_go_infra',\n", " 'sse_send_go_infra','sse_keepalive_go_infra','ws_handler_go_infra','ws_upgrader_go_infra',\n", " 'health_check_http_go_infra'])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c655f8fc", + "metadata": {}, + "outputs": [], + "source": [ "top('datascience', [r[0] for r in conn.execute(\n", " \"SELECT id FROM functions WHERE domain='datascience' AND purity='pure' ORDER BY name LIMIT 15\")])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "364cdb4b", + "metadata": {}, + "outputs": [], + "source": [ "top('cybersecurity', [r[0] for r in conn.execute(\n", " \"SELECT id FROM functions WHERE domain='cybersecurity' ORDER BY tested DESC, name LIMIT 12\")])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "958aaac8", + "metadata": {}, + "outputs": [], + "source": [ "top('ml', [r[0] for r in conn.execute(\n", " \"SELECT id FROM functions WHERE domain='ml' ORDER BY name LIMIT 15\")])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c03903d6", + "metadata": {}, + "outputs": [], + "source": [ "top('pipelines', [r[0] for r in conn.execute(\n", " \"SELECT id FROM functions WHERE domain='pipelines' ORDER BY name LIMIT 15\")])" - ]}, - {"cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "54a82572", + "metadata": {}, + "outputs": [], + "source": [ "top('gamedev (audio)', ['audio_engine_cpp_gamedev','audio_play_cpp_gamedev'])" - ]}, - {"cell_type": "markdown", "metadata": {}, "source": [ + ] + }, + { + "cell_type": "markdown", + "id": "55adc346", + "metadata": {}, + "source": [ "## 3. Conclusiones\n", "\n", "- **infra (496)** y **core (240)** dominan: middleware HTTP, SSE/WS, SQLite, helpers Go puros.\n", @@ -107,11 +212,28 @@ "- **ml (25)** son casi todos **tipos** (`image_generator`, `model_ref`, `lora_ref`, `generation_config`) — el contrato esta definido, las **funciones de ejecucion estan vacias**.\n", "- **audio**: solo playback (miniaudio en `gamedev`). 0 generacion, 0 STT/TTS, 0 voice conversion.\n", "- **LLM/text**: 0 clientes — solo tipos `message/part/tool_part` en core." - ]} + ] + } ], "metadata": { - "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, - "language_info": {"name": "python", "version": "3.12"} + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.7" + } }, - "nbformat": 4, "nbformat_minor": 5 + "nbformat": 4, + "nbformat_minor": 5 }