{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Monte Carlo: análisis de sensibilidad por parámetro\n", "\n", "Usamos las funciones del registry para correr cientos de simulaciones variando **un parámetro a la vez**.\n", "Esto nos dice:\n", "- Qué parámetros importan más\n", "- Cómo responde el mercado simulado a cada cambio\n", "- Qué rangos producen mercados realistas\n", "\n", "## Parámetros calibrados desde BTC/USDT real (notebook 06)\n", "\n", "| Parámetro | Valor calibrado | Confianza | Fuente |\n", "|---|---|---|---|\n", "| sigma | 0.000514 | Alta | Std retornos 1m |\n", "| mu | ~0 | Alta | Media retornos |\n", "| jump_intensity | 0.013 | Media | % retornos > 3σ |\n", "| jump_size_std | 0.000356 | Media | Std de los jumps |\n", "| n_takers_lambda | 12.0 | Media | aggTrades/segundo |\n", "| taker_size_alpha | 0.78 | Media | Pareto MLE cola p90+ |\n", "| hawkes_alpha | 0.17 | Baja | Fit exp sobre ACF |\n", "| hawkes_beta | 0.015 | Baja | Fit exp sobre ACF |\n", "| gamma | ? | No observable | Relación spread~vol |\n", "| n_makers | ? | No observable | Capas de liquidez L2 |\n", "| maker_spread | 0.01 | Alta | Spread real del book |" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Base params cargados\n", "Test: 270 trades, spread=-0.057967\n" ] } ], "source": [ "import sys, os\n", "sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions'))\n", "sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions', 'pipelines'))\n", "\n", "from run_market_sim import run_market_sim\n", "import numpy as np\n", "import polars as pl\n", "import matplotlib.pyplot as plt\n", "\n", "# Parámetros base calibrados desde datos reales\n", "BASE = dict(\n", " initial_price=100.0,\n", " n_ticks=300,\n", " sigma=0.000514,\n", " mu=0.0,\n", " jump_intensity=0.013,\n", " jump_size_std=0.000356,\n", " n_makers=5,\n", " maker_spread=0.01,\n", " gamma=0.1,\n", " maker_levels=3,\n", " maker_qty=10.0,\n", " n_takers_lambda=12.0,\n", " taker_size_alpha=0.78,\n", " taker_size_min=0.001,\n", " taker_size_max=5.0,\n", " hawkes_alpha=0.17,\n", " hawkes_beta=0.015,\n", ")\n", "\n", "print('Base params cargados')\n", "# Quick test\n", "r = run_market_sim(**BASE, seed=0)\n", "print(f'Test: {r[\"total_trades\"]} trades, spread={np.mean(r[\"spreads\"]):.6f}')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Herramientas de análisis" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def sweep_param(param_name: str, values: list, base_params: dict, n_seeds: int = 10) -> pl.DataFrame:\n", " \"\"\"Corre simulaciones variando un parámetro. N seeds por valor para tener distribución.\"\"\"\n", " records = []\n", " total = len(values) * n_seeds\n", " done = 0\n", " for val in values:\n", " for seed in range(n_seeds):\n", " params = dict(base_params)\n", " params[param_name] = val\n", " params['seed'] = seed * 1000 + hash(str(val)) % 1000\n", " sim = run_market_sim(**params)\n", " \n", " spreads = sim['spreads']\n", " npt = sim['n_trades_per_tick']\n", " tp = np.array(sim['trade_prices']) if sim['trade_prices'] else np.array([0.0])\n", " fp = np.array(sim['fundamental_prices'])\n", " \n", " # Realized vol de trades\n", " tp_pos = tp[tp > 0]\n", " if len(tp_pos) > 2:\n", " log_ret = np.diff(np.log(tp_pos))\n", " rvol = float(np.std(log_ret))\n", " else:\n", " rvol = 0.0\n", " \n", " records.append({\n", " 'param_value': float(val),\n", " 'seed': seed,\n", " 'total_trades': sim['total_trades'],\n", " 'mean_spread': float(np.mean(spreads)),\n", " 'std_spread': float(np.std(spreads)),\n", " 'mean_trades_tick': float(np.mean(npt)),\n", " 'max_trades_tick': int(np.max(npt)),\n", " 'realized_vol': rvol,\n", " 'price_return_pct': float((fp[-1] / fp[0] - 1) * 100),\n", " 'maker_total_pnl': float(sum(sim['maker_pnls'])),\n", " })\n", " done += 1\n", " \n", " print(f'{param_name}: {done} simulaciones')\n", " return pl.DataFrame(records)\n", "\n", "\n", "def plot_sweep(df: pl.DataFrame, param_name: str, metrics: list[tuple[str, str]], title: str = ''):\n", " \"\"\"Grafica métricas vs parámetro con bandas de confianza.\"\"\"\n", " n = len(metrics)\n", " fig, axes = plt.subplots(1, n, figsize=(5 * n, 4))\n", " if n == 1:\n", " axes = [axes]\n", " \n", " agg = df.group_by('param_value').agg(\n", " *[pl.col(m).mean().alias(f'{m}_mean') for m, _ in metrics],\n", " *[pl.col(m).std().alias(f'{m}_std') for m, _ in metrics],\n", " ).sort('param_value')\n", " \n", " x = agg['param_value'].to_numpy()\n", " \n", " for i, (metric, label) in enumerate(metrics):\n", " ax = axes[i]\n", " y = agg[f'{metric}_mean'].to_numpy()\n", " yerr = agg[f'{metric}_std'].to_numpy()\n", " yerr = np.nan_to_num(yerr, nan=0.0)\n", " \n", " ax.fill_between(x, y - yerr, y + yerr, alpha=0.2, color='#3498db')\n", " ax.plot(x, y, 'o-', color='#3498db', markersize=4, linewidth=1.5)\n", " ax.set_xlabel(param_name)\n", " ax.set_ylabel(label)\n", " ax.grid(True, alpha=0.3)\n", " \n", " fig.suptitle(title or f'Sensibilidad a {param_name}', fontsize=12, fontweight='bold')\n", " plt.tight_layout()\n", " plt.show()\n", "\n", "\n", "METRICS = [\n", " ('mean_spread', 'Spread medio'),\n", " ('total_trades', 'Total trades'),\n", " ('realized_vol', 'Vol realizada'),\n", " ('maker_total_pnl', 'PnL makers'),\n", "]\n", "\n", "print('sweep_param() y plot_sweep() definidas')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 1. SIGMA (volatilidad)\n", "\n", "**Qué es:** cuánto se mueve el precio fundamental por tick. \n", "**Calibrado:** 0.000514 (desde retornos 1m de BTC) \n", "**Confianza:** ALTA — medición directa \n", "**Hipótesis:** más σ → más oportunidades para takers → más trades, spread más ancho (makers se protegen)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sigma_vals = [0.0001, 0.0003, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05]\n", "df_sigma = sweep_param('sigma', sigma_vals, BASE, n_seeds=10)\n", "plot_sweep(df_sigma, 'sigma', METRICS, 'SIGMA — volatilidad del precio fundamental')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 2. GAMMA (aversión al riesgo del maker)\n", "\n", "**Qué es:** cuánto ajusta el maker sus precios por inventario acumulado. \n", "**Calibrado:** NO directamente — se infiere de spread vs volatilidad \n", "**Confianza:** BAJA \n", "**Hipótesis:** más γ → spread más ancho → menos ejecuciones → makers más seguros pero mercado menos líquido" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gamma_vals = [0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0, 5.0]\n", "df_gamma = sweep_param('gamma', gamma_vals, BASE, n_seeds=10)\n", "plot_sweep(df_gamma, 'gamma', METRICS, 'GAMMA — aversión al riesgo del maker (Avellaneda-Stoikov)')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 3. N_TAKERS_LAMBDA (arrival rate de takers)\n", "\n", "**Qué es:** cuántos takers llegan por tick en promedio (base Poisson, amplificado por Hawkes). \n", "**Calibrado:** 12.0 aggTrades/segundo \n", "**Confianza:** MEDIA — medimos aggTrades, no órdenes originales \n", "**Hipótesis:** más λ → más presión sobre el book → más trades, spreads más volátiles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "lambda_vals = [0.5, 1, 2, 5, 10, 15, 20, 30, 50]\n", "df_lambda = sweep_param('n_takers_lambda', lambda_vals, BASE, n_seeds=10)\n", "plot_sweep(df_lambda, 'n_takers_lambda', METRICS, 'LAMBDA — arrival rate de takers')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 4. HAWKES_ALPHA (contagio entre trades)\n", "\n", "**Qué es:** cuánto excita un trade la llegada de más trades (clustering). \n", "**Calibrado:** 0.17 (fit exponencial sobre ACF) \n", "**Confianza:** BAJA — el branching ratio salió >1, modelo simple no captura bien \n", "**Hipótesis:** más α → ráfagas más intensas → max trades/tick explota, spread se estresa" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hawkes_a_vals = [0.0, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]\n", "df_hawkes_a = sweep_param('hawkes_alpha', hawkes_a_vals, BASE, n_seeds=10)\n", "plot_sweep(df_hawkes_a, 'hawkes_alpha', METRICS, 'HAWKES_ALPHA — contagio entre trades')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 5. TAKER_SIZE_ALPHA (cola de tamaños — ballenas)\n", "\n", "**Qué es:** exponente Pareto de los tamaños de órdenes. Bajo = más ballenas. \n", "**Calibrado:** 0.78 (MLE sobre cola p90+) \n", "**Confianza:** MEDIA — medimos fills agrupados, no órdenes originales \n", "**Hipótesis:** α bajo → más órdenes grandes → más slippage, spread se abre más, más impacto en precio" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "size_a_vals = [0.3, 0.5, 0.78, 1.0, 1.5, 2.0, 3.0, 5.0]\n", "df_size_a = sweep_param('taker_size_alpha', size_a_vals, BASE, n_seeds=10)\n", "plot_sweep(df_size_a, 'taker_size_alpha', METRICS, 'TAKER_SIZE_ALPHA — cola de tamaños (bajo = más ballenas)')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 6. N_MAKERS (número de market makers)\n", "\n", "**Qué es:** cuántos makers compiten poniendo liquidez. \n", "**Calibrado:** NO directamente observable — se infiere de capas de liquidez en L2 \n", "**Confianza:** BAJA \n", "**Hipótesis:** más makers → más competencia → spread más tight, más liquidez, pero PnL por maker baja" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "nmakers_vals = [1, 2, 3, 5, 7, 10, 15, 20]\n", "df_nmakers = sweep_param('n_makers', nmakers_vals, BASE, n_seeds=10)\n", "plot_sweep(df_nmakers, 'n_makers', METRICS, 'N_MAKERS — número de market makers')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 7. MAKER_SPREAD (spread base)\n", "\n", "**Qué es:** el spread mínimo que los makers intentan capturar. \n", "**Calibrado:** $0.01 (spread real de BTC/USDT en Binance) \n", "**Confianza:** ALTA — medición directa del book \n", "**Hipótesis:** spread más ancho → menos ejecuciones → makers más rentables pero mercado menos eficiente" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "spread_vals = [0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0]\n", "df_spread = sweep_param('maker_spread', spread_vals, BASE, n_seeds=10)\n", "plot_sweep(df_spread, 'maker_spread', METRICS, 'MAKER_SPREAD — spread base deseado por makers')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 8. JUMP_INTENSITY (frecuencia de saltos)\n", "\n", "**Qué es:** probabilidad de un movimiento brusco en cada tick. \n", "**Calibrado:** 1.3% (retornos > 3σ) \n", "**Confianza:** MEDIA — depende del threshold elegido \n", "**Hipótesis:** más jumps → más volatilidad realizada, kurtosis sube, makers sufren más" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "jump_vals = [0.0, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]\n", "df_jump = sweep_param('jump_intensity', jump_vals, BASE, n_seeds=10)\n", "plot_sweep(df_jump, 'jump_intensity', METRICS, 'JUMP_INTENSITY — frecuencia de saltos bruscos')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 9. HAWKES_BETA (decaimiento del contagio)\n", "\n", "**Qué es:** qué tan rápido se calma la excitación después de una ráfaga. \n", "**Calibrado:** 0.015 \n", "**Confianza:** BAJA \n", "**Hipótesis:** β bajo → ráfagas más largas → mercado más caótico. β alto → ráfagas cortas → más Poisson" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "hawkes_b_vals = [0.005, 0.01, 0.02, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0]\n", "df_hawkes_b = sweep_param('hawkes_beta', hawkes_b_vals, BASE, n_seeds=10)\n", "plot_sweep(df_hawkes_b, 'hawkes_beta', METRICS, 'HAWKES_BETA — decaimiento del contagio (alto = se calma rápido)')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 10. MAKER_LEVELS (profundidad del maker)\n", "\n", "**Qué es:** cuántos niveles de precio pone cada maker a cada lado. \n", "**Calibrado:** se estima contando niveles con liquidez significativa en L2 \n", "**Confianza:** BAJA \n", "**Hipótesis:** más niveles → más profundidad → menos slippage para órdenes grandes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "levels_vals = [1, 2, 3, 5, 7, 10, 15]\n", "df_levels = sweep_param('maker_levels', levels_vals, BASE, n_seeds=10)\n", "plot_sweep(df_levels, 'maker_levels', METRICS, 'MAKER_LEVELS — niveles de profundidad por maker')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "---\n", "## 11. Resumen: sensibilidad relativa\n", "\n", "¿Qué parámetro afecta más a cada métrica?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Calcular coeficiente de variación de cada métrica respecto a cada parámetro\n", "all_sweeps = {\n", " 'sigma': df_sigma,\n", " 'gamma': df_gamma,\n", " 'n_takers_lambda': df_lambda,\n", " 'hawkes_alpha': df_hawkes_a,\n", " 'taker_size_alpha': df_size_a,\n", " 'n_makers': df_nmakers,\n", " 'maker_spread': df_spread,\n", " 'jump_intensity': df_jump,\n", " 'hawkes_beta': df_hawkes_b,\n", " 'maker_levels': df_levels,\n", "}\n", "\n", "sensitivity = []\n", "for pname, df in all_sweeps.items():\n", " agg = df.group_by('param_value').agg(\n", " pl.col('mean_spread').mean(),\n", " pl.col('total_trades').mean(),\n", " pl.col('realized_vol').mean(),\n", " pl.col('maker_total_pnl').mean(),\n", " )\n", " for metric in ['mean_spread', 'total_trades', 'realized_vol', 'maker_total_pnl']:\n", " vals = agg[metric].to_numpy()\n", " vals = vals[~np.isnan(vals)]\n", " if len(vals) > 1 and np.mean(np.abs(vals)) > 0:\n", " cv = np.std(vals) / np.mean(np.abs(vals))\n", " else:\n", " cv = 0.0\n", " sensitivity.append({'param': pname, 'metric': metric, 'cv': round(cv, 3)})\n", "\n", "sens_df = pl.DataFrame(sensitivity)\n", "\n", "# Heatmap\n", "params_order = list(all_sweeps.keys())\n", "metrics_order = ['mean_spread', 'total_trades', 'realized_vol', 'maker_total_pnl']\n", "metrics_labels = ['Spread', 'Trades', 'Vol realizada', 'PnL makers']\n", "\n", "matrix = np.zeros((len(params_order), len(metrics_order)))\n", "for row in sens_df.iter_rows(named=True):\n", " i = params_order.index(row['param'])\n", " j = metrics_order.index(row['metric'])\n", " matrix[i, j] = row['cv']\n", "\n", "fig, ax = plt.subplots(figsize=(10, 8))\n", "im = ax.imshow(matrix, cmap='YlOrRd', aspect='auto')\n", "ax.set_xticks(range(len(metrics_labels)))\n", "ax.set_xticklabels(metrics_labels, fontsize=10)\n", "ax.set_yticks(range(len(params_order)))\n", "ax.set_yticklabels(params_order, fontsize=10)\n", "\n", "for i in range(len(params_order)):\n", " for j in range(len(metrics_order)):\n", " ax.text(j, i, f'{matrix[i,j]:.2f}', ha='center', va='center', fontsize=9,\n", " color='white' if matrix[i,j] > 0.5 else 'black')\n", "\n", "ax.set_title('Sensibilidad: coeficiente de variación por parámetro × métrica\\n(más alto = más impacto)', fontsize=12)\n", "plt.colorbar(im, label='CV')\n", "plt.tight_layout()\n", "plt.show()\n", "\n", "# Top sensibilidades\n", "print('\\nTop 10 combinaciones param × métrica más sensibles:')\n", "top = sens_df.sort('cv', descending=True).head(10)\n", "print(top)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.13.0" } }, "nbformat": 4, "nbformat_minor": 4 }