Files
estudio_mercados/notebooks/.ipynb_checkpoints/07_montecarlo-checkpoint.ipynb
T

518 lines
18 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Monte Carlo: análisis de sensibilidad por parámetro\n",
"\n",
"Usamos las funciones del registry para correr cientos de simulaciones variando **un parámetro a la vez**.\n",
"Esto nos dice:\n",
"- Qué parámetros importan más\n",
"- Cómo responde el mercado simulado a cada cambio\n",
"- Qué rangos producen mercados realistas\n",
"\n",
"## Parámetros calibrados desde BTC/USDT real (notebook 06)\n",
"\n",
"| Parámetro | Valor calibrado | Confianza | Fuente |\n",
"|---|---|---|---|\n",
"| sigma | 0.000514 | Alta | Std retornos 1m |\n",
"| mu | ~0 | Alta | Media retornos |\n",
"| jump_intensity | 0.013 | Media | % retornos > 3σ |\n",
"| jump_size_std | 0.000356 | Media | Std de los jumps |\n",
"| n_takers_lambda | 12.0 | Media | aggTrades/segundo |\n",
"| taker_size_alpha | 0.78 | Media | Pareto MLE cola p90+ |\n",
"| hawkes_alpha | 0.17 | Baja | Fit exp sobre ACF |\n",
"| hawkes_beta | 0.015 | Baja | Fit exp sobre ACF |\n",
"| gamma | ? | No observable | Relación spread~vol |\n",
"| n_makers | ? | No observable | Capas de liquidez L2 |\n",
"| maker_spread | 0.01 | Alta | Spread real del book |"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Base params cargados\n",
"Test: 270 trades, spread=-0.057967\n"
]
}
],
"source": [
"import sys, os\n",
"sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions'))\n",
"sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions', 'pipelines'))\n",
"\n",
"from run_market_sim import run_market_sim\n",
"import numpy as np\n",
"import polars as pl\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Parámetros base calibrados desde datos reales\n",
"BASE = dict(\n",
" initial_price=100.0,\n",
" n_ticks=300,\n",
" sigma=0.000514,\n",
" mu=0.0,\n",
" jump_intensity=0.013,\n",
" jump_size_std=0.000356,\n",
" n_makers=5,\n",
" maker_spread=0.01,\n",
" gamma=0.1,\n",
" maker_levels=3,\n",
" maker_qty=10.0,\n",
" n_takers_lambda=12.0,\n",
" taker_size_alpha=0.78,\n",
" taker_size_min=0.001,\n",
" taker_size_max=5.0,\n",
" hawkes_alpha=0.17,\n",
" hawkes_beta=0.015,\n",
")\n",
"\n",
"print('Base params cargados')\n",
"# Quick test\n",
"r = run_market_sim(**BASE, seed=0)\n",
"print(f'Test: {r[\"total_trades\"]} trades, spread={np.mean(r[\"spreads\"]):.6f}')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Herramientas de análisis"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def sweep_param(param_name: str, values: list, base_params: dict, n_seeds: int = 10) -> pl.DataFrame:\n",
" \"\"\"Corre simulaciones variando un parámetro. N seeds por valor para tener distribución.\"\"\"\n",
" records = []\n",
" total = len(values) * n_seeds\n",
" done = 0\n",
" for val in values:\n",
" for seed in range(n_seeds):\n",
" params = dict(base_params)\n",
" params[param_name] = val\n",
" params['seed'] = seed * 1000 + hash(str(val)) % 1000\n",
" sim = run_market_sim(**params)\n",
" \n",
" spreads = sim['spreads']\n",
" npt = sim['n_trades_per_tick']\n",
" tp = np.array(sim['trade_prices']) if sim['trade_prices'] else np.array([0.0])\n",
" fp = np.array(sim['fundamental_prices'])\n",
" \n",
" # Realized vol de trades\n",
" tp_pos = tp[tp > 0]\n",
" if len(tp_pos) > 2:\n",
" log_ret = np.diff(np.log(tp_pos))\n",
" rvol = float(np.std(log_ret))\n",
" else:\n",
" rvol = 0.0\n",
" \n",
" records.append({\n",
" 'param_value': float(val),\n",
" 'seed': seed,\n",
" 'total_trades': sim['total_trades'],\n",
" 'mean_spread': float(np.mean(spreads)),\n",
" 'std_spread': float(np.std(spreads)),\n",
" 'mean_trades_tick': float(np.mean(npt)),\n",
" 'max_trades_tick': int(np.max(npt)),\n",
" 'realized_vol': rvol,\n",
" 'price_return_pct': float((fp[-1] / fp[0] - 1) * 100),\n",
" 'maker_total_pnl': float(sum(sim['maker_pnls'])),\n",
" })\n",
" done += 1\n",
" \n",
" print(f'{param_name}: {done} simulaciones')\n",
" return pl.DataFrame(records)\n",
"\n",
"\n",
"def plot_sweep(df: pl.DataFrame, param_name: str, metrics: list[tuple[str, str]], title: str = ''):\n",
" \"\"\"Grafica métricas vs parámetro con bandas de confianza.\"\"\"\n",
" n = len(metrics)\n",
" fig, axes = plt.subplots(1, n, figsize=(5 * n, 4))\n",
" if n == 1:\n",
" axes = [axes]\n",
" \n",
" agg = df.group_by('param_value').agg(\n",
" *[pl.col(m).mean().alias(f'{m}_mean') for m, _ in metrics],\n",
" *[pl.col(m).std().alias(f'{m}_std') for m, _ in metrics],\n",
" ).sort('param_value')\n",
" \n",
" x = agg['param_value'].to_numpy()\n",
" \n",
" for i, (metric, label) in enumerate(metrics):\n",
" ax = axes[i]\n",
" y = agg[f'{metric}_mean'].to_numpy()\n",
" yerr = agg[f'{metric}_std'].to_numpy()\n",
" yerr = np.nan_to_num(yerr, nan=0.0)\n",
" \n",
" ax.fill_between(x, y - yerr, y + yerr, alpha=0.2, color='#3498db')\n",
" ax.plot(x, y, 'o-', color='#3498db', markersize=4, linewidth=1.5)\n",
" ax.set_xlabel(param_name)\n",
" ax.set_ylabel(label)\n",
" ax.grid(True, alpha=0.3)\n",
" \n",
" fig.suptitle(title or f'Sensibilidad a {param_name}', fontsize=12, fontweight='bold')\n",
" plt.tight_layout()\n",
" plt.show()\n",
"\n",
"\n",
"METRICS = [\n",
" ('mean_spread', 'Spread medio'),\n",
" ('total_trades', 'Total trades'),\n",
" ('realized_vol', 'Vol realizada'),\n",
" ('maker_total_pnl', 'PnL makers'),\n",
"]\n",
"\n",
"print('sweep_param() y plot_sweep() definidas')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 1. SIGMA (volatilidad)\n",
"\n",
"**Qué es:** cuánto se mueve el precio fundamental por tick. \n",
"**Calibrado:** 0.000514 (desde retornos 1m de BTC) \n",
"**Confianza:** ALTA — medición directa \n",
"**Hipótesis:** más σ → más oportunidades para takers → más trades, spread más ancho (makers se protegen)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sigma_vals = [0.0001, 0.0003, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05]\n",
"df_sigma = sweep_param('sigma', sigma_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_sigma, 'sigma', METRICS, 'SIGMA — volatilidad del precio fundamental')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 2. GAMMA (aversión al riesgo del maker)\n",
"\n",
"**Qué es:** cuánto ajusta el maker sus precios por inventario acumulado. \n",
"**Calibrado:** NO directamente — se infiere de spread vs volatilidad \n",
"**Confianza:** BAJA \n",
"**Hipótesis:** más γ → spread más ancho → menos ejecuciones → makers más seguros pero mercado menos líquido"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"gamma_vals = [0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0, 5.0]\n",
"df_gamma = sweep_param('gamma', gamma_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_gamma, 'gamma', METRICS, 'GAMMA — aversión al riesgo del maker (Avellaneda-Stoikov)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 3. N_TAKERS_LAMBDA (arrival rate de takers)\n",
"\n",
"**Qué es:** cuántos takers llegan por tick en promedio (base Poisson, amplificado por Hawkes). \n",
"**Calibrado:** 12.0 aggTrades/segundo \n",
"**Confianza:** MEDIA — medimos aggTrades, no órdenes originales \n",
"**Hipótesis:** más λ → más presión sobre el book → más trades, spreads más volátiles"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"lambda_vals = [0.5, 1, 2, 5, 10, 15, 20, 30, 50]\n",
"df_lambda = sweep_param('n_takers_lambda', lambda_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_lambda, 'n_takers_lambda', METRICS, 'LAMBDA — arrival rate de takers')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 4. HAWKES_ALPHA (contagio entre trades)\n",
"\n",
"**Qué es:** cuánto excita un trade la llegada de más trades (clustering). \n",
"**Calibrado:** 0.17 (fit exponencial sobre ACF) \n",
"**Confianza:** BAJA — el branching ratio salió >1, modelo simple no captura bien \n",
"**Hipótesis:** más α → ráfagas más intensas → max trades/tick explota, spread se estresa"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hawkes_a_vals = [0.0, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]\n",
"df_hawkes_a = sweep_param('hawkes_alpha', hawkes_a_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_hawkes_a, 'hawkes_alpha', METRICS, 'HAWKES_ALPHA — contagio entre trades')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 5. TAKER_SIZE_ALPHA (cola de tamaños — ballenas)\n",
"\n",
"**Qué es:** exponente Pareto de los tamaños de órdenes. Bajo = más ballenas. \n",
"**Calibrado:** 0.78 (MLE sobre cola p90+) \n",
"**Confianza:** MEDIA — medimos fills agrupados, no órdenes originales \n",
"**Hipótesis:** α bajo → más órdenes grandes → más slippage, spread se abre más, más impacto en precio"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"size_a_vals = [0.3, 0.5, 0.78, 1.0, 1.5, 2.0, 3.0, 5.0]\n",
"df_size_a = sweep_param('taker_size_alpha', size_a_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_size_a, 'taker_size_alpha', METRICS, 'TAKER_SIZE_ALPHA — cola de tamaños (bajo = más ballenas)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 6. N_MAKERS (número de market makers)\n",
"\n",
"**Qué es:** cuántos makers compiten poniendo liquidez. \n",
"**Calibrado:** NO directamente observable — se infiere de capas de liquidez en L2 \n",
"**Confianza:** BAJA \n",
"**Hipótesis:** más makers → más competencia → spread más tight, más liquidez, pero PnL por maker baja"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nmakers_vals = [1, 2, 3, 5, 7, 10, 15, 20]\n",
"df_nmakers = sweep_param('n_makers', nmakers_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_nmakers, 'n_makers', METRICS, 'N_MAKERS — número de market makers')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 7. MAKER_SPREAD (spread base)\n",
"\n",
"**Qué es:** el spread mínimo que los makers intentan capturar. \n",
"**Calibrado:** $0.01 (spread real de BTC/USDT en Binance) \n",
"**Confianza:** ALTA — medición directa del book \n",
"**Hipótesis:** spread más ancho → menos ejecuciones → makers más rentables pero mercado menos eficiente"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"spread_vals = [0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0]\n",
"df_spread = sweep_param('maker_spread', spread_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_spread, 'maker_spread', METRICS, 'MAKER_SPREAD — spread base deseado por makers')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 8. JUMP_INTENSITY (frecuencia de saltos)\n",
"\n",
"**Qué es:** probabilidad de un movimiento brusco en cada tick. \n",
"**Calibrado:** 1.3% (retornos > 3σ) \n",
"**Confianza:** MEDIA — depende del threshold elegido \n",
"**Hipótesis:** más jumps → más volatilidad realizada, kurtosis sube, makers sufren más"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"jump_vals = [0.0, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]\n",
"df_jump = sweep_param('jump_intensity', jump_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_jump, 'jump_intensity', METRICS, 'JUMP_INTENSITY — frecuencia de saltos bruscos')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 9. HAWKES_BETA (decaimiento del contagio)\n",
"\n",
"**Qué es:** qué tan rápido se calma la excitación después de una ráfaga. \n",
"**Calibrado:** 0.015 \n",
"**Confianza:** BAJA \n",
"**Hipótesis:** β bajo → ráfagas más largas → mercado más caótico. β alto → ráfagas cortas → más Poisson"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"hawkes_b_vals = [0.005, 0.01, 0.02, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0]\n",
"df_hawkes_b = sweep_param('hawkes_beta', hawkes_b_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_hawkes_b, 'hawkes_beta', METRICS, 'HAWKES_BETA — decaimiento del contagio (alto = se calma rápido)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 10. MAKER_LEVELS (profundidad del maker)\n",
"\n",
"**Qué es:** cuántos niveles de precio pone cada maker a cada lado. \n",
"**Calibrado:** se estima contando niveles con liquidez significativa en L2 \n",
"**Confianza:** BAJA \n",
"**Hipótesis:** más niveles → más profundidad → menos slippage para órdenes grandes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"levels_vals = [1, 2, 3, 5, 7, 10, 15]\n",
"df_levels = sweep_param('maker_levels', levels_vals, BASE, n_seeds=10)\n",
"plot_sweep(df_levels, 'maker_levels', METRICS, 'MAKER_LEVELS — niveles de profundidad por maker')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 11. Resumen: sensibilidad relativa\n",
"\n",
"¿Qué parámetro afecta más a cada métrica?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Calcular coeficiente de variación de cada métrica respecto a cada parámetro\n",
"all_sweeps = {\n",
" 'sigma': df_sigma,\n",
" 'gamma': df_gamma,\n",
" 'n_takers_lambda': df_lambda,\n",
" 'hawkes_alpha': df_hawkes_a,\n",
" 'taker_size_alpha': df_size_a,\n",
" 'n_makers': df_nmakers,\n",
" 'maker_spread': df_spread,\n",
" 'jump_intensity': df_jump,\n",
" 'hawkes_beta': df_hawkes_b,\n",
" 'maker_levels': df_levels,\n",
"}\n",
"\n",
"sensitivity = []\n",
"for pname, df in all_sweeps.items():\n",
" agg = df.group_by('param_value').agg(\n",
" pl.col('mean_spread').mean(),\n",
" pl.col('total_trades').mean(),\n",
" pl.col('realized_vol').mean(),\n",
" pl.col('maker_total_pnl').mean(),\n",
" )\n",
" for metric in ['mean_spread', 'total_trades', 'realized_vol', 'maker_total_pnl']:\n",
" vals = agg[metric].to_numpy()\n",
" vals = vals[~np.isnan(vals)]\n",
" if len(vals) > 1 and np.mean(np.abs(vals)) > 0:\n",
" cv = np.std(vals) / np.mean(np.abs(vals))\n",
" else:\n",
" cv = 0.0\n",
" sensitivity.append({'param': pname, 'metric': metric, 'cv': round(cv, 3)})\n",
"\n",
"sens_df = pl.DataFrame(sensitivity)\n",
"\n",
"# Heatmap\n",
"params_order = list(all_sweeps.keys())\n",
"metrics_order = ['mean_spread', 'total_trades', 'realized_vol', 'maker_total_pnl']\n",
"metrics_labels = ['Spread', 'Trades', 'Vol realizada', 'PnL makers']\n",
"\n",
"matrix = np.zeros((len(params_order), len(metrics_order)))\n",
"for row in sens_df.iter_rows(named=True):\n",
" i = params_order.index(row['param'])\n",
" j = metrics_order.index(row['metric'])\n",
" matrix[i, j] = row['cv']\n",
"\n",
"fig, ax = plt.subplots(figsize=(10, 8))\n",
"im = ax.imshow(matrix, cmap='YlOrRd', aspect='auto')\n",
"ax.set_xticks(range(len(metrics_labels)))\n",
"ax.set_xticklabels(metrics_labels, fontsize=10)\n",
"ax.set_yticks(range(len(params_order)))\n",
"ax.set_yticklabels(params_order, fontsize=10)\n",
"\n",
"for i in range(len(params_order)):\n",
" for j in range(len(metrics_order)):\n",
" ax.text(j, i, f'{matrix[i,j]:.2f}', ha='center', va='center', fontsize=9,\n",
" color='white' if matrix[i,j] > 0.5 else 'black')\n",
"\n",
"ax.set_title('Sensibilidad: coeficiente de variación por parámetro × métrica\\n(más alto = más impacto)', fontsize=12)\n",
"plt.colorbar(im, label='CV')\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# Top sensibilidades\n",
"print('\\nTop 10 combinaciones param × métrica más sensibles:')\n",
"top = sens_df.sort('cv', descending=True).head(10)\n",
"print(top)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.13.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}