Files
estudio_mercados/notebooks/.ipynb_checkpoints/06_analisis_datos_reales-checkpoint.ipynb
T

574 lines
21 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Análisis del dataset real: 1M aggTrades + Bitstamp L3\n",
"\n",
"Tenemos:\n",
"- **Binance**: 1M aggTrades de BTC/USDT (~26h de mercado)\n",
"- **Bitstamp**: L3 orders + trades (5 min de captura)\n",
"\n",
"## Objetivos\n",
"1. Estimar parámetros de microestructura sobre datos reales\n",
"2. Ver cómo cambian con ventanas deslizantes\n",
"3. Comparar Binance (aggTrades = órdenes agrupadas) vs Bitstamp (L3 = cada orden)\n",
"4. Calibrar nuestra simulación para que genere datos similares"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Binance aggTrades: 1,000,000 filas\n",
"Columnas: ['agg_trade_id', 'price', 'qty', 'first_trade_id', 'last_trade_id', 'timestamp', 'is_buyer_maker', 'side', 'n_fills']\n",
"Rango: 2026-04-02 14:26:02.324000 → 2026-04-03 16:32:41.139000 (1 day, 2:06:38.815000)\n",
"\n",
"Bitstamp L3 aún no disponible (grabando...)\n"
]
}
],
"source": [
"import polars as pl\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from scipy.optimize import curve_fit\n",
"from scipy.stats import probplot\n",
"from datetime import datetime\n",
"from pathlib import Path\n",
"\n",
"DATA = Path('../data')\n",
"\n",
"# Cargar Binance aggTrades\n",
"trades = pl.read_csv(str(DATA / 'binance_btcusdt_aggtrades_1M.csv'))\n",
"print(f'Binance aggTrades: {trades.shape[0]:,} filas')\n",
"print(f'Columnas: {trades.columns}')\n",
"\n",
"t_min = datetime.fromtimestamp(trades['timestamp'].min() / 1000)\n",
"t_max = datetime.fromtimestamp(trades['timestamp'].max() / 1000)\n",
"print(f'Rango: {t_min} → {t_max} ({t_max - t_min})')\n",
"\n",
"# Intentar cargar Bitstamp si existe\n",
"bs_path = DATA / 'bitstamp_btcusd_l3_orders.csv'\n",
"if bs_path.exists():\n",
" bs_orders = pl.read_csv(str(bs_path))\n",
" print(f'\\nBitstamp L3 orders: {bs_orders.shape[0]:,} filas')\n",
"else:\n",
" bs_orders = None\n",
" print('\\nBitstamp L3 aún no disponible (grabando...)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 1. Visión general del dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Añadir columna datetime y agrupar por minuto\n",
"trades_dt = trades.with_columns(\n",
" (pl.col('timestamp') * 1000).cast(pl.Datetime('us')).alias('datetime'),\n",
" (pl.col('timestamp') // 60000).alias('minute'),\n",
")\n",
"\n",
"# Por minuto\n",
"per_min = trades_dt.group_by('minute').agg(\n",
" pl.len().alias('n_trades'),\n",
" pl.col('price').last().alias('close'),\n",
" pl.col('price').min().alias('low'),\n",
" pl.col('price').max().alias('high'),\n",
" pl.col('qty').sum().alias('volume'),\n",
" (pl.col('qty') * pl.col('price')).sum().alias('turnover'),\n",
" pl.col('timestamp').min().alias('ts'),\n",
").sort('minute')\n",
"\n",
"# Log returns\n",
"per_min = per_min.with_columns(\n",
" (pl.col('close').log() - pl.col('close').shift(1).log()).alias('log_return')\n",
")\n",
"\n",
"print(f'Minutos: {per_min.shape[0]}')\n",
"print(f'Trades/minuto: media={per_min[\"n_trades\"].mean():.0f}, mediana={per_min[\"n_trades\"].median():.0f}')\n",
"print(f'Volumen/minuto: media={per_min[\"volume\"].mean():.2f} BTC')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Overview: precio, volumen, trades/min\n",
"fig, axes = plt.subplots(3, 1, figsize=(16, 10), gridspec_kw={'height_ratios': [3, 1, 1]}, sharex=True)\n",
"\n",
"minutes = np.arange(per_min.shape[0])\n",
"\n",
"ax = axes[0]\n",
"ax.plot(minutes, per_min['close'].to_numpy(), linewidth=0.5, color='#3498db')\n",
"ax.set_ylabel('Precio (USDT)')\n",
"ax.set_title(f'BTC/USDT — 1M aggTrades ({t_min.strftime(\"%Y-%m-%d %H:%M\")} → {t_max.strftime(\"%Y-%m-%d %H:%M\")})')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"ax = axes[1]\n",
"ax.bar(minutes, per_min['volume'].to_numpy(), width=1.0, color='#e67e22', alpha=0.6)\n",
"ax.set_ylabel('Volumen (BTC)')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"ax = axes[2]\n",
"ax.bar(minutes, per_min['n_trades'].to_numpy(), width=1.0, color='#9b59b6', alpha=0.6)\n",
"ax.set_ylabel('Trades/min')\n",
"ax.set_xlabel('Minuto')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 2. Estimación de parámetros\n",
"\n",
"### 2.1 Volatilidad (σ)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"returns = per_min.drop_nulls('log_return')['log_return'].to_numpy()\n",
"\n",
"sigma_1m = np.std(returns)\n",
"sigma_1h = sigma_1m * np.sqrt(60)\n",
"sigma_1d = sigma_1m * np.sqrt(60 * 24)\n",
"sigma_ann = sigma_1d * np.sqrt(365)\n",
"\n",
"print(f'σ por minuto: {sigma_1m:.6f}')\n",
"print(f'σ por hora: {sigma_1h:.5f}')\n",
"print(f'σ diaria: {sigma_1d:.4f} ({sigma_1d*100:.2f}%)')\n",
"print(f'σ anualizada: {sigma_ann:.2f} ({sigma_ann*100:.0f}%)')\n",
"\n",
"# Rolling sigma (ventana de 60 minutos)\n",
"window = 60\n",
"rolling_sigma = np.array([np.std(returns[max(0,i-window):i]) for i in range(window, len(returns))])\n",
"\n",
"fig, axes = plt.subplots(2, 2, figsize=(14, 8))\n",
"\n",
"# Histograma de retornos\n",
"ax = axes[0][0]\n",
"ax.hist(returns, bins=100, density=True, color='#3498db', alpha=0.6)\n",
"x = np.linspace(returns.min(), returns.max(), 200)\n",
"from scipy.stats import norm\n",
"ax.plot(x, norm.pdf(x, 0, sigma_1m), 'r-', linewidth=1.5, label=f'Normal σ={sigma_1m:.5f}')\n",
"ax.set_title('Distribución de retornos 1m')\n",
"ax.legend(fontsize=8)\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"# QQ plot\n",
"probplot(returns, dist='norm', plot=axes[0][1])\n",
"axes[0][1].set_title('QQ-Plot vs Normal')\n",
"axes[0][1].grid(True, alpha=0.3)\n",
"\n",
"# Rolling sigma\n",
"ax = axes[1][0]\n",
"ax.fill_between(range(len(rolling_sigma)), rolling_sigma, color='#e74c3c', alpha=0.5)\n",
"ax.axhline(y=sigma_1m, color='black', linestyle='--', linewidth=0.8, label=f'σ global={sigma_1m:.5f}')\n",
"ax.set_title(f'σ rolling (ventana {window}m)')\n",
"ax.set_ylabel('σ por minuto')\n",
"ax.legend(fontsize=8)\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"# Retornos absolutos (clustering de volatilidad)\n",
"ax = axes[1][1]\n",
"ax.plot(np.abs(returns), linewidth=0.3, color='#e74c3c', alpha=0.6)\n",
"ax.set_title('|Retornos| — clustering de volatilidad')\n",
"ax.set_ylabel('|log-return|')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"kurtosis = float(np.mean((returns - np.mean(returns))**4) / sigma_1m**4)\n",
"skew = float(np.mean((returns - np.mean(returns))**3) / sigma_1m**3)\n",
"print(f'\\nKurtosis: {kurtosis:.1f} (Normal=3)')\n",
"print(f'Skewness: {skew:.3f} (Normal=0)')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.2 Arrival rate (λ) y Hawkes clustering"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Trades por segundo\n",
"trades_per_sec = trades.with_columns(\n",
" (pl.col('timestamp') // 1000).alias('second')\n",
").group_by('second').agg(\n",
" pl.len().alias('n_trades'),\n",
" pl.col('qty').sum().alias('volume'),\n",
").sort('second')\n",
"\n",
"arrivals = trades_per_sec['n_trades'].to_numpy()\n",
"\n",
"lambda_mean = np.mean(arrivals)\n",
"var_mean = np.var(arrivals) / np.mean(arrivals)\n",
"\n",
"print(f'Trades/segundo: media={lambda_mean:.1f}, mediana={np.median(arrivals):.0f}')\n",
"print(f'Var/Mean ratio: {var_mean:.1f} (=1 si Poisson, >1 = clustering)')\n",
"\n",
"# Autocorrelación\n",
"max_lag = 60\n",
"mean_a = np.mean(arrivals)\n",
"var_a = np.var(arrivals)\n",
"acf = np.array([\n",
" np.mean((arrivals[lag:] - mean_a) * (arrivals[:-lag] - mean_a)) / var_a\n",
" if lag > 0 else 1.0\n",
" for lag in range(max_lag)\n",
"])\n",
"\n",
"# Ajustar exponencial para estimar Hawkes\n",
"lags = np.arange(1, max_lag)\n",
"acf_vals = acf[1:]\n",
"positive_mask = acf_vals > 0\n",
"if np.sum(positive_mask) > 5:\n",
" try:\n",
" exp_fn = lambda x, a, b: a * np.exp(-b * x)\n",
" popt, _ = curve_fit(exp_fn, lags[positive_mask], acf_vals[positive_mask], p0=[0.3, 0.1], maxfev=5000)\n",
" hawkes_a, hawkes_b = abs(popt[0]), abs(popt[1])\n",
" branching = hawkes_a / hawkes_b\n",
" except:\n",
" hawkes_a, hawkes_b, branching = 0, 1, 0\n",
"else:\n",
" hawkes_a, hawkes_b, branching = 0, 1, 0\n",
"\n",
"print(f'\\nHawkes (ajuste exp a ACF):')\n",
"print(f' α ≈ {hawkes_a:.4f}')\n",
"print(f' β ≈ {hawkes_b:.4f}')\n",
"print(f' Branching ratio η = α/β = {branching:.3f} (< 1 = estacionario)')\n",
"\n",
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
"\n",
"# ACF\n",
"ax = axes[0]\n",
"ax.bar(range(max_lag), acf, color='#e67e22', alpha=0.6)\n",
"if hawkes_a > 0:\n",
" ax.plot(lags, exp_fn(lags, hawkes_a, hawkes_b), 'r-', linewidth=2, label=f'Exp fit: α={hawkes_a:.3f}, β={hawkes_b:.3f}')\n",
"ax.axhline(y=0, color='black', linewidth=0.5)\n",
"ci = 1.96 / np.sqrt(len(arrivals))\n",
"ax.axhline(y=ci, color='blue', linestyle='--', linewidth=0.8, alpha=0.5)\n",
"ax.axhline(y=-ci, color='blue', linestyle='--', linewidth=0.8, alpha=0.5)\n",
"ax.set_title('ACF trades/segundo')\n",
"ax.set_xlabel('Lag (s)')\n",
"ax.legend(fontsize=7)\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"# Distribución de arrivals\n",
"ax = axes[1]\n",
"ax.hist(arrivals, bins=50, density=True, color='#3498db', alpha=0.6)\n",
"ax.set_title(f'Trades/segundo (media={lambda_mean:.1f}, V/M={var_mean:.1f})')\n",
"ax.set_xlabel('Trades/s')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"# Rolling lambda\n",
"w = 300 # ventana 5 min\n",
"rolling_lambda = np.convolve(arrivals, np.ones(w)/w, mode='valid')\n",
"ax = axes[2]\n",
"ax.plot(rolling_lambda, linewidth=0.5, color='#9b59b6')\n",
"ax.axhline(y=lambda_mean, color='black', linestyle='--', linewidth=0.8)\n",
"ax.set_title(f'λ rolling (ventana {w}s = 5min)')\n",
"ax.set_ylabel('Trades/s')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.3 Distribución de tamaños (Pareto)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"sizes = trades['qty'].to_numpy()\n",
"sizes = sizes[sizes > 0]\n",
"costs = (trades['qty'] * trades['price']).to_numpy()\n",
"costs = costs[costs > 0]\n",
"\n",
"# Pareto MLE sobre la cola (p90+)\n",
"x_min_qty = np.percentile(sizes, 90)\n",
"tail_qty = sizes[sizes >= x_min_qty]\n",
"alpha_qty = len(tail_qty) / np.sum(np.log(tail_qty / x_min_qty))\n",
"\n",
"x_min_cost = np.percentile(costs, 90)\n",
"tail_cost = costs[costs >= x_min_cost]\n",
"alpha_cost = len(tail_cost) / np.sum(np.log(tail_cost / x_min_cost))\n",
"\n",
"print(f'Tamaños (BTC):')\n",
"print(f' Mediana: {np.median(sizes):.6f} BTC')\n",
"print(f' p99: {np.percentile(sizes, 99):.4f} BTC')\n",
"print(f' Max: {sizes.max():.2f} BTC')\n",
"print(f' Pareto α (cola p90+): {alpha_qty:.2f}')\n",
"\n",
"print(f'\\nTurnover (USDT):')\n",
"print(f' Mediana: ${np.median(costs):,.0f}')\n",
"print(f' p99: ${np.percentile(costs, 99):,.0f}')\n",
"print(f' Max: ${costs.max():,.0f}')\n",
"print(f' Pareto α (cola p90+): {alpha_cost:.2f}')\n",
"\n",
"fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
"\n",
"# CCDF log-log de tamaños\n",
"for ax, data, alpha, label, xmin in [\n",
" (axes[0], sizes, alpha_qty, 'BTC', x_min_qty),\n",
" (axes[1], costs, alpha_cost, 'USDT', x_min_cost),\n",
"]:\n",
" sorted_d = np.sort(data)[::-1]\n",
" ranks = np.arange(1, len(sorted_d) + 1) / len(sorted_d)\n",
" ax.loglog(sorted_d, ranks, '.', markersize=0.5, alpha=0.3, color='#2ecc71')\n",
" x_fit = np.logspace(np.log10(xmin), np.log10(data.max()), 50)\n",
" ax.loglog(x_fit, (x_fit/xmin)**(-alpha) * (len(data[data>=xmin])/len(data)),\n",
" 'r-', linewidth=2, label=f'Pareto α={alpha:.2f}')\n",
" ax.set_title(f'CCDF tamaños ({label})')\n",
" ax.set_xlabel(label)\n",
" ax.set_ylabel('P(X > x)')\n",
" ax.legend()\n",
" ax.grid(True, alpha=0.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.4 Jumps y colas pesadas"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Detectar jumps (retornos > 3σ)\n",
"threshold = 3 * sigma_1m\n",
"jump_mask = np.abs(returns) > threshold\n",
"n_jumps = np.sum(jump_mask)\n",
"jump_intensity = n_jumps / len(returns)\n",
"jump_sizes = np.abs(returns[jump_mask])\n",
"jump_size_std = np.std(jump_sizes) if len(jump_sizes) > 1 else 0\n",
"\n",
"print(f'Jumps detectados (>3σ): {n_jumps} de {len(returns)} ({jump_intensity*100:.1f}%)')\n",
"print(f'Jump size std: {jump_size_std:.6f}')\n",
"print(f'Kurtosis: {kurtosis:.1f} (Normal=3, >3 = colas pesadas)')\n",
"\n",
"# Retornos con jumps marcados\n",
"fig, ax = plt.subplots(figsize=(16, 4))\n",
"ax.plot(returns, linewidth=0.3, color='#3498db', alpha=0.6)\n",
"idx = np.where(jump_mask)[0]\n",
"ax.scatter(idx, returns[idx], color='red', s=10, zorder=5, label=f'Jumps ({n_jumps})')\n",
"ax.axhline(y=threshold, color='red', linestyle='--', linewidth=0.5, alpha=0.5)\n",
"ax.axhline(y=-threshold, color='red', linestyle='--', linewidth=0.5, alpha=0.5)\n",
"ax.set_title('Retornos 1m — jumps marcados en rojo')\n",
"ax.legend(fontsize=8)\n",
"ax.grid(True, alpha=0.3)\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2.5 Fills por aggTrade — estructura de las órdenes"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# n_fills nos dice cuántos niveles del book barrió cada taker order\n",
"fills = trades['n_fills'].to_numpy()\n",
"\n",
"print(f'Fills por aggTrade:')\n",
"print(f' 1 fill (no cruzó niveles): {np.sum(fills == 1):,} ({np.mean(fills == 1)*100:.1f}%)')\n",
"print(f' 2-5 fills: {np.sum((fills >= 2) & (fills <= 5)):,} ({np.mean((fills >= 2) & (fills <= 5))*100:.1f}%)')\n",
"print(f' 6-20 fills: {np.sum((fills >= 6) & (fills <= 20)):,} ({np.mean((fills >= 6) & (fills <= 20))*100:.1f}%)')\n",
"print(f' >20 fills (ballenas): {np.sum(fills > 20):,} ({np.mean(fills > 20)*100:.1f}%)')\n",
"print(f' Max fills: {fills.max()}')\n",
"\n",
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
"\n",
"ax = axes[0]\n",
"ax.hist(fills[fills <= 20], bins=range(1, 22), color='#3498db', alpha=0.6, edgecolor='white')\n",
"ax.set_title('Fills por aggTrade (≤20)')\n",
"ax.set_xlabel('Número de fills')\n",
"ax.set_ylabel('Frecuencia')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"# Qty vs n_fills — las ballenas barren más niveles\n",
"ax = axes[1]\n",
"sample = trades.sample(min(50000, trades.shape[0]), seed=42)\n",
"ax.scatter(sample['n_fills'].to_numpy(), sample['qty'].to_numpy(), s=0.5, alpha=0.2, color='#e67e22')\n",
"ax.set_xlabel('Fills por aggTrade')\n",
"ax.set_ylabel('Qty (BTC)')\n",
"ax.set_title('Tamaño de orden vs fills (más grande = barre más niveles)')\n",
"ax.set_yscale('log')\n",
"ax.set_xscale('log')\n",
"ax.grid(True, alpha=0.3)\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 3. Bitstamp L3: comparar con Binance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Cargar Bitstamp si ya existe\n",
"bs_orders_path = DATA / 'bitstamp_btcusd_l3_orders.csv'\n",
"bs_trades_path = DATA / 'bitstamp_btcusd_l3_trades.csv'\n",
"\n",
"if bs_orders_path.exists():\n",
" bs_orders = pl.read_csv(str(bs_orders_path))\n",
" print(f'Bitstamp L3 orders: {bs_orders.shape[0]:,}')\n",
" print(bs_orders.group_by('event').agg(pl.len().alias('count')).sort('count', descending=True))\n",
" print()\n",
" \n",
" # Ratio create/delete — vida media de las órdenes\n",
" creates = bs_orders.filter(pl.col('event') == 'order_created').shape[0]\n",
" deletes = bs_orders.filter(pl.col('event') == 'order_deleted').shape[0]\n",
" changes = bs_orders.filter(pl.col('event') == 'order_changed').shape[0]\n",
" print(f'Creadas: {creates:,} Borradas: {deletes:,} Cambiadas: {changes:,}')\n",
" print(f'Ratio delete/create: {deletes/creates:.2f} (cercano a 1 = la mayoría se cancela sin ejecutar)')\n",
" \n",
" # Cuántas se cancelan vs se ejecutan\n",
" print(f'\\nEsto revela algo fundamental: la mayoría de órdenes se CANCELAN, no se ejecutan.')\n",
" print(f'Los makers constantemente ponen y quitan órdenes para ajustar sus quotes.')\n",
"\n",
"if bs_trades_path.exists():\n",
" bs_trades = pl.read_csv(str(bs_trades_path))\n",
" print(f'\\nBitstamp L3 trades: {bs_trades.shape[0]:,}')\n",
" print(bs_trades.head(3))\n",
" \n",
" # En L3 podemos ver maker y taker order IDs\n",
" print(f'\\nCon L3 vemos los IDs del buyer y seller de cada trade:')\n",
" print(f' Unique buy_order_ids: {bs_trades[\"buy_order_id\"].n_unique():,}')\n",
" print(f' Unique sell_order_ids: {bs_trades[\"sell_order_id\"].n_unique():,}')\n",
"\n",
"if not bs_orders_path.exists():\n",
" print('Bitstamp L3 aún no disponible. Ejecutar notebook 05 primero.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"---\n",
"## 4. Resumen: parámetros calibrados desde datos reales\n",
"\n",
"Estos son los valores que usaríamos para que nuestra simulación genere datos similares a BTC/USDT."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Recopilar todo\n",
"print('=' * 65)\n",
"print(' PARÁMETROS CALIBRADOS DESDE BTC/USDT REAL')\n",
"print(' Dataset: 1M aggTrades, ~26 horas')\n",
"print('=' * 65)\n",
"print(f'')\n",
"print(f' # Precio fundamental')\n",
"print(f' sigma = {sigma_1m:.6f} # por minuto')\n",
"print(f' mu = {np.mean(returns):.8f} # drift (cercano a 0)')\n",
"print(f'')\n",
"print(f' # Jumps')\n",
"print(f' jump_intensity = {jump_intensity:.4f} # {jump_intensity*100:.1f}% de velas tienen jump')\n",
"print(f' jump_size_std = {jump_size_std:.6f}')\n",
"print(f'')\n",
"print(f' # Arrival rate')\n",
"print(f' n_takers_lambda = {lambda_mean:.1f} # aggTrades/segundo')\n",
"print(f'')\n",
"print(f' # Hawkes clustering')\n",
"print(f' hawkes_alpha = {hawkes_a:.4f}')\n",
"print(f' hawkes_beta = {hawkes_b:.4f}')\n",
"print(f' branching_ratio = {branching:.3f}')\n",
"print(f'')\n",
"print(f' # Distribución de tamaños')\n",
"print(f' taker_size_alpha = {alpha_qty:.2f} # Pareto exponent (cola p90+)')\n",
"print(f' taker_size_min = {np.percentile(sizes, 5):.6f} # BTC (p5)')\n",
"print(f' taker_size_max = {np.percentile(sizes, 99.9):.4f} # BTC (p99.9)')\n",
"print(f'')\n",
"print(f' # Estructura de fills')\n",
"print(f' median_fills_per_order = {np.median(fills):.0f}')\n",
"print(f' pct_single_fill = {np.mean(fills==1)*100:.1f}%')\n",
"print(f'')\n",
"print(f' # Resumen estadístico')\n",
"print(f' kurtosis = {kurtosis:.1f}')\n",
"print(f' skewness = {skew:.3f}')\n",
"print(f' var_mean_ratio = {var_mean:.1f}')\n",
"print('=' * 65)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python",
"version": "3.13.0"
}
},
"nbformat": 4,
"nbformat_minor": 4
}