593 lines
21 KiB
Plaintext
593 lines
21 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"# Datos reales de Binance\n",
|
||
"\n",
|
||
"Usamos la API pública de Binance (gratis, sin API key) para obtener:\n",
|
||
"1. **Order book** (L2) — profundidad del libro en tiempo real\n",
|
||
"2. **Trades recientes** — los últimos fills ejecutados\n",
|
||
"3. **OHLCV** — velas históricas\n",
|
||
"\n",
|
||
"Después aplicamos las mismas técnicas de estimación del notebook 03 sobre datos reales."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Exchange: Binance\n",
|
||
"Rate limit: 50ms\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"import ccxt\n",
|
||
"import polars as pl\n",
|
||
"import numpy as np\n",
|
||
"import matplotlib.pyplot as plt\n",
|
||
"from datetime import datetime, timedelta\n",
|
||
"import time\n",
|
||
"\n",
|
||
"exchange = ccxt.binance({'enableRateLimit': True})\n",
|
||
"print(f\"Exchange: {exchange.name}\")\n",
|
||
"print(f\"Rate limit: {exchange.rateLimit}ms\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 1. Elegir par y explorar qué hay disponible"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"SYMBOL = 'BTC/USDT'\n",
|
||
"\n",
|
||
"ticker = exchange.fetch_ticker(SYMBOL)\n",
|
||
"print(f\"Par: {SYMBOL}\")\n",
|
||
"print(f\"Último precio: {ticker['last']}\")\n",
|
||
"print(f\"Bid: {ticker['bid']} Ask: {ticker['ask']}\")\n",
|
||
"print(f\"Spread: {ticker['ask'] - ticker['bid']:.2f} ({(ticker['ask'] - ticker['bid']) / ticker['last'] * 100:.4f}%)\")\n",
|
||
"print(f\"Volumen 24h: {ticker['baseVolume']:,.0f} BTC\")\n",
|
||
"print(f\"Volumen 24h: ${ticker['quoteVolume']:,.0f} USDT\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 2. Order Book (L2)\n",
|
||
"\n",
|
||
"El order book de Binance te da los **niveles de precio agregados** — no ves órdenes individuales (no es L3).\n",
|
||
"Cada nivel muestra: precio y cantidad total a ese precio."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def fetch_orderbook(symbol: str, limit: int = 50) -> pl.DataFrame:\n",
|
||
" \"\"\"Obtiene el order book y lo devuelve como DataFrame.\"\"\"\n",
|
||
" ob = exchange.fetch_order_book(symbol, limit=limit)\n",
|
||
"\n",
|
||
" bids = pl.DataFrame(ob['bids'], schema=['price', 'qty'], orient='row')\n",
|
||
" bids = bids.with_columns(pl.lit('bid').alias('side'))\n",
|
||
"\n",
|
||
" asks = pl.DataFrame(ob['asks'], schema=['price', 'qty'], orient='row')\n",
|
||
" asks = asks.with_columns(pl.lit('ask').alias('side'))\n",
|
||
"\n",
|
||
" df = pl.concat([bids, asks])\n",
|
||
" return df, ob['timestamp']\n",
|
||
"\n",
|
||
"\n",
|
||
"ob_df, ob_ts = fetch_orderbook(SYMBOL, limit=20)\n",
|
||
"print(f\"Timestamp: {datetime.fromtimestamp(ob_ts/1000)}\")\n",
|
||
"print(f\"\\nTop 5 bids:\")\n",
|
||
"print(ob_df.filter(pl.col('side') == 'bid').head(5))\n",
|
||
"print(f\"\\nTop 5 asks:\")\n",
|
||
"print(ob_df.filter(pl.col('side') == 'ask').head(5))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_real_orderbook(ob_df: pl.DataFrame, symbol: str):\n",
|
||
" \"\"\"Visualiza el order book real.\"\"\"\n",
|
||
" bids = ob_df.filter(pl.col('side') == 'bid').sort('price', descending=True)\n",
|
||
" asks = ob_df.filter(pl.col('side') == 'ask').sort('price')\n",
|
||
"\n",
|
||
" bid_prices = bids['price'].to_numpy()\n",
|
||
" bid_cum = np.cumsum(bids['qty'].to_numpy())\n",
|
||
" ask_prices = asks['price'].to_numpy()\n",
|
||
" ask_cum = np.cumsum(asks['qty'].to_numpy())\n",
|
||
"\n",
|
||
" fig, ax = plt.subplots(figsize=(12, 5))\n",
|
||
" ax.fill_between(bid_prices, bid_cum, step='post', color='#2ecc71', alpha=0.5, label='Bids')\n",
|
||
" ax.fill_between(ask_prices, ask_cum, step='pre', color='#e74c3c', alpha=0.5, label='Asks')\n",
|
||
" ax.set_xlabel('Precio (USDT)')\n",
|
||
" ax.set_ylabel('Cantidad acumulada (BTC)')\n",
|
||
"\n",
|
||
" best_bid = bid_prices[0]\n",
|
||
" best_ask = ask_prices[0]\n",
|
||
" spread = best_ask - best_bid\n",
|
||
" mid = (best_bid + best_ask) / 2\n",
|
||
"\n",
|
||
" ax.axvline(x=mid, color='gray', linestyle='--', linewidth=0.8)\n",
|
||
" ax.set_title(f'{symbol} Order Book — Spread: ${spread:.2f} ({spread/mid*100:.4f}%) — Mid: ${mid:,.2f}')\n",
|
||
" ax.legend()\n",
|
||
" ax.grid(True, alpha=0.3)\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.show()\n",
|
||
"\n",
|
||
"\n",
|
||
"plot_real_orderbook(ob_df, SYMBOL)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 3. Trades recientes (fills)\n",
|
||
"\n",
|
||
"Esto es lo que ves en el tape público. Cada trade es un **fill** — no sabes si vienen de la misma orden."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def fetch_trades(symbol: str, limit: int = 1000) -> pl.DataFrame:\n",
|
||
" \"\"\"Obtiene trades recientes.\"\"\"\n",
|
||
" raw = exchange.fetch_trades(symbol, limit=limit)\n",
|
||
" records = [{\n",
|
||
" 'timestamp': t['timestamp'],\n",
|
||
" 'datetime': t['datetime'],\n",
|
||
" 'price': t['price'],\n",
|
||
" 'qty': t['amount'],\n",
|
||
" 'side': t['side'], # taker side\n",
|
||
" 'cost': t['cost'], # price * qty en quote currency\n",
|
||
" } for t in raw]\n",
|
||
" return pl.DataFrame(records)\n",
|
||
"\n",
|
||
"\n",
|
||
"trades = fetch_trades(SYMBOL, limit=1000)\n",
|
||
"print(f\"Trades obtenidos: {trades.shape[0]}\")\n",
|
||
"print(f\"Rango: {trades['datetime'].min()} → {trades['datetime'].max()}\")\n",
|
||
"print(f\"\\nÚltimos 5 trades:\")\n",
|
||
"print(trades.tail(5))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Estadísticas básicas de los trades\n",
|
||
"buys = trades.filter(pl.col('side') == 'buy')\n",
|
||
"sells = trades.filter(pl.col('side') == 'sell')\n",
|
||
"\n",
|
||
"print(f\"Buy trades: {buys.shape[0]} ({buys.shape[0]/trades.shape[0]*100:.1f}%)\")\n",
|
||
"print(f\"Sell trades: {sells.shape[0]} ({sells.shape[0]/trades.shape[0]*100:.1f}%)\")\n",
|
||
"print(f\"\\nTamaño promedio: {trades['qty'].mean():.6f} BTC\")\n",
|
||
"print(f\"Tamaño mediano: {trades['qty'].median():.6f} BTC\")\n",
|
||
"print(f\"Tamaño máximo: {trades['qty'].max():.6f} BTC\")\n",
|
||
"print(f\"\\nPrecio min: ${trades['price'].min():,.2f}\")\n",
|
||
"print(f\"Precio max: ${trades['price'].max():,.2f}\")\n",
|
||
"print(f\"Rango: ${trades['price'].max() - trades['price'].min():,.2f}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 4. Velas históricas (OHLCV)\n",
|
||
"\n",
|
||
"Las velas agregan trades en intervalos. Útiles para estimar σ en distintos timeframes."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def fetch_ohlcv(symbol: str, timeframe: str = '1m', limit: int = 500) -> pl.DataFrame:\n",
|
||
" \"\"\"Obtiene velas OHLCV.\"\"\"\n",
|
||
" raw = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)\n",
|
||
" df = pl.DataFrame(raw, schema=['timestamp', 'open', 'high', 'low', 'close', 'volume'], orient='row')\n",
|
||
" df = df.with_columns(\n",
|
||
" (pl.col('timestamp').cast(pl.Int64) * 1000).cast(pl.Datetime('us')).alias('datetime')\n",
|
||
" )\n",
|
||
" return df\n",
|
||
"\n",
|
||
"\n",
|
||
"# 1-minute candles, últimas 500\n",
|
||
"ohlcv_1m = fetch_ohlcv(SYMBOL, '1m', 500)\n",
|
||
"print(f\"Velas 1m: {ohlcv_1m.shape[0]}\")\n",
|
||
"print(f\"Rango: {ohlcv_1m['datetime'].min()} → {ohlcv_1m['datetime'].max()}\")\n",
|
||
"print(ohlcv_1m.tail(3))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def plot_candles_and_volume(ohlcv: pl.DataFrame, symbol: str, timeframe: str):\n",
|
||
" \"\"\"Gráfico de velas con volumen.\"\"\"\n",
|
||
" fig, axes = plt.subplots(2, 1, figsize=(14, 7), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)\n",
|
||
"\n",
|
||
" dt = ohlcv['datetime'].to_numpy()\n",
|
||
" opens = ohlcv['open'].to_numpy()\n",
|
||
" closes = ohlcv['close'].to_numpy()\n",
|
||
" highs = ohlcv['high'].to_numpy()\n",
|
||
" lows = ohlcv['low'].to_numpy()\n",
|
||
" volumes = ohlcv['volume'].to_numpy()\n",
|
||
"\n",
|
||
" colors = ['#2ecc71' if c >= o else '#e74c3c' for o, c in zip(opens, closes)]\n",
|
||
"\n",
|
||
" # Velas\n",
|
||
" ax = axes[0]\n",
|
||
" for i in range(len(dt)):\n",
|
||
" ax.plot([i, i], [lows[i], highs[i]], color=colors[i], linewidth=0.5)\n",
|
||
" ax.plot([i, i], [opens[i], closes[i]], color=colors[i], linewidth=2)\n",
|
||
" ax.set_ylabel('Precio (USDT)')\n",
|
||
" ax.set_title(f'{symbol} — {timeframe} candles')\n",
|
||
" ax.grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
" # Volumen\n",
|
||
" ax = axes[1]\n",
|
||
" ax.bar(range(len(dt)), volumes, color=colors, alpha=0.6, width=0.8)\n",
|
||
" ax.set_ylabel('Volumen (BTC)')\n",
|
||
" ax.set_xlabel('Vela')\n",
|
||
" ax.grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
" plt.tight_layout()\n",
|
||
" plt.show()\n",
|
||
"\n",
|
||
"\n",
|
||
"# Últimas 100 velas para que se vea claro\n",
|
||
"plot_candles_and_volume(ohlcv_1m.tail(100), SYMBOL, '1m')"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"---\n",
|
||
"\n",
|
||
"## 5. Estimación de parámetros sobre datos reales\n",
|
||
"\n",
|
||
"Aplicamos las mismas técnicas del notebook 03 pero sobre BTC/USDT real."
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 5.1 Volatilidad (σ) desde velas"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Retornos logarítmicos close-to-close\n",
|
||
"closes = ohlcv_1m['close'].to_numpy()\n",
|
||
"log_returns = np.diff(np.log(closes))\n",
|
||
"\n",
|
||
"sigma_1m = np.std(log_returns)\n",
|
||
"sigma_1h = sigma_1m * np.sqrt(60) # escalar a 1 hora\n",
|
||
"sigma_1d = sigma_1m * np.sqrt(60 * 24) # escalar a 1 día\n",
|
||
"sigma_annual = sigma_1d * np.sqrt(365) # anualizada\n",
|
||
"\n",
|
||
"print(f\"σ por minuto: {sigma_1m:.6f}\")\n",
|
||
"print(f\"σ por hora: {sigma_1h:.6f}\")\n",
|
||
"print(f\"σ por día: {sigma_1d:.4f} ({sigma_1d*100:.2f}%)\")\n",
|
||
"print(f\"σ anualizada: {sigma_annual:.4f} ({sigma_annual*100:.1f}%)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 5.2 Arrival rate (λ) de trades"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Inter-arrival times entre trades consecutivos\n",
|
||
"timestamps = trades['timestamp'].to_numpy()\n",
|
||
"inter_arrivals_ms = np.diff(timestamps)\n",
|
||
"inter_arrivals_s = inter_arrivals_ms / 1000.0\n",
|
||
"\n",
|
||
"# Filtrar zeros (trades en el mismo milisegundo = probablemente mismo matching event)\n",
|
||
"inter_arrivals_s = inter_arrivals_s[inter_arrivals_s > 0]\n",
|
||
"\n",
|
||
"lambda_per_sec = 1.0 / np.mean(inter_arrivals_s)\n",
|
||
"lambda_per_min = lambda_per_sec * 60\n",
|
||
"\n",
|
||
"print(f\"Tiempo medio entre trades: {np.mean(inter_arrivals_s)*1000:.1f} ms\")\n",
|
||
"print(f\"Tiempo mediano entre trades: {np.median(inter_arrivals_s)*1000:.1f} ms\")\n",
|
||
"print(f\"λ (trades/segundo): {lambda_per_sec:.1f}\")\n",
|
||
"print(f\"λ (trades/minuto): {lambda_per_min:.0f}\")\n",
|
||
"print(f\"\\nRecuerda: esto son FILLS, no órdenes originales\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 5.3 Clustering (Hawkes) — ¿los trades generan más trades?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Agrupar trades por segundo y calcular autocorrelación\n",
|
||
"trades_per_sec = trades.with_columns(\n",
|
||
" (pl.col('timestamp') // 1000).alias('second')\n",
|
||
").group_by('second').agg(pl.len().alias('n_trades')).sort('second')\n",
|
||
"\n",
|
||
"arrivals = trades_per_sec['n_trades'].to_numpy()\n",
|
||
"\n",
|
||
"# Autocorrelación\n",
|
||
"max_lag = 30\n",
|
||
"mean_arr = np.mean(arrivals)\n",
|
||
"var_arr = np.var(arrivals)\n",
|
||
"acf = np.array([\n",
|
||
" np.mean((arrivals[lag:] - mean_arr) * (arrivals[:-lag] - mean_arr)) / var_arr\n",
|
||
" if lag > 0 else 1.0\n",
|
||
" for lag in range(max_lag)\n",
|
||
"])\n",
|
||
"\n",
|
||
"# Var/Mean ratio (dispersion index)\n",
|
||
"dispersion = var_arr / mean_arr\n",
|
||
"\n",
|
||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||
"\n",
|
||
"axes[0].bar(range(max_lag), acf, color='#e67e22', alpha=0.6)\n",
|
||
"axes[0].axhline(y=0, color='black', linewidth=0.5)\n",
|
||
"axes[0].axhline(y=1.96/np.sqrt(len(arrivals)), color='blue', linestyle='--', linewidth=0.8, label='95% CI')\n",
|
||
"axes[0].axhline(y=-1.96/np.sqrt(len(arrivals)), color='blue', linestyle='--', linewidth=0.8)\n",
|
||
"axes[0].set_title('Autocorrelación de trades/segundo')\n",
|
||
"axes[0].set_xlabel('Lag (segundos)')\n",
|
||
"axes[0].legend(fontsize=8)\n",
|
||
"axes[0].grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
"axes[1].hist(arrivals, bins=50, color='#3498db', alpha=0.6, density=True)\n",
|
||
"axes[1].set_title(f'Distribución de trades/segundo\\nMedia={mean_arr:.1f}, Var/Mean={dispersion:.1f}')\n",
|
||
"axes[1].set_xlabel('Trades por segundo')\n",
|
||
"axes[1].grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
"plt.tight_layout()\n",
|
||
"plt.show()\n",
|
||
"\n",
|
||
"print(f\"Var/Mean ratio: {dispersion:.2f}\")\n",
|
||
"if dispersion > 1.5:\n",
|
||
" print(\" → Hay clustering significativo (Hawkes). Los trades generan más trades.\")\n",
|
||
"else:\n",
|
||
" print(\" → Cercano a Poisson. Poco clustering.\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 5.4 Distribución de tamaños — ¿hay ballenas?"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"sizes = trades['qty'].to_numpy()\n",
|
||
"sizes = sizes[sizes > 0]\n",
|
||
"\n",
|
||
"# Estimar exponente Pareto (MLE)\n",
|
||
"x_min = np.percentile(sizes, 90) # usar percentil 90 como x_min (zona de cola)\n",
|
||
"tail = sizes[sizes >= x_min]\n",
|
||
"alpha_est = len(tail) / np.sum(np.log(tail / x_min))\n",
|
||
"\n",
|
||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||
"\n",
|
||
"# Histograma\n",
|
||
"axes[0].hist(sizes, bins=100, color='#2ecc71', alpha=0.6, density=True)\n",
|
||
"axes[0].set_title('Distribución de tamaños de trades')\n",
|
||
"axes[0].set_xlabel('Tamaño (BTC)')\n",
|
||
"axes[0].set_yscale('log')\n",
|
||
"axes[0].grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
"# CCDF log-log (survival function)\n",
|
||
"sizes_sorted = np.sort(sizes)[::-1]\n",
|
||
"ranks = np.arange(1, len(sizes_sorted) + 1) / len(sizes_sorted)\n",
|
||
"axes[1].loglog(sizes_sorted, ranks, '.', markersize=1, alpha=0.4, color='#2ecc71')\n",
|
||
"# Fit Pareto\n",
|
||
"x_fit = np.logspace(np.log10(x_min), np.log10(sizes.max()), 50)\n",
|
||
"axes[1].loglog(x_fit, (x_fit / x_min) ** (-alpha_est) * (len(tail)/len(sizes)),\n",
|
||
" 'r-', linewidth=2, label=f'Pareto α={alpha_est:.2f}')\n",
|
||
"axes[1].set_title('CCDF (complementary CDF) — cola pesada')\n",
|
||
"axes[1].set_xlabel('Tamaño (BTC)')\n",
|
||
"axes[1].set_ylabel('P(X > x)')\n",
|
||
"axes[1].legend()\n",
|
||
"axes[1].grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
"plt.tight_layout()\n",
|
||
"plt.show()\n",
|
||
"\n",
|
||
"print(f\"Tamaño mediano: {np.median(sizes):.6f} BTC (${np.median(sizes) * ticker['last']:,.2f})\")\n",
|
||
"print(f\"Tamaño p99: {np.percentile(sizes, 99):.6f} BTC (${np.percentile(sizes, 99) * ticker['last']:,.2f})\")\n",
|
||
"print(f\"Tamaño max: {sizes.max():.6f} BTC (${sizes.max() * ticker['last']:,.2f})\")\n",
|
||
"print(f\"Pareto α (cola): {alpha_est:.2f}\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### 5.5 Detección de jumps en retornos reales"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Retornos de 1 minuto\n",
|
||
"threshold = 3 * sigma_1m\n",
|
||
"jump_mask = np.abs(log_returns) > threshold\n",
|
||
"n_jumps = np.sum(jump_mask)\n",
|
||
"jump_intensity = n_jumps / len(log_returns)\n",
|
||
"\n",
|
||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||
"\n",
|
||
"# Retornos con jumps marcados\n",
|
||
"ax = axes[0]\n",
|
||
"ax.plot(log_returns, linewidth=0.5, color='#3498db', alpha=0.6)\n",
|
||
"jump_indices = np.where(jump_mask)[0]\n",
|
||
"ax.scatter(jump_indices, log_returns[jump_indices], color='red', s=20, zorder=5, label=f'Jumps ({n_jumps})')\n",
|
||
"ax.axhline(y=threshold, color='red', linestyle='--', linewidth=0.8, alpha=0.5)\n",
|
||
"ax.axhline(y=-threshold, color='red', linestyle='--', linewidth=0.8, alpha=0.5)\n",
|
||
"ax.set_title('Retornos 1m con jumps detectados (> 3σ)')\n",
|
||
"ax.set_ylabel('Log-return')\n",
|
||
"ax.legend(fontsize=8)\n",
|
||
"ax.grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
"# QQ plot\n",
|
||
"from scipy.stats import probplot\n",
|
||
"probplot(log_returns, dist=\"norm\", plot=axes[1])\n",
|
||
"axes[1].set_title('QQ-Plot: retornos vs Normal\\n(colas pesadas = desviación en extremos)')\n",
|
||
"axes[1].grid(True, alpha=0.3)\n",
|
||
"\n",
|
||
"plt.tight_layout()\n",
|
||
"plt.show()\n",
|
||
"\n",
|
||
"print(f\"Jumps detectados: {n_jumps} de {len(log_returns)} velas ({jump_intensity*100:.1f}%)\")\n",
|
||
"print(f\"Kurtosis: {float(np.mean((log_returns - np.mean(log_returns))**4) / np.std(log_returns)**4):.1f} (Normal=3, >3 = colas pesadas)\")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"---\n",
|
||
"\n",
|
||
"## 6. Resumen: perfil del mercado BTC/USDT"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"best_bid = ob_df.filter(pl.col('side') == 'bid')['price'].max()\n",
|
||
"best_ask = ob_df.filter(pl.col('side') == 'ask')['price'].min()\n",
|
||
"spread = best_ask - best_bid\n",
|
||
"\n",
|
||
"print(\"=\" * 60)\n",
|
||
"print(f\" PERFIL DE MERCADO: {SYMBOL}\")\n",
|
||
"print(f\" {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n",
|
||
"print(\"=\" * 60)\n",
|
||
"print(f\"\")\n",
|
||
"print(f\" Precio: ${ticker['last']:,.2f}\")\n",
|
||
"print(f\" Spread: ${spread:.2f} ({spread/ticker['last']*100:.4f}%)\")\n",
|
||
"print(f\" Vol 24h: {ticker['baseVolume']:,.0f} BTC\")\n",
|
||
"print(f\"\")\n",
|
||
"print(f\" σ (1 min): {sigma_1m:.6f}\")\n",
|
||
"print(f\" σ (diaria): {sigma_1d:.4f} ({sigma_1d*100:.2f}%)\")\n",
|
||
"print(f\" σ (anual): {sigma_annual:.2f} ({sigma_annual*100:.0f}%)\")\n",
|
||
"print(f\"\")\n",
|
||
"print(f\" λ (fills/seg): {lambda_per_sec:.1f}\")\n",
|
||
"print(f\" Clustering: Var/Mean = {dispersion:.1f} {'(Hawkes)' if dispersion > 1.5 else '(~Poisson)'}\")\n",
|
||
"print(f\"\")\n",
|
||
"print(f\" Tamaño mediano: {np.median(sizes):.6f} BTC\")\n",
|
||
"print(f\" Pareto α: {alpha_est:.2f}\")\n",
|
||
"print(f\" Kurtosis: {float(np.mean((log_returns - np.mean(log_returns))**4) / np.std(log_returns)**4):.1f}\")\n",
|
||
"print(f\" Jumps (>3σ): {jump_intensity*100:.1f}%\")\n",
|
||
"print(f\"\")\n",
|
||
"print(\"=\" * 60)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"## 7. Guardar datos para análisis offline"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": null,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# Guardar todo en data/\n",
|
||
"trades.write_csv('../data/binance_btcusdt_trades.csv')\n",
|
||
"ohlcv_1m.write_csv('../data/binance_btcusdt_ohlcv_1m.csv')\n",
|
||
"ob_df.write_csv('../data/binance_btcusdt_orderbook.csv')\n",
|
||
"\n",
|
||
"print(f\"Guardados en data/:\")\n",
|
||
"print(f\" binance_btcusdt_trades.csv ({trades.shape[0]} trades)\")\n",
|
||
"print(f\" binance_btcusdt_ohlcv_1m.csv ({ohlcv_1m.shape[0]} velas)\")\n",
|
||
"print(f\" binance_btcusdt_orderbook.csv ({ob_df.shape[0]} niveles)\")"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3 (ipykernel)",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.13.7"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 4
|
||
}
|