1247 lines
48 KiB
Plaintext
1247 lines
48 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Recolección de datos: Binance + Bitstamp L3\n",
|
|
"\n",
|
|
"**Objetivo:** Dataset de 1M+ filas guardado en `data/`\n",
|
|
"\n",
|
|
"| Fuente | Tipo | Método | Qué obtenemos |\n",
|
|
"|---|---|---|---|\n",
|
|
"| Binance | aggTrades (fills agrupados por taker) | REST paginado | 1M+ trades históricos |\n",
|
|
"| Binance | Order book L2 | REST snapshots | Profundidad del libro |\n",
|
|
"| Bitstamp | L3 live_orders | WebSocket | Cada orden individual con ID |"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 1,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Data dir: /home/lucas/fn_registry/analysis/estudio_mercados/data\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import aiohttp\n",
|
|
"import asyncio\n",
|
|
"import websockets\n",
|
|
"import json\n",
|
|
"import time\n",
|
|
"import polars as pl\n",
|
|
"import numpy as np\n",
|
|
"from datetime import datetime, timedelta\n",
|
|
"from pathlib import Path\n",
|
|
"\n",
|
|
"DATA_DIR = Path('../data')\n",
|
|
"DATA_DIR.mkdir(exist_ok=True)\n",
|
|
"\n",
|
|
"BINANCE_BASE = 'https://api.binance.com'\n",
|
|
"BITSTAMP_WS = 'wss://ws.bitstamp.net'\n",
|
|
"\n",
|
|
"print(f'Data dir: {DATA_DIR.resolve()}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"---\n",
|
|
"## 1. Binance aggTrades — 1M+ filas\n",
|
|
"\n",
|
|
"Los `aggTrades` agrupan fills de la misma taker order:\n",
|
|
"- Cada fila = 1 taker order (o parte si cruzó muchos niveles)\n",
|
|
"- Campo `a` = aggregate trade ID\n",
|
|
"- Campo `m` = true si el maker es buyer (taker es seller)\n",
|
|
"- Paginamos con `fromId` para ir hacia atrás en el tiempo"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"fetch_binance_agg_trades() definida\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"async def fetch_binance_agg_trades(\n",
|
|
" symbol: str = 'BTCUSDT',\n",
|
|
" target_rows: int = 1_000_000,\n",
|
|
" batch_size: int = 1000,\n",
|
|
") -> pl.DataFrame:\n",
|
|
" \"\"\"Descarga aggTrades de Binance paginando hacia atrás.\n",
|
|
"\n",
|
|
" Cada aggTrade agrupa fills de la misma taker order:\n",
|
|
" - a: aggregate trade id\n",
|
|
" - p: price\n",
|
|
" - q: quantity\n",
|
|
" - f: first trade id\n",
|
|
" - l: last trade id\n",
|
|
" - T: timestamp\n",
|
|
" - m: was the buyer the maker? (true = taker sold, false = taker bought)\n",
|
|
" \"\"\"\n",
|
|
" all_records = []\n",
|
|
" from_id = None\n",
|
|
" total = 0\n",
|
|
" start_time = time.time()\n",
|
|
"\n",
|
|
" async with aiohttp.ClientSession() as session:\n",
|
|
" while total < target_rows:\n",
|
|
" params = {'symbol': symbol, 'limit': batch_size}\n",
|
|
" if from_id is not None:\n",
|
|
" params['fromId'] = from_id\n",
|
|
"\n",
|
|
" async with session.get(f'{BINANCE_BASE}/api/v3/aggTrades', params=params) as resp:\n",
|
|
" if resp.status != 200:\n",
|
|
" text = await resp.text()\n",
|
|
" print(f'Error {resp.status}: {text}')\n",
|
|
" break\n",
|
|
" data = await resp.json()\n",
|
|
"\n",
|
|
" if not data:\n",
|
|
" break\n",
|
|
"\n",
|
|
" for row in data:\n",
|
|
" all_records.append({\n",
|
|
" 'agg_trade_id': row['a'],\n",
|
|
" 'price': float(row['p']),\n",
|
|
" 'qty': float(row['q']),\n",
|
|
" 'first_trade_id': row['f'],\n",
|
|
" 'last_trade_id': row['l'],\n",
|
|
" 'timestamp': row['T'],\n",
|
|
" 'is_buyer_maker': row['m'], # True = taker vendió\n",
|
|
" 'side': 'sell' if row['m'] else 'buy', # taker side\n",
|
|
" 'n_fills': row['l'] - row['f'] + 1, # fills en esta agg\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Avanzar: siguiente página desde el último ID + 1\n",
|
|
" from_id = data[-1]['a'] + 1\n",
|
|
" total += len(data)\n",
|
|
"\n",
|
|
" if total % 50_000 == 0:\n",
|
|
" elapsed = time.time() - start_time\n",
|
|
" rate = total / elapsed\n",
|
|
" eta = (target_rows - total) / rate if rate > 0 else 0\n",
|
|
" ts = datetime.fromtimestamp(data[-1]['T'] / 1000)\n",
|
|
" print(f' {total:>8,} rows | {rate:,.0f} rows/s | ETA {eta:.0f}s | hasta {ts}')\n",
|
|
"\n",
|
|
" # Rate limit: Binance permite 1200 req/min en aggTrades\n",
|
|
" await asyncio.sleep(0.05)\n",
|
|
"\n",
|
|
" elapsed = time.time() - start_time\n",
|
|
" print(f'\\nDescargados {total:,} aggTrades en {elapsed:.1f}s ({total/elapsed:,.0f} rows/s)')\n",
|
|
"\n",
|
|
" return pl.DataFrame(all_records)\n",
|
|
"\n",
|
|
"\n",
|
|
"print('fetch_binance_agg_trades() definida')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 3,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"Descargados 1,000 aggTrades en 0.5s (1,871 rows/s)\n",
|
|
"\n",
|
|
"Shape: (1000, 9)\n",
|
|
"Columnas: ['agg_trade_id', 'price', 'qty', 'first_trade_id', 'last_trade_id', 'timestamp', 'is_buyer_maker', 'side', 'n_fills']\n",
|
|
"shape: (5, 9)\n",
|
|
"┌──────────────┬──────────┬─────────┬─────────────┬───┬─────────────┬─────────────┬──────┬─────────┐\n",
|
|
"│ agg_trade_id ┆ price ┆ qty ┆ first_trade ┆ … ┆ timestamp ┆ is_buyer_ma ┆ side ┆ n_fills │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ _id ┆ ┆ --- ┆ ker ┆ --- ┆ --- │\n",
|
|
"│ i64 ┆ f64 ┆ f64 ┆ --- ┆ ┆ i64 ┆ --- ┆ str ┆ i64 │\n",
|
|
"│ ┆ ┆ ┆ i64 ┆ ┆ ┆ bool ┆ ┆ │\n",
|
|
"╞══════════════╪══════════╪═════════╪═════════════╪═══╪═════════════╪═════════════╪══════╪═════════╡\n",
|
|
"│ 3924566726 ┆ 66916.1 ┆ 0.00122 ┆ 6182451140 ┆ … ┆ 17752265945 ┆ false ┆ buy ┆ 1 │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ 74 ┆ ┆ ┆ │\n",
|
|
"│ 3924566727 ┆ 66916.09 ┆ 0.00047 ┆ 6182451141 ┆ … ┆ 17752265955 ┆ true ┆ sell ┆ 1 │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ 80 ┆ ┆ ┆ │\n",
|
|
"│ 3924566728 ┆ 66916.1 ┆ 0.00034 ┆ 6182451142 ┆ … ┆ 17752265956 ┆ false ┆ buy ┆ 1 │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ 00 ┆ ┆ ┆ │\n",
|
|
"│ 3924566729 ┆ 66916.1 ┆ 0.03403 ┆ 6182451143 ┆ … ┆ 17752265957 ┆ false ┆ buy ┆ 1 │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ 48 ┆ ┆ ┆ │\n",
|
|
"│ 3924566730 ┆ 66916.1 ┆ 0.00121 ┆ 6182451144 ┆ … ┆ 17752265966 ┆ false ┆ buy ┆ 1 │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ 48 ┆ ┆ ┆ │\n",
|
|
"└──────────────┴──────────┴─────────┴─────────────┴───┴─────────────┴─────────────┴──────┴─────────┘\n",
|
|
"\n",
|
|
"Rango temporal:\n",
|
|
" 2026-04-03 16:29:54.574000 → 2026-04-03 16:31:32.561000\n",
|
|
" Duración: 0:01:37.987000\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Descargar 1M+ aggTrades de BTC/USDT\n",
|
|
"binance_trades = await fetch_binance_agg_trades('BTCUSDT', target_rows=1_000_000)\n",
|
|
"\n",
|
|
"print(f'\\nShape: {binance_trades.shape}')\n",
|
|
"print(f'Columnas: {binance_trades.columns}')\n",
|
|
"print(binance_trades.head(5))\n",
|
|
"print(f'\\nRango temporal:')\n",
|
|
"t_min = datetime.fromtimestamp(binance_trades['timestamp'].min() / 1000)\n",
|
|
"t_max = datetime.fromtimestamp(binance_trades['timestamp'].max() / 1000)\n",
|
|
"print(f' {t_min} → {t_max}')\n",
|
|
"print(f' Duración: {t_max - t_min}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Guardar Binance aggTrades\n",
|
|
"out_path = DATA_DIR / 'binance_btcusdt_aggtrades.csv'\n",
|
|
"binance_trades.write_csv(str(out_path))\n",
|
|
"size_mb = out_path.stat().st_size / 1024 / 1024\n",
|
|
"print(f'Guardado: {out_path}')\n",
|
|
"print(f' {binance_trades.shape[0]:,} filas, {size_mb:.1f} MB')\n",
|
|
"\n",
|
|
"# Estadísticas rápidas\n",
|
|
"print(f'\\nEstadísticas:')\n",
|
|
"print(f' Buys (taker): {binance_trades.filter(pl.col(\"side\") == \"buy\").shape[0]:,}')\n",
|
|
"print(f' Sells (taker): {binance_trades.filter(pl.col(\"side\") == \"sell\").shape[0]:,}')\n",
|
|
"print(f' Precio min: ${binance_trades[\"price\"].min():,.2f}')\n",
|
|
"print(f' Precio max: ${binance_trades[\"price\"].max():,.2f}')\n",
|
|
"print(f' Qty mediana: {binance_trades[\"qty\"].median():.6f} BTC')\n",
|
|
"print(f' Qty max: {binance_trades[\"qty\"].max():.4f} BTC')\n",
|
|
"print(f' Fills/aggTrade mediana: {binance_trades[\"n_fills\"].median():.0f}')\n",
|
|
"print(f' Fills/aggTrade max: {binance_trades[\"n_fills\"].max()}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"---\n",
|
|
"## 2. Bitstamp L3 — órdenes individuales via WebSocket\n",
|
|
"\n",
|
|
"Cada mensaje tiene:\n",
|
|
"- `id`: ID único de la orden\n",
|
|
"- `order_type`: 0 = buy, 1 = sell\n",
|
|
"- `price`, `amount`\n",
|
|
"- `datetime`, `microtimestamp`\n",
|
|
"\n",
|
|
"Los canales:\n",
|
|
"- `live_orders_btcusd`: cada orden creada\n",
|
|
"- `live_trades_btcusd`: cada ejecución con IDs de maker y taker"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"async def record_bitstamp_l3(\n",
|
|
" pair: str = 'btcusd',\n",
|
|
" duration_seconds: int = 300,\n",
|
|
") -> tuple[pl.DataFrame, pl.DataFrame]:\n",
|
|
" \"\"\"Graba datos L3 de Bitstamp via WebSocket.\n",
|
|
"\n",
|
|
" Retorna (orders_df, trades_df) con todas las órdenes y trades capturados.\n",
|
|
" \"\"\"\n",
|
|
" orders = []\n",
|
|
" trades = []\n",
|
|
" start = time.time()\n",
|
|
" msg_count = 0\n",
|
|
"\n",
|
|
" async with websockets.connect(BITSTAMP_WS) as ws:\n",
|
|
" # Suscribirse a órdenes individuales + trades\n",
|
|
" for channel in [f'live_orders_{pair}', f'live_trades_{pair}']:\n",
|
|
" await ws.send(json.dumps({\n",
|
|
" 'event': 'bts:subscribe',\n",
|
|
" 'data': {'channel': channel}\n",
|
|
" }))\n",
|
|
"\n",
|
|
" print(f'Grabando Bitstamp L3 ({pair}) por {duration_seconds}s...')\n",
|
|
"\n",
|
|
" while time.time() - start < duration_seconds:\n",
|
|
" try:\n",
|
|
" raw = await asyncio.wait_for(ws.recv(), timeout=5.0)\n",
|
|
" msg = json.loads(raw)\n",
|
|
" msg_count += 1\n",
|
|
"\n",
|
|
" event = msg.get('event', '')\n",
|
|
" channel = msg.get('channel', '')\n",
|
|
" data = msg.get('data', {})\n",
|
|
"\n",
|
|
" if isinstance(data, str):\n",
|
|
" try:\n",
|
|
" data = json.loads(data)\n",
|
|
" except:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" # Órdenes (L3)\n",
|
|
" if 'live_orders' in channel and event in ('order_created', 'order_changed', 'order_deleted'):\n",
|
|
" orders.append({\n",
|
|
" 'event': event,\n",
|
|
" 'order_id': data.get('id', ''),\n",
|
|
" 'side': 'buy' if data.get('order_type') == 0 else 'sell',\n",
|
|
" 'price': float(data.get('price', 0)),\n",
|
|
" 'amount': float(data.get('amount', 0)),\n",
|
|
" 'datetime': data.get('datetime', ''),\n",
|
|
" 'microtimestamp': data.get('microtimestamp', ''),\n",
|
|
" })\n",
|
|
"\n",
|
|
" # Trades\n",
|
|
" elif 'live_trades' in channel and event == 'trade':\n",
|
|
" trades.append({\n",
|
|
" 'trade_id': data.get('id', ''),\n",
|
|
" 'side': 'buy' if data.get('type') == 0 else 'sell',\n",
|
|
" 'price': float(data.get('price', 0)),\n",
|
|
" 'amount': float(data.get('amount', 0)),\n",
|
|
" 'buy_order_id': data.get('buy_order_id', ''),\n",
|
|
" 'sell_order_id': data.get('sell_order_id', ''),\n",
|
|
" 'timestamp': data.get('timestamp', ''),\n",
|
|
" 'microtimestamp': data.get('microtimestamp', ''),\n",
|
|
" })\n",
|
|
"\n",
|
|
" if msg_count % 5000 == 0:\n",
|
|
" elapsed = time.time() - start\n",
|
|
" print(f' {elapsed:.0f}s: {len(orders):,} orders, {len(trades):,} trades ({msg_count:,} msgs)')\n",
|
|
"\n",
|
|
" except asyncio.TimeoutError:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" elapsed = time.time() - start\n",
|
|
" print(f'\\nGrabación terminada: {elapsed:.0f}s')\n",
|
|
" print(f' Órdenes L3: {len(orders):,}')\n",
|
|
" print(f' Trades: {len(trades):,}')\n",
|
|
" print(f' Msgs total: {msg_count:,}')\n",
|
|
"\n",
|
|
" orders_df = pl.DataFrame(orders) if orders else pl.DataFrame()\n",
|
|
" trades_df = pl.DataFrame(trades) if trades else pl.DataFrame()\n",
|
|
"\n",
|
|
" return orders_df, trades_df\n",
|
|
"\n",
|
|
"\n",
|
|
"print('record_bitstamp_l3() definida')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Grabar 5 minutos de L3 de Bitstamp\n",
|
|
"bs_orders, bs_trades = await record_bitstamp_l3('btcusd', duration_seconds=300)\n",
|
|
"\n",
|
|
"if bs_orders.shape[0] > 0:\n",
|
|
" print(f'\\n=== Órdenes L3 ===')\n",
|
|
" print(f'Shape: {bs_orders.shape}')\n",
|
|
" print(bs_orders.head(5))\n",
|
|
" print(f'\\nEventos:')\n",
|
|
" print(bs_orders.group_by('event').agg(pl.len().alias('count')).sort('count', descending=True))\n",
|
|
"\n",
|
|
"if bs_trades.shape[0] > 0:\n",
|
|
" print(f'\\n=== Trades ===')\n",
|
|
" print(f'Shape: {bs_trades.shape}')\n",
|
|
" print(bs_trades.head(5))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Guardar Bitstamp L3\n",
|
|
"if bs_orders.shape[0] > 0:\n",
|
|
" path = DATA_DIR / 'bitstamp_btcusd_l3_orders.csv'\n",
|
|
" bs_orders.write_csv(str(path))\n",
|
|
" print(f'Guardado: {path} ({bs_orders.shape[0]:,} filas, {path.stat().st_size/1024/1024:.1f} MB)')\n",
|
|
"\n",
|
|
"if bs_trades.shape[0] > 0:\n",
|
|
" path = DATA_DIR / 'bitstamp_btcusd_l3_trades.csv'\n",
|
|
" bs_trades.write_csv(str(path))\n",
|
|
" print(f'Guardado: {path} ({bs_trades.shape[0]:,} filas, {path.stat().st_size/1024/1024:.1f} MB)')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"---\n",
|
|
"## 3. Resumen del dataset"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import os\n",
|
|
"\n",
|
|
"print('=' * 70)\n",
|
|
"print(' DATASET RECOLECTADO')\n",
|
|
"print('=' * 70)\n",
|
|
"\n",
|
|
"total_rows = 0\n",
|
|
"for f in sorted(DATA_DIR.glob('*.csv')):\n",
|
|
" size_mb = f.stat().st_size / 1024 / 1024\n",
|
|
" # Contar filas rápido\n",
|
|
" try:\n",
|
|
" nrows = pl.scan_csv(str(f)).select(pl.len()).collect().item()\n",
|
|
" except:\n",
|
|
" nrows = '?'\n",
|
|
" total_rows += nrows if isinstance(nrows, int) else 0\n",
|
|
" print(f' {f.name:<45} {nrows:>10,} filas {size_mb:>7.1f} MB')\n",
|
|
"\n",
|
|
"print(f'\\n TOTAL: {total_rows:>10,} filas')\n",
|
|
"print('=' * 70)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"waiting for binance download\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print('waiting for binance download')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Descargando 1M aggTrades (ID 3,923,568,287 → 3,924,568,287)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 100,000 (10%) | 3,308 rows/s | 2026-04-02 16:01:27.188000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 200,000 (20%) | 3,248 rows/s | 2026-04-02 16:55:31.965000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 300,000 (30%) | 3,258 rows/s | 2026-04-02 18:36:12.184000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 400,000 (40%) | 3,253 rows/s | 2026-04-02 20:16:47.719000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 500,000 (50%) | 3,212 rows/s | 2026-04-02 22:23:55.678000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 600,000 (60%) | 3,187 rows/s | 2026-04-03 03:15:10.491000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 700,000 (70%) | 3,199 rows/s | 2026-04-03 06:33:15.815000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 800,000 (80%) | 3,209 rows/s | 2026-04-03 09:56:05.448000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 900,000 (90%) | 3,225 rows/s | 2026-04-03 13:58:09.651000\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 1,000,000 (100%) | 3,235 rows/s | 2026-04-03 16:32:41.139000\n",
|
|
"Listo: 1,000,000 rows en 309s (3,234 rows/s)\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Shape: (1000000, 9)\n",
|
|
"Rango: 2026-04-02 14:26:02.324000 → 2026-04-03 16:32:41.139000 (1 day, 2:06:38.815000)\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"import aiohttp, asyncio, time as _time\n",
|
|
"from datetime import datetime as _dt\n",
|
|
"\n",
|
|
"async def _fetch_bulk():\n",
|
|
" all_records = []\n",
|
|
" total = 0\n",
|
|
" t0 = _time.time()\n",
|
|
" \n",
|
|
" async with aiohttp.ClientSession() as session:\n",
|
|
" # Obtener último ID\n",
|
|
" async with session.get(f'{BINANCE_BASE}/api/v3/aggTrades', \n",
|
|
" params={'symbol': 'BTCUSDT', 'limit': 1}) as resp:\n",
|
|
" data = await resp.json()\n",
|
|
" latest_id = data[0]['a']\n",
|
|
" \n",
|
|
" from_id = latest_id - 1_000_000\n",
|
|
" print(f'Descargando 1M aggTrades (ID {from_id:,} → {latest_id:,})')\n",
|
|
" \n",
|
|
" while from_id < latest_id:\n",
|
|
" params = {'symbol': 'BTCUSDT', 'fromId': from_id, 'limit': 1000}\n",
|
|
" async with session.get(f'{BINANCE_BASE}/api/v3/aggTrades', params=params) as resp:\n",
|
|
" if resp.status == 429:\n",
|
|
" await asyncio.sleep(10)\n",
|
|
" continue\n",
|
|
" if resp.status != 200:\n",
|
|
" print(f'Error {resp.status}')\n",
|
|
" break\n",
|
|
" data = await resp.json()\n",
|
|
" \n",
|
|
" if not data:\n",
|
|
" break\n",
|
|
" \n",
|
|
" for r in data:\n",
|
|
" all_records.append({\n",
|
|
" 'agg_trade_id': r['a'], 'price': float(r['p']), 'qty': float(r['q']),\n",
|
|
" 'first_trade_id': r['f'], 'last_trade_id': r['l'], 'timestamp': r['T'],\n",
|
|
" 'is_buyer_maker': r['m'], 'side': 'sell' if r['m'] else 'buy',\n",
|
|
" 'n_fills': r['l'] - r['f'] + 1,\n",
|
|
" })\n",
|
|
" \n",
|
|
" from_id = data[-1]['a'] + 1\n",
|
|
" total += len(data)\n",
|
|
" \n",
|
|
" if total % 100_000 == 0:\n",
|
|
" elapsed = _time.time() - t0\n",
|
|
" pct = total / 1_000_000 * 100\n",
|
|
" ts = _dt.fromtimestamp(data[-1]['T'] / 1000)\n",
|
|
" print(f' {total:>10,} ({pct:.0f}%) | {total/elapsed:,.0f} rows/s | {ts}')\n",
|
|
" \n",
|
|
" await asyncio.sleep(0.06)\n",
|
|
" \n",
|
|
" elapsed = _time.time() - t0\n",
|
|
" print(f'Listo: {total:,} rows en {elapsed:.0f}s ({total/elapsed:,.0f} rows/s)')\n",
|
|
" return pl.DataFrame(all_records)\n",
|
|
"\n",
|
|
"binance_trades = await _fetch_bulk()\n",
|
|
"print(f'Shape: {binance_trades.shape}')\n",
|
|
"t_min = _dt.fromtimestamp(binance_trades['timestamp'].min() / 1000)\n",
|
|
"t_max = _dt.fromtimestamp(binance_trades['timestamp'].max() / 1000)\n",
|
|
"print(f'Rango: {t_min} → {t_max} ({t_max - t_min})')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Guardado: binance_btcusdt_aggtrades_1M.csv\n",
|
|
" 1,000,000 filas, 72.1 MB\n",
|
|
" Buys: 506,912\n",
|
|
" Sells: 493,088\n",
|
|
" Qty mediana: 0.000460 BTC\n",
|
|
" Fills/aggTrade mediana: 1\n",
|
|
" Fills/aggTrade max: 500\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"# Guardar Binance aggTrades\n",
|
|
"out_path = DATA_DIR / 'binance_btcusdt_aggtrades_1M.csv'\n",
|
|
"binance_trades.write_csv(str(out_path))\n",
|
|
"size_mb = out_path.stat().st_size / 1024 / 1024\n",
|
|
"print(f'Guardado: {out_path.name}')\n",
|
|
"print(f' {binance_trades.shape[0]:,} filas, {size_mb:.1f} MB')\n",
|
|
"print(f' Buys: {binance_trades.filter(pl.col(\"side\") == \"buy\").shape[0]:,}')\n",
|
|
"print(f' Sells: {binance_trades.filter(pl.col(\"side\") == \"sell\").shape[0]:,}')\n",
|
|
"print(f' Qty mediana: {binance_trades[\"qty\"].median():.6f} BTC')\n",
|
|
"print(f' Fills/aggTrade mediana: {binance_trades[\"n_fills\"].median():.0f}')\n",
|
|
"print(f' Fills/aggTrade max: {binance_trades[\"n_fills\"].max()}')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Grabando Bitstamp L3 (btcusd) por 300s...\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 24s: 4,995 orders, 3 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 41s: 9,993 orders, 5 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 79s: 14,989 orders, 9 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 125s: 19,984 orders, 14 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 164s: 24,930 orders, 68 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 205s: 29,927 orders, 71 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 228s: 34,925 orders, 73 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 246s: 39,924 orders, 74 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 281s: 44,920 orders, 78 trades\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Bitstamp: 48,668 orders, 82 trades en 310s\n",
|
|
"Guardado: bitstamp_btcusd_l3_orders.csv (48,668 filas)\n",
|
|
"shape: (3, 2)\n",
|
|
"┌───────────────┬───────┐\n",
|
|
"│ event ┆ count │\n",
|
|
"│ --- ┆ --- │\n",
|
|
"│ str ┆ u32 │\n",
|
|
"╞═══════════════╪═══════╡\n",
|
|
"│ order_created ┆ 24290 │\n",
|
|
"│ order_deleted ┆ 24289 │\n",
|
|
"│ order_changed ┆ 89 │\n",
|
|
"└───────────────┴───────┘\n",
|
|
"Guardado: bitstamp_btcusd_l3_trades.csv (82 filas)\n",
|
|
"shape: (3, 8)\n",
|
|
"┌───────────┬──────┬─────────┬──────────┬───────────────┬──────────────┬────────────┬──────────────┐\n",
|
|
"│ trade_id ┆ side ┆ price ┆ amount ┆ buy_order_id ┆ sell_order_i ┆ timestamp ┆ microtimesta │\n",
|
|
"│ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ d ┆ --- ┆ mp │\n",
|
|
"│ i64 ┆ str ┆ f64 ┆ f64 ┆ i64 ┆ --- ┆ str ┆ --- │\n",
|
|
"│ ┆ ┆ ┆ ┆ ┆ i64 ┆ ┆ str │\n",
|
|
"╞═══════════╪══════╪═════════╪══════════╪═══════════════╪══════════════╪════════════╪══════════════╡\n",
|
|
"│ 555923544 ┆ buy ┆ 66879.0 ┆ 0.013272 ┆ 1992262225506 ┆ 199226205261 ┆ 1775227119 ┆ 177522711955 │\n",
|
|
"│ ┆ ┆ ┆ ┆ 305 ┆ 0055 ┆ ┆ 6000 │\n",
|
|
"│ 555923545 ┆ buy ┆ 66879.0 ┆ 0.001557 ┆ 1992262225506 ┆ 199226222522 ┆ 1775227119 ┆ 177522711955 │\n",
|
|
"│ ┆ ┆ ┆ ┆ 305 ┆ 7776 ┆ ┆ 6000 │\n",
|
|
"│ 555923566 ┆ buy ┆ 66898.0 ┆ 0.000077 ┆ 1992262268186 ┆ 199226223412 ┆ 1775227129 ┆ 177522712997 │\n",
|
|
"│ ┆ ┆ ┆ ┆ 625 ┆ 4290 ┆ ┆ 6000 │\n",
|
|
"└───────────┴──────┴─────────┴──────────┴───────────────┴──────────────┴────────────┴──────────────┘\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"import websockets, json, asyncio\n",
|
|
"\n",
|
|
"async def _record_bitstamp(pair='btcusd', duration=300):\n",
|
|
" orders, trades = [], []\n",
|
|
" t0 = _time.time()\n",
|
|
" mc = 0\n",
|
|
" async with websockets.connect('wss://ws.bitstamp.net') as ws:\n",
|
|
" for ch in [f'live_orders_{pair}', f'live_trades_{pair}']:\n",
|
|
" await ws.send(json.dumps({'event':'bts:subscribe','data':{'channel':ch}}))\n",
|
|
" print(f'Grabando Bitstamp L3 ({pair}) por {duration}s...')\n",
|
|
" while _time.time() - t0 < duration:\n",
|
|
" try:\n",
|
|
" raw = await asyncio.wait_for(ws.recv(), timeout=5.0)\n",
|
|
" msg = json.loads(raw)\n",
|
|
" mc += 1\n",
|
|
" ev = msg.get('event','')\n",
|
|
" ch = msg.get('channel','')\n",
|
|
" d = msg.get('data',{})\n",
|
|
" if isinstance(d, str):\n",
|
|
" try: d = json.loads(d)\n",
|
|
" except: continue\n",
|
|
" if 'live_orders' in ch and ev in ('order_created','order_changed','order_deleted'):\n",
|
|
" orders.append({\n",
|
|
" 'event':ev, 'order_id':d.get('id',''),\n",
|
|
" 'side':'buy' if d.get('order_type')==0 else 'sell',\n",
|
|
" 'price':float(d.get('price',0)), 'amount':float(d.get('amount',0)),\n",
|
|
" 'datetime':d.get('datetime',''), 'microtimestamp':d.get('microtimestamp',''),\n",
|
|
" })\n",
|
|
" elif 'live_trades' in ch and ev == 'trade':\n",
|
|
" trades.append({\n",
|
|
" 'trade_id':d.get('id',''),\n",
|
|
" 'side':'buy' if d.get('type')==0 else 'sell',\n",
|
|
" 'price':float(d.get('price',0)), 'amount':float(d.get('amount',0)),\n",
|
|
" 'buy_order_id':d.get('buy_order_id',''),\n",
|
|
" 'sell_order_id':d.get('sell_order_id',''),\n",
|
|
" 'timestamp':d.get('timestamp',''), 'microtimestamp':d.get('microtimestamp',''),\n",
|
|
" })\n",
|
|
" if mc % 5000 == 0:\n",
|
|
" print(f' {_time.time()-t0:.0f}s: {len(orders):,} orders, {len(trades):,} trades')\n",
|
|
" except asyncio.TimeoutError:\n",
|
|
" continue\n",
|
|
" print(f'Bitstamp: {len(orders):,} orders, {len(trades):,} trades en {_time.time()-t0:.0f}s')\n",
|
|
" return pl.DataFrame(orders) if orders else pl.DataFrame(), pl.DataFrame(trades) if trades else pl.DataFrame()\n",
|
|
"\n",
|
|
"bs_orders, bs_trades = await _record_bitstamp('btcusd', duration=300)\n",
|
|
"if bs_orders.shape[0] > 0:\n",
|
|
" bs_orders.write_csv(str(DATA_DIR / 'bitstamp_btcusd_l3_orders.csv'))\n",
|
|
" print(f'Guardado: bitstamp_btcusd_l3_orders.csv ({bs_orders.shape[0]:,} filas)')\n",
|
|
" print(bs_orders.group_by('event').agg(pl.len().alias('count')).sort('count', descending=True))\n",
|
|
"if bs_trades.shape[0] > 0:\n",
|
|
" bs_trades.write_csv(str(DATA_DIR / 'bitstamp_btcusd_l3_trades.csv'))\n",
|
|
" print(f'Guardado: bitstamp_btcusd_l3_trades.csv ({bs_trades.shape[0]:,} filas)')\n",
|
|
" print(bs_trades.head(3))\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Pares disponibles en Bitstamp: 273\n",
|
|
" ['btcusd', 'btceur', 'btcgbp', 'gbpusd', 'eurusd', 'xrpusd', 'xrpeur', 'xrpbtc', 'xrpgbp', 'ltcbtc']\n",
|
|
" ['ltcusd', 'ltceur', 'ethbtc', 'ethusd', 'etheur', 'ethgbp', 'bchusd', 'bcheur', 'xlmusd', 'xlmeur']\n",
|
|
" ['linkusd', 'linkeur', 'linkgbp', 'usdcusd', 'usdceur', 'btcusdc', 'ethusdc', 'eth2eth', 'aaveusd', 'aaveeur']\n",
|
|
" ['batusd', 'bateur', 'umausd', 'umaeur', 'daiusd', 'kncusd', 'knceur', 'mkrusd', 'mkreur', 'zrxusd']\n",
|
|
" ['zrxeur', 'algousd', 'algoeur', 'audiousd', 'audioeur', 'crvusd', 'crveur', 'snxusd', 'snxeur', 'uniusd']\n",
|
|
" ['unieur', 'xtzusd', 'xtzeur', 'compusd', 'compeur', 'grtusd', 'grteur', 'usdtusd', 'usdteur', 'usdcusdt']\n",
|
|
" ['btcusdt', 'ethusdt', 'xrpusdt', 'flrusd', 'flreur', 'manausd', 'manaeur', 'sushiusd', 'sushieur', 'chzusd']\n",
|
|
" ['chzeur', 'enjusd', 'enjeur', 'hbarusd', 'hbareur', 'axsusd', 'axseur', 'sandusd', 'sandeur', 'storjusd']\n",
|
|
" ['storjeur', 'adausd', 'adaeur', 'fetusd', 'feteur', 'sklusd', 'skleur', 'avaxusd', 'avaxeur', 'ftmusd']\n",
|
|
" ['ftmeur', 'shibusd', 'shibeur', 'galausd', 'galaeur', 'wbtcbtc', 'ctsiusd', 'ctsieur', 'imxusd', 'imxeur']\n",
|
|
" ['injusd', 'injeur', 'rndrusd', 'rndreur', 'solusd', 'soleur', 'apeusd', 'apeeur', 'eurcusdc', 'eurceur']\n",
|
|
" ['dotusd', 'doteur', 'nearusd', 'neareur', 'ldousd', 'ldoeur', 'dogeusd', 'dogeeur', 'suiusd', 'suieur']\n",
|
|
" ['eurcvusdt', 'eurcveur', 'pyusdusd', 'tracusd', 'traceur', 'wecanusd', 'wecaneur', 'lmwrusd', 'lmwreur', 'pepeusd']\n",
|
|
" ['pepeeur', 'csprusd', 'cspreur', 'gyenusd', 'vchfusd', 'vchfeur', 'veurusd', 'veureur', 'zusdusd', 'bonkusd']\n",
|
|
" ['bonkeur', 'jupusd', 'jupeur', 'pythusd', 'pytheur', 'wifusd', 'wifeur', 'ondousd', 'ondoeur', 'trufusd']\n",
|
|
" ['trufeur', 'egldusd', 'egldeur', 'icpusd', 'icpeur', 'xdcusd', 'xdceur', 'flokiusd', 'flokieur', 'strkusd']\n",
|
|
" ['strkeur', 'xchngusd', 'xchngeur', 'ctxusd', 'ctxeur', 'mogusd', 'mogeur', 'wenusd', 'weneur', 'arbusd']\n",
|
|
" ['arbeur', 'xsgdusd', 'xsgdusdt', 'zetausd', 'zetaeur', 'mewusd', 'meweur', 'rlusdusd', 'rlusdeur', 'rlusdusdt']\n",
|
|
" ['btcrlusd', 'xrprlusd', 'ethrlusd', 'woousd', 'wooeur', 'seiusd', 'seieur', 'opusd', 'opeur', 'cxtusd']\n",
|
|
" ['cxteur', 'popcatusd', 'popcateur', 'syrupusd', 'syrupeur', 'melaniausd', 'melaniaeur', 'trumpusd', 'trumpeur', 'polusd']\n",
|
|
" ['poleur', 'fartcoinusd', 'fartcoineur', 'penguusd', 'pengueur', 'pnutusd', 'pnuteur', 'etcusd', 'etceur', 'xrpusdc']\n",
|
|
" ['solusdc', 'btceurc', 'etheurc', 'eurcvusdc', 'btceurcv', 'etheurcv', 'sgdusd', 'virtualeur', 'virtualusd', 'xcnusd']\n",
|
|
" ['xcneur', 'moodengusd', 'moodengeur', 'taiusd', 'taieur', 'tonusd', 'toneur', 'xplusd', 'xpleur', 'biousd']\n",
|
|
" ['bioeur', 'enausd', 'enaeur', 'spkusd', 'spkeur', 'wlfiusd', 'wlfieur', 'hypeusd', 'hypeeur', 'asterusd']\n",
|
|
" ['astereur', 'avntusd', 'avnteur', 'bnbusd', 'bnbeur', 'monusd', 'moneur', 'pendleusd', 'pendleeur', 'zorausd']\n",
|
|
" ['zoraeur', 'aptusd', 'apteur', 'atomusd', 'atomeur', 'cakeusd', 'cakeeur', 'fightusd', 'fighteur', 'litusd']\n",
|
|
" ['liteur', 'susd', 'seur', 'taousd', 'taoeur', 'trxusd', 'trxeur', 'aerousd', 'aeroeur', 'paxgusd']\n",
|
|
" ['paxgeur', 'mntusd', 'mnteur']\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"# Descubrir todos los pares disponibles en Bitstamp via REST\n",
|
|
"import aiohttp, asyncio\n",
|
|
"\n",
|
|
"async def get_bitstamp_pairs():\n",
|
|
" async with aiohttp.ClientSession() as s:\n",
|
|
" async with s.get('https://www.bitstamp.net/api/v2/trading-pairs-info/') as r:\n",
|
|
" data = await r.json()\n",
|
|
" # Filtrar pares activos con USD\n",
|
|
" pairs = [p['url_symbol'] for p in data if p['trading'] == 'Enabled']\n",
|
|
" return pairs\n",
|
|
"\n",
|
|
"all_pairs = await get_bitstamp_pairs()\n",
|
|
"print(f'Pares disponibles en Bitstamp: {len(all_pairs)}')\n",
|
|
"for i in range(0, len(all_pairs), 10):\n",
|
|
" print(f' {all_pairs[i:i+10]}')\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Suscrito a 30 pares (60 canales)\n",
|
|
"Grabando 900s...\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 52s: 19,886 orders, 54 trades | 386 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 105s: 39,844 orders, 96 trades | 380 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 139s: 59,825 orders, 115 trades | 432 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 165s: 79,771 orders, 169 trades | 484 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 180s: 99,733 orders, 207 trades | 554 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 203s: 119,704 orders, 236 trades | 590 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 231s: 139,667 orders, 273 trades | 605 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 249s: 159,648 orders, 292 trades | 640 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 280s: 179,615 orders, 325 trades | 642 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 318s: 199,577 orders, 363 trades | 627 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 344s: 219,531 orders, 409 trades | 638 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 362s: 239,503 orders, 437 trades | 662 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 396s: 259,473 orders, 467 trades | 656 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 426s: 279,419 orders, 521 trades | 655 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 470s: 299,377 orders, 563 trades | 637 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 496s: 319,349 orders, 591 trades | 644 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 531s: 339,318 orders, 622 trades | 639 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 564s: 359,287 orders, 653 trades | 637 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 587s: 379,268 orders, 672 trades | 646 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 615s: 399,247 orders, 693 trades | 649 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 643s: 419,226 orders, 714 trades | 652 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 690s: 439,193 orders, 747 trades | 637 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 725s: 459,152 orders, 788 trades | 633 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 768s: 479,097 orders, 843 trades | 624 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 796s: 499,001 orders, 939 trades | 627 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 823s: 518,950 orders, 990 trades | 630 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 839s: 538,919 orders, 1,021 trades | 642 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 863s: 558,883 orders, 1,057 trades | 647 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" 895s: 578,849 orders, 1,091 trades | 647 orders/s\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"Finalizado: 581,929 orders, 1,094 trades en 910s\n",
|
|
" 639 orders/s promedio\n"
|
|
]
|
|
},
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"Guardado: bitstamp_multi_l3_orders.csv (581,929 filas, 48.5 MB)\n",
|
|
"shape: (10, 2)\n",
|
|
"┌─────────┬────────┐\n",
|
|
"│ pair ┆ n │\n",
|
|
"│ --- ┆ --- │\n",
|
|
"│ str ┆ u32 │\n",
|
|
"╞═════════╪════════╡\n",
|
|
"│ btcusd ┆ 168383 │\n",
|
|
"│ ethusd ┆ 74387 │\n",
|
|
"│ solusd ┆ 50820 │\n",
|
|
"│ xrpusd ┆ 32191 │\n",
|
|
"│ avaxusd ┆ 30689 │\n",
|
|
"│ soleur ┆ 29519 │\n",
|
|
"│ btceur ┆ 28591 │\n",
|
|
"│ adausd ┆ 27297 │\n",
|
|
"│ btcusdt ┆ 22249 │\n",
|
|
"│ dogeusd ┆ 17526 │\n",
|
|
"└─────────┴────────┘\n",
|
|
"Guardado: bitstamp_multi_l3_trades.csv (1,094 filas)\n",
|
|
"shape: (10, 2)\n",
|
|
"┌─────────┬─────┐\n",
|
|
"│ pair ┆ n │\n",
|
|
"│ --- ┆ --- │\n",
|
|
"│ str ┆ u32 │\n",
|
|
"╞═════════╪═════╡\n",
|
|
"│ solusd ┆ 292 │\n",
|
|
"│ btcusd ┆ 150 │\n",
|
|
"│ adausd ┆ 141 │\n",
|
|
"│ xrpusd ┆ 123 │\n",
|
|
"│ btceur ┆ 74 │\n",
|
|
"│ avaxusd ┆ 46 │\n",
|
|
"│ ethusd ┆ 42 │\n",
|
|
"│ dogeusd ┆ 42 │\n",
|
|
"│ xrpeur ┆ 39 │\n",
|
|
"│ ltcusd ┆ 35 │\n",
|
|
"└─────────┴─────┘\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"\n",
|
|
"import websockets, json, asyncio\n",
|
|
"\n",
|
|
"# Top 30 pares más líquidos (mezcla de USD, EUR, BTC base)\n",
|
|
"PAIRS = [\n",
|
|
" 'btcusd', 'ethusd', 'xrpusd', 'solusd', 'dogeusd', 'adausd',\n",
|
|
" 'ltcusd', 'linkusd', 'avaxusd', 'dotusd', 'nearusd', 'suiusd',\n",
|
|
" 'shibusd', 'pepeusd', 'bonkusd', 'injusd', 'arbusd', 'opusd',\n",
|
|
" 'btceur', 'etheur', 'xrpeur', 'soleur', 'dogeeur',\n",
|
|
" 'btcusdt', 'ethusdt', 'xrpusdt',\n",
|
|
" 'ethbtc', 'xrpbtc', 'ltcbtc', 'usdtusd',\n",
|
|
"]\n",
|
|
"\n",
|
|
"async def record_bitstamp_massive(pairs, duration_seconds=600):\n",
|
|
" '''Graba L3 de múltiples pares simultáneamente.'''\n",
|
|
" orders = []\n",
|
|
" trades = []\n",
|
|
" t0 = _time.time()\n",
|
|
" mc = 0\n",
|
|
"\n",
|
|
" async with websockets.connect('wss://ws.bitstamp.net', max_size=10_000_000) as ws:\n",
|
|
" # Suscribirse a todos los pares\n",
|
|
" for pair in pairs:\n",
|
|
" for ch_type in ['live_orders', 'live_trades']:\n",
|
|
" await ws.send(json.dumps({\n",
|
|
" 'event': 'bts:subscribe',\n",
|
|
" 'data': {'channel': f'{ch_type}_{pair}'}\n",
|
|
" }))\n",
|
|
" print(f'Suscrito a {len(pairs)} pares ({len(pairs)*2} canales)')\n",
|
|
" print(f'Grabando {duration_seconds}s...')\n",
|
|
"\n",
|
|
" while _time.time() - t0 < duration_seconds:\n",
|
|
" try:\n",
|
|
" raw = await asyncio.wait_for(ws.recv(), timeout=5.0)\n",
|
|
" msg = json.loads(raw)\n",
|
|
" mc += 1\n",
|
|
" ev = msg.get('event', '')\n",
|
|
" ch = msg.get('channel', '')\n",
|
|
" d = msg.get('data', {})\n",
|
|
" if isinstance(d, str):\n",
|
|
" try: d = json.loads(d)\n",
|
|
" except: continue\n",
|
|
"\n",
|
|
" # Extraer par del nombre del canal\n",
|
|
" pair = ch.replace('live_orders_', '').replace('live_trades_', '')\n",
|
|
"\n",
|
|
" if 'live_orders' in ch and ev in ('order_created', 'order_changed', 'order_deleted'):\n",
|
|
" orders.append({\n",
|
|
" 'pair': pair, 'event': ev,\n",
|
|
" 'order_id': d.get('id', ''),\n",
|
|
" 'side': 'buy' if d.get('order_type') == 0 else 'sell',\n",
|
|
" 'price': float(d.get('price', 0)),\n",
|
|
" 'amount': float(d.get('amount', 0)),\n",
|
|
" 'datetime': d.get('datetime', ''),\n",
|
|
" 'microtimestamp': d.get('microtimestamp', ''),\n",
|
|
" })\n",
|
|
" elif 'live_trades' in ch and ev == 'trade':\n",
|
|
" trades.append({\n",
|
|
" 'pair': pair, 'trade_id': d.get('id', ''),\n",
|
|
" 'side': 'buy' if d.get('type') == 0 else 'sell',\n",
|
|
" 'price': float(d.get('price', 0)),\n",
|
|
" 'amount': float(d.get('amount', 0)),\n",
|
|
" 'buy_order_id': d.get('buy_order_id', ''),\n",
|
|
" 'sell_order_id': d.get('sell_order_id', ''),\n",
|
|
" 'timestamp': d.get('timestamp', ''),\n",
|
|
" 'microtimestamp': d.get('microtimestamp', ''),\n",
|
|
" })\n",
|
|
"\n",
|
|
" if mc % 20000 == 0:\n",
|
|
" elapsed = _time.time() - t0\n",
|
|
" rate = len(orders) / elapsed\n",
|
|
" print(f' {elapsed:.0f}s: {len(orders):,} orders, {len(trades):,} trades | {rate:,.0f} orders/s')\n",
|
|
"\n",
|
|
" # Check if we hit 1M\n",
|
|
" if len(orders) >= 1_000_000:\n",
|
|
" print(f' Alcanzado 1M orders en {_time.time()-t0:.0f}s!')\n",
|
|
" break\n",
|
|
"\n",
|
|
" except asyncio.TimeoutError:\n",
|
|
" continue\n",
|
|
"\n",
|
|
" elapsed = _time.time() - t0\n",
|
|
" print(f'\\nFinalizado: {len(orders):,} orders, {len(trades):,} trades en {elapsed:.0f}s')\n",
|
|
" print(f' {len(orders)/elapsed:,.0f} orders/s promedio')\n",
|
|
"\n",
|
|
" orders_df = pl.DataFrame(orders) if orders else pl.DataFrame()\n",
|
|
" trades_df = pl.DataFrame(trades) if trades else pl.DataFrame()\n",
|
|
" return orders_df, trades_df\n",
|
|
"\n",
|
|
"bs_orders_m, bs_trades_m = await record_bitstamp_massive(PAIRS, duration_seconds=900)\n",
|
|
"\n",
|
|
"# Guardar\n",
|
|
"if bs_orders_m.shape[0] > 0:\n",
|
|
" path = DATA_DIR / 'bitstamp_multi_l3_orders.csv'\n",
|
|
" bs_orders_m.write_csv(str(path))\n",
|
|
" size_mb = path.stat().st_size / 1024 / 1024\n",
|
|
" print(f'\\nGuardado: {path.name} ({bs_orders_m.shape[0]:,} filas, {size_mb:.1f} MB)')\n",
|
|
" print(bs_orders_m.group_by('pair').agg(pl.len().alias('n')).sort('n', descending=True).head(10))\n",
|
|
"\n",
|
|
"if bs_trades_m.shape[0] > 0:\n",
|
|
" path = DATA_DIR / 'bitstamp_multi_l3_trades.csv'\n",
|
|
" bs_trades_m.write_csv(str(path))\n",
|
|
" print(f'Guardado: {path.name} ({bs_trades_m.shape[0]:,} filas)')\n",
|
|
" print(bs_trades_m.group_by('pair').agg(pl.len().alias('n')).sort('n', descending=True).head(10))\n"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3 (ipykernel)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.13.7"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 4
|
|
}
|