init: estudio_mercados analysis from fn_registry
This commit is contained in:
@@ -0,0 +1,598 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Matching Engine FIFO\n",
|
||||
"\n",
|
||||
"Motor de matching de órdenes con prioridad precio-tiempo (FIFO).\n",
|
||||
"\n",
|
||||
"**Objetivo:** Implementar un order book con matching FIFO que podamos usar después para simular mercados con datos reales de exchanges.\n",
|
||||
"\n",
|
||||
"**Estructura:**\n",
|
||||
"1. Tipos de datos (Order, Trade, OrderBook)\n",
|
||||
"2. Order Book con inserción y cancelación\n",
|
||||
"3. Matching engine FIFO\n",
|
||||
"4. Tests y visualización"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Tipos de datos"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from __future__ import annotations\n",
|
||||
"from dataclasses import dataclass, field\n",
|
||||
"from enum import Enum\n",
|
||||
"from typing import Optional\n",
|
||||
"from collections import defaultdict\n",
|
||||
"from sortedcontainers import SortedDict\n",
|
||||
"import time\n",
|
||||
"import uuid\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class Side(Enum):\n",
|
||||
" BUY = \"buy\"\n",
|
||||
" SELL = \"sell\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class OrderType(Enum):\n",
|
||||
" LIMIT = \"limit\"\n",
|
||||
" MARKET = \"market\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class OrderStatus(Enum):\n",
|
||||
" NEW = \"new\"\n",
|
||||
" PARTIAL = \"partial\"\n",
|
||||
" FILLED = \"filled\"\n",
|
||||
" CANCELLED = \"cancelled\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@dataclass\n",
|
||||
"class Order:\n",
|
||||
" \"\"\"Una orden en el libro.\"\"\"\n",
|
||||
" side: Side\n",
|
||||
" price: float # 0 para market orders\n",
|
||||
" qty: float # cantidad original\n",
|
||||
" remaining: float = 0 # cantidad pendiente\n",
|
||||
" order_type: OrderType = OrderType.LIMIT\n",
|
||||
" order_id: str = field(default_factory=lambda: str(uuid.uuid4()))\n",
|
||||
" timestamp: float = field(default_factory=time.time)\n",
|
||||
" status: OrderStatus = OrderStatus.NEW\n",
|
||||
"\n",
|
||||
" def __post_init__(self):\n",
|
||||
" if self.remaining == 0:\n",
|
||||
" self.remaining = self.qty\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"@dataclass\n",
|
||||
"class Trade:\n",
|
||||
" \"\"\"Un trade ejecutado por el matching engine.\"\"\"\n",
|
||||
" price: float\n",
|
||||
" qty: float\n",
|
||||
" buyer_order_id: str\n",
|
||||
" seller_order_id: str\n",
|
||||
" timestamp: float = field(default_factory=time.time)\n",
|
||||
" trade_id: str = field(default_factory=lambda: str(uuid.uuid4()))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"Tipos definidos: Side, OrderType, OrderStatus, Order, Trade\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Order Book\n",
|
||||
"\n",
|
||||
"Estructura del libro de órdenes:\n",
|
||||
"- **Bids** (compras): ordenados por precio descendente, FIFO dentro del mismo precio\n",
|
||||
"- **Asks** (ventas): ordenados por precio ascendente, FIFO dentro del mismo precio\n",
|
||||
"\n",
|
||||
"Usamos `SortedDict` para mantener los niveles de precio ordenados y `deque` para la cola FIFO en cada nivel."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from collections import deque\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"class OrderBook:\n",
|
||||
" \"\"\"Libro de órdenes con niveles de precio ordenados y colas FIFO por nivel.\"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(self):\n",
|
||||
" # SortedDict: price -> deque[Order]\n",
|
||||
" # Bids: negamos el precio para que SortedDict ordene desc\n",
|
||||
" self._bids: SortedDict = SortedDict() # key = -price\n",
|
||||
" self._asks: SortedDict = SortedDict() # key = price\n",
|
||||
" self._orders: dict[str, Order] = {} # order_id -> Order (lookup rápido)\n",
|
||||
"\n",
|
||||
" def add(self, order: Order) -> None:\n",
|
||||
" \"\"\"Añade una orden al libro (sin matching, solo inserción).\"\"\"\n",
|
||||
" book = self._bids if order.side == Side.BUY else self._asks\n",
|
||||
" key = -order.price if order.side == Side.BUY else order.price\n",
|
||||
"\n",
|
||||
" if key not in book:\n",
|
||||
" book[key] = deque()\n",
|
||||
" book[key].append(order)\n",
|
||||
" self._orders[order.order_id] = order\n",
|
||||
"\n",
|
||||
" def cancel(self, order_id: str) -> Optional[Order]:\n",
|
||||
" \"\"\"Cancela una orden por ID. Retorna la orden cancelada o None.\"\"\"\n",
|
||||
" order = self._orders.pop(order_id, None)\n",
|
||||
" if order is None:\n",
|
||||
" return None\n",
|
||||
"\n",
|
||||
" book = self._bids if order.side == Side.BUY else self._asks\n",
|
||||
" key = -order.price if order.side == Side.BUY else order.price\n",
|
||||
"\n",
|
||||
" if key in book:\n",
|
||||
" q = book[key]\n",
|
||||
" try:\n",
|
||||
" q.remove(order)\n",
|
||||
" except ValueError:\n",
|
||||
" pass\n",
|
||||
" if not q:\n",
|
||||
" del book[key]\n",
|
||||
"\n",
|
||||
" order.status = OrderStatus.CANCELLED\n",
|
||||
" return order\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def best_bid(self) -> Optional[float]:\n",
|
||||
" \"\"\"Mejor precio de compra.\"\"\"\n",
|
||||
" if not self._bids:\n",
|
||||
" return None\n",
|
||||
" return -self._bids.peekitem(0)[0]\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def best_ask(self) -> Optional[float]:\n",
|
||||
" \"\"\"Mejor precio de venta.\"\"\"\n",
|
||||
" if not self._asks:\n",
|
||||
" return None\n",
|
||||
" return self._asks.peekitem(0)[0]\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def spread(self) -> Optional[float]:\n",
|
||||
" \"\"\"Spread bid-ask.\"\"\"\n",
|
||||
" if self.best_bid is None or self.best_ask is None:\n",
|
||||
" return None\n",
|
||||
" return self.best_ask - self.best_bid\n",
|
||||
"\n",
|
||||
" @property\n",
|
||||
" def midprice(self) -> Optional[float]:\n",
|
||||
" \"\"\"Precio medio.\"\"\"\n",
|
||||
" if self.best_bid is None or self.best_ask is None:\n",
|
||||
" return None\n",
|
||||
" return (self.best_bid + self.best_ask) / 2\n",
|
||||
"\n",
|
||||
" def depth(self, side: Side, levels: int = 5) -> list[tuple[float, float]]:\n",
|
||||
" \"\"\"Profundidad del libro: [(price, total_qty), ...] para N niveles.\"\"\"\n",
|
||||
" book = self._bids if side == Side.BUY else self._asks\n",
|
||||
" result = []\n",
|
||||
" for key in book.islice(0, levels):\n",
|
||||
" price = -key if side == Side.BUY else key\n",
|
||||
" total_qty = sum(o.remaining for o in book[key])\n",
|
||||
" result.append((price, total_qty))\n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
" def __repr__(self):\n",
|
||||
" bids = self.depth(Side.BUY, 3)\n",
|
||||
" asks = self.depth(Side.SELL, 3)\n",
|
||||
" return f\"OrderBook(best_bid={self.best_bid}, best_ask={self.best_ask}, spread={self.spread}, bids_top3={bids}, asks_top3={asks})\"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"OrderBook definido\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Matching Engine FIFO\n",
|
||||
"\n",
|
||||
"Lógica de matching:\n",
|
||||
"1. Orden de **compra** se matchea contra asks (de menor a mayor precio)\n",
|
||||
"2. Orden de **venta** se matchea contra bids (de mayor a menor precio)\n",
|
||||
"3. Dentro de cada nivel de precio: **FIFO** (primera en llegar, primera en ejecutarse)\n",
|
||||
"4. El precio del trade es siempre el de la orden **pasiva** (la que ya estaba en el libro)\n",
|
||||
"5. Si la orden agresora no se llena completamente, se inserta en el libro como orden pasiva"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class MatchingEngineFIFO:\n",
|
||||
" \"\"\"Motor de matching con prioridad precio-tiempo (FIFO).\"\"\"\n",
|
||||
"\n",
|
||||
" def __init__(self):\n",
|
||||
" self.book = OrderBook()\n",
|
||||
" self.trades: list[Trade] = []\n",
|
||||
"\n",
|
||||
" def submit(self, order: Order) -> list[Trade]:\n",
|
||||
" \"\"\"Procesa una orden: matchea lo posible y el resto va al libro.\"\"\"\n",
|
||||
" new_trades = self._match(order)\n",
|
||||
" self.trades.extend(new_trades)\n",
|
||||
"\n",
|
||||
" # Si queda cantidad y es limit, insertar en el libro\n",
|
||||
" if order.remaining > 0 and order.order_type == OrderType.LIMIT:\n",
|
||||
" order.status = OrderStatus.PARTIAL if order.remaining < order.qty else OrderStatus.NEW\n",
|
||||
" self.book.add(order)\n",
|
||||
"\n",
|
||||
" return new_trades\n",
|
||||
"\n",
|
||||
" def _match(self, aggressor: Order) -> list[Trade]:\n",
|
||||
" \"\"\"Matchea la orden agresora contra el lado opuesto del libro.\"\"\"\n",
|
||||
" trades = []\n",
|
||||
"\n",
|
||||
" # Seleccionar el lado opuesto\n",
|
||||
" if aggressor.side == Side.BUY:\n",
|
||||
" passive_book = self.book._asks # asks ordenados asc\n",
|
||||
" price_key_fn = lambda k: k # key es el precio directo\n",
|
||||
" can_match = lambda passive_price: (\n",
|
||||
" aggressor.order_type == OrderType.MARKET or\n",
|
||||
" passive_price <= aggressor.price\n",
|
||||
" )\n",
|
||||
" else:\n",
|
||||
" passive_book = self.book._bids # bids ordenados desc (key negado)\n",
|
||||
" price_key_fn = lambda k: -k # desnegar para obtener precio real\n",
|
||||
" can_match = lambda passive_price: (\n",
|
||||
" aggressor.order_type == OrderType.MARKET or\n",
|
||||
" passive_price >= aggressor.price\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
" keys_to_remove = []\n",
|
||||
"\n",
|
||||
" for key in list(passive_book.keys()):\n",
|
||||
" if aggressor.remaining <= 0:\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" passive_price = price_key_fn(key)\n",
|
||||
" if not can_match(passive_price):\n",
|
||||
" break # los siguientes niveles son peores\n",
|
||||
"\n",
|
||||
" queue = passive_book[key]\n",
|
||||
"\n",
|
||||
" while queue and aggressor.remaining > 0:\n",
|
||||
" passive = queue[0] # FIFO: primera de la cola\n",
|
||||
" fill_qty = min(aggressor.remaining, passive.remaining)\n",
|
||||
"\n",
|
||||
" # Ejecutar trade al precio pasivo\n",
|
||||
" trade = Trade(\n",
|
||||
" price=passive_price,\n",
|
||||
" qty=fill_qty,\n",
|
||||
" buyer_order_id=aggressor.order_id if aggressor.side == Side.BUY else passive.order_id,\n",
|
||||
" seller_order_id=passive.order_id if aggressor.side == Side.BUY else aggressor.order_id,\n",
|
||||
" )\n",
|
||||
" trades.append(trade)\n",
|
||||
"\n",
|
||||
" # Actualizar cantidades\n",
|
||||
" aggressor.remaining -= fill_qty\n",
|
||||
" passive.remaining -= fill_qty\n",
|
||||
"\n",
|
||||
" if passive.remaining <= 0:\n",
|
||||
" passive.status = OrderStatus.FILLED\n",
|
||||
" queue.popleft()\n",
|
||||
" self.book._orders.pop(passive.order_id, None)\n",
|
||||
" else:\n",
|
||||
" passive.status = OrderStatus.PARTIAL\n",
|
||||
"\n",
|
||||
" if not queue:\n",
|
||||
" keys_to_remove.append(key)\n",
|
||||
"\n",
|
||||
" # Limpiar niveles vacíos\n",
|
||||
" for key in keys_to_remove:\n",
|
||||
" del passive_book[key]\n",
|
||||
"\n",
|
||||
" if aggressor.remaining <= 0:\n",
|
||||
" aggressor.status = OrderStatus.FILLED\n",
|
||||
"\n",
|
||||
" return trades\n",
|
||||
"\n",
|
||||
" def cancel(self, order_id: str) -> Optional[Order]:\n",
|
||||
" \"\"\"Cancela una orden del libro.\"\"\"\n",
|
||||
" return self.book.cancel(order_id)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"MatchingEngineFIFO definido\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Tests básicos"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def test_basic_match():\n",
|
||||
" \"\"\"Dos órdenes opuestas al mismo precio → 1 trade.\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" # Sell limit a 100\n",
|
||||
" sell = Order(side=Side.SELL, price=100.0, qty=10.0)\n",
|
||||
" engine.submit(sell)\n",
|
||||
"\n",
|
||||
" # Buy limit a 100 → debe matchear\n",
|
||||
" buy = Order(side=Side.BUY, price=100.0, qty=10.0)\n",
|
||||
" trades = engine.submit(buy)\n",
|
||||
"\n",
|
||||
" assert len(trades) == 1, f\"Expected 1 trade, got {len(trades)}\"\n",
|
||||
" assert trades[0].price == 100.0\n",
|
||||
" assert trades[0].qty == 10.0\n",
|
||||
" assert buy.status == OrderStatus.FILLED\n",
|
||||
" assert sell.status == OrderStatus.FILLED\n",
|
||||
" print(\"✓ test_basic_match\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def test_partial_fill():\n",
|
||||
" \"\"\"Buy de 15 contra sell de 10 → fill parcial, 5 queda en libro.\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" sell = Order(side=Side.SELL, price=100.0, qty=10.0)\n",
|
||||
" engine.submit(sell)\n",
|
||||
"\n",
|
||||
" buy = Order(side=Side.BUY, price=100.0, qty=15.0)\n",
|
||||
" trades = engine.submit(buy)\n",
|
||||
"\n",
|
||||
" assert len(trades) == 1\n",
|
||||
" assert trades[0].qty == 10.0\n",
|
||||
" assert buy.remaining == 5.0\n",
|
||||
" assert buy.status == OrderStatus.PARTIAL\n",
|
||||
" assert engine.book.best_bid == 100.0\n",
|
||||
" print(\"✓ test_partial_fill\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def test_fifo_priority():\n",
|
||||
" \"\"\"Dos sells al mismo precio → la primera se llena primero (FIFO).\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" sell1 = Order(side=Side.SELL, price=100.0, qty=5.0)\n",
|
||||
" sell2 = Order(side=Side.SELL, price=100.0, qty=5.0)\n",
|
||||
" engine.submit(sell1)\n",
|
||||
" engine.submit(sell2)\n",
|
||||
"\n",
|
||||
" buy = Order(side=Side.BUY, price=100.0, qty=7.0)\n",
|
||||
" trades = engine.submit(buy)\n",
|
||||
"\n",
|
||||
" assert len(trades) == 2, f\"Expected 2 trades, got {len(trades)}\"\n",
|
||||
" assert trades[0].qty == 5.0 # sell1 completamente llena\n",
|
||||
" assert trades[1].qty == 2.0 # sell2 parcial\n",
|
||||
" assert sell1.status == OrderStatus.FILLED\n",
|
||||
" assert sell2.status == OrderStatus.PARTIAL\n",
|
||||
" assert sell2.remaining == 3.0\n",
|
||||
" print(\"✓ test_fifo_priority\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def test_price_priority():\n",
|
||||
" \"\"\"Sell a 99 antes que sell a 100 → buyer obtiene mejor precio.\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" sell_expensive = Order(side=Side.SELL, price=100.0, qty=5.0)\n",
|
||||
" sell_cheap = Order(side=Side.SELL, price=99.0, qty=5.0)\n",
|
||||
" engine.submit(sell_expensive)\n",
|
||||
" engine.submit(sell_cheap)\n",
|
||||
"\n",
|
||||
" buy = Order(side=Side.BUY, price=100.0, qty=8.0)\n",
|
||||
" trades = engine.submit(buy)\n",
|
||||
"\n",
|
||||
" assert len(trades) == 2\n",
|
||||
" assert trades[0].price == 99.0 # primero la más barata\n",
|
||||
" assert trades[0].qty == 5.0\n",
|
||||
" assert trades[1].price == 100.0 # luego la cara\n",
|
||||
" assert trades[1].qty == 3.0\n",
|
||||
" print(\"✓ test_price_priority\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def test_no_match_spread():\n",
|
||||
" \"\"\"Buy a 99, sell a 100 → no matchea, ambas en libro.\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" sell = Order(side=Side.SELL, price=100.0, qty=10.0)\n",
|
||||
" engine.submit(sell)\n",
|
||||
"\n",
|
||||
" buy = Order(side=Side.BUY, price=99.0, qty=10.0)\n",
|
||||
" trades = engine.submit(buy)\n",
|
||||
"\n",
|
||||
" assert len(trades) == 0\n",
|
||||
" assert engine.book.best_bid == 99.0\n",
|
||||
" assert engine.book.best_ask == 100.0\n",
|
||||
" assert engine.book.spread == 1.0\n",
|
||||
" print(\"✓ test_no_match_spread\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def test_market_order():\n",
|
||||
" \"\"\"Market order matchea a cualquier precio disponible.\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" sell = Order(side=Side.SELL, price=105.0, qty=10.0)\n",
|
||||
" engine.submit(sell)\n",
|
||||
"\n",
|
||||
" buy = Order(side=Side.BUY, price=0, qty=5.0, order_type=OrderType.MARKET)\n",
|
||||
" trades = engine.submit(buy)\n",
|
||||
"\n",
|
||||
" assert len(trades) == 1\n",
|
||||
" assert trades[0].price == 105.0 # al precio de la pasiva\n",
|
||||
" assert trades[0].qty == 5.0\n",
|
||||
" print(\"✓ test_market_order\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def test_cancel():\n",
|
||||
" \"\"\"Cancelar una orden la remueve del libro.\"\"\"\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
" sell = Order(side=Side.SELL, price=100.0, qty=10.0)\n",
|
||||
" engine.submit(sell)\n",
|
||||
" assert engine.book.best_ask == 100.0\n",
|
||||
"\n",
|
||||
" cancelled = engine.cancel(sell.order_id)\n",
|
||||
" assert cancelled is not None\n",
|
||||
" assert cancelled.status == OrderStatus.CANCELLED\n",
|
||||
" assert engine.book.best_ask is None\n",
|
||||
" print(\"✓ test_cancel\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Ejecutar todos\n",
|
||||
"test_basic_match()\n",
|
||||
"test_partial_fill()\n",
|
||||
"test_fifo_priority()\n",
|
||||
"test_price_priority()\n",
|
||||
"test_no_match_spread()\n",
|
||||
"test_market_order()\n",
|
||||
"test_cancel()\n",
|
||||
"print(\"\\n=== Todos los tests pasaron ===\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Visualización del Order Book"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_orderbook(engine: MatchingEngineFIFO, levels: int = 10, title: str = \"Order Book\"):\n",
|
||||
" \"\"\"Visualiza la profundidad del order book.\"\"\"\n",
|
||||
" bids = engine.book.depth(Side.BUY, levels)\n",
|
||||
" asks = engine.book.depth(Side.SELL, levels)\n",
|
||||
"\n",
|
||||
" fig, ax = plt.subplots(figsize=(10, 5))\n",
|
||||
"\n",
|
||||
" if bids:\n",
|
||||
" bid_prices, bid_qtys = zip(*bids)\n",
|
||||
" bid_cum = np.cumsum(bid_qtys)\n",
|
||||
" ax.barh(range(len(bids)), bid_qtys, color='#2ecc71', alpha=0.7, label='Bids')\n",
|
||||
" for i, (p, q) in enumerate(bids):\n",
|
||||
" ax.text(q + 0.1, i, f\"{p:.2f} ({q:.1f})\", va='center', fontsize=9)\n",
|
||||
"\n",
|
||||
" if asks:\n",
|
||||
" ask_prices, ask_qtys = zip(*asks)\n",
|
||||
" y_offset = len(bids) + 1 # gap visual\n",
|
||||
" ax.barh(range(y_offset, y_offset + len(asks)), ask_qtys, color='#e74c3c', alpha=0.7, label='Asks')\n",
|
||||
" for i, (p, q) in enumerate(asks):\n",
|
||||
" ax.text(q + 0.1, y_offset + i, f\"{p:.2f} ({q:.1f})\", va='center', fontsize=9)\n",
|
||||
"\n",
|
||||
" ax.set_yticks([])\n",
|
||||
" ax.set_xlabel('Quantity')\n",
|
||||
" ax.set_title(f\"{title}\\nSpread: {engine.book.spread:.2f} | Mid: {engine.book.midprice:.2f}\" if engine.book.spread else title)\n",
|
||||
" ax.legend()\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Crear un libro con varias órdenes para visualizar\n",
|
||||
"import random\n",
|
||||
"random.seed(42)\n",
|
||||
"\n",
|
||||
"engine = MatchingEngineFIFO()\n",
|
||||
"\n",
|
||||
"# Poblar bids alrededor de 100\n",
|
||||
"for i in range(20):\n",
|
||||
" price = round(100 - random.uniform(0.1, 2.0), 2)\n",
|
||||
" qty = round(random.uniform(1, 20), 1)\n",
|
||||
" engine.submit(Order(side=Side.BUY, price=price, qty=qty))\n",
|
||||
"\n",
|
||||
"# Poblar asks alrededor de 100\n",
|
||||
"for i in range(20):\n",
|
||||
" price = round(100 + random.uniform(0.1, 2.0), 2)\n",
|
||||
" qty = round(random.uniform(1, 20), 1)\n",
|
||||
" engine.submit(Order(side=Side.SELL, price=price, qty=qty))\n",
|
||||
"\n",
|
||||
"print(engine.book)\n",
|
||||
"plot_orderbook(engine, levels=8, title=\"Order Book Sintético\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Simulación: impacto de una market order\n",
|
||||
"\n",
|
||||
"Veamos cómo una market order grande barre niveles del libro y mueve el precio."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Estado antes\n",
|
||||
"print(\"=== ANTES ===\")\n",
|
||||
"print(f\"Best ask: {engine.book.best_ask}\")\n",
|
||||
"print(f\"Best bid: {engine.book.best_bid}\")\n",
|
||||
"print(f\"Spread: {engine.book.spread:.4f}\")\n",
|
||||
"print(f\"Midprice: {engine.book.midprice:.4f}\")\n",
|
||||
"print(f\"\\nAsk depth (5 niveles): {engine.book.depth(Side.SELL, 5)}\")\n",
|
||||
"\n",
|
||||
"# Market buy grande: comprar 50 unidades\n",
|
||||
"big_buy = Order(side=Side.BUY, price=0, qty=50.0, order_type=OrderType.MARKET)\n",
|
||||
"trades = engine.submit(big_buy)\n",
|
||||
"\n",
|
||||
"print(f\"\\n=== MARKET BUY 50 ===\")\n",
|
||||
"print(f\"Trades ejecutados: {len(trades)}\")\n",
|
||||
"for t in trades:\n",
|
||||
" print(f\" {t.qty:.1f} @ {t.price:.2f}\")\n",
|
||||
"\n",
|
||||
"avg_price = sum(t.price * t.qty for t in trades) / sum(t.qty for t in trades) if trades else 0\n",
|
||||
"print(f\"\\nPrecio promedio ponderado: {avg_price:.4f}\")\n",
|
||||
"print(f\"Slippage vs best ask: {avg_price - trades[0].price:.4f}\" if trades else \"\")\n",
|
||||
"\n",
|
||||
"print(f\"\\n=== DESPUÉS ===\")\n",
|
||||
"print(f\"Best ask: {engine.book.best_ask}\")\n",
|
||||
"print(f\"Best bid: {engine.book.best_bid}\")\n",
|
||||
"print(f\"Spread: {engine.book.spread}\")\n",
|
||||
"print(engine.book)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,666 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Simulación de Mercado con Agentes\n",
|
||||
"\n",
|
||||
"Simulador agent-based donde **makers** colocan bids/asks y **takers** lanzan market orders.\n",
|
||||
"El precio emerge de la interacción entre ellos.\n",
|
||||
"\n",
|
||||
"## Parámetros ajustables\n",
|
||||
"\n",
|
||||
"| Parámetro | Qué controla |\n",
|
||||
"|---|---|\n",
|
||||
"| `sigma` | Volatilidad del precio (cuánto se mueve) |\n",
|
||||
"| `mu` | Drift/tendencia (positivo = sube, negativo = baja) |\n",
|
||||
"| `n_makers` | Cuántos market makers hay poniendo liquidez |\n",
|
||||
"| `n_takers_lambda` | Ritmo de llegada de takers (órdenes/tick) |\n",
|
||||
"| `maker_spread` | Spread base que los makers quieren capturar |\n",
|
||||
"| `gamma` | Aversión al riesgo del maker (alto = ajusta más por inventario) |\n",
|
||||
"| `taker_size_alpha` | Exponente power-law para tamaño de órdenes (bajo = más ballenas) |\n",
|
||||
"| `hawkes_alpha` | Contagio entre trades (alto = más ráfagas) |\n",
|
||||
"| `hawkes_beta` | Decaimiento del contagio (alto = ráfagas más cortas) |\n",
|
||||
"| `jump_intensity` | Frecuencia de saltos bruscos de precio |\n",
|
||||
"| `jump_size_std` | Tamaño promedio de los saltos |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "Exception",
|
||||
"evalue": "File `'01_matching_engine_fifo.ipynb'` not found.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
|
||||
"\u001b[31mOSError\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[31mOSError\u001b[39m: File `'01_matching_engine_fifo.ipynb'` not found.",
|
||||
"\nThe above exception was the direct cause of the following exception:\n",
|
||||
"\u001b[31mException\u001b[39m Traceback (most recent call last)",
|
||||
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[1]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;66;03m# Importar todo del notebook 01\u001b[39;00m\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m get_ipython().run_line_magic(\u001b[33m'run'\u001b[39m, \u001b[33m'01_matching_engine_fifo.ipynb'\u001b[39m)\n",
|
||||
"\u001b[31mException\u001b[39m: File `'01_matching_engine_fifo.ipynb'` not found."
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Importar todo del notebook 01\n",
|
||||
"%run 01_matching_engine_fifo.ipynb"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Parámetros de simulación"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from dataclasses import dataclass\n",
|
||||
"\n",
|
||||
"@dataclass\n",
|
||||
"class SimParams:\n",
|
||||
" \"\"\"Todos los parámetros ajustables de la simulación.\"\"\"\n",
|
||||
"\n",
|
||||
" # --- Precio fundamental ---\n",
|
||||
" initial_price: float = 100.0 # precio inicial\n",
|
||||
" mu: float = 0.0 # drift (tendencia): 0 = sin tendencia\n",
|
||||
" sigma: float = 0.02 # volatilidad por tick (2%)\n",
|
||||
"\n",
|
||||
" # --- Saltos (jump-diffusion) ---\n",
|
||||
" jump_intensity: float = 0.02 # prob de salto por tick (2%)\n",
|
||||
" jump_size_std: float = 0.05 # std del tamaño del salto (5%)\n",
|
||||
"\n",
|
||||
" # --- Makers ---\n",
|
||||
" n_makers: int = 5 # número de market makers\n",
|
||||
" maker_spread: float = 0.5 # spread base (en unidades de precio)\n",
|
||||
" maker_qty: float = 10.0 # qty base por orden de maker\n",
|
||||
" gamma: float = 0.1 # aversión al riesgo (Avellaneda-Stoikov)\n",
|
||||
" maker_levels: int = 3 # niveles de profundidad que pone cada maker\n",
|
||||
"\n",
|
||||
" # --- Takers ---\n",
|
||||
" n_takers_lambda: float = 2.0 # media de takers por tick (Poisson)\n",
|
||||
" taker_size_alpha: float = 2.0 # exponente power-law para tamaño (mayor = menos ballenas)\n",
|
||||
" taker_size_min: float = 1.0 # tamaño mínimo de orden taker\n",
|
||||
" taker_size_max: float = 100.0 # tamaño máximo de orden taker\n",
|
||||
"\n",
|
||||
" # --- Hawkes (clustering de takers) ---\n",
|
||||
" hawkes_alpha: float = 0.5 # excitación por trade (0 = Poisson puro)\n",
|
||||
" hawkes_beta: float = 1.0 # decaimiento de excitación\n",
|
||||
"\n",
|
||||
" # --- Simulación ---\n",
|
||||
" n_ticks: int = 500 # duración de la simulación\n",
|
||||
" seed: int = 42 # semilla para reproducibilidad\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"params = SimParams()\n",
|
||||
"print(\"Parámetros cargados\")\n",
|
||||
"print(f\" Precio inicial: {params.initial_price}\")\n",
|
||||
"print(f\" Volatilidad: {params.sigma}\")\n",
|
||||
"print(f\" Makers: {params.n_makers} (spread={params.maker_spread}, γ={params.gamma})\")\n",
|
||||
"print(f\" Takers λ: {params.n_takers_lambda} (Hawkes α={params.hawkes_alpha})\")\n",
|
||||
"print(f\" Ticks: {params.n_ticks}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Generador de precio fundamental\n",
|
||||
"\n",
|
||||
"El \"precio verdadero\" que los agentes intentan seguir. Usa **jump-diffusion**:\n",
|
||||
"- La mayor parte del tiempo se mueve suavemente (GBM)\n",
|
||||
"- De vez en cuando da un salto brusco (jump)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_fundamental_prices(p: SimParams) -> np.ndarray:\n",
|
||||
" \"\"\"Genera serie de precios fundamentales con jump-diffusion.\n",
|
||||
"\n",
|
||||
" S(t+1) = S(t) * exp((mu - sigma²/2)*dt + sigma*sqrt(dt)*Z + J*N)\n",
|
||||
" donde Z ~ N(0,1), N ~ Bernoulli(jump_intensity), J ~ N(0, jump_size_std)\n",
|
||||
" \"\"\"\n",
|
||||
" rng = np.random.default_rng(p.seed)\n",
|
||||
" prices = np.zeros(p.n_ticks)\n",
|
||||
" prices[0] = p.initial_price\n",
|
||||
"\n",
|
||||
" dt = 1.0 # cada tick es una unidad de tiempo\n",
|
||||
"\n",
|
||||
" for t in range(1, p.n_ticks):\n",
|
||||
" # GBM component\n",
|
||||
" z = rng.standard_normal()\n",
|
||||
" gbm = (p.mu - 0.5 * p.sigma**2) * dt + p.sigma * np.sqrt(dt) * z\n",
|
||||
"\n",
|
||||
" # Jump component\n",
|
||||
" jump = 0.0\n",
|
||||
" if rng.random() < p.jump_intensity:\n",
|
||||
" jump = rng.normal(0, p.jump_size_std)\n",
|
||||
"\n",
|
||||
" prices[t] = prices[t-1] * np.exp(gbm + jump)\n",
|
||||
"\n",
|
||||
" return prices\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Preview\n",
|
||||
"fund_prices = generate_fundamental_prices(params)\n",
|
||||
"plt.figure(figsize=(12, 3))\n",
|
||||
"plt.plot(fund_prices, linewidth=0.8)\n",
|
||||
"plt.title(f'Precio fundamental (σ={params.sigma}, jumps={params.jump_intensity})')\n",
|
||||
"plt.xlabel('Tick')\n",
|
||||
"plt.ylabel('Precio')\n",
|
||||
"plt.grid(True, alpha=0.3)\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Hawkes process para arrival de takers\n",
|
||||
"\n",
|
||||
"Genera cuántos takers llegan en cada tick. Con Hawkes, un trade excita más trades:\n",
|
||||
"- `hawkes_alpha = 0` → Poisson puro (sin contagio)\n",
|
||||
"- `hawkes_alpha > 0` → trades generan más trades (ráfagas)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def generate_hawkes_arrivals(p: SimParams, n_trades_per_tick: list[int]) -> list[int]:\n",
|
||||
" \"\"\"Genera número de takers por tick usando Hawkes process.\n",
|
||||
"\n",
|
||||
" λ(t) = λ_base + Σ α * exp(-β * (t - tᵢ))\n",
|
||||
" donde tᵢ son los ticks donde hubo trades.\n",
|
||||
" \"\"\"\n",
|
||||
" rng = np.random.default_rng(p.seed + 1)\n",
|
||||
" arrivals = []\n",
|
||||
" excitation = 0.0 # acumulador de excitación\n",
|
||||
"\n",
|
||||
" for t in range(p.n_ticks):\n",
|
||||
" # Intensidad actual\n",
|
||||
" lam = p.n_takers_lambda + excitation\n",
|
||||
" lam = max(0.1, lam) # piso para evitar λ negativo\n",
|
||||
"\n",
|
||||
" # Número de takers este tick\n",
|
||||
" n = rng.poisson(lam)\n",
|
||||
" arrivals.append(n)\n",
|
||||
"\n",
|
||||
" # Actualizar excitación: decae + se excita por trades\n",
|
||||
" excitation *= np.exp(-p.hawkes_beta)\n",
|
||||
" if t < len(n_trades_per_tick):\n",
|
||||
" excitation += p.hawkes_alpha * n_trades_per_tick[t]\n",
|
||||
" else:\n",
|
||||
" excitation += p.hawkes_alpha * n\n",
|
||||
"\n",
|
||||
" return arrivals\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Preview con Poisson puro\n",
|
||||
"arrivals_preview = generate_hawkes_arrivals(params, [0] * params.n_ticks)\n",
|
||||
"print(f\"Takers por tick: min={min(arrivals_preview)}, max={max(arrivals_preview)}, mean={np.mean(arrivals_preview):.1f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Agentes\n",
|
||||
"\n",
|
||||
"### Market Maker (Avellaneda-Stoikov)\n",
|
||||
"Calcula su **precio de reserva** según inventario:\n",
|
||||
"- Si compró mucho → baja sus precios para vender\n",
|
||||
"- Si vendió mucho → sube sus precios para comprar\n",
|
||||
"\n",
|
||||
"### Taker\n",
|
||||
"Lanza market orders con tamaño power-law (muchas chicas, pocas grandes)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@dataclass\n",
|
||||
"class MakerState:\n",
|
||||
" \"\"\"Estado interno de un market maker.\"\"\"\n",
|
||||
" maker_id: str\n",
|
||||
" inventory: float = 0.0 # positivo = largo, negativo = corto\n",
|
||||
" pnl: float = 0.0 # profit & loss acumulado\n",
|
||||
" active_order_ids: list = field(default_factory=list)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def maker_quotes(state: MakerState, mid: float, p: SimParams, t: int, rng) -> list[Order]:\n",
|
||||
" \"\"\"Genera las órdenes de un maker usando Avellaneda-Stoikov.\n",
|
||||
"\n",
|
||||
" Precio de reserva: r = mid - inventory * gamma * sigma²\n",
|
||||
" Spread óptimo: delta = gamma * sigma² + spread_base\n",
|
||||
" \"\"\"\n",
|
||||
" # Precio de reserva: ajustado por inventario\n",
|
||||
" # Si inventory > 0 (compré mucho), r baja → mis asks bajan para vender\n",
|
||||
" # Si inventory < 0 (vendí mucho), r sube → mis bids suben para comprar\n",
|
||||
" reservation = mid - state.inventory * p.gamma * p.sigma**2\n",
|
||||
"\n",
|
||||
" # Spread: base + ajuste por volatilidad\n",
|
||||
" half_spread = p.maker_spread / 2 + p.gamma * p.sigma**2 / 2\n",
|
||||
"\n",
|
||||
" orders = []\n",
|
||||
" for level in range(p.maker_levels):\n",
|
||||
" offset = level * half_spread * 0.5 # niveles más profundos\n",
|
||||
" qty = p.maker_qty * (1 + level * 0.5) # más qty en niveles profundos\n",
|
||||
"\n",
|
||||
" # Pequeña variación para que los makers no sean idénticos\n",
|
||||
" noise = rng.uniform(-0.05, 0.05)\n",
|
||||
"\n",
|
||||
" bid_price = round(reservation - half_spread - offset + noise, 2)\n",
|
||||
" ask_price = round(reservation + half_spread + offset + noise, 2)\n",
|
||||
"\n",
|
||||
" if bid_price > 0:\n",
|
||||
" orders.append(Order(side=Side.BUY, price=bid_price, qty=qty))\n",
|
||||
" if ask_price > 0:\n",
|
||||
" orders.append(Order(side=Side.SELL, price=ask_price, qty=qty))\n",
|
||||
"\n",
|
||||
" return orders\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def taker_order(mid: float, p: SimParams, rng) -> Order:\n",
|
||||
" \"\"\"Genera una market order de taker.\n",
|
||||
"\n",
|
||||
" Lado: 50/50 compra/venta\n",
|
||||
" Tamaño: power-law (Pareto) truncada\n",
|
||||
" \"\"\"\n",
|
||||
" side = Side.BUY if rng.random() < 0.5 else Side.SELL\n",
|
||||
"\n",
|
||||
" # Power-law: P(size > x) ~ x^(-alpha)\n",
|
||||
" # Pareto genera valores >= 1, escalamos al rango deseado\n",
|
||||
" raw_size = (rng.pareto(p.taker_size_alpha) + 1) * p.taker_size_min\n",
|
||||
" size = min(raw_size, p.taker_size_max)\n",
|
||||
" size = round(size, 1)\n",
|
||||
"\n",
|
||||
" return Order(side=side, price=0, qty=size, order_type=OrderType.MARKET)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"Agentes definidos: MakerState, maker_quotes (Avellaneda-Stoikov), taker_order (power-law)\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Loop de simulación\n",
|
||||
"\n",
|
||||
"En cada tick:\n",
|
||||
"1. El precio fundamental se mueve (GBM + jumps)\n",
|
||||
"2. Cada maker cancela sus órdenes anteriores y coloca nuevas\n",
|
||||
"3. Llegan N takers (Hawkes) y lanzan market orders\n",
|
||||
"4. El engine matchea todo\n",
|
||||
"5. Se actualizan inventarios y PnL de los makers"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"@dataclass\n",
|
||||
"class SimResult:\n",
|
||||
" \"\"\"Resultados de la simulación para análisis.\"\"\"\n",
|
||||
" fundamental_prices: np.ndarray # precio \"verdadero\"\n",
|
||||
" trade_prices: list[float] # precio de cada trade\n",
|
||||
" trade_times: list[int] # tick de cada trade\n",
|
||||
" trade_sizes: list[float] # tamaño de cada trade\n",
|
||||
" spreads: list[float] # spread en cada tick\n",
|
||||
" midprices: list[float] # midprice del book en cada tick\n",
|
||||
" taker_arrivals: list[int] # takers por tick\n",
|
||||
" maker_states: list[MakerState] # estado final de los makers\n",
|
||||
" n_trades_per_tick: list[int] # trades por tick\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def run_simulation(p: SimParams) -> SimResult:\n",
|
||||
" \"\"\"Ejecuta la simulación completa.\"\"\"\n",
|
||||
" rng = np.random.default_rng(p.seed)\n",
|
||||
"\n",
|
||||
" # Generar precios fundamentales\n",
|
||||
" fund_prices = generate_fundamental_prices(p)\n",
|
||||
"\n",
|
||||
" # Inicializar engine y makers\n",
|
||||
" engine = MatchingEngineFIFO()\n",
|
||||
" makers = [MakerState(maker_id=f\"maker_{i}\") for i in range(p.n_makers)]\n",
|
||||
"\n",
|
||||
" # Resultados\n",
|
||||
" trade_prices, trade_times, trade_sizes = [], [], []\n",
|
||||
" spreads, midprices = [], []\n",
|
||||
" n_trades_per_tick = []\n",
|
||||
"\n",
|
||||
" # Hawkes state\n",
|
||||
" hawkes_excitation = 0.0\n",
|
||||
"\n",
|
||||
" for t in range(p.n_ticks):\n",
|
||||
" mid = fund_prices[t]\n",
|
||||
"\n",
|
||||
" # --- MAKERS: cancelar y recolocar ---\n",
|
||||
" for maker in makers:\n",
|
||||
" # Cancelar órdenes anteriores\n",
|
||||
" for oid in maker.active_order_ids:\n",
|
||||
" engine.cancel(oid)\n",
|
||||
" maker.active_order_ids = []\n",
|
||||
"\n",
|
||||
" # Colocar nuevas\n",
|
||||
" quotes = maker_quotes(maker, mid, p, t, rng)\n",
|
||||
" for q in quotes:\n",
|
||||
" engine.submit(q)\n",
|
||||
" maker.active_order_ids.append(q.order_id)\n",
|
||||
"\n",
|
||||
" # --- TAKERS: generar con Hawkes ---\n",
|
||||
" lam = p.n_takers_lambda + hawkes_excitation\n",
|
||||
" lam = max(0.1, lam)\n",
|
||||
" n_takers = rng.poisson(lam)\n",
|
||||
"\n",
|
||||
" tick_trades = 0\n",
|
||||
" for _ in range(n_takers):\n",
|
||||
" order = taker_order(mid, p, rng)\n",
|
||||
" trades = engine.submit(order)\n",
|
||||
" tick_trades += len(trades)\n",
|
||||
"\n",
|
||||
" for tr in trades:\n",
|
||||
" trade_prices.append(tr.price)\n",
|
||||
" trade_times.append(t)\n",
|
||||
" trade_sizes.append(tr.qty)\n",
|
||||
"\n",
|
||||
" # Actualizar inventario de makers\n",
|
||||
" for maker in makers:\n",
|
||||
" if tr.buyer_order_id in maker.active_order_ids:\n",
|
||||
" maker.inventory += tr.qty\n",
|
||||
" maker.pnl -= tr.price * tr.qty\n",
|
||||
" elif tr.seller_order_id in maker.active_order_ids:\n",
|
||||
" maker.inventory -= tr.qty\n",
|
||||
" maker.pnl += tr.price * tr.qty\n",
|
||||
"\n",
|
||||
" # Hawkes: actualizar excitación\n",
|
||||
" hawkes_excitation *= np.exp(-p.hawkes_beta)\n",
|
||||
" hawkes_excitation += p.hawkes_alpha * tick_trades\n",
|
||||
"\n",
|
||||
" n_trades_per_tick.append(tick_trades)\n",
|
||||
"\n",
|
||||
" # Registrar estado del book\n",
|
||||
" sp = engine.book.spread\n",
|
||||
" spreads.append(sp if sp is not None else 0.0)\n",
|
||||
" mp = engine.book.midprice\n",
|
||||
" midprices.append(mp if mp is not None else mid)\n",
|
||||
"\n",
|
||||
" # PnL final: mark-to-market\n",
|
||||
" final_price = fund_prices[-1]\n",
|
||||
" for maker in makers:\n",
|
||||
" maker.pnl += maker.inventory * final_price\n",
|
||||
"\n",
|
||||
" return SimResult(\n",
|
||||
" fundamental_prices=fund_prices,\n",
|
||||
" trade_prices=trade_prices,\n",
|
||||
" trade_times=trade_times,\n",
|
||||
" trade_sizes=trade_sizes,\n",
|
||||
" spreads=spreads,\n",
|
||||
" midprices=midprices,\n",
|
||||
" taker_arrivals=n_trades_per_tick,\n",
|
||||
" maker_states=makers,\n",
|
||||
" n_trades_per_tick=n_trades_per_tick,\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print(\"run_simulation() definida\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Ejecutar simulación base"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"result = run_simulation(params)\n",
|
||||
"\n",
|
||||
"print(f\"Total trades: {len(result.trade_prices)}\")\n",
|
||||
"print(f\"Spread promedio: {np.mean(result.spreads):.4f}\")\n",
|
||||
"print(f\"Trades/tick promedio: {np.mean(result.n_trades_per_tick):.1f}\")\n",
|
||||
"print(f\"\\nEstado final de makers:\")\n",
|
||||
"for m in result.maker_states:\n",
|
||||
" print(f\" {m.maker_id}: inventario={m.inventory:.1f}, PnL={m.pnl:.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Dashboard de resultados"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_simulation(result: SimResult, params: SimParams):\n",
|
||||
" \"\"\"Dashboard de la simulación.\"\"\"\n",
|
||||
" fig, axes = plt.subplots(4, 1, figsize=(14, 12), gridspec_kw={'height_ratios': [3, 1, 1, 1]})\n",
|
||||
"\n",
|
||||
" # --- Panel 1: Precio ---\n",
|
||||
" ax = axes[0]\n",
|
||||
" ax.plot(result.fundamental_prices, color='gray', linewidth=0.8, alpha=0.5, label='Fundamental')\n",
|
||||
" ax.plot(result.midprices, color='#3498db', linewidth=0.8, label='Midprice (book)')\n",
|
||||
" if result.trade_prices:\n",
|
||||
" ax.scatter(result.trade_times, result.trade_prices, s=1, alpha=0.3, color='orange', label='Trades')\n",
|
||||
" ax.set_ylabel('Precio')\n",
|
||||
" ax.set_title(f'Simulación: {params.n_makers} makers, λ_takers={params.n_takers_lambda}, '\n",
|
||||
" f'σ={params.sigma}, γ={params.gamma}, Hawkes α={params.hawkes_alpha}')\n",
|
||||
" ax.legend(loc='upper left', fontsize=8)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
" # --- Panel 2: Spread ---\n",
|
||||
" ax = axes[1]\n",
|
||||
" ax.fill_between(range(len(result.spreads)), result.spreads, color='#9b59b6', alpha=0.5)\n",
|
||||
" ax.set_ylabel('Spread')\n",
|
||||
" ax.set_ylim(0, np.percentile(result.spreads, 99) * 1.5 if result.spreads else 1)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
" # --- Panel 3: Trades por tick ---\n",
|
||||
" ax = axes[2]\n",
|
||||
" ax.bar(range(len(result.n_trades_per_tick)), result.n_trades_per_tick,\n",
|
||||
" color='#e67e22', alpha=0.6, width=1.0)\n",
|
||||
" ax.set_ylabel('Trades/tick')\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
" # --- Panel 4: Volumen por trade ---\n",
|
||||
" ax = axes[3]\n",
|
||||
" if result.trade_sizes:\n",
|
||||
" ax.scatter(result.trade_times, result.trade_sizes, s=2, alpha=0.4, color='#2ecc71')\n",
|
||||
" ax.set_ylabel('Tamaño orden')\n",
|
||||
" ax.set_xlabel('Tick')\n",
|
||||
" ax.set_yscale('log')\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"plot_simulation(result, params)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Experimentos: comparar escenarios\n",
|
||||
"\n",
|
||||
"Ajusta los parámetros y observa cómo cambia el mercado."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Experimento 1: Mercado tranquilo vs volátil ---\n",
|
||||
"\n",
|
||||
"calm = SimParams(sigma=0.005, jump_intensity=0.0, n_ticks=300, seed=42)\n",
|
||||
"volatile = SimParams(sigma=0.05, jump_intensity=0.1, jump_size_std=0.08, n_ticks=300, seed=42)\n",
|
||||
"\n",
|
||||
"r_calm = run_simulation(calm)\n",
|
||||
"r_volatile = run_simulation(volatile)\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"axes[0].plot(r_calm.midprices, color='#3498db')\n",
|
||||
"axes[0].set_title(f'Tranquilo (σ={calm.sigma}, jumps=0)')\n",
|
||||
"axes[0].set_ylabel('Precio')\n",
|
||||
"axes[0].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"axes[1].plot(r_volatile.midprices, color='#e74c3c')\n",
|
||||
"axes[1].set_title(f'Volátil (σ={volatile.sigma}, jumps={volatile.jump_intensity})')\n",
|
||||
"axes[1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"Spread promedio → Tranquilo: {np.mean(r_calm.spreads):.4f}, Volátil: {np.mean(r_volatile.spreads):.4f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Experimento 2: Pocos makers vs muchos makers ---\n",
|
||||
"\n",
|
||||
"few_makers = SimParams(n_makers=1, n_ticks=300, seed=42)\n",
|
||||
"many_makers = SimParams(n_makers=10, n_ticks=300, seed=42)\n",
|
||||
"\n",
|
||||
"r_few = run_simulation(few_makers)\n",
|
||||
"r_many = run_simulation(many_makers)\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"axes[0].fill_between(range(len(r_few.spreads)), r_few.spreads, color='#e74c3c', alpha=0.5)\n",
|
||||
"axes[0].set_title(f'1 maker → spread promedio: {np.mean(r_few.spreads):.4f}')\n",
|
||||
"axes[0].set_ylabel('Spread')\n",
|
||||
"axes[0].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"axes[1].fill_between(range(len(r_many.spreads)), r_many.spreads, color='#2ecc71', alpha=0.5)\n",
|
||||
"axes[1].set_title(f'10 makers → spread promedio: {np.mean(r_many.spreads):.4f}')\n",
|
||||
"axes[1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Experimento 3: Sin Hawkes vs con Hawkes fuerte ---\n",
|
||||
"\n",
|
||||
"no_hawkes = SimParams(hawkes_alpha=0.0, n_ticks=300, seed=42)\n",
|
||||
"strong_hawkes = SimParams(hawkes_alpha=1.5, hawkes_beta=0.5, n_ticks=300, seed=42)\n",
|
||||
"\n",
|
||||
"r_no_h = run_simulation(no_hawkes)\n",
|
||||
"r_strong_h = run_simulation(strong_hawkes)\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(2, 2, figsize=(14, 6))\n",
|
||||
"\n",
|
||||
"axes[0][0].bar(range(len(r_no_h.n_trades_per_tick)), r_no_h.n_trades_per_tick,\n",
|
||||
" color='#3498db', alpha=0.6, width=1.0)\n",
|
||||
"axes[0][0].set_title('Poisson puro (hawkes_alpha=0)')\n",
|
||||
"axes[0][0].set_ylabel('Trades/tick')\n",
|
||||
"\n",
|
||||
"axes[0][1].bar(range(len(r_strong_h.n_trades_per_tick)), r_strong_h.n_trades_per_tick,\n",
|
||||
" color='#e74c3c', alpha=0.6, width=1.0)\n",
|
||||
"axes[0][1].set_title('Hawkes fuerte (alpha=1.5, beta=0.5)')\n",
|
||||
"\n",
|
||||
"axes[1][0].plot(r_no_h.midprices, color='#3498db', linewidth=0.8)\n",
|
||||
"axes[1][0].set_ylabel('Midprice')\n",
|
||||
"\n",
|
||||
"axes[1][1].plot(r_strong_h.midprices, color='#e74c3c', linewidth=0.8)\n",
|
||||
"\n",
|
||||
"for ax in axes.flat:\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"Max trades/tick → Poisson: {max(r_no_h.n_trades_per_tick)}, Hawkes: {max(r_strong_h.n_trades_per_tick)}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# --- Experimento 4: Gamma bajo vs alto (aversión al riesgo del maker) ---\n",
|
||||
"\n",
|
||||
"low_gamma = SimParams(gamma=0.01, n_ticks=300, seed=42)\n",
|
||||
"high_gamma = SimParams(gamma=1.0, n_ticks=300, seed=42)\n",
|
||||
"\n",
|
||||
"r_low_g = run_simulation(low_gamma)\n",
|
||||
"r_high_g = run_simulation(high_gamma)\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"axes[0].fill_between(range(len(r_low_g.spreads)), r_low_g.spreads, color='#2ecc71', alpha=0.5)\n",
|
||||
"axes[0].set_title(f'γ={low_gamma.gamma} (maker agresivo)\\nspread prom: {np.mean(r_low_g.spreads):.4f}')\n",
|
||||
"axes[0].set_ylabel('Spread')\n",
|
||||
"axes[0].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"axes[1].fill_between(range(len(r_high_g.spreads)), r_high_g.spreads, color='#e74c3c', alpha=0.5)\n",
|
||||
"axes[1].set_title(f'γ={high_gamma.gamma} (maker conservador)\\nspread prom: {np.mean(r_high_g.spreads):.4f}')\n",
|
||||
"axes[1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"\\nPnL makers γ={low_gamma.gamma}: {[f'{m.pnl:.0f}' for m in r_low_g.maker_states]}\")\n",
|
||||
"print(f\"PnL makers γ={high_gamma.gamma}: {[f'{m.pnl:.0f}' for m in r_high_g.maker_states]}\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
@@ -0,0 +1,592 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Datos reales de Binance\n",
|
||||
"\n",
|
||||
"Usamos la API pública de Binance (gratis, sin API key) para obtener:\n",
|
||||
"1. **Order book** (L2) — profundidad del libro en tiempo real\n",
|
||||
"2. **Trades recientes** — los últimos fills ejecutados\n",
|
||||
"3. **OHLCV** — velas históricas\n",
|
||||
"\n",
|
||||
"Después aplicamos las mismas técnicas de estimación del notebook 03 sobre datos reales."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Exchange: Binance\n",
|
||||
"Rate limit: 50ms\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import ccxt\n",
|
||||
"import polars as pl\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"import time\n",
|
||||
"\n",
|
||||
"exchange = ccxt.binance({'enableRateLimit': True})\n",
|
||||
"print(f\"Exchange: {exchange.name}\")\n",
|
||||
"print(f\"Rate limit: {exchange.rateLimit}ms\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Elegir par y explorar qué hay disponible"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"SYMBOL = 'BTC/USDT'\n",
|
||||
"\n",
|
||||
"ticker = exchange.fetch_ticker(SYMBOL)\n",
|
||||
"print(f\"Par: {SYMBOL}\")\n",
|
||||
"print(f\"Último precio: {ticker['last']}\")\n",
|
||||
"print(f\"Bid: {ticker['bid']} Ask: {ticker['ask']}\")\n",
|
||||
"print(f\"Spread: {ticker['ask'] - ticker['bid']:.2f} ({(ticker['ask'] - ticker['bid']) / ticker['last'] * 100:.4f}%)\")\n",
|
||||
"print(f\"Volumen 24h: {ticker['baseVolume']:,.0f} BTC\")\n",
|
||||
"print(f\"Volumen 24h: ${ticker['quoteVolume']:,.0f} USDT\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Order Book (L2)\n",
|
||||
"\n",
|
||||
"El order book de Binance te da los **niveles de precio agregados** — no ves órdenes individuales (no es L3).\n",
|
||||
"Cada nivel muestra: precio y cantidad total a ese precio."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fetch_orderbook(symbol: str, limit: int = 50) -> pl.DataFrame:\n",
|
||||
" \"\"\"Obtiene el order book y lo devuelve como DataFrame.\"\"\"\n",
|
||||
" ob = exchange.fetch_order_book(symbol, limit=limit)\n",
|
||||
"\n",
|
||||
" bids = pl.DataFrame(ob['bids'], schema=['price', 'qty'], orient='row')\n",
|
||||
" bids = bids.with_columns(pl.lit('bid').alias('side'))\n",
|
||||
"\n",
|
||||
" asks = pl.DataFrame(ob['asks'], schema=['price', 'qty'], orient='row')\n",
|
||||
" asks = asks.with_columns(pl.lit('ask').alias('side'))\n",
|
||||
"\n",
|
||||
" df = pl.concat([bids, asks])\n",
|
||||
" return df, ob['timestamp']\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"ob_df, ob_ts = fetch_orderbook(SYMBOL, limit=20)\n",
|
||||
"print(f\"Timestamp: {datetime.fromtimestamp(ob_ts/1000)}\")\n",
|
||||
"print(f\"\\nTop 5 bids:\")\n",
|
||||
"print(ob_df.filter(pl.col('side') == 'bid').head(5))\n",
|
||||
"print(f\"\\nTop 5 asks:\")\n",
|
||||
"print(ob_df.filter(pl.col('side') == 'ask').head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_real_orderbook(ob_df: pl.DataFrame, symbol: str):\n",
|
||||
" \"\"\"Visualiza el order book real.\"\"\"\n",
|
||||
" bids = ob_df.filter(pl.col('side') == 'bid').sort('price', descending=True)\n",
|
||||
" asks = ob_df.filter(pl.col('side') == 'ask').sort('price')\n",
|
||||
"\n",
|
||||
" bid_prices = bids['price'].to_numpy()\n",
|
||||
" bid_cum = np.cumsum(bids['qty'].to_numpy())\n",
|
||||
" ask_prices = asks['price'].to_numpy()\n",
|
||||
" ask_cum = np.cumsum(asks['qty'].to_numpy())\n",
|
||||
"\n",
|
||||
" fig, ax = plt.subplots(figsize=(12, 5))\n",
|
||||
" ax.fill_between(bid_prices, bid_cum, step='post', color='#2ecc71', alpha=0.5, label='Bids')\n",
|
||||
" ax.fill_between(ask_prices, ask_cum, step='pre', color='#e74c3c', alpha=0.5, label='Asks')\n",
|
||||
" ax.set_xlabel('Precio (USDT)')\n",
|
||||
" ax.set_ylabel('Cantidad acumulada (BTC)')\n",
|
||||
"\n",
|
||||
" best_bid = bid_prices[0]\n",
|
||||
" best_ask = ask_prices[0]\n",
|
||||
" spread = best_ask - best_bid\n",
|
||||
" mid = (best_bid + best_ask) / 2\n",
|
||||
"\n",
|
||||
" ax.axvline(x=mid, color='gray', linestyle='--', linewidth=0.8)\n",
|
||||
" ax.set_title(f'{symbol} Order Book — Spread: ${spread:.2f} ({spread/mid*100:.4f}%) — Mid: ${mid:,.2f}')\n",
|
||||
" ax.legend()\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"plot_real_orderbook(ob_df, SYMBOL)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Trades recientes (fills)\n",
|
||||
"\n",
|
||||
"Esto es lo que ves en el tape público. Cada trade es un **fill** — no sabes si vienen de la misma orden."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fetch_trades(symbol: str, limit: int = 1000) -> pl.DataFrame:\n",
|
||||
" \"\"\"Obtiene trades recientes.\"\"\"\n",
|
||||
" raw = exchange.fetch_trades(symbol, limit=limit)\n",
|
||||
" records = [{\n",
|
||||
" 'timestamp': t['timestamp'],\n",
|
||||
" 'datetime': t['datetime'],\n",
|
||||
" 'price': t['price'],\n",
|
||||
" 'qty': t['amount'],\n",
|
||||
" 'side': t['side'], # taker side\n",
|
||||
" 'cost': t['cost'], # price * qty en quote currency\n",
|
||||
" } for t in raw]\n",
|
||||
" return pl.DataFrame(records)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"trades = fetch_trades(SYMBOL, limit=1000)\n",
|
||||
"print(f\"Trades obtenidos: {trades.shape[0]}\")\n",
|
||||
"print(f\"Rango: {trades['datetime'].min()} → {trades['datetime'].max()}\")\n",
|
||||
"print(f\"\\nÚltimos 5 trades:\")\n",
|
||||
"print(trades.tail(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Estadísticas básicas de los trades\n",
|
||||
"buys = trades.filter(pl.col('side') == 'buy')\n",
|
||||
"sells = trades.filter(pl.col('side') == 'sell')\n",
|
||||
"\n",
|
||||
"print(f\"Buy trades: {buys.shape[0]} ({buys.shape[0]/trades.shape[0]*100:.1f}%)\")\n",
|
||||
"print(f\"Sell trades: {sells.shape[0]} ({sells.shape[0]/trades.shape[0]*100:.1f}%)\")\n",
|
||||
"print(f\"\\nTamaño promedio: {trades['qty'].mean():.6f} BTC\")\n",
|
||||
"print(f\"Tamaño mediano: {trades['qty'].median():.6f} BTC\")\n",
|
||||
"print(f\"Tamaño máximo: {trades['qty'].max():.6f} BTC\")\n",
|
||||
"print(f\"\\nPrecio min: ${trades['price'].min():,.2f}\")\n",
|
||||
"print(f\"Precio max: ${trades['price'].max():,.2f}\")\n",
|
||||
"print(f\"Rango: ${trades['price'].max() - trades['price'].min():,.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Velas históricas (OHLCV)\n",
|
||||
"\n",
|
||||
"Las velas agregan trades en intervalos. Útiles para estimar σ en distintos timeframes."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fetch_ohlcv(symbol: str, timeframe: str = '1m', limit: int = 500) -> pl.DataFrame:\n",
|
||||
" \"\"\"Obtiene velas OHLCV.\"\"\"\n",
|
||||
" raw = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)\n",
|
||||
" df = pl.DataFrame(raw, schema=['timestamp', 'open', 'high', 'low', 'close', 'volume'], orient='row')\n",
|
||||
" df = df.with_columns(\n",
|
||||
" (pl.col('timestamp').cast(pl.Int64) * 1000).cast(pl.Datetime('us')).alias('datetime')\n",
|
||||
" )\n",
|
||||
" return df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# 1-minute candles, últimas 500\n",
|
||||
"ohlcv_1m = fetch_ohlcv(SYMBOL, '1m', 500)\n",
|
||||
"print(f\"Velas 1m: {ohlcv_1m.shape[0]}\")\n",
|
||||
"print(f\"Rango: {ohlcv_1m['datetime'].min()} → {ohlcv_1m['datetime'].max()}\")\n",
|
||||
"print(ohlcv_1m.tail(3))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_candles_and_volume(ohlcv: pl.DataFrame, symbol: str, timeframe: str):\n",
|
||||
" \"\"\"Gráfico de velas con volumen.\"\"\"\n",
|
||||
" fig, axes = plt.subplots(2, 1, figsize=(14, 7), gridspec_kw={'height_ratios': [3, 1]}, sharex=True)\n",
|
||||
"\n",
|
||||
" dt = ohlcv['datetime'].to_numpy()\n",
|
||||
" opens = ohlcv['open'].to_numpy()\n",
|
||||
" closes = ohlcv['close'].to_numpy()\n",
|
||||
" highs = ohlcv['high'].to_numpy()\n",
|
||||
" lows = ohlcv['low'].to_numpy()\n",
|
||||
" volumes = ohlcv['volume'].to_numpy()\n",
|
||||
"\n",
|
||||
" colors = ['#2ecc71' if c >= o else '#e74c3c' for o, c in zip(opens, closes)]\n",
|
||||
"\n",
|
||||
" # Velas\n",
|
||||
" ax = axes[0]\n",
|
||||
" for i in range(len(dt)):\n",
|
||||
" ax.plot([i, i], [lows[i], highs[i]], color=colors[i], linewidth=0.5)\n",
|
||||
" ax.plot([i, i], [opens[i], closes[i]], color=colors[i], linewidth=2)\n",
|
||||
" ax.set_ylabel('Precio (USDT)')\n",
|
||||
" ax.set_title(f'{symbol} — {timeframe} candles')\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
" # Volumen\n",
|
||||
" ax = axes[1]\n",
|
||||
" ax.bar(range(len(dt)), volumes, color=colors, alpha=0.6, width=0.8)\n",
|
||||
" ax.set_ylabel('Volumen (BTC)')\n",
|
||||
" ax.set_xlabel('Vela')\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Últimas 100 velas para que se vea claro\n",
|
||||
"plot_candles_and_volume(ohlcv_1m.tail(100), SYMBOL, '1m')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 5. Estimación de parámetros sobre datos reales\n",
|
||||
"\n",
|
||||
"Aplicamos las mismas técnicas del notebook 03 pero sobre BTC/USDT real."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 5.1 Volatilidad (σ) desde velas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retornos logarítmicos close-to-close\n",
|
||||
"closes = ohlcv_1m['close'].to_numpy()\n",
|
||||
"log_returns = np.diff(np.log(closes))\n",
|
||||
"\n",
|
||||
"sigma_1m = np.std(log_returns)\n",
|
||||
"sigma_1h = sigma_1m * np.sqrt(60) # escalar a 1 hora\n",
|
||||
"sigma_1d = sigma_1m * np.sqrt(60 * 24) # escalar a 1 día\n",
|
||||
"sigma_annual = sigma_1d * np.sqrt(365) # anualizada\n",
|
||||
"\n",
|
||||
"print(f\"σ por minuto: {sigma_1m:.6f}\")\n",
|
||||
"print(f\"σ por hora: {sigma_1h:.6f}\")\n",
|
||||
"print(f\"σ por día: {sigma_1d:.4f} ({sigma_1d*100:.2f}%)\")\n",
|
||||
"print(f\"σ anualizada: {sigma_annual:.4f} ({sigma_annual*100:.1f}%)\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 5.2 Arrival rate (λ) de trades"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Inter-arrival times entre trades consecutivos\n",
|
||||
"timestamps = trades['timestamp'].to_numpy()\n",
|
||||
"inter_arrivals_ms = np.diff(timestamps)\n",
|
||||
"inter_arrivals_s = inter_arrivals_ms / 1000.0\n",
|
||||
"\n",
|
||||
"# Filtrar zeros (trades en el mismo milisegundo = probablemente mismo matching event)\n",
|
||||
"inter_arrivals_s = inter_arrivals_s[inter_arrivals_s > 0]\n",
|
||||
"\n",
|
||||
"lambda_per_sec = 1.0 / np.mean(inter_arrivals_s)\n",
|
||||
"lambda_per_min = lambda_per_sec * 60\n",
|
||||
"\n",
|
||||
"print(f\"Tiempo medio entre trades: {np.mean(inter_arrivals_s)*1000:.1f} ms\")\n",
|
||||
"print(f\"Tiempo mediano entre trades: {np.median(inter_arrivals_s)*1000:.1f} ms\")\n",
|
||||
"print(f\"λ (trades/segundo): {lambda_per_sec:.1f}\")\n",
|
||||
"print(f\"λ (trades/minuto): {lambda_per_min:.0f}\")\n",
|
||||
"print(f\"\\nRecuerda: esto son FILLS, no órdenes originales\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 5.3 Clustering (Hawkes) — ¿los trades generan más trades?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Agrupar trades por segundo y calcular autocorrelación\n",
|
||||
"trades_per_sec = trades.with_columns(\n",
|
||||
" (pl.col('timestamp') // 1000).alias('second')\n",
|
||||
").group_by('second').agg(pl.len().alias('n_trades')).sort('second')\n",
|
||||
"\n",
|
||||
"arrivals = trades_per_sec['n_trades'].to_numpy()\n",
|
||||
"\n",
|
||||
"# Autocorrelación\n",
|
||||
"max_lag = 30\n",
|
||||
"mean_arr = np.mean(arrivals)\n",
|
||||
"var_arr = np.var(arrivals)\n",
|
||||
"acf = np.array([\n",
|
||||
" np.mean((arrivals[lag:] - mean_arr) * (arrivals[:-lag] - mean_arr)) / var_arr\n",
|
||||
" if lag > 0 else 1.0\n",
|
||||
" for lag in range(max_lag)\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"# Var/Mean ratio (dispersion index)\n",
|
||||
"dispersion = var_arr / mean_arr\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"axes[0].bar(range(max_lag), acf, color='#e67e22', alpha=0.6)\n",
|
||||
"axes[0].axhline(y=0, color='black', linewidth=0.5)\n",
|
||||
"axes[0].axhline(y=1.96/np.sqrt(len(arrivals)), color='blue', linestyle='--', linewidth=0.8, label='95% CI')\n",
|
||||
"axes[0].axhline(y=-1.96/np.sqrt(len(arrivals)), color='blue', linestyle='--', linewidth=0.8)\n",
|
||||
"axes[0].set_title('Autocorrelación de trades/segundo')\n",
|
||||
"axes[0].set_xlabel('Lag (segundos)')\n",
|
||||
"axes[0].legend(fontsize=8)\n",
|
||||
"axes[0].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"axes[1].hist(arrivals, bins=50, color='#3498db', alpha=0.6, density=True)\n",
|
||||
"axes[1].set_title(f'Distribución de trades/segundo\\nMedia={mean_arr:.1f}, Var/Mean={dispersion:.1f}')\n",
|
||||
"axes[1].set_xlabel('Trades por segundo')\n",
|
||||
"axes[1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"Var/Mean ratio: {dispersion:.2f}\")\n",
|
||||
"if dispersion > 1.5:\n",
|
||||
" print(\" → Hay clustering significativo (Hawkes). Los trades generan más trades.\")\n",
|
||||
"else:\n",
|
||||
" print(\" → Cercano a Poisson. Poco clustering.\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 5.4 Distribución de tamaños — ¿hay ballenas?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sizes = trades['qty'].to_numpy()\n",
|
||||
"sizes = sizes[sizes > 0]\n",
|
||||
"\n",
|
||||
"# Estimar exponente Pareto (MLE)\n",
|
||||
"x_min = np.percentile(sizes, 90) # usar percentil 90 como x_min (zona de cola)\n",
|
||||
"tail = sizes[sizes >= x_min]\n",
|
||||
"alpha_est = len(tail) / np.sum(np.log(tail / x_min))\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"# Histograma\n",
|
||||
"axes[0].hist(sizes, bins=100, color='#2ecc71', alpha=0.6, density=True)\n",
|
||||
"axes[0].set_title('Distribución de tamaños de trades')\n",
|
||||
"axes[0].set_xlabel('Tamaño (BTC)')\n",
|
||||
"axes[0].set_yscale('log')\n",
|
||||
"axes[0].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# CCDF log-log (survival function)\n",
|
||||
"sizes_sorted = np.sort(sizes)[::-1]\n",
|
||||
"ranks = np.arange(1, len(sizes_sorted) + 1) / len(sizes_sorted)\n",
|
||||
"axes[1].loglog(sizes_sorted, ranks, '.', markersize=1, alpha=0.4, color='#2ecc71')\n",
|
||||
"# Fit Pareto\n",
|
||||
"x_fit = np.logspace(np.log10(x_min), np.log10(sizes.max()), 50)\n",
|
||||
"axes[1].loglog(x_fit, (x_fit / x_min) ** (-alpha_est) * (len(tail)/len(sizes)),\n",
|
||||
" 'r-', linewidth=2, label=f'Pareto α={alpha_est:.2f}')\n",
|
||||
"axes[1].set_title('CCDF (complementary CDF) — cola pesada')\n",
|
||||
"axes[1].set_xlabel('Tamaño (BTC)')\n",
|
||||
"axes[1].set_ylabel('P(X > x)')\n",
|
||||
"axes[1].legend()\n",
|
||||
"axes[1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"Tamaño mediano: {np.median(sizes):.6f} BTC (${np.median(sizes) * ticker['last']:,.2f})\")\n",
|
||||
"print(f\"Tamaño p99: {np.percentile(sizes, 99):.6f} BTC (${np.percentile(sizes, 99) * ticker['last']:,.2f})\")\n",
|
||||
"print(f\"Tamaño max: {sizes.max():.6f} BTC (${sizes.max() * ticker['last']:,.2f})\")\n",
|
||||
"print(f\"Pareto α (cola): {alpha_est:.2f}\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 5.5 Detección de jumps en retornos reales"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Retornos de 1 minuto\n",
|
||||
"threshold = 3 * sigma_1m\n",
|
||||
"jump_mask = np.abs(log_returns) > threshold\n",
|
||||
"n_jumps = np.sum(jump_mask)\n",
|
||||
"jump_intensity = n_jumps / len(log_returns)\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"# Retornos con jumps marcados\n",
|
||||
"ax = axes[0]\n",
|
||||
"ax.plot(log_returns, linewidth=0.5, color='#3498db', alpha=0.6)\n",
|
||||
"jump_indices = np.where(jump_mask)[0]\n",
|
||||
"ax.scatter(jump_indices, log_returns[jump_indices], color='red', s=20, zorder=5, label=f'Jumps ({n_jumps})')\n",
|
||||
"ax.axhline(y=threshold, color='red', linestyle='--', linewidth=0.8, alpha=0.5)\n",
|
||||
"ax.axhline(y=-threshold, color='red', linestyle='--', linewidth=0.8, alpha=0.5)\n",
|
||||
"ax.set_title('Retornos 1m con jumps detectados (> 3σ)')\n",
|
||||
"ax.set_ylabel('Log-return')\n",
|
||||
"ax.legend(fontsize=8)\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# QQ plot\n",
|
||||
"from scipy.stats import probplot\n",
|
||||
"probplot(log_returns, dist=\"norm\", plot=axes[1])\n",
|
||||
"axes[1].set_title('QQ-Plot: retornos vs Normal\\n(colas pesadas = desviación en extremos)')\n",
|
||||
"axes[1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f\"Jumps detectados: {n_jumps} de {len(log_returns)} velas ({jump_intensity*100:.1f}%)\")\n",
|
||||
"print(f\"Kurtosis: {float(np.mean((log_returns - np.mean(log_returns))**4) / np.std(log_returns)**4):.1f} (Normal=3, >3 = colas pesadas)\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"\n",
|
||||
"## 6. Resumen: perfil del mercado BTC/USDT"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"best_bid = ob_df.filter(pl.col('side') == 'bid')['price'].max()\n",
|
||||
"best_ask = ob_df.filter(pl.col('side') == 'ask')['price'].min()\n",
|
||||
"spread = best_ask - best_bid\n",
|
||||
"\n",
|
||||
"print(\"=\" * 60)\n",
|
||||
"print(f\" PERFIL DE MERCADO: {SYMBOL}\")\n",
|
||||
"print(f\" {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\")\n",
|
||||
"print(\"=\" * 60)\n",
|
||||
"print(f\"\")\n",
|
||||
"print(f\" Precio: ${ticker['last']:,.2f}\")\n",
|
||||
"print(f\" Spread: ${spread:.2f} ({spread/ticker['last']*100:.4f}%)\")\n",
|
||||
"print(f\" Vol 24h: {ticker['baseVolume']:,.0f} BTC\")\n",
|
||||
"print(f\"\")\n",
|
||||
"print(f\" σ (1 min): {sigma_1m:.6f}\")\n",
|
||||
"print(f\" σ (diaria): {sigma_1d:.4f} ({sigma_1d*100:.2f}%)\")\n",
|
||||
"print(f\" σ (anual): {sigma_annual:.2f} ({sigma_annual*100:.0f}%)\")\n",
|
||||
"print(f\"\")\n",
|
||||
"print(f\" λ (fills/seg): {lambda_per_sec:.1f}\")\n",
|
||||
"print(f\" Clustering: Var/Mean = {dispersion:.1f} {'(Hawkes)' if dispersion > 1.5 else '(~Poisson)'}\")\n",
|
||||
"print(f\"\")\n",
|
||||
"print(f\" Tamaño mediano: {np.median(sizes):.6f} BTC\")\n",
|
||||
"print(f\" Pareto α: {alpha_est:.2f}\")\n",
|
||||
"print(f\" Kurtosis: {float(np.mean((log_returns - np.mean(log_returns))**4) / np.std(log_returns)**4):.1f}\")\n",
|
||||
"print(f\" Jumps (>3σ): {jump_intensity*100:.1f}%\")\n",
|
||||
"print(f\"\")\n",
|
||||
"print(\"=\" * 60)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Guardar datos para análisis offline"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Guardar todo en data/\n",
|
||||
"trades.write_csv('../data/binance_btcusdt_trades.csv')\n",
|
||||
"ohlcv_1m.write_csv('../data/binance_btcusdt_ohlcv_1m.csv')\n",
|
||||
"ob_df.write_csv('../data/binance_btcusdt_orderbook.csv')\n",
|
||||
"\n",
|
||||
"print(f\"Guardados en data/:\")\n",
|
||||
"print(f\" binance_btcusdt_trades.csv ({trades.shape[0]} trades)\")\n",
|
||||
"print(f\" binance_btcusdt_ohlcv_1m.csv ({ohlcv_1m.shape[0]} velas)\")\n",
|
||||
"print(f\" binance_btcusdt_orderbook.csv ({ob_df.shape[0]} niveles)\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,387 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Recolección de datos: Binance + Bitstamp L3\n",
|
||||
"\n",
|
||||
"**Objetivo:** Dataset de 1M+ filas guardado en `data/`\n",
|
||||
"\n",
|
||||
"| Fuente | Tipo | Método | Qué obtenemos |\n",
|
||||
"|---|---|---|---|\n",
|
||||
"| Binance | aggTrades (fills agrupados por taker) | REST paginado | 1M+ trades históricos |\n",
|
||||
"| Binance | Order book L2 | REST snapshots | Profundidad del libro |\n",
|
||||
"| Bitstamp | L3 live_orders | WebSocket | Cada orden individual con ID |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Data dir: /home/lucas/fn_registry/analysis/estudio_mercados/data\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import aiohttp\n",
|
||||
"import asyncio\n",
|
||||
"import websockets\n",
|
||||
"import json\n",
|
||||
"import time\n",
|
||||
"import polars as pl\n",
|
||||
"import numpy as np\n",
|
||||
"from datetime import datetime, timedelta\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"DATA_DIR = Path('../data')\n",
|
||||
"DATA_DIR.mkdir(exist_ok=True)\n",
|
||||
"\n",
|
||||
"BINANCE_BASE = 'https://api.binance.com'\n",
|
||||
"BITSTAMP_WS = 'wss://ws.bitstamp.net'\n",
|
||||
"\n",
|
||||
"print(f'Data dir: {DATA_DIR.resolve()}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 1. Binance aggTrades — 1M+ filas\n",
|
||||
"\n",
|
||||
"Los `aggTrades` agrupan fills de la misma taker order:\n",
|
||||
"- Cada fila = 1 taker order (o parte si cruzó muchos niveles)\n",
|
||||
"- Campo `a` = aggregate trade ID\n",
|
||||
"- Campo `m` = true si el maker es buyer (taker es seller)\n",
|
||||
"- Paginamos con `fromId` para ir hacia atrás en el tiempo"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async def fetch_binance_agg_trades(\n",
|
||||
" symbol: str = 'BTCUSDT',\n",
|
||||
" target_rows: int = 1_000_000,\n",
|
||||
" batch_size: int = 1000,\n",
|
||||
") -> pl.DataFrame:\n",
|
||||
" \"\"\"Descarga aggTrades de Binance paginando hacia atrás.\n",
|
||||
"\n",
|
||||
" Cada aggTrade agrupa fills de la misma taker order:\n",
|
||||
" - a: aggregate trade id\n",
|
||||
" - p: price\n",
|
||||
" - q: quantity\n",
|
||||
" - f: first trade id\n",
|
||||
" - l: last trade id\n",
|
||||
" - T: timestamp\n",
|
||||
" - m: was the buyer the maker? (true = taker sold, false = taker bought)\n",
|
||||
" \"\"\"\n",
|
||||
" all_records = []\n",
|
||||
" from_id = None\n",
|
||||
" total = 0\n",
|
||||
" start_time = time.time()\n",
|
||||
"\n",
|
||||
" async with aiohttp.ClientSession() as session:\n",
|
||||
" while total < target_rows:\n",
|
||||
" params = {'symbol': symbol, 'limit': batch_size}\n",
|
||||
" if from_id is not None:\n",
|
||||
" params['fromId'] = from_id\n",
|
||||
"\n",
|
||||
" async with session.get(f'{BINANCE_BASE}/api/v3/aggTrades', params=params) as resp:\n",
|
||||
" if resp.status != 200:\n",
|
||||
" text = await resp.text()\n",
|
||||
" print(f'Error {resp.status}: {text}')\n",
|
||||
" break\n",
|
||||
" data = await resp.json()\n",
|
||||
"\n",
|
||||
" if not data:\n",
|
||||
" break\n",
|
||||
"\n",
|
||||
" for row in data:\n",
|
||||
" all_records.append({\n",
|
||||
" 'agg_trade_id': row['a'],\n",
|
||||
" 'price': float(row['p']),\n",
|
||||
" 'qty': float(row['q']),\n",
|
||||
" 'first_trade_id': row['f'],\n",
|
||||
" 'last_trade_id': row['l'],\n",
|
||||
" 'timestamp': row['T'],\n",
|
||||
" 'is_buyer_maker': row['m'], # True = taker vendió\n",
|
||||
" 'side': 'sell' if row['m'] else 'buy', # taker side\n",
|
||||
" 'n_fills': row['l'] - row['f'] + 1, # fills en esta agg\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" # Avanzar: siguiente página desde el último ID + 1\n",
|
||||
" from_id = data[-1]['a'] + 1\n",
|
||||
" total += len(data)\n",
|
||||
"\n",
|
||||
" if total % 50_000 == 0:\n",
|
||||
" elapsed = time.time() - start_time\n",
|
||||
" rate = total / elapsed\n",
|
||||
" eta = (target_rows - total) / rate if rate > 0 else 0\n",
|
||||
" ts = datetime.fromtimestamp(data[-1]['T'] / 1000)\n",
|
||||
" print(f' {total:>8,} rows | {rate:,.0f} rows/s | ETA {eta:.0f}s | hasta {ts}')\n",
|
||||
"\n",
|
||||
" # Rate limit: Binance permite 1200 req/min en aggTrades\n",
|
||||
" await asyncio.sleep(0.05)\n",
|
||||
"\n",
|
||||
" elapsed = time.time() - start_time\n",
|
||||
" print(f'\\nDescargados {total:,} aggTrades en {elapsed:.1f}s ({total/elapsed:,.0f} rows/s)')\n",
|
||||
"\n",
|
||||
" return pl.DataFrame(all_records)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print('fetch_binance_agg_trades() definida')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Descargar 1M+ aggTrades de BTC/USDT\n",
|
||||
"binance_trades = await fetch_binance_agg_trades('BTCUSDT', target_rows=1_000_000)\n",
|
||||
"\n",
|
||||
"print(f'\\nShape: {binance_trades.shape}')\n",
|
||||
"print(f'Columnas: {binance_trades.columns}')\n",
|
||||
"print(binance_trades.head(5))\n",
|
||||
"print(f'\\nRango temporal:')\n",
|
||||
"t_min = datetime.fromtimestamp(binance_trades['timestamp'].min() / 1000)\n",
|
||||
"t_max = datetime.fromtimestamp(binance_trades['timestamp'].max() / 1000)\n",
|
||||
"print(f' {t_min} → {t_max}')\n",
|
||||
"print(f' Duración: {t_max - t_min}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Guardar Binance aggTrades\n",
|
||||
"out_path = DATA_DIR / 'binance_btcusdt_aggtrades.csv'\n",
|
||||
"binance_trades.write_csv(str(out_path))\n",
|
||||
"size_mb = out_path.stat().st_size / 1024 / 1024\n",
|
||||
"print(f'Guardado: {out_path}')\n",
|
||||
"print(f' {binance_trades.shape[0]:,} filas, {size_mb:.1f} MB')\n",
|
||||
"\n",
|
||||
"# Estadísticas rápidas\n",
|
||||
"print(f'\\nEstadísticas:')\n",
|
||||
"print(f' Buys (taker): {binance_trades.filter(pl.col(\"side\") == \"buy\").shape[0]:,}')\n",
|
||||
"print(f' Sells (taker): {binance_trades.filter(pl.col(\"side\") == \"sell\").shape[0]:,}')\n",
|
||||
"print(f' Precio min: ${binance_trades[\"price\"].min():,.2f}')\n",
|
||||
"print(f' Precio max: ${binance_trades[\"price\"].max():,.2f}')\n",
|
||||
"print(f' Qty mediana: {binance_trades[\"qty\"].median():.6f} BTC')\n",
|
||||
"print(f' Qty max: {binance_trades[\"qty\"].max():.4f} BTC')\n",
|
||||
"print(f' Fills/aggTrade mediana: {binance_trades[\"n_fills\"].median():.0f}')\n",
|
||||
"print(f' Fills/aggTrade max: {binance_trades[\"n_fills\"].max()}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 2. Bitstamp L3 — órdenes individuales via WebSocket\n",
|
||||
"\n",
|
||||
"Cada mensaje tiene:\n",
|
||||
"- `id`: ID único de la orden\n",
|
||||
"- `order_type`: 0 = buy, 1 = sell\n",
|
||||
"- `price`, `amount`\n",
|
||||
"- `datetime`, `microtimestamp`\n",
|
||||
"\n",
|
||||
"Los canales:\n",
|
||||
"- `live_orders_btcusd`: cada orden creada\n",
|
||||
"- `live_trades_btcusd`: cada ejecución con IDs de maker y taker"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"async def record_bitstamp_l3(\n",
|
||||
" pair: str = 'btcusd',\n",
|
||||
" duration_seconds: int = 300,\n",
|
||||
") -> tuple[pl.DataFrame, pl.DataFrame]:\n",
|
||||
" \"\"\"Graba datos L3 de Bitstamp via WebSocket.\n",
|
||||
"\n",
|
||||
" Retorna (orders_df, trades_df) con todas las órdenes y trades capturados.\n",
|
||||
" \"\"\"\n",
|
||||
" orders = []\n",
|
||||
" trades = []\n",
|
||||
" start = time.time()\n",
|
||||
" msg_count = 0\n",
|
||||
"\n",
|
||||
" async with websockets.connect(BITSTAMP_WS) as ws:\n",
|
||||
" # Suscribirse a órdenes individuales + trades\n",
|
||||
" for channel in [f'live_orders_{pair}', f'live_trades_{pair}']:\n",
|
||||
" await ws.send(json.dumps({\n",
|
||||
" 'event': 'bts:subscribe',\n",
|
||||
" 'data': {'channel': channel}\n",
|
||||
" }))\n",
|
||||
"\n",
|
||||
" print(f'Grabando Bitstamp L3 ({pair}) por {duration_seconds}s...')\n",
|
||||
"\n",
|
||||
" while time.time() - start < duration_seconds:\n",
|
||||
" try:\n",
|
||||
" raw = await asyncio.wait_for(ws.recv(), timeout=5.0)\n",
|
||||
" msg = json.loads(raw)\n",
|
||||
" msg_count += 1\n",
|
||||
"\n",
|
||||
" event = msg.get('event', '')\n",
|
||||
" channel = msg.get('channel', '')\n",
|
||||
" data = msg.get('data', {})\n",
|
||||
"\n",
|
||||
" if isinstance(data, str):\n",
|
||||
" try:\n",
|
||||
" data = json.loads(data)\n",
|
||||
" except:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" # Órdenes (L3)\n",
|
||||
" if 'live_orders' in channel and event in ('order_created', 'order_changed', 'order_deleted'):\n",
|
||||
" orders.append({\n",
|
||||
" 'event': event,\n",
|
||||
" 'order_id': data.get('id', ''),\n",
|
||||
" 'side': 'buy' if data.get('order_type') == 0 else 'sell',\n",
|
||||
" 'price': float(data.get('price', 0)),\n",
|
||||
" 'amount': float(data.get('amount', 0)),\n",
|
||||
" 'datetime': data.get('datetime', ''),\n",
|
||||
" 'microtimestamp': data.get('microtimestamp', ''),\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" # Trades\n",
|
||||
" elif 'live_trades' in channel and event == 'trade':\n",
|
||||
" trades.append({\n",
|
||||
" 'trade_id': data.get('id', ''),\n",
|
||||
" 'side': 'buy' if data.get('type') == 0 else 'sell',\n",
|
||||
" 'price': float(data.get('price', 0)),\n",
|
||||
" 'amount': float(data.get('amount', 0)),\n",
|
||||
" 'buy_order_id': data.get('buy_order_id', ''),\n",
|
||||
" 'sell_order_id': data.get('sell_order_id', ''),\n",
|
||||
" 'timestamp': data.get('timestamp', ''),\n",
|
||||
" 'microtimestamp': data.get('microtimestamp', ''),\n",
|
||||
" })\n",
|
||||
"\n",
|
||||
" if msg_count % 5000 == 0:\n",
|
||||
" elapsed = time.time() - start\n",
|
||||
" print(f' {elapsed:.0f}s: {len(orders):,} orders, {len(trades):,} trades ({msg_count:,} msgs)')\n",
|
||||
"\n",
|
||||
" except asyncio.TimeoutError:\n",
|
||||
" continue\n",
|
||||
"\n",
|
||||
" elapsed = time.time() - start\n",
|
||||
" print(f'\\nGrabación terminada: {elapsed:.0f}s')\n",
|
||||
" print(f' Órdenes L3: {len(orders):,}')\n",
|
||||
" print(f' Trades: {len(trades):,}')\n",
|
||||
" print(f' Msgs total: {msg_count:,}')\n",
|
||||
"\n",
|
||||
" orders_df = pl.DataFrame(orders) if orders else pl.DataFrame()\n",
|
||||
" trades_df = pl.DataFrame(trades) if trades else pl.DataFrame()\n",
|
||||
"\n",
|
||||
" return orders_df, trades_df\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"print('record_bitstamp_l3() definida')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Grabar 5 minutos de L3 de Bitstamp\n",
|
||||
"bs_orders, bs_trades = await record_bitstamp_l3('btcusd', duration_seconds=300)\n",
|
||||
"\n",
|
||||
"if bs_orders.shape[0] > 0:\n",
|
||||
" print(f'\\n=== Órdenes L3 ===')\n",
|
||||
" print(f'Shape: {bs_orders.shape}')\n",
|
||||
" print(bs_orders.head(5))\n",
|
||||
" print(f'\\nEventos:')\n",
|
||||
" print(bs_orders.group_by('event').agg(pl.len().alias('count')).sort('count', descending=True))\n",
|
||||
"\n",
|
||||
"if bs_trades.shape[0] > 0:\n",
|
||||
" print(f'\\n=== Trades ===')\n",
|
||||
" print(f'Shape: {bs_trades.shape}')\n",
|
||||
" print(bs_trades.head(5))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Guardar Bitstamp L3\n",
|
||||
"if bs_orders.shape[0] > 0:\n",
|
||||
" path = DATA_DIR / 'bitstamp_btcusd_l3_orders.csv'\n",
|
||||
" bs_orders.write_csv(str(path))\n",
|
||||
" print(f'Guardado: {path} ({bs_orders.shape[0]:,} filas, {path.stat().st_size/1024/1024:.1f} MB)')\n",
|
||||
"\n",
|
||||
"if bs_trades.shape[0] > 0:\n",
|
||||
" path = DATA_DIR / 'bitstamp_btcusd_l3_trades.csv'\n",
|
||||
" bs_trades.write_csv(str(path))\n",
|
||||
" print(f'Guardado: {path} ({bs_trades.shape[0]:,} filas, {path.stat().st_size/1024/1024:.1f} MB)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 3. Resumen del dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"print('=' * 70)\n",
|
||||
"print(' DATASET RECOLECTADO')\n",
|
||||
"print('=' * 70)\n",
|
||||
"\n",
|
||||
"total_rows = 0\n",
|
||||
"for f in sorted(DATA_DIR.glob('*.csv')):\n",
|
||||
" size_mb = f.stat().st_size / 1024 / 1024\n",
|
||||
" # Contar filas rápido\n",
|
||||
" try:\n",
|
||||
" nrows = pl.scan_csv(str(f)).select(pl.len()).collect().item()\n",
|
||||
" except:\n",
|
||||
" nrows = '?'\n",
|
||||
" total_rows += nrows if isinstance(nrows, int) else 0\n",
|
||||
" print(f' {f.name:<45} {nrows:>10,} filas {size_mb:>7.1f} MB')\n",
|
||||
"\n",
|
||||
"print(f'\\n TOTAL: {total_rows:>10,} filas')\n",
|
||||
"print('=' * 70)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,573 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Análisis del dataset real: 1M aggTrades + Bitstamp L3\n",
|
||||
"\n",
|
||||
"Tenemos:\n",
|
||||
"- **Binance**: 1M aggTrades de BTC/USDT (~26h de mercado)\n",
|
||||
"- **Bitstamp**: L3 orders + trades (5 min de captura)\n",
|
||||
"\n",
|
||||
"## Objetivos\n",
|
||||
"1. Estimar parámetros de microestructura sobre datos reales\n",
|
||||
"2. Ver cómo cambian con ventanas deslizantes\n",
|
||||
"3. Comparar Binance (aggTrades = órdenes agrupadas) vs Bitstamp (L3 = cada orden)\n",
|
||||
"4. Calibrar nuestra simulación para que genere datos similares"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Binance aggTrades: 1,000,000 filas\n",
|
||||
"Columnas: ['agg_trade_id', 'price', 'qty', 'first_trade_id', 'last_trade_id', 'timestamp', 'is_buyer_maker', 'side', 'n_fills']\n",
|
||||
"Rango: 2026-04-02 14:26:02.324000 → 2026-04-03 16:32:41.139000 (1 day, 2:06:38.815000)\n",
|
||||
"\n",
|
||||
"Bitstamp L3 aún no disponible (grabando...)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import polars as pl\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from scipy.optimize import curve_fit\n",
|
||||
"from scipy.stats import probplot\n",
|
||||
"from datetime import datetime\n",
|
||||
"from pathlib import Path\n",
|
||||
"\n",
|
||||
"DATA = Path('../data')\n",
|
||||
"\n",
|
||||
"# Cargar Binance aggTrades\n",
|
||||
"trades = pl.read_csv(str(DATA / 'binance_btcusdt_aggtrades_1M.csv'))\n",
|
||||
"print(f'Binance aggTrades: {trades.shape[0]:,} filas')\n",
|
||||
"print(f'Columnas: {trades.columns}')\n",
|
||||
"\n",
|
||||
"t_min = datetime.fromtimestamp(trades['timestamp'].min() / 1000)\n",
|
||||
"t_max = datetime.fromtimestamp(trades['timestamp'].max() / 1000)\n",
|
||||
"print(f'Rango: {t_min} → {t_max} ({t_max - t_min})')\n",
|
||||
"\n",
|
||||
"# Intentar cargar Bitstamp si existe\n",
|
||||
"bs_path = DATA / 'bitstamp_btcusd_l3_orders.csv'\n",
|
||||
"if bs_path.exists():\n",
|
||||
" bs_orders = pl.read_csv(str(bs_path))\n",
|
||||
" print(f'\\nBitstamp L3 orders: {bs_orders.shape[0]:,} filas')\n",
|
||||
"else:\n",
|
||||
" bs_orders = None\n",
|
||||
" print('\\nBitstamp L3 aún no disponible (grabando...)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Visión general del dataset"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Añadir columna datetime y agrupar por minuto\n",
|
||||
"trades_dt = trades.with_columns(\n",
|
||||
" (pl.col('timestamp') * 1000).cast(pl.Datetime('us')).alias('datetime'),\n",
|
||||
" (pl.col('timestamp') // 60000).alias('minute'),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Por minuto\n",
|
||||
"per_min = trades_dt.group_by('minute').agg(\n",
|
||||
" pl.len().alias('n_trades'),\n",
|
||||
" pl.col('price').last().alias('close'),\n",
|
||||
" pl.col('price').min().alias('low'),\n",
|
||||
" pl.col('price').max().alias('high'),\n",
|
||||
" pl.col('qty').sum().alias('volume'),\n",
|
||||
" (pl.col('qty') * pl.col('price')).sum().alias('turnover'),\n",
|
||||
" pl.col('timestamp').min().alias('ts'),\n",
|
||||
").sort('minute')\n",
|
||||
"\n",
|
||||
"# Log returns\n",
|
||||
"per_min = per_min.with_columns(\n",
|
||||
" (pl.col('close').log() - pl.col('close').shift(1).log()).alias('log_return')\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print(f'Minutos: {per_min.shape[0]}')\n",
|
||||
"print(f'Trades/minuto: media={per_min[\"n_trades\"].mean():.0f}, mediana={per_min[\"n_trades\"].median():.0f}')\n",
|
||||
"print(f'Volumen/minuto: media={per_min[\"volume\"].mean():.2f} BTC')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Overview: precio, volumen, trades/min\n",
|
||||
"fig, axes = plt.subplots(3, 1, figsize=(16, 10), gridspec_kw={'height_ratios': [3, 1, 1]}, sharex=True)\n",
|
||||
"\n",
|
||||
"minutes = np.arange(per_min.shape[0])\n",
|
||||
"\n",
|
||||
"ax = axes[0]\n",
|
||||
"ax.plot(minutes, per_min['close'].to_numpy(), linewidth=0.5, color='#3498db')\n",
|
||||
"ax.set_ylabel('Precio (USDT)')\n",
|
||||
"ax.set_title(f'BTC/USDT — 1M aggTrades ({t_min.strftime(\"%Y-%m-%d %H:%M\")} → {t_max.strftime(\"%Y-%m-%d %H:%M\")})')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"ax = axes[1]\n",
|
||||
"ax.bar(minutes, per_min['volume'].to_numpy(), width=1.0, color='#e67e22', alpha=0.6)\n",
|
||||
"ax.set_ylabel('Volumen (BTC)')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"ax = axes[2]\n",
|
||||
"ax.bar(minutes, per_min['n_trades'].to_numpy(), width=1.0, color='#9b59b6', alpha=0.6)\n",
|
||||
"ax.set_ylabel('Trades/min')\n",
|
||||
"ax.set_xlabel('Minuto')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Estimación de parámetros\n",
|
||||
"\n",
|
||||
"### 2.1 Volatilidad (σ)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"returns = per_min.drop_nulls('log_return')['log_return'].to_numpy()\n",
|
||||
"\n",
|
||||
"sigma_1m = np.std(returns)\n",
|
||||
"sigma_1h = sigma_1m * np.sqrt(60)\n",
|
||||
"sigma_1d = sigma_1m * np.sqrt(60 * 24)\n",
|
||||
"sigma_ann = sigma_1d * np.sqrt(365)\n",
|
||||
"\n",
|
||||
"print(f'σ por minuto: {sigma_1m:.6f}')\n",
|
||||
"print(f'σ por hora: {sigma_1h:.5f}')\n",
|
||||
"print(f'σ diaria: {sigma_1d:.4f} ({sigma_1d*100:.2f}%)')\n",
|
||||
"print(f'σ anualizada: {sigma_ann:.2f} ({sigma_ann*100:.0f}%)')\n",
|
||||
"\n",
|
||||
"# Rolling sigma (ventana de 60 minutos)\n",
|
||||
"window = 60\n",
|
||||
"rolling_sigma = np.array([np.std(returns[max(0,i-window):i]) for i in range(window, len(returns))])\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(2, 2, figsize=(14, 8))\n",
|
||||
"\n",
|
||||
"# Histograma de retornos\n",
|
||||
"ax = axes[0][0]\n",
|
||||
"ax.hist(returns, bins=100, density=True, color='#3498db', alpha=0.6)\n",
|
||||
"x = np.linspace(returns.min(), returns.max(), 200)\n",
|
||||
"from scipy.stats import norm\n",
|
||||
"ax.plot(x, norm.pdf(x, 0, sigma_1m), 'r-', linewidth=1.5, label=f'Normal σ={sigma_1m:.5f}')\n",
|
||||
"ax.set_title('Distribución de retornos 1m')\n",
|
||||
"ax.legend(fontsize=8)\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# QQ plot\n",
|
||||
"probplot(returns, dist='norm', plot=axes[0][1])\n",
|
||||
"axes[0][1].set_title('QQ-Plot vs Normal')\n",
|
||||
"axes[0][1].grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# Rolling sigma\n",
|
||||
"ax = axes[1][0]\n",
|
||||
"ax.fill_between(range(len(rolling_sigma)), rolling_sigma, color='#e74c3c', alpha=0.5)\n",
|
||||
"ax.axhline(y=sigma_1m, color='black', linestyle='--', linewidth=0.8, label=f'σ global={sigma_1m:.5f}')\n",
|
||||
"ax.set_title(f'σ rolling (ventana {window}m)')\n",
|
||||
"ax.set_ylabel('σ por minuto')\n",
|
||||
"ax.legend(fontsize=8)\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# Retornos absolutos (clustering de volatilidad)\n",
|
||||
"ax = axes[1][1]\n",
|
||||
"ax.plot(np.abs(returns), linewidth=0.3, color='#e74c3c', alpha=0.6)\n",
|
||||
"ax.set_title('|Retornos| — clustering de volatilidad')\n",
|
||||
"ax.set_ylabel('|log-return|')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"kurtosis = float(np.mean((returns - np.mean(returns))**4) / sigma_1m**4)\n",
|
||||
"skew = float(np.mean((returns - np.mean(returns))**3) / sigma_1m**3)\n",
|
||||
"print(f'\\nKurtosis: {kurtosis:.1f} (Normal=3)')\n",
|
||||
"print(f'Skewness: {skew:.3f} (Normal=0)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.2 Arrival rate (λ) y Hawkes clustering"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Trades por segundo\n",
|
||||
"trades_per_sec = trades.with_columns(\n",
|
||||
" (pl.col('timestamp') // 1000).alias('second')\n",
|
||||
").group_by('second').agg(\n",
|
||||
" pl.len().alias('n_trades'),\n",
|
||||
" pl.col('qty').sum().alias('volume'),\n",
|
||||
").sort('second')\n",
|
||||
"\n",
|
||||
"arrivals = trades_per_sec['n_trades'].to_numpy()\n",
|
||||
"\n",
|
||||
"lambda_mean = np.mean(arrivals)\n",
|
||||
"var_mean = np.var(arrivals) / np.mean(arrivals)\n",
|
||||
"\n",
|
||||
"print(f'Trades/segundo: media={lambda_mean:.1f}, mediana={np.median(arrivals):.0f}')\n",
|
||||
"print(f'Var/Mean ratio: {var_mean:.1f} (=1 si Poisson, >1 = clustering)')\n",
|
||||
"\n",
|
||||
"# Autocorrelación\n",
|
||||
"max_lag = 60\n",
|
||||
"mean_a = np.mean(arrivals)\n",
|
||||
"var_a = np.var(arrivals)\n",
|
||||
"acf = np.array([\n",
|
||||
" np.mean((arrivals[lag:] - mean_a) * (arrivals[:-lag] - mean_a)) / var_a\n",
|
||||
" if lag > 0 else 1.0\n",
|
||||
" for lag in range(max_lag)\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"# Ajustar exponencial para estimar Hawkes\n",
|
||||
"lags = np.arange(1, max_lag)\n",
|
||||
"acf_vals = acf[1:]\n",
|
||||
"positive_mask = acf_vals > 0\n",
|
||||
"if np.sum(positive_mask) > 5:\n",
|
||||
" try:\n",
|
||||
" exp_fn = lambda x, a, b: a * np.exp(-b * x)\n",
|
||||
" popt, _ = curve_fit(exp_fn, lags[positive_mask], acf_vals[positive_mask], p0=[0.3, 0.1], maxfev=5000)\n",
|
||||
" hawkes_a, hawkes_b = abs(popt[0]), abs(popt[1])\n",
|
||||
" branching = hawkes_a / hawkes_b\n",
|
||||
" except:\n",
|
||||
" hawkes_a, hawkes_b, branching = 0, 1, 0\n",
|
||||
"else:\n",
|
||||
" hawkes_a, hawkes_b, branching = 0, 1, 0\n",
|
||||
"\n",
|
||||
"print(f'\\nHawkes (ajuste exp a ACF):')\n",
|
||||
"print(f' α ≈ {hawkes_a:.4f}')\n",
|
||||
"print(f' β ≈ {hawkes_b:.4f}')\n",
|
||||
"print(f' Branching ratio η = α/β = {branching:.3f} (< 1 = estacionario)')\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 3, figsize=(16, 4))\n",
|
||||
"\n",
|
||||
"# ACF\n",
|
||||
"ax = axes[0]\n",
|
||||
"ax.bar(range(max_lag), acf, color='#e67e22', alpha=0.6)\n",
|
||||
"if hawkes_a > 0:\n",
|
||||
" ax.plot(lags, exp_fn(lags, hawkes_a, hawkes_b), 'r-', linewidth=2, label=f'Exp fit: α={hawkes_a:.3f}, β={hawkes_b:.3f}')\n",
|
||||
"ax.axhline(y=0, color='black', linewidth=0.5)\n",
|
||||
"ci = 1.96 / np.sqrt(len(arrivals))\n",
|
||||
"ax.axhline(y=ci, color='blue', linestyle='--', linewidth=0.8, alpha=0.5)\n",
|
||||
"ax.axhline(y=-ci, color='blue', linestyle='--', linewidth=0.8, alpha=0.5)\n",
|
||||
"ax.set_title('ACF trades/segundo')\n",
|
||||
"ax.set_xlabel('Lag (s)')\n",
|
||||
"ax.legend(fontsize=7)\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# Distribución de arrivals\n",
|
||||
"ax = axes[1]\n",
|
||||
"ax.hist(arrivals, bins=50, density=True, color='#3498db', alpha=0.6)\n",
|
||||
"ax.set_title(f'Trades/segundo (media={lambda_mean:.1f}, V/M={var_mean:.1f})')\n",
|
||||
"ax.set_xlabel('Trades/s')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# Rolling lambda\n",
|
||||
"w = 300 # ventana 5 min\n",
|
||||
"rolling_lambda = np.convolve(arrivals, np.ones(w)/w, mode='valid')\n",
|
||||
"ax = axes[2]\n",
|
||||
"ax.plot(rolling_lambda, linewidth=0.5, color='#9b59b6')\n",
|
||||
"ax.axhline(y=lambda_mean, color='black', linestyle='--', linewidth=0.8)\n",
|
||||
"ax.set_title(f'λ rolling (ventana {w}s = 5min)')\n",
|
||||
"ax.set_ylabel('Trades/s')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.3 Distribución de tamaños (Pareto)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sizes = trades['qty'].to_numpy()\n",
|
||||
"sizes = sizes[sizes > 0]\n",
|
||||
"costs = (trades['qty'] * trades['price']).to_numpy()\n",
|
||||
"costs = costs[costs > 0]\n",
|
||||
"\n",
|
||||
"# Pareto MLE sobre la cola (p90+)\n",
|
||||
"x_min_qty = np.percentile(sizes, 90)\n",
|
||||
"tail_qty = sizes[sizes >= x_min_qty]\n",
|
||||
"alpha_qty = len(tail_qty) / np.sum(np.log(tail_qty / x_min_qty))\n",
|
||||
"\n",
|
||||
"x_min_cost = np.percentile(costs, 90)\n",
|
||||
"tail_cost = costs[costs >= x_min_cost]\n",
|
||||
"alpha_cost = len(tail_cost) / np.sum(np.log(tail_cost / x_min_cost))\n",
|
||||
"\n",
|
||||
"print(f'Tamaños (BTC):')\n",
|
||||
"print(f' Mediana: {np.median(sizes):.6f} BTC')\n",
|
||||
"print(f' p99: {np.percentile(sizes, 99):.4f} BTC')\n",
|
||||
"print(f' Max: {sizes.max():.2f} BTC')\n",
|
||||
"print(f' Pareto α (cola p90+): {alpha_qty:.2f}')\n",
|
||||
"\n",
|
||||
"print(f'\\nTurnover (USDT):')\n",
|
||||
"print(f' Mediana: ${np.median(costs):,.0f}')\n",
|
||||
"print(f' p99: ${np.percentile(costs, 99):,.0f}')\n",
|
||||
"print(f' Max: ${costs.max():,.0f}')\n",
|
||||
"print(f' Pareto α (cola p90+): {alpha_cost:.2f}')\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
|
||||
"\n",
|
||||
"# CCDF log-log de tamaños\n",
|
||||
"for ax, data, alpha, label, xmin in [\n",
|
||||
" (axes[0], sizes, alpha_qty, 'BTC', x_min_qty),\n",
|
||||
" (axes[1], costs, alpha_cost, 'USDT', x_min_cost),\n",
|
||||
"]:\n",
|
||||
" sorted_d = np.sort(data)[::-1]\n",
|
||||
" ranks = np.arange(1, len(sorted_d) + 1) / len(sorted_d)\n",
|
||||
" ax.loglog(sorted_d, ranks, '.', markersize=0.5, alpha=0.3, color='#2ecc71')\n",
|
||||
" x_fit = np.logspace(np.log10(xmin), np.log10(data.max()), 50)\n",
|
||||
" ax.loglog(x_fit, (x_fit/xmin)**(-alpha) * (len(data[data>=xmin])/len(data)),\n",
|
||||
" 'r-', linewidth=2, label=f'Pareto α={alpha:.2f}')\n",
|
||||
" ax.set_title(f'CCDF tamaños ({label})')\n",
|
||||
" ax.set_xlabel(label)\n",
|
||||
" ax.set_ylabel('P(X > x)')\n",
|
||||
" ax.legend()\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.4 Jumps y colas pesadas"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Detectar jumps (retornos > 3σ)\n",
|
||||
"threshold = 3 * sigma_1m\n",
|
||||
"jump_mask = np.abs(returns) > threshold\n",
|
||||
"n_jumps = np.sum(jump_mask)\n",
|
||||
"jump_intensity = n_jumps / len(returns)\n",
|
||||
"jump_sizes = np.abs(returns[jump_mask])\n",
|
||||
"jump_size_std = np.std(jump_sizes) if len(jump_sizes) > 1 else 0\n",
|
||||
"\n",
|
||||
"print(f'Jumps detectados (>3σ): {n_jumps} de {len(returns)} ({jump_intensity*100:.1f}%)')\n",
|
||||
"print(f'Jump size std: {jump_size_std:.6f}')\n",
|
||||
"print(f'Kurtosis: {kurtosis:.1f} (Normal=3, >3 = colas pesadas)')\n",
|
||||
"\n",
|
||||
"# Retornos con jumps marcados\n",
|
||||
"fig, ax = plt.subplots(figsize=(16, 4))\n",
|
||||
"ax.plot(returns, linewidth=0.3, color='#3498db', alpha=0.6)\n",
|
||||
"idx = np.where(jump_mask)[0]\n",
|
||||
"ax.scatter(idx, returns[idx], color='red', s=10, zorder=5, label=f'Jumps ({n_jumps})')\n",
|
||||
"ax.axhline(y=threshold, color='red', linestyle='--', linewidth=0.5, alpha=0.5)\n",
|
||||
"ax.axhline(y=-threshold, color='red', linestyle='--', linewidth=0.5, alpha=0.5)\n",
|
||||
"ax.set_title('Retornos 1m — jumps marcados en rojo')\n",
|
||||
"ax.legend(fontsize=8)\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### 2.5 Fills por aggTrade — estructura de las órdenes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# n_fills nos dice cuántos niveles del book barrió cada taker order\n",
|
||||
"fills = trades['n_fills'].to_numpy()\n",
|
||||
"\n",
|
||||
"print(f'Fills por aggTrade:')\n",
|
||||
"print(f' 1 fill (no cruzó niveles): {np.sum(fills == 1):,} ({np.mean(fills == 1)*100:.1f}%)')\n",
|
||||
"print(f' 2-5 fills: {np.sum((fills >= 2) & (fills <= 5)):,} ({np.mean((fills >= 2) & (fills <= 5))*100:.1f}%)')\n",
|
||||
"print(f' 6-20 fills: {np.sum((fills >= 6) & (fills <= 20)):,} ({np.mean((fills >= 6) & (fills <= 20))*100:.1f}%)')\n",
|
||||
"print(f' >20 fills (ballenas): {np.sum(fills > 20):,} ({np.mean(fills > 20)*100:.1f}%)')\n",
|
||||
"print(f' Max fills: {fills.max()}')\n",
|
||||
"\n",
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 4))\n",
|
||||
"\n",
|
||||
"ax = axes[0]\n",
|
||||
"ax.hist(fills[fills <= 20], bins=range(1, 22), color='#3498db', alpha=0.6, edgecolor='white')\n",
|
||||
"ax.set_title('Fills por aggTrade (≤20)')\n",
|
||||
"ax.set_xlabel('Número de fills')\n",
|
||||
"ax.set_ylabel('Frecuencia')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# Qty vs n_fills — las ballenas barren más niveles\n",
|
||||
"ax = axes[1]\n",
|
||||
"sample = trades.sample(min(50000, trades.shape[0]), seed=42)\n",
|
||||
"ax.scatter(sample['n_fills'].to_numpy(), sample['qty'].to_numpy(), s=0.5, alpha=0.2, color='#e67e22')\n",
|
||||
"ax.set_xlabel('Fills por aggTrade')\n",
|
||||
"ax.set_ylabel('Qty (BTC)')\n",
|
||||
"ax.set_title('Tamaño de orden vs fills (más grande = barre más niveles)')\n",
|
||||
"ax.set_yscale('log')\n",
|
||||
"ax.set_xscale('log')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 3. Bitstamp L3: comparar con Binance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Cargar Bitstamp si ya existe\n",
|
||||
"bs_orders_path = DATA / 'bitstamp_btcusd_l3_orders.csv'\n",
|
||||
"bs_trades_path = DATA / 'bitstamp_btcusd_l3_trades.csv'\n",
|
||||
"\n",
|
||||
"if bs_orders_path.exists():\n",
|
||||
" bs_orders = pl.read_csv(str(bs_orders_path))\n",
|
||||
" print(f'Bitstamp L3 orders: {bs_orders.shape[0]:,}')\n",
|
||||
" print(bs_orders.group_by('event').agg(pl.len().alias('count')).sort('count', descending=True))\n",
|
||||
" print()\n",
|
||||
" \n",
|
||||
" # Ratio create/delete — vida media de las órdenes\n",
|
||||
" creates = bs_orders.filter(pl.col('event') == 'order_created').shape[0]\n",
|
||||
" deletes = bs_orders.filter(pl.col('event') == 'order_deleted').shape[0]\n",
|
||||
" changes = bs_orders.filter(pl.col('event') == 'order_changed').shape[0]\n",
|
||||
" print(f'Creadas: {creates:,} Borradas: {deletes:,} Cambiadas: {changes:,}')\n",
|
||||
" print(f'Ratio delete/create: {deletes/creates:.2f} (cercano a 1 = la mayoría se cancela sin ejecutar)')\n",
|
||||
" \n",
|
||||
" # Cuántas se cancelan vs se ejecutan\n",
|
||||
" print(f'\\nEsto revela algo fundamental: la mayoría de órdenes se CANCELAN, no se ejecutan.')\n",
|
||||
" print(f'Los makers constantemente ponen y quitan órdenes para ajustar sus quotes.')\n",
|
||||
"\n",
|
||||
"if bs_trades_path.exists():\n",
|
||||
" bs_trades = pl.read_csv(str(bs_trades_path))\n",
|
||||
" print(f'\\nBitstamp L3 trades: {bs_trades.shape[0]:,}')\n",
|
||||
" print(bs_trades.head(3))\n",
|
||||
" \n",
|
||||
" # En L3 podemos ver maker y taker order IDs\n",
|
||||
" print(f'\\nCon L3 vemos los IDs del buyer y seller de cada trade:')\n",
|
||||
" print(f' Unique buy_order_ids: {bs_trades[\"buy_order_id\"].n_unique():,}')\n",
|
||||
" print(f' Unique sell_order_ids: {bs_trades[\"sell_order_id\"].n_unique():,}')\n",
|
||||
"\n",
|
||||
"if not bs_orders_path.exists():\n",
|
||||
" print('Bitstamp L3 aún no disponible. Ejecutar notebook 05 primero.')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 4. Resumen: parámetros calibrados desde datos reales\n",
|
||||
"\n",
|
||||
"Estos son los valores que usaríamos para que nuestra simulación genere datos similares a BTC/USDT."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Recopilar todo\n",
|
||||
"print('=' * 65)\n",
|
||||
"print(' PARÁMETROS CALIBRADOS DESDE BTC/USDT REAL')\n",
|
||||
"print(' Dataset: 1M aggTrades, ~26 horas')\n",
|
||||
"print('=' * 65)\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Precio fundamental')\n",
|
||||
"print(f' sigma = {sigma_1m:.6f} # por minuto')\n",
|
||||
"print(f' mu = {np.mean(returns):.8f} # drift (cercano a 0)')\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Jumps')\n",
|
||||
"print(f' jump_intensity = {jump_intensity:.4f} # {jump_intensity*100:.1f}% de velas tienen jump')\n",
|
||||
"print(f' jump_size_std = {jump_size_std:.6f}')\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Arrival rate')\n",
|
||||
"print(f' n_takers_lambda = {lambda_mean:.1f} # aggTrades/segundo')\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Hawkes clustering')\n",
|
||||
"print(f' hawkes_alpha = {hawkes_a:.4f}')\n",
|
||||
"print(f' hawkes_beta = {hawkes_b:.4f}')\n",
|
||||
"print(f' branching_ratio = {branching:.3f}')\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Distribución de tamaños')\n",
|
||||
"print(f' taker_size_alpha = {alpha_qty:.2f} # Pareto exponent (cola p90+)')\n",
|
||||
"print(f' taker_size_min = {np.percentile(sizes, 5):.6f} # BTC (p5)')\n",
|
||||
"print(f' taker_size_max = {np.percentile(sizes, 99.9):.4f} # BTC (p99.9)')\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Estructura de fills')\n",
|
||||
"print(f' median_fills_per_order = {np.median(fills):.0f}')\n",
|
||||
"print(f' pct_single_fill = {np.mean(fills==1)*100:.1f}%')\n",
|
||||
"print(f'')\n",
|
||||
"print(f' # Resumen estadístico')\n",
|
||||
"print(f' kurtosis = {kurtosis:.1f}')\n",
|
||||
"print(f' skewness = {skew:.3f}')\n",
|
||||
"print(f' var_mean_ratio = {var_mean:.1f}')\n",
|
||||
"print('=' * 65)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,517 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Monte Carlo: análisis de sensibilidad por parámetro\n",
|
||||
"\n",
|
||||
"Usamos las funciones del registry para correr cientos de simulaciones variando **un parámetro a la vez**.\n",
|
||||
"Esto nos dice:\n",
|
||||
"- Qué parámetros importan más\n",
|
||||
"- Cómo responde el mercado simulado a cada cambio\n",
|
||||
"- Qué rangos producen mercados realistas\n",
|
||||
"\n",
|
||||
"## Parámetros calibrados desde BTC/USDT real (notebook 06)\n",
|
||||
"\n",
|
||||
"| Parámetro | Valor calibrado | Confianza | Fuente |\n",
|
||||
"|---|---|---|---|\n",
|
||||
"| sigma | 0.000514 | Alta | Std retornos 1m |\n",
|
||||
"| mu | ~0 | Alta | Media retornos |\n",
|
||||
"| jump_intensity | 0.013 | Media | % retornos > 3σ |\n",
|
||||
"| jump_size_std | 0.000356 | Media | Std de los jumps |\n",
|
||||
"| n_takers_lambda | 12.0 | Media | aggTrades/segundo |\n",
|
||||
"| taker_size_alpha | 0.78 | Media | Pareto MLE cola p90+ |\n",
|
||||
"| hawkes_alpha | 0.17 | Baja | Fit exp sobre ACF |\n",
|
||||
"| hawkes_beta | 0.015 | Baja | Fit exp sobre ACF |\n",
|
||||
"| gamma | ? | No observable | Relación spread~vol |\n",
|
||||
"| n_makers | ? | No observable | Capas de liquidez L2 |\n",
|
||||
"| maker_spread | 0.01 | Alta | Spread real del book |"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Base params cargados\n",
|
||||
"Test: 270 trades, spread=-0.057967\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys, os\n",
|
||||
"sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions'))\n",
|
||||
"sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions', 'pipelines'))\n",
|
||||
"\n",
|
||||
"from run_market_sim import run_market_sim\n",
|
||||
"import numpy as np\n",
|
||||
"import polars as pl\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"\n",
|
||||
"# Parámetros base calibrados desde datos reales\n",
|
||||
"BASE = dict(\n",
|
||||
" initial_price=100.0,\n",
|
||||
" n_ticks=300,\n",
|
||||
" sigma=0.000514,\n",
|
||||
" mu=0.0,\n",
|
||||
" jump_intensity=0.013,\n",
|
||||
" jump_size_std=0.000356,\n",
|
||||
" n_makers=5,\n",
|
||||
" maker_spread=0.01,\n",
|
||||
" gamma=0.1,\n",
|
||||
" maker_levels=3,\n",
|
||||
" maker_qty=10.0,\n",
|
||||
" n_takers_lambda=12.0,\n",
|
||||
" taker_size_alpha=0.78,\n",
|
||||
" taker_size_min=0.001,\n",
|
||||
" taker_size_max=5.0,\n",
|
||||
" hawkes_alpha=0.17,\n",
|
||||
" hawkes_beta=0.015,\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print('Base params cargados')\n",
|
||||
"# Quick test\n",
|
||||
"r = run_market_sim(**BASE, seed=0)\n",
|
||||
"print(f'Test: {r[\"total_trades\"]} trades, spread={np.mean(r[\"spreads\"]):.6f}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Herramientas de análisis"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def sweep_param(param_name: str, values: list, base_params: dict, n_seeds: int = 10) -> pl.DataFrame:\n",
|
||||
" \"\"\"Corre simulaciones variando un parámetro. N seeds por valor para tener distribución.\"\"\"\n",
|
||||
" records = []\n",
|
||||
" total = len(values) * n_seeds\n",
|
||||
" done = 0\n",
|
||||
" for val in values:\n",
|
||||
" for seed in range(n_seeds):\n",
|
||||
" params = dict(base_params)\n",
|
||||
" params[param_name] = val\n",
|
||||
" params['seed'] = seed * 1000 + hash(str(val)) % 1000\n",
|
||||
" sim = run_market_sim(**params)\n",
|
||||
" \n",
|
||||
" spreads = sim['spreads']\n",
|
||||
" npt = sim['n_trades_per_tick']\n",
|
||||
" tp = np.array(sim['trade_prices']) if sim['trade_prices'] else np.array([0.0])\n",
|
||||
" fp = np.array(sim['fundamental_prices'])\n",
|
||||
" \n",
|
||||
" # Realized vol de trades\n",
|
||||
" tp_pos = tp[tp > 0]\n",
|
||||
" if len(tp_pos) > 2:\n",
|
||||
" log_ret = np.diff(np.log(tp_pos))\n",
|
||||
" rvol = float(np.std(log_ret))\n",
|
||||
" else:\n",
|
||||
" rvol = 0.0\n",
|
||||
" \n",
|
||||
" records.append({\n",
|
||||
" 'param_value': float(val),\n",
|
||||
" 'seed': seed,\n",
|
||||
" 'total_trades': sim['total_trades'],\n",
|
||||
" 'mean_spread': float(np.mean(spreads)),\n",
|
||||
" 'std_spread': float(np.std(spreads)),\n",
|
||||
" 'mean_trades_tick': float(np.mean(npt)),\n",
|
||||
" 'max_trades_tick': int(np.max(npt)),\n",
|
||||
" 'realized_vol': rvol,\n",
|
||||
" 'price_return_pct': float((fp[-1] / fp[0] - 1) * 100),\n",
|
||||
" 'maker_total_pnl': float(sum(sim['maker_pnls'])),\n",
|
||||
" })\n",
|
||||
" done += 1\n",
|
||||
" \n",
|
||||
" print(f'{param_name}: {done} simulaciones')\n",
|
||||
" return pl.DataFrame(records)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def plot_sweep(df: pl.DataFrame, param_name: str, metrics: list[tuple[str, str]], title: str = ''):\n",
|
||||
" \"\"\"Grafica métricas vs parámetro con bandas de confianza.\"\"\"\n",
|
||||
" n = len(metrics)\n",
|
||||
" fig, axes = plt.subplots(1, n, figsize=(5 * n, 4))\n",
|
||||
" if n == 1:\n",
|
||||
" axes = [axes]\n",
|
||||
" \n",
|
||||
" agg = df.group_by('param_value').agg(\n",
|
||||
" *[pl.col(m).mean().alias(f'{m}_mean') for m, _ in metrics],\n",
|
||||
" *[pl.col(m).std().alias(f'{m}_std') for m, _ in metrics],\n",
|
||||
" ).sort('param_value')\n",
|
||||
" \n",
|
||||
" x = agg['param_value'].to_numpy()\n",
|
||||
" \n",
|
||||
" for i, (metric, label) in enumerate(metrics):\n",
|
||||
" ax = axes[i]\n",
|
||||
" y = agg[f'{metric}_mean'].to_numpy()\n",
|
||||
" yerr = agg[f'{metric}_std'].to_numpy()\n",
|
||||
" yerr = np.nan_to_num(yerr, nan=0.0)\n",
|
||||
" \n",
|
||||
" ax.fill_between(x, y - yerr, y + yerr, alpha=0.2, color='#3498db')\n",
|
||||
" ax.plot(x, y, 'o-', color='#3498db', markersize=4, linewidth=1.5)\n",
|
||||
" ax.set_xlabel(param_name)\n",
|
||||
" ax.set_ylabel(label)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
" \n",
|
||||
" fig.suptitle(title or f'Sensibilidad a {param_name}', fontsize=12, fontweight='bold')\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"METRICS = [\n",
|
||||
" ('mean_spread', 'Spread medio'),\n",
|
||||
" ('total_trades', 'Total trades'),\n",
|
||||
" ('realized_vol', 'Vol realizada'),\n",
|
||||
" ('maker_total_pnl', 'PnL makers'),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"print('sweep_param() y plot_sweep() definidas')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 1. SIGMA (volatilidad)\n",
|
||||
"\n",
|
||||
"**Qué es:** cuánto se mueve el precio fundamental por tick. \n",
|
||||
"**Calibrado:** 0.000514 (desde retornos 1m de BTC) \n",
|
||||
"**Confianza:** ALTA — medición directa \n",
|
||||
"**Hipótesis:** más σ → más oportunidades para takers → más trades, spread más ancho (makers se protegen)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"sigma_vals = [0.0001, 0.0003, 0.0005, 0.001, 0.002, 0.005, 0.01, 0.02, 0.05]\n",
|
||||
"df_sigma = sweep_param('sigma', sigma_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_sigma, 'sigma', METRICS, 'SIGMA — volatilidad del precio fundamental')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 2. GAMMA (aversión al riesgo del maker)\n",
|
||||
"\n",
|
||||
"**Qué es:** cuánto ajusta el maker sus precios por inventario acumulado. \n",
|
||||
"**Calibrado:** NO directamente — se infiere de spread vs volatilidad \n",
|
||||
"**Confianza:** BAJA \n",
|
||||
"**Hipótesis:** más γ → spread más ancho → menos ejecuciones → makers más seguros pero mercado menos líquido"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"gamma_vals = [0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0, 5.0]\n",
|
||||
"df_gamma = sweep_param('gamma', gamma_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_gamma, 'gamma', METRICS, 'GAMMA — aversión al riesgo del maker (Avellaneda-Stoikov)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 3. N_TAKERS_LAMBDA (arrival rate de takers)\n",
|
||||
"\n",
|
||||
"**Qué es:** cuántos takers llegan por tick en promedio (base Poisson, amplificado por Hawkes). \n",
|
||||
"**Calibrado:** 12.0 aggTrades/segundo \n",
|
||||
"**Confianza:** MEDIA — medimos aggTrades, no órdenes originales \n",
|
||||
"**Hipótesis:** más λ → más presión sobre el book → más trades, spreads más volátiles"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"lambda_vals = [0.5, 1, 2, 5, 10, 15, 20, 30, 50]\n",
|
||||
"df_lambda = sweep_param('n_takers_lambda', lambda_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_lambda, 'n_takers_lambda', METRICS, 'LAMBDA — arrival rate de takers')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 4. HAWKES_ALPHA (contagio entre trades)\n",
|
||||
"\n",
|
||||
"**Qué es:** cuánto excita un trade la llegada de más trades (clustering). \n",
|
||||
"**Calibrado:** 0.17 (fit exponencial sobre ACF) \n",
|
||||
"**Confianza:** BAJA — el branching ratio salió >1, modelo simple no captura bien \n",
|
||||
"**Hipótesis:** más α → ráfagas más intensas → max trades/tick explota, spread se estresa"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hawkes_a_vals = [0.0, 0.05, 0.1, 0.2, 0.3, 0.5, 0.7, 0.9]\n",
|
||||
"df_hawkes_a = sweep_param('hawkes_alpha', hawkes_a_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_hawkes_a, 'hawkes_alpha', METRICS, 'HAWKES_ALPHA — contagio entre trades')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 5. TAKER_SIZE_ALPHA (cola de tamaños — ballenas)\n",
|
||||
"\n",
|
||||
"**Qué es:** exponente Pareto de los tamaños de órdenes. Bajo = más ballenas. \n",
|
||||
"**Calibrado:** 0.78 (MLE sobre cola p90+) \n",
|
||||
"**Confianza:** MEDIA — medimos fills agrupados, no órdenes originales \n",
|
||||
"**Hipótesis:** α bajo → más órdenes grandes → más slippage, spread se abre más, más impacto en precio"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"size_a_vals = [0.3, 0.5, 0.78, 1.0, 1.5, 2.0, 3.0, 5.0]\n",
|
||||
"df_size_a = sweep_param('taker_size_alpha', size_a_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_size_a, 'taker_size_alpha', METRICS, 'TAKER_SIZE_ALPHA — cola de tamaños (bajo = más ballenas)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 6. N_MAKERS (número de market makers)\n",
|
||||
"\n",
|
||||
"**Qué es:** cuántos makers compiten poniendo liquidez. \n",
|
||||
"**Calibrado:** NO directamente observable — se infiere de capas de liquidez en L2 \n",
|
||||
"**Confianza:** BAJA \n",
|
||||
"**Hipótesis:** más makers → más competencia → spread más tight, más liquidez, pero PnL por maker baja"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"nmakers_vals = [1, 2, 3, 5, 7, 10, 15, 20]\n",
|
||||
"df_nmakers = sweep_param('n_makers', nmakers_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_nmakers, 'n_makers', METRICS, 'N_MAKERS — número de market makers')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 7. MAKER_SPREAD (spread base)\n",
|
||||
"\n",
|
||||
"**Qué es:** el spread mínimo que los makers intentan capturar. \n",
|
||||
"**Calibrado:** $0.01 (spread real de BTC/USDT en Binance) \n",
|
||||
"**Confianza:** ALTA — medición directa del book \n",
|
||||
"**Hipótesis:** spread más ancho → menos ejecuciones → makers más rentables pero mercado menos eficiente"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"spread_vals = [0.001, 0.005, 0.01, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0]\n",
|
||||
"df_spread = sweep_param('maker_spread', spread_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_spread, 'maker_spread', METRICS, 'MAKER_SPREAD — spread base deseado por makers')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 8. JUMP_INTENSITY (frecuencia de saltos)\n",
|
||||
"\n",
|
||||
"**Qué es:** probabilidad de un movimiento brusco en cada tick. \n",
|
||||
"**Calibrado:** 1.3% (retornos > 3σ) \n",
|
||||
"**Confianza:** MEDIA — depende del threshold elegido \n",
|
||||
"**Hipótesis:** más jumps → más volatilidad realizada, kurtosis sube, makers sufren más"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"jump_vals = [0.0, 0.005, 0.01, 0.02, 0.05, 0.1, 0.15, 0.2]\n",
|
||||
"df_jump = sweep_param('jump_intensity', jump_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_jump, 'jump_intensity', METRICS, 'JUMP_INTENSITY — frecuencia de saltos bruscos')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 9. HAWKES_BETA (decaimiento del contagio)\n",
|
||||
"\n",
|
||||
"**Qué es:** qué tan rápido se calma la excitación después de una ráfaga. \n",
|
||||
"**Calibrado:** 0.015 \n",
|
||||
"**Confianza:** BAJA \n",
|
||||
"**Hipótesis:** β bajo → ráfagas más largas → mercado más caótico. β alto → ráfagas cortas → más Poisson"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"hawkes_b_vals = [0.005, 0.01, 0.02, 0.05, 0.1, 0.3, 0.5, 1.0, 2.0]\n",
|
||||
"df_hawkes_b = sweep_param('hawkes_beta', hawkes_b_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_hawkes_b, 'hawkes_beta', METRICS, 'HAWKES_BETA — decaimiento del contagio (alto = se calma rápido)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 10. MAKER_LEVELS (profundidad del maker)\n",
|
||||
"\n",
|
||||
"**Qué es:** cuántos niveles de precio pone cada maker a cada lado. \n",
|
||||
"**Calibrado:** se estima contando niveles con liquidez significativa en L2 \n",
|
||||
"**Confianza:** BAJA \n",
|
||||
"**Hipótesis:** más niveles → más profundidad → menos slippage para órdenes grandes"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"levels_vals = [1, 2, 3, 5, 7, 10, 15]\n",
|
||||
"df_levels = sweep_param('maker_levels', levels_vals, BASE, n_seeds=10)\n",
|
||||
"plot_sweep(df_levels, 'maker_levels', METRICS, 'MAKER_LEVELS — niveles de profundidad por maker')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## 11. Resumen: sensibilidad relativa\n",
|
||||
"\n",
|
||||
"¿Qué parámetro afecta más a cada métrica?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Calcular coeficiente de variación de cada métrica respecto a cada parámetro\n",
|
||||
"all_sweeps = {\n",
|
||||
" 'sigma': df_sigma,\n",
|
||||
" 'gamma': df_gamma,\n",
|
||||
" 'n_takers_lambda': df_lambda,\n",
|
||||
" 'hawkes_alpha': df_hawkes_a,\n",
|
||||
" 'taker_size_alpha': df_size_a,\n",
|
||||
" 'n_makers': df_nmakers,\n",
|
||||
" 'maker_spread': df_spread,\n",
|
||||
" 'jump_intensity': df_jump,\n",
|
||||
" 'hawkes_beta': df_hawkes_b,\n",
|
||||
" 'maker_levels': df_levels,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"sensitivity = []\n",
|
||||
"for pname, df in all_sweeps.items():\n",
|
||||
" agg = df.group_by('param_value').agg(\n",
|
||||
" pl.col('mean_spread').mean(),\n",
|
||||
" pl.col('total_trades').mean(),\n",
|
||||
" pl.col('realized_vol').mean(),\n",
|
||||
" pl.col('maker_total_pnl').mean(),\n",
|
||||
" )\n",
|
||||
" for metric in ['mean_spread', 'total_trades', 'realized_vol', 'maker_total_pnl']:\n",
|
||||
" vals = agg[metric].to_numpy()\n",
|
||||
" vals = vals[~np.isnan(vals)]\n",
|
||||
" if len(vals) > 1 and np.mean(np.abs(vals)) > 0:\n",
|
||||
" cv = np.std(vals) / np.mean(np.abs(vals))\n",
|
||||
" else:\n",
|
||||
" cv = 0.0\n",
|
||||
" sensitivity.append({'param': pname, 'metric': metric, 'cv': round(cv, 3)})\n",
|
||||
"\n",
|
||||
"sens_df = pl.DataFrame(sensitivity)\n",
|
||||
"\n",
|
||||
"# Heatmap\n",
|
||||
"params_order = list(all_sweeps.keys())\n",
|
||||
"metrics_order = ['mean_spread', 'total_trades', 'realized_vol', 'maker_total_pnl']\n",
|
||||
"metrics_labels = ['Spread', 'Trades', 'Vol realizada', 'PnL makers']\n",
|
||||
"\n",
|
||||
"matrix = np.zeros((len(params_order), len(metrics_order)))\n",
|
||||
"for row in sens_df.iter_rows(named=True):\n",
|
||||
" i = params_order.index(row['param'])\n",
|
||||
" j = metrics_order.index(row['metric'])\n",
|
||||
" matrix[i, j] = row['cv']\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(10, 8))\n",
|
||||
"im = ax.imshow(matrix, cmap='YlOrRd', aspect='auto')\n",
|
||||
"ax.set_xticks(range(len(metrics_labels)))\n",
|
||||
"ax.set_xticklabels(metrics_labels, fontsize=10)\n",
|
||||
"ax.set_yticks(range(len(params_order)))\n",
|
||||
"ax.set_yticklabels(params_order, fontsize=10)\n",
|
||||
"\n",
|
||||
"for i in range(len(params_order)):\n",
|
||||
" for j in range(len(metrics_order)):\n",
|
||||
" ax.text(j, i, f'{matrix[i,j]:.2f}', ha='center', va='center', fontsize=9,\n",
|
||||
" color='white' if matrix[i,j] > 0.5 else 'black')\n",
|
||||
"\n",
|
||||
"ax.set_title('Sensibilidad: coeficiente de variación por parámetro × métrica\\n(más alto = más impacto)', fontsize=12)\n",
|
||||
"plt.colorbar(im, label='CV')\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Top sensibilidades\n",
|
||||
"print('\\nTop 10 combinaciones param × métrica más sensibles:')\n",
|
||||
"top = sens_df.sort('cv', descending=True).head(10)\n",
|
||||
"print(top)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,504 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Estimación de precios futuros con Monte Carlo\n",
|
||||
"\n",
|
||||
"Usamos los parámetros calibrados de BTC/USDT real para generar **miles de caminos de precio posibles** y estimar:\n",
|
||||
"- Distribución del precio a distintos horizontes\n",
|
||||
"- Intervalos de confianza (fan chart)\n",
|
||||
"- Probabilidad de subir/bajar X%\n",
|
||||
"- Value at Risk (VaR) y Expected Shortfall\n",
|
||||
"\n",
|
||||
"**Importante:** Esto NO es una predicción. Es un modelo probabilístico que dice \"dado cómo se ha comportado el mercado, estos son los escenarios posibles\". La distribución real tiene colas más pesadas que nuestro modelo."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Listo\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import sys, os\n",
|
||||
"sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions'))\n",
|
||||
"sys.path.insert(0, os.path.join(os.environ.get('FN_REGISTRY_ROOT', os.path.expanduser('~/fn_registry')), 'python', 'functions', 'pipelines'))\n",
|
||||
"\n",
|
||||
"from finance.finance import generate_gbm_prices\n",
|
||||
"from run_market_sim import run_market_sim\n",
|
||||
"import numpy as np\n",
|
||||
"import polars as pl\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from matplotlib.colors import LinearSegmentedColormap\n",
|
||||
"\n",
|
||||
"print('Listo')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 1. Parámetros calibrados y escenarios"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Precio actual de BTC\n",
|
||||
"CURRENT_PRICE = 66760.0\n",
|
||||
"\n",
|
||||
"# Parámetros calibrados de notebook 06 (datos reales 1M trades)\n",
|
||||
"CALIBRATED = dict(\n",
|
||||
" sigma=0.000514, # por minuto\n",
|
||||
" mu=0.0, # sin drift (conservador)\n",
|
||||
" jump_intensity=0.013, # 1.3% de velas con jump\n",
|
||||
" jump_size_std=0.000356, # tamaño de los jumps\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Horizontes de simulación\n",
|
||||
"HORIZONS = {\n",
|
||||
" '1 hora': 60,\n",
|
||||
" '4 horas': 240,\n",
|
||||
" '1 día': 1440,\n",
|
||||
" '1 semana': 10080,\n",
|
||||
"}\n",
|
||||
"\n",
|
||||
"N_SIMS = 5000 # simulaciones por escenario\n",
|
||||
"\n",
|
||||
"print(f'Precio actual: ${CURRENT_PRICE:,.0f}')\n",
|
||||
"print(f'σ minuto: {CALIBRATED[\"sigma\"]:.6f}')\n",
|
||||
"print(f'σ diaria: {CALIBRATED[\"sigma\"] * np.sqrt(1440):.4f} ({CALIBRATED[\"sigma\"] * np.sqrt(1440) * 100:.2f}%)')\n",
|
||||
"print(f'σ anual: {CALIBRATED[\"sigma\"] * np.sqrt(1440 * 365):.2f} ({CALIBRATED[\"sigma\"] * np.sqrt(1440 * 365) * 100:.0f}%)')\n",
|
||||
"print(f'Simulaciones: {N_SIMS:,}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 2. Generar caminos de precio Monte Carlo\n",
|
||||
"\n",
|
||||
"Para cada simulación generamos un camino completo de precios usando GBM + jumps.\n",
|
||||
"El horizonte más largo (1 semana = 10,080 minutos) incluye a todos los demás."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"max_ticks = max(HORIZONS.values())\n",
|
||||
"\n",
|
||||
"# Generar todos los caminos (matrix: N_SIMS x max_ticks)\n",
|
||||
"all_paths = np.zeros((N_SIMS, max_ticks))\n",
|
||||
"\n",
|
||||
"for i in range(N_SIMS):\n",
|
||||
" path = generate_gbm_prices(\n",
|
||||
" initial_price=CURRENT_PRICE,\n",
|
||||
" n_ticks=max_ticks,\n",
|
||||
" seed=i,\n",
|
||||
" **CALIBRATED,\n",
|
||||
" )\n",
|
||||
" all_paths[i] = path\n",
|
||||
"\n",
|
||||
"print(f'Generados {N_SIMS:,} caminos de {max_ticks:,} ticks ({max_ticks/1440:.0f} días)')\n",
|
||||
"print(f'Shape: {all_paths.shape}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 3. Fan chart — todos los caminos posibles\n",
|
||||
"\n",
|
||||
"El fan chart muestra la distribución del precio en cada momento.\n",
|
||||
"Las bandas representan percentiles: cuanto más oscuro, más probable."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def plot_fan_chart(paths, horizon_ticks, horizon_name, n_sample_paths=50):\n",
|
||||
" \"\"\"Fan chart con bandas de percentiles.\"\"\"\n",
|
||||
" data = paths[:, :horizon_ticks]\n",
|
||||
" ticks = np.arange(horizon_ticks)\n",
|
||||
" \n",
|
||||
" # Percentiles\n",
|
||||
" bands = [\n",
|
||||
" (1, 99, '#3498db', 0.08),\n",
|
||||
" (5, 95, '#3498db', 0.12),\n",
|
||||
" (10, 90, '#3498db', 0.18),\n",
|
||||
" (25, 75, '#3498db', 0.25),\n",
|
||||
" (40, 60, '#3498db', 0.35),\n",
|
||||
" ]\n",
|
||||
" \n",
|
||||
" fig, ax = plt.subplots(figsize=(16, 7))\n",
|
||||
" \n",
|
||||
" for plo, phi, color, alpha in bands:\n",
|
||||
" lo = np.percentile(data, plo, axis=0)\n",
|
||||
" hi = np.percentile(data, phi, axis=0)\n",
|
||||
" ax.fill_between(ticks, lo, hi, color=color, alpha=alpha, label=f'p{plo}-p{phi}')\n",
|
||||
" \n",
|
||||
" # Mediana\n",
|
||||
" median = np.median(data, axis=0)\n",
|
||||
" ax.plot(ticks, median, color='#2c3e50', linewidth=1.5, label='Mediana')\n",
|
||||
" \n",
|
||||
" # Sample paths\n",
|
||||
" rng = np.random.default_rng(0)\n",
|
||||
" idx = rng.choice(N_SIMS, n_sample_paths, replace=False)\n",
|
||||
" for j in idx:\n",
|
||||
" ax.plot(ticks, data[j], linewidth=0.15, alpha=0.3, color='#7f8c8d')\n",
|
||||
" \n",
|
||||
" ax.axhline(y=CURRENT_PRICE, color='red', linestyle='--', linewidth=0.8, alpha=0.5, label=f'Precio actual ${CURRENT_PRICE:,.0f}')\n",
|
||||
" \n",
|
||||
" # Formatear eje x\n",
|
||||
" if horizon_ticks <= 240:\n",
|
||||
" ax.set_xlabel('Minutos')\n",
|
||||
" elif horizon_ticks <= 1440:\n",
|
||||
" xticks = np.arange(0, horizon_ticks + 1, 60)\n",
|
||||
" ax.set_xticks(xticks)\n",
|
||||
" ax.set_xticklabels([f'{int(x/60)}h' for x in xticks])\n",
|
||||
" ax.set_xlabel('Horas')\n",
|
||||
" else:\n",
|
||||
" xticks = np.arange(0, horizon_ticks + 1, 1440)\n",
|
||||
" ax.set_xticks(xticks)\n",
|
||||
" ax.set_xticklabels([f'{int(x/1440)}d' for x in xticks])\n",
|
||||
" ax.set_xlabel('Días')\n",
|
||||
" \n",
|
||||
" ax.set_ylabel('Precio (USDT)')\n",
|
||||
" ax.set_title(f'BTC/USDT — Monte Carlo {N_SIMS:,} simulaciones — Horizonte {horizon_name}', fontsize=13)\n",
|
||||
" ax.legend(loc='upper left', fontsize=8)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# Fan charts para cada horizonte\n",
|
||||
"for name, ticks in HORIZONS.items():\n",
|
||||
" plot_fan_chart(all_paths, ticks, name)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 4. Distribución del precio final por horizonte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(2, 2, figsize=(14, 10))\n",
|
||||
"\n",
|
||||
"for ax, (name, ticks) in zip(axes.flat, HORIZONS.items()):\n",
|
||||
" final_prices = all_paths[:, ticks - 1]\n",
|
||||
" returns_pct = (final_prices / CURRENT_PRICE - 1) * 100\n",
|
||||
" \n",
|
||||
" ax.hist(returns_pct, bins=80, density=True, color='#3498db', alpha=0.6, edgecolor='white')\n",
|
||||
" \n",
|
||||
" # Percentiles\n",
|
||||
" p5 = np.percentile(returns_pct, 5)\n",
|
||||
" p50 = np.percentile(returns_pct, 50)\n",
|
||||
" p95 = np.percentile(returns_pct, 95)\n",
|
||||
" \n",
|
||||
" ax.axvline(x=p5, color='red', linewidth=1.5, linestyle='--', label=f'p5: {p5:+.2f}%')\n",
|
||||
" ax.axvline(x=p50, color='#2c3e50', linewidth=1.5, label=f'Mediana: {p50:+.2f}%')\n",
|
||||
" ax.axvline(x=p95, color='green', linewidth=1.5, linestyle='--', label=f'p95: {p95:+.2f}%')\n",
|
||||
" ax.axvline(x=0, color='gray', linewidth=0.8, alpha=0.5)\n",
|
||||
" \n",
|
||||
" ax.set_title(f'{name}', fontsize=12, fontweight='bold')\n",
|
||||
" ax.set_xlabel('Retorno (%)')\n",
|
||||
" ax.legend(fontsize=8)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"fig.suptitle(f'Distribución de retornos por horizonte — {N_SIMS:,} simulaciones', fontsize=14)\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 5. Tabla de estimaciones por horizonte"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print(f'Precio actual: ${CURRENT_PRICE:,.0f}')\n",
|
||||
"print(f'Modelo: GBM + Jump-diffusion (σ={CALIBRATED[\"sigma\"]}, jumps={CALIBRATED[\"jump_intensity\"]})')\n",
|
||||
"print(f'Simulaciones: {N_SIMS:,}')\n",
|
||||
"print()\n",
|
||||
"print(f'{\"Horizonte\":<12} {\"P5\":>10} {\"P25\":>10} {\"Mediana\":>10} {\"P75\":>10} {\"P95\":>10} {\"σ rango\":>10}')\n",
|
||||
"print('-' * 75)\n",
|
||||
"\n",
|
||||
"for name, ticks in HORIZONS.items():\n",
|
||||
" fp = all_paths[:, ticks - 1]\n",
|
||||
" p5, p25, p50, p75, p95 = np.percentile(fp, [5, 25, 50, 75, 95])\n",
|
||||
" sigma_range = (p95 - p5) / CURRENT_PRICE * 100\n",
|
||||
" print(f'{name:<12} ${p5:>9,.0f} ${p25:>9,.0f} ${p50:>9,.0f} ${p75:>9,.0f} ${p95:>9,.0f} ±{sigma_range/2:.1f}%')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 6. Probabilidades de escenarios"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"scenarios = [-10, -5, -2, -1, 0, 1, 2, 5, 10] # % de cambio\n",
|
||||
"\n",
|
||||
"print(f'{\"Horizonte\":<12}', end='')\n",
|
||||
"for s in scenarios:\n",
|
||||
" label = f'{s:+d}%' if s != 0 else ' =0%'\n",
|
||||
" print(f'{label:>8}', end='')\n",
|
||||
"print()\n",
|
||||
"print('-' * (12 + 8 * len(scenarios)))\n",
|
||||
"\n",
|
||||
"for name, ticks in HORIZONS.items():\n",
|
||||
" fp = all_paths[:, ticks - 1]\n",
|
||||
" returns = (fp / CURRENT_PRICE - 1) * 100\n",
|
||||
" \n",
|
||||
" print(f'{name:<12}', end='')\n",
|
||||
" for s in scenarios:\n",
|
||||
" if s < 0:\n",
|
||||
" prob = np.mean(returns <= s) * 100\n",
|
||||
" elif s > 0:\n",
|
||||
" prob = np.mean(returns >= s) * 100\n",
|
||||
" else:\n",
|
||||
" prob = np.mean(returns >= 0) * 100\n",
|
||||
" print(f'{prob:>7.1f}%', end='')\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 7. Value at Risk (VaR) y Expected Shortfall (CVaR)\n",
|
||||
"\n",
|
||||
"- **VaR(95%):** pérdida máxima que no se supera el 95% del tiempo\n",
|
||||
"- **CVaR(95%):** pérdida promedio en el peor 5% de los casos (más conservador)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"confidence_levels = [0.90, 0.95, 0.99]\n",
|
||||
"\n",
|
||||
"print(f'{\"Horizonte\":<12}', end='')\n",
|
||||
"for cl in confidence_levels:\n",
|
||||
" print(f'{\"VaR \" + str(int(cl*100)) + \"%\":>10} {\"CVaR \" + str(int(cl*100)) + \"%\":>10}', end='')\n",
|
||||
"print()\n",
|
||||
"print('-' * (12 + 20 * len(confidence_levels)))\n",
|
||||
"\n",
|
||||
"for name, ticks in HORIZONS.items():\n",
|
||||
" fp = all_paths[:, ticks - 1]\n",
|
||||
" pnl = fp - CURRENT_PRICE # P&L en dólares\n",
|
||||
" pnl_pct = (fp / CURRENT_PRICE - 1) * 100\n",
|
||||
" \n",
|
||||
" print(f'{name:<12}', end='')\n",
|
||||
" for cl in confidence_levels:\n",
|
||||
" var_pct = np.percentile(pnl_pct, (1 - cl) * 100)\n",
|
||||
" # CVaR = promedio de las pérdidas peores que VaR\n",
|
||||
" cvar_pct = np.mean(pnl_pct[pnl_pct <= var_pct])\n",
|
||||
" print(f'{var_pct:>+9.2f}% {cvar_pct:>+9.2f}%', end='')\n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"print()\n",
|
||||
"print('En dólares (por 1 BTC):')\n",
|
||||
"print(f'{\"Horizonte\":<12}', end='')\n",
|
||||
"for cl in confidence_levels:\n",
|
||||
" print(f'{\"VaR \" + str(int(cl*100)) + \"%\":>12} {\"CVaR \" + str(int(cl*100)) + \"%\":>12}', end='')\n",
|
||||
"print()\n",
|
||||
"print('-' * (12 + 24 * len(confidence_levels)))\n",
|
||||
"\n",
|
||||
"for name, ticks in HORIZONS.items():\n",
|
||||
" fp = all_paths[:, ticks - 1]\n",
|
||||
" pnl = fp - CURRENT_PRICE\n",
|
||||
" \n",
|
||||
" print(f'{name:<12}', end='')\n",
|
||||
" for cl in confidence_levels:\n",
|
||||
" var_usd = np.percentile(pnl, (1 - cl) * 100)\n",
|
||||
" cvar_usd = np.mean(pnl[pnl <= var_usd])\n",
|
||||
" print(f' ${var_usd:>+9,.0f} ${cvar_usd:>+9,.0f}', end='')\n",
|
||||
" print()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 8. Impacto del matching engine: simulación completa vs GBM puro\n",
|
||||
"\n",
|
||||
"¿Cambian las estimaciones cuando incluimos el matching engine (makers + takers) en vez de solo GBM?"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"N_ENGINE_SIMS = 200 # menos porque el engine es más lento\n",
|
||||
"HORIZON_ENGINE = 300 # ticks\n",
|
||||
"\n",
|
||||
"# Con matching engine\n",
|
||||
"engine_finals = []\n",
|
||||
"for i in range(N_ENGINE_SIMS):\n",
|
||||
" sim = run_market_sim(\n",
|
||||
" initial_price=CURRENT_PRICE,\n",
|
||||
" n_ticks=HORIZON_ENGINE,\n",
|
||||
" sigma=CALIBRATED['sigma'],\n",
|
||||
" mu=CALIBRATED['mu'],\n",
|
||||
" jump_intensity=CALIBRATED['jump_intensity'],\n",
|
||||
" jump_size_std=CALIBRATED['jump_size_std'],\n",
|
||||
" n_makers=5,\n",
|
||||
" maker_spread=0.01,\n",
|
||||
" gamma=0.1,\n",
|
||||
" n_takers_lambda=12.0,\n",
|
||||
" taker_size_alpha=0.78,\n",
|
||||
" hawkes_alpha=0.17,\n",
|
||||
" hawkes_beta=0.015,\n",
|
||||
" seed=i,\n",
|
||||
" )\n",
|
||||
" # Último midprice como precio final\n",
|
||||
" engine_finals.append(sim['midprices'][-1] if sim['midprices'] else CURRENT_PRICE)\n",
|
||||
"\n",
|
||||
"engine_finals = np.array(engine_finals)\n",
|
||||
"\n",
|
||||
"# GBM puro (mismos parámetros, mismo horizonte)\n",
|
||||
"gbm_finals = all_paths[:N_ENGINE_SIMS, HORIZON_ENGINE - 1]\n",
|
||||
"\n",
|
||||
"print(f'Simulaciones: {N_ENGINE_SIMS}')\n",
|
||||
"print(f'Horizonte: {HORIZON_ENGINE} minutos ({HORIZON_ENGINE/60:.0f}h)')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"fig, axes = plt.subplots(1, 2, figsize=(14, 5))\n",
|
||||
"\n",
|
||||
"# Distribuciones comparadas\n",
|
||||
"ax = axes[0]\n",
|
||||
"gbm_ret = (gbm_finals / CURRENT_PRICE - 1) * 100\n",
|
||||
"eng_ret = (engine_finals / CURRENT_PRICE - 1) * 100\n",
|
||||
"\n",
|
||||
"ax.hist(gbm_ret, bins=40, density=True, alpha=0.5, color='#3498db', label=f'GBM puro (σ={np.std(gbm_ret):.3f}%)')\n",
|
||||
"ax.hist(eng_ret, bins=40, density=True, alpha=0.5, color='#e74c3c', label=f'Con engine (σ={np.std(eng_ret):.3f}%)')\n",
|
||||
"ax.set_xlabel('Retorno (%)')\n",
|
||||
"ax.set_title(f'Distribución a {HORIZON_ENGINE/60:.0f}h: GBM vs Engine')\n",
|
||||
"ax.legend(fontsize=9)\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"# QQ plot\n",
|
||||
"ax = axes[1]\n",
|
||||
"gbm_sorted = np.sort(gbm_ret)\n",
|
||||
"eng_sorted = np.sort(eng_ret)\n",
|
||||
"min_len = min(len(gbm_sorted), len(eng_sorted))\n",
|
||||
"ax.scatter(gbm_sorted[:min_len], eng_sorted[:min_len], s=5, alpha=0.5, color='#9b59b6')\n",
|
||||
"lims = [min(gbm_sorted.min(), eng_sorted.min()), max(gbm_sorted.max(), eng_sorted.max())]\n",
|
||||
"ax.plot(lims, lims, 'k--', linewidth=0.8)\n",
|
||||
"ax.set_xlabel('GBM puro (%)')\n",
|
||||
"ax.set_ylabel('Con engine (%)')\n",
|
||||
"ax.set_title('QQ-Plot: GBM vs Engine\\n(en la diagonal = idénticos)')\n",
|
||||
"ax.grid(True, alpha=0.3)\n",
|
||||
"\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"print(f'GBM puro: media={np.mean(gbm_ret):+.3f}%, std={np.std(gbm_ret):.3f}%, kurtosis={float(np.mean((gbm_ret-np.mean(gbm_ret))**4)/np.std(gbm_ret)**4):.1f}')\n",
|
||||
"print(f'Con engine: media={np.mean(eng_ret):+.3f}%, std={np.std(eng_ret):.3f}%, kurtosis={float(np.mean((eng_ret-np.mean(eng_ret))**4)/np.std(eng_ret)**4):.1f}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## 9. Resumen"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"print('=' * 70)\n",
|
||||
"print(f' ESTIMACIÓN MONTE CARLO — BTC/USDT')\n",
|
||||
"print(f' Precio actual: ${CURRENT_PRICE:,.0f}')\n",
|
||||
"print(f' Modelo: GBM + Jump-diffusion calibrado con 1M trades reales')\n",
|
||||
"print(f' Simulaciones: {N_SIMS:,}')\n",
|
||||
"print('=' * 70)\n",
|
||||
"print()\n",
|
||||
"\n",
|
||||
"for name, ticks in HORIZONS.items():\n",
|
||||
" fp = all_paths[:, ticks - 1]\n",
|
||||
" ret = (fp / CURRENT_PRICE - 1) * 100\n",
|
||||
" p5, p50, p95 = np.percentile(fp, [5, 50, 95])\n",
|
||||
" prob_up = np.mean(fp > CURRENT_PRICE) * 100\n",
|
||||
" var95 = np.percentile(ret, 5)\n",
|
||||
" \n",
|
||||
" print(f' {name:}')\n",
|
||||
" print(f' Rango p5-p95: ${p5:,.0f} — ${p95:,.0f}')\n",
|
||||
" print(f' Mediana: ${p50:,.0f} ({(p50/CURRENT_PRICE - 1)*100:+.2f}%)')\n",
|
||||
" print(f' P(sube): {prob_up:.1f}%')\n",
|
||||
" print(f' VaR 95%: {var95:+.2f}% (${var95/100 * CURRENT_PRICE:+,.0f})')\n",
|
||||
" print()\n",
|
||||
"\n",
|
||||
"print(' NOTA: Estas estimaciones asumen que la volatilidad y la estructura')\n",
|
||||
"print(' del mercado se mantienen constantes. En la realidad cambian.')\n",
|
||||
"print(' Esto es un modelo probabilístico, NO una predicción.')\n",
|
||||
"print('=' * 70)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
@@ -0,0 +1,512 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Alpha Research: señales de microestructura\n",
|
||||
"\n",
|
||||
"Exploramos señales que podrían predecir movimientos de precio a corto plazo.\n",
|
||||
"\n",
|
||||
"Para cada señal:\n",
|
||||
"1. La calculamos sobre los datos reales\n",
|
||||
"2. Medimos su correlación con retornos futuros a distintos horizontes\n",
|
||||
"3. Visualizamos si tiene poder predictivo\n",
|
||||
"\n",
|
||||
"**Datos:** 1M aggTrades BTC/USDT (~26h)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Trades: 1,000,000\n",
|
||||
"Columnas: ['agg_trade_id', 'price', 'qty', 'first_trade_id', 'last_trade_id', 'timestamp', 'is_buyer_maker', 'side', 'n_fills']\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import polars as pl\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from pathlib import Path\n",
|
||||
"from scipy.stats import spearmanr\n",
|
||||
"\n",
|
||||
"DATA = Path('../data')\n",
|
||||
"trades = pl.read_csv(str(DATA / 'binance_btcusdt_aggtrades_1M.csv'))\n",
|
||||
"print(f'Trades: {trades.shape[0]:,}')\n",
|
||||
"print(f'Columnas: {trades.columns}')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Preparación: agrupar en barras de tiempo\n",
|
||||
"\n",
|
||||
"Las señales se calculan sobre ventanas de tiempo, no sobre trades individuales.\n",
|
||||
"Creamos barras de 1 segundo con todas las métricas que necesitamos."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Barras de 1 segundo\n",
|
||||
"bars = trades.with_columns(\n",
|
||||
" (pl.col('timestamp') // 1000).alias('second'),\n",
|
||||
" (pl.col('price') * pl.col('qty')).alias('turnover'),\n",
|
||||
" pl.when(pl.col('side') == 'buy').then(pl.col('qty')).otherwise(0.0).alias('buy_qty'),\n",
|
||||
" pl.when(pl.col('side') == 'sell').then(pl.col('qty')).otherwise(0.0).alias('sell_qty'),\n",
|
||||
" pl.when(pl.col('side') == 'buy').then(1).otherwise(0).alias('is_buy'),\n",
|
||||
").group_by('second').agg(\n",
|
||||
" pl.col('price').last().alias('close'),\n",
|
||||
" pl.col('price').first().alias('open'),\n",
|
||||
" pl.col('price').max().alias('high'),\n",
|
||||
" pl.col('price').min().alias('low'),\n",
|
||||
" pl.col('qty').sum().alias('volume'),\n",
|
||||
" pl.col('turnover').sum().alias('turnover'),\n",
|
||||
" pl.len().alias('n_trades'),\n",
|
||||
" pl.col('buy_qty').sum().alias('buy_volume'),\n",
|
||||
" pl.col('sell_qty').sum().alias('sell_volume'),\n",
|
||||
" pl.col('is_buy').sum().alias('n_buys'),\n",
|
||||
" (pl.len() - pl.col('is_buy').sum()).alias('n_sells'),\n",
|
||||
" pl.col('n_fills').max().alias('max_fills'), # biggest order this second\n",
|
||||
" pl.col('qty').max().alias('max_qty'),\n",
|
||||
").sort('second')\n",
|
||||
"\n",
|
||||
"# VWAP por segundo\n",
|
||||
"bars = bars.with_columns(\n",
|
||||
" (pl.col('turnover') / pl.col('volume')).alias('vwap'),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Log returns futuros a distintos horizontes (para evaluar señales)\n",
|
||||
"for horizon in [1, 5, 10, 30, 60]:\n",
|
||||
" bars = bars.with_columns(\n",
|
||||
" (pl.col('close').shift(-horizon).log() - pl.col('close').log()).alias(f'fwd_ret_{horizon}s')\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print(f'Barras de 1s: {bars.shape[0]:,}')\n",
|
||||
"print(f'Columnas: {bars.columns}')\n",
|
||||
"print(bars.head(3))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def evaluate_signal(bars: pl.DataFrame, signal_col: str, name: str, horizons=[1, 5, 10, 30, 60]):\n",
|
||||
" \"\"\"Evalúa una señal: correlación con retornos futuros + gráficos.\"\"\"\n",
|
||||
" fig, axes = plt.subplots(1, len(horizons) + 1, figsize=(4 * (len(horizons) + 1), 4))\n",
|
||||
" \n",
|
||||
" # Panel 1: la señal en el tiempo\n",
|
||||
" ax = axes[0]\n",
|
||||
" sig = bars[signal_col].to_numpy()\n",
|
||||
" ax.plot(sig[:2000], linewidth=0.3, color='#3498db', alpha=0.7)\n",
|
||||
" ax.set_title(f'{name}\\n(primeros 2000s)', fontsize=9)\n",
|
||||
" ax.set_xlabel('Segundo')\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
" \n",
|
||||
" # Paneles 2+: scatter señal vs retorno futuro por horizonte\n",
|
||||
" corrs = []\n",
|
||||
" for i, h in enumerate(horizons):\n",
|
||||
" ax = axes[i + 1]\n",
|
||||
" ret_col = f'fwd_ret_{h}s'\n",
|
||||
" \n",
|
||||
" clean = bars.select([signal_col, ret_col]).drop_nulls()\n",
|
||||
" if clean.shape[0] < 100:\n",
|
||||
" corrs.append((h, 0, 1))\n",
|
||||
" continue\n",
|
||||
" \n",
|
||||
" x = clean[signal_col].to_numpy()\n",
|
||||
" y = clean[ret_col].to_numpy()\n",
|
||||
" \n",
|
||||
" # Spearman (rank correlation, más robusto a outliers)\n",
|
||||
" rho, pval = spearmanr(x, y)\n",
|
||||
" corrs.append((h, rho, pval))\n",
|
||||
" \n",
|
||||
" # Binned scatter: dividir señal en 20 bins, plotear media de retorno\n",
|
||||
" n_bins = 20\n",
|
||||
" try:\n",
|
||||
" bins = np.percentile(x[~np.isnan(x)], np.linspace(0, 100, n_bins + 1))\n",
|
||||
" bins = np.unique(bins)\n",
|
||||
" if len(bins) < 3:\n",
|
||||
" raise ValueError\n",
|
||||
" bin_idx = np.digitize(x, bins) - 1\n",
|
||||
" bin_idx = np.clip(bin_idx, 0, len(bins) - 2)\n",
|
||||
" bin_means_x = [np.mean(x[bin_idx == b]) for b in range(len(bins) - 1) if np.sum(bin_idx == b) > 0]\n",
|
||||
" bin_means_y = [np.mean(y[bin_idx == b]) * 10000 for b in range(len(bins) - 1) if np.sum(bin_idx == b) > 0] # in bps\n",
|
||||
" ax.bar(range(len(bin_means_y)), bin_means_y, color='#2ecc71' if rho > 0 else '#e74c3c', alpha=0.6)\n",
|
||||
" except:\n",
|
||||
" pass\n",
|
||||
" \n",
|
||||
" color = 'green' if abs(rho) > 0.02 and pval < 0.01 else 'gray'\n",
|
||||
" ax.set_title(f'{h}s: ρ={rho:.4f}\\np={pval:.2e}', fontsize=9, color=color)\n",
|
||||
" ax.set_xlabel(f'Bin de {name}')\n",
|
||||
" if i == 0:\n",
|
||||
" ax.set_ylabel('Ret futuro (bps)')\n",
|
||||
" ax.axhline(y=0, color='black', linewidth=0.5)\n",
|
||||
" ax.grid(True, alpha=0.3)\n",
|
||||
" \n",
|
||||
" fig.suptitle(f'Señal: {name}', fontsize=12, fontweight='bold')\n",
|
||||
" plt.tight_layout()\n",
|
||||
" plt.show()\n",
|
||||
" \n",
|
||||
" # Resumen\n",
|
||||
" for h, rho, pval in corrs:\n",
|
||||
" sig_marker = '***' if pval < 0.001 else '**' if pval < 0.01 else '*' if pval < 0.05 else ''\n",
|
||||
" print(f' {h:>3}s: ρ={rho:+.4f} (p={pval:.2e}) {sig_marker}')\n",
|
||||
" \n",
|
||||
" return corrs\n",
|
||||
"\n",
|
||||
"print('evaluate_signal() definida')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 1: Order Flow Imbalance (OFI)\n",
|
||||
"\n",
|
||||
"**Qué mide:** La diferencia entre volumen de compras y ventas en los últimos N segundos. \n",
|
||||
"**Intuición:** Si llegan más market buys que sells, hay presión compradora → el precio debería subir. \n",
|
||||
"**Fórmula:** `OFI = (buy_volume - sell_volume) / (buy_volume + sell_volume)` \n",
|
||||
"Normalizado entre -1 (todo sells) y +1 (todo buys)."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# OFI en ventanas de 5, 10, 30 segundos\n",
|
||||
"for w in [5, 10, 30]:\n",
|
||||
" buy_sum = bars['buy_volume'].rolling_sum(window_size=w)\n",
|
||||
" sell_sum = bars['sell_volume'].rolling_sum(window_size=w)\n",
|
||||
" total = buy_sum + sell_sum\n",
|
||||
" ofi = (buy_sum - sell_sum) / total\n",
|
||||
" bars = bars.with_columns(ofi.alias(f'ofi_{w}s'))\n",
|
||||
"\n",
|
||||
"print('OFI 5s:')\n",
|
||||
"corrs_ofi5 = evaluate_signal(bars, 'ofi_5s', 'OFI 5s')\n",
|
||||
"print('\\nOFI 10s:')\n",
|
||||
"corrs_ofi10 = evaluate_signal(bars, 'ofi_10s', 'OFI 10s')\n",
|
||||
"print('\\nOFI 30s:')\n",
|
||||
"corrs_ofi30 = evaluate_signal(bars, 'ofi_30s', 'OFI 30s')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 2: Trade Count Imbalance\n",
|
||||
"\n",
|
||||
"**Qué mide:** Diferencia entre número de buys y sells (no volumen, sino conteo). \n",
|
||||
"**Intuición:** Muchos trades pequeños de compra pueden ser más informativos que un solo trade grande. \n",
|
||||
"**Fórmula:** `TCI = (n_buys - n_sells) / (n_buys + n_sells)`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for w in [5, 10, 30]:\n",
|
||||
" nb = bars['n_buys'].rolling_sum(window_size=w)\n",
|
||||
" ns = bars['n_sells'].rolling_sum(window_size=w)\n",
|
||||
" bars = bars.with_columns(\n",
|
||||
" ((nb - ns) / (nb + ns)).alias(f'tci_{w}s')\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print('Trade Count Imbalance 10s:')\n",
|
||||
"corrs_tci = evaluate_signal(bars, 'tci_10s', 'TCI 10s')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 3: Trade Intensity (aceleración de actividad)\n",
|
||||
"\n",
|
||||
"**Qué mide:** ¿Están llegando trades más rápido que lo normal? \n",
|
||||
"**Intuición:** Aceleraciones predicen movimientos — los informados tradean antes del movimiento. \n",
|
||||
"**Fórmula:** `intensity = trades_last_5s / trades_last_60s_avg`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"short_window = bars['n_trades'].rolling_sum(window_size=5)\n",
|
||||
"long_window = bars['n_trades'].rolling_mean(window_size=60)\n",
|
||||
"bars = bars.with_columns(\n",
|
||||
" (short_window / 5 / long_window).alias('trade_intensity')\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print('Trade Intensity (5s / 60s avg):')\n",
|
||||
"corrs_intensity = evaluate_signal(bars, 'trade_intensity', 'Trade Intensity')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 4: Volume-Weighted Imbalance\n",
|
||||
"\n",
|
||||
"**Qué mide:** OFI pero ponderando más los trades grandes (ballenas). \n",
|
||||
"**Intuición:** Un trade de 1 BTC tiene más información que 100 trades de 0.001 BTC. \n",
|
||||
"**Fórmula:** Separar trades grandes (>p90) y calcular su OFI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Señal basada en los trades más grandes de cada segundo\n",
|
||||
"# max_qty ya captura el trade más grande, pero necesitamos su lado\n",
|
||||
"# Usamos n_fills como proxy: más fills = orden más grande que barrió más niveles\n",
|
||||
"\n",
|
||||
"# Proxy: volumen de los trades con >5 fills (ballenas)\n",
|
||||
"whale_trades = trades.filter(pl.col('n_fills') > 5).with_columns(\n",
|
||||
" (pl.col('timestamp') // 1000).alias('second'),\n",
|
||||
" pl.when(pl.col('side') == 'buy').then(pl.col('qty')).otherwise(-pl.col('qty')).alias('signed_qty'),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"whale_flow = whale_trades.group_by('second').agg(\n",
|
||||
" pl.col('signed_qty').sum().alias('whale_flow'),\n",
|
||||
" pl.len().alias('whale_count'),\n",
|
||||
").sort('second')\n",
|
||||
"\n",
|
||||
"# Unir con bars\n",
|
||||
"bars = bars.join(whale_flow, on='second', how='left').with_columns(\n",
|
||||
" pl.col('whale_flow').fill_null(0.0),\n",
|
||||
" pl.col('whale_count').fill_null(0),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"# Whale flow rolling\n",
|
||||
"bars = bars.with_columns(\n",
|
||||
" pl.col('whale_flow').rolling_sum(window_size=10).alias('whale_flow_10s'),\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print('Whale Flow 10s (trades con >5 fills):')\n",
|
||||
"print(f'Trades clasificados como ballena: {whale_trades.shape[0]:,} ({whale_trades.shape[0]/trades.shape[0]*100:.1f}%)')\n",
|
||||
"corrs_whale = evaluate_signal(bars, 'whale_flow_10s', 'Whale Flow 10s')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 5: VWAP Deviation\n",
|
||||
"\n",
|
||||
"**Qué mide:** ¿El precio actual está por encima o debajo del VWAP reciente? \n",
|
||||
"**Intuición:** El precio tiende a revertir al VWAP (mean reversion). \n",
|
||||
"**Fórmula:** `deviation = (close - vwap_rolling) / close`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for w in [30, 60, 300]:\n",
|
||||
" rolling_turnover = bars['turnover'].rolling_sum(window_size=w)\n",
|
||||
" rolling_volume = bars['volume'].rolling_sum(window_size=w)\n",
|
||||
" rolling_vwap = rolling_turnover / rolling_volume\n",
|
||||
" deviation = (bars['close'] - rolling_vwap) / bars['close']\n",
|
||||
" bars = bars.with_columns(deviation.alias(f'vwap_dev_{w}s'))\n",
|
||||
"\n",
|
||||
"print('VWAP Deviation 30s:')\n",
|
||||
"corrs_vwap30 = evaluate_signal(bars, 'vwap_dev_30s', 'VWAP Dev 30s')\n",
|
||||
"print('\\nVWAP Deviation 60s:')\n",
|
||||
"corrs_vwap60 = evaluate_signal(bars, 'vwap_dev_60s', 'VWAP Dev 60s')\n",
|
||||
"print('\\nVWAP Deviation 300s (5min):')\n",
|
||||
"corrs_vwap300 = evaluate_signal(bars, 'vwap_dev_300s', 'VWAP Dev 5min')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 6: Volatility Breakout\n",
|
||||
"\n",
|
||||
"**Qué mide:** ¿La volatilidad actual es anormalmente alta? \n",
|
||||
"**Intuición:** Picos de volatilidad preceden movimientos direccionales (momentum post-breakout). \n",
|
||||
"**Fórmula:** `breakout = vol_5s / vol_60s`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Volatilidad realizada como rango (high - low) / close\n",
|
||||
"bars = bars.with_columns(\n",
|
||||
" ((pl.col('high') - pl.col('low')) / pl.col('close')).alias('range_pct')\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"short_vol = bars['range_pct'].rolling_mean(window_size=5)\n",
|
||||
"long_vol = bars['range_pct'].rolling_mean(window_size=60)\n",
|
||||
"bars = bars.with_columns(\n",
|
||||
" (short_vol / long_vol).alias('vol_breakout')\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"print('Volatility Breakout (5s / 60s):')\n",
|
||||
"corrs_volbreak = evaluate_signal(bars, 'vol_breakout', 'Vol Breakout')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Señal 7: Retorno reciente (momentum/reversal)\n",
|
||||
"\n",
|
||||
"**Qué mide:** ¿El precio acaba de subir o bajar? \n",
|
||||
"**Intuición:** A muy corto plazo puede haber momentum (inercia) o reversal (rebote). \n",
|
||||
"**Fórmula:** `ret_Ns = log(close) - log(close_N_ago)`"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"for w in [1, 5, 10, 30, 60]:\n",
|
||||
" bars = bars.with_columns(\n",
|
||||
" (pl.col('close').log() - pl.col('close').shift(w).log()).alias(f'past_ret_{w}s')\n",
|
||||
" )\n",
|
||||
"\n",
|
||||
"print('Past Return 1s (ultra corto):')\n",
|
||||
"corrs_ret1 = evaluate_signal(bars, 'past_ret_1s', 'Past Ret 1s')\n",
|
||||
"print('\\nPast Return 5s:')\n",
|
||||
"corrs_ret5 = evaluate_signal(bars, 'past_ret_5s', 'Past Ret 5s')\n",
|
||||
"print('\\nPast Return 30s:')\n",
|
||||
"corrs_ret30 = evaluate_signal(bars, 'past_ret_30s', 'Past Ret 30s')\n",
|
||||
"print('\\nPast Return 60s:')\n",
|
||||
"corrs_ret60 = evaluate_signal(bars, 'past_ret_60s', 'Past Ret 60s')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"---\n",
|
||||
"## Resumen: ranking de señales"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# Recopilar todas las correlaciones\n",
|
||||
"all_signals = [\n",
|
||||
" ('OFI 5s', corrs_ofi5),\n",
|
||||
" ('OFI 10s', corrs_ofi10),\n",
|
||||
" ('OFI 30s', corrs_ofi30),\n",
|
||||
" ('TCI 10s', corrs_tci),\n",
|
||||
" ('Trade Intensity', corrs_intensity),\n",
|
||||
" ('Whale Flow 10s', corrs_whale),\n",
|
||||
" ('VWAP Dev 30s', corrs_vwap30),\n",
|
||||
" ('VWAP Dev 60s', corrs_vwap60),\n",
|
||||
" ('VWAP Dev 5min', corrs_vwap300),\n",
|
||||
" ('Vol Breakout', corrs_volbreak),\n",
|
||||
" ('Past Ret 1s', corrs_ret1),\n",
|
||||
" ('Past Ret 5s', corrs_ret5),\n",
|
||||
" ('Past Ret 30s', corrs_ret30),\n",
|
||||
" ('Past Ret 60s', corrs_ret60),\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"records = []\n",
|
||||
"for name, corrs in all_signals:\n",
|
||||
" for h, rho, pval in corrs:\n",
|
||||
" records.append({'signal': name, 'horizon_s': h, 'spearman_rho': round(rho, 5), 'p_value': pval})\n",
|
||||
"\n",
|
||||
"results = pl.DataFrame(records)\n",
|
||||
"\n",
|
||||
"# Heatmap de correlaciones\n",
|
||||
"signal_names = [s[0] for s in all_signals]\n",
|
||||
"horizons = [1, 5, 10, 30, 60]\n",
|
||||
"\n",
|
||||
"matrix = np.zeros((len(signal_names), len(horizons)))\n",
|
||||
"for row in results.iter_rows(named=True):\n",
|
||||
" i = signal_names.index(row['signal'])\n",
|
||||
" j = horizons.index(row['horizon_s'])\n",
|
||||
" matrix[i, j] = row['spearman_rho']\n",
|
||||
"\n",
|
||||
"fig, ax = plt.subplots(figsize=(10, 10))\n",
|
||||
"vmax = max(0.01, np.max(np.abs(matrix)))\n",
|
||||
"im = ax.imshow(matrix, cmap='RdBu_r', aspect='auto', vmin=-vmax, vmax=vmax)\n",
|
||||
"ax.set_xticks(range(len(horizons)))\n",
|
||||
"ax.set_xticklabels([f'{h}s' for h in horizons], fontsize=10)\n",
|
||||
"ax.set_yticks(range(len(signal_names)))\n",
|
||||
"ax.set_yticklabels(signal_names, fontsize=10)\n",
|
||||
"\n",
|
||||
"for i in range(len(signal_names)):\n",
|
||||
" for j in range(len(horizons)):\n",
|
||||
" val = matrix[i, j]\n",
|
||||
" # Marcar significativos\n",
|
||||
" r = results.filter((pl.col('signal') == signal_names[i]) & (pl.col('horizon_s') == horizons[j]))\n",
|
||||
" if r.shape[0] > 0:\n",
|
||||
" pv = r['p_value'][0]\n",
|
||||
" star = '***' if pv < 0.001 else '**' if pv < 0.01 else '*' if pv < 0.05 else ''\n",
|
||||
" else:\n",
|
||||
" star = ''\n",
|
||||
" color = 'white' if abs(val) > vmax * 0.6 else 'black'\n",
|
||||
" ax.text(j, i, f'{val:.4f}\\n{star}', ha='center', va='center', fontsize=8, color=color)\n",
|
||||
"\n",
|
||||
"ax.set_title('Spearman ρ: señal vs retorno futuro\\n(rojo = predice subida, azul = predice bajada, *** = p<0.001)', fontsize=12)\n",
|
||||
"ax.set_xlabel('Horizonte futuro')\n",
|
||||
"plt.colorbar(im, label='ρ')\n",
|
||||
"plt.tight_layout()\n",
|
||||
"plt.show()\n",
|
||||
"\n",
|
||||
"# Top señales\n",
|
||||
"print('\\nTop 15 señales por |ρ| (significativas p<0.01):')\n",
|
||||
"top = results.filter(pl.col('p_value') < 0.01).with_columns(\n",
|
||||
" pl.col('spearman_rho').abs().alias('abs_rho')\n",
|
||||
").sort('abs_rho', descending=True).head(15)\n",
|
||||
"print(top.select(['signal', 'horizon_s', 'spearman_rho', 'p_value']))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"name": "python",
|
||||
"version": "3.13.0"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
Reference in New Issue
Block a user