cc01845acf
Un solo archivo (llm.py) que habla directo con la API de Anthropic Messages usando el token OAuth que Claude Code guarda en ~/.claude/.credentials.json. Sin servidor, sin arranque: chat interactivo con memoria, one-shot, pipe, y bucle de tools propias (run_tool_loop). Empaqueta standalone la logica del grupo claude-direct del registry para poder distribuirla (basta llm.py + README.md, el receptor solo necesita httpx + Claude Code logueado).
345 lines
13 KiB
Python
Executable File
345 lines
13 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""llm — a tiny, self-contained CLI to chat with Claude from the terminal.
|
|
|
|
It talks straight to the Anthropic Messages API using the OAuth token that Claude
|
|
Code already stores on your machine (~/.claude/.credentials.json), so there is
|
|
nothing to configure: install httpx, run it, type.
|
|
|
|
Usage:
|
|
python3 llm.py # interactive chat (with memory)
|
|
python3 llm.py "your question" # one-shot answer
|
|
echo "summarize this" | python3 llm.py
|
|
python3 llm.py --model claude-opus-4-8 "explain pointers"
|
|
|
|
Interactive commands:
|
|
/model <id> switch model (e.g. claude-opus-4-8, claude-haiku-4-5-20251001)
|
|
/system <text> set a system prompt
|
|
/reset start a fresh conversation
|
|
/exit quit
|
|
|
|
Requirements:
|
|
- Python 3.9+
|
|
- httpx (pip install httpx)
|
|
- Claude Code installed and logged in (so ~/.claude/.credentials.json exists)
|
|
"""
|
|
import json
|
|
import os
|
|
import sys
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Callable, Iterator
|
|
|
|
API_URL = "https://api.anthropic.com/v1/messages"
|
|
ANTHROPIC_VERSION = "2023-06-01"
|
|
DEFAULT_MODEL = "claude-haiku-4-5-20251001"
|
|
CREDENTIALS_PATH = "~/.claude/.credentials.json"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Token: read (and best-effort refresh) the Claude Code OAuth access token
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def load_oauth_token(credentials_path: str = "", refresh_if_expired: bool = True) -> str:
|
|
"""Return the Claude Code OAuth access token from the local credentials file."""
|
|
path = Path(credentials_path or CREDENTIALS_PATH).expanduser()
|
|
if not path.exists():
|
|
raise FileNotFoundError(
|
|
f"No se encontro {path}. Instala Claude Code e inicia sesion "
|
|
"(`claude`) para generar las credenciales."
|
|
)
|
|
data = json.loads(path.read_text(encoding="utf-8"))
|
|
oauth = data["claudeAiOauth"]
|
|
access_token = oauth["accessToken"]
|
|
expires_at_ms = oauth.get("expiresAt", 0)
|
|
refresh_token = oauth.get("refreshToken", "")
|
|
|
|
now_ms = int(time.time() * 1000)
|
|
if expires_at_ms and expires_at_ms <= now_ms and refresh_if_expired and refresh_token:
|
|
new_token = _try_refresh(refresh_token)
|
|
if new_token:
|
|
return new_token
|
|
print("warning: token may be expired; refresh failed.", file=sys.stderr)
|
|
return access_token
|
|
|
|
|
|
def _try_refresh(refresh_token: str) -> str:
|
|
"""Best-effort OAuth refresh. Returns '' on failure (claude normally refreshes)."""
|
|
try:
|
|
import httpx
|
|
|
|
resp = httpx.post(
|
|
"https://auth.anthropic.com/oauth/token",
|
|
data={"grant_type": "refresh_token", "refresh_token": refresh_token},
|
|
headers={"content-type": "application/x-www-form-urlencoded"},
|
|
timeout=10.0,
|
|
)
|
|
if resp.status_code == 200:
|
|
return resp.json().get("access_token", "")
|
|
except Exception as exc: # noqa: BLE001
|
|
print(f"warning: refresh failed: {exc}", file=sys.stderr)
|
|
return ""
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# SSE parsing (pure)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _parse_sse_chunk(chunk: str) -> Iterator[dict]:
|
|
event = ""
|
|
data = ""
|
|
for line in chunk.splitlines():
|
|
if line.startswith("event:"):
|
|
event = line[len("event:"):].strip()
|
|
elif line.startswith("data:"):
|
|
data = line[len("data:"):].strip()
|
|
elif line == "":
|
|
if data and data != "[DONE]":
|
|
try:
|
|
yield from _normalise(event, json.loads(data))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
event = data = ""
|
|
if data and data != "[DONE]":
|
|
try:
|
|
yield from _normalise(event, json.loads(data))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
|
|
|
|
def _normalise(event_type: str, data: dict) -> Iterator[dict]:
|
|
if event_type == "content_block_start":
|
|
block = data.get("content_block", {})
|
|
if block.get("type") == "tool_use":
|
|
yield {"type": "tool_use_start", "id": block.get("id", ""),
|
|
"name": block.get("name", ""), "index": data.get("index", 0)}
|
|
elif event_type == "content_block_delta":
|
|
delta = data.get("delta", {})
|
|
if delta.get("type") == "text_delta":
|
|
yield {"type": "text", "text": delta.get("text", "")}
|
|
elif delta.get("type") == "input_json_delta":
|
|
yield {"type": "tool_input_delta", "index": data.get("index", 0),
|
|
"partial_json": delta.get("partial_json", "")}
|
|
elif event_type == "message_delta":
|
|
stop = data.get("delta", {}).get("stop_reason", "")
|
|
if stop:
|
|
yield {"type": "done", "stop_reason": stop}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Streaming call
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def stream_messages(messages: list, model: str = DEFAULT_MODEL, system: str = "",
|
|
tools: list = None, max_tokens: int = 4096, token: str = "") -> Iterator[dict]:
|
|
"""Stream an Anthropic Messages API call. Yields text/tool_use/done/error events."""
|
|
if not token:
|
|
try:
|
|
token = load_oauth_token()
|
|
except Exception as exc: # noqa: BLE001
|
|
yield {"type": "error", "message": f"failed to load token: {exc}"}
|
|
return
|
|
|
|
body = {"model": model, "max_tokens": max_tokens, "messages": messages, "stream": True}
|
|
if system:
|
|
body["system"] = system
|
|
if tools:
|
|
body["tools"] = tools
|
|
headers = {"authorization": f"Bearer {token}", "anthropic-version": ANTHROPIC_VERSION,
|
|
"content-type": "application/json"}
|
|
|
|
try:
|
|
import httpx
|
|
except ImportError:
|
|
yield {"type": "error", "message": "falta httpx. Instala con: pip install httpx"}
|
|
return
|
|
|
|
try:
|
|
with httpx.stream("POST", API_URL, json=body, headers=headers, timeout=120.0) as resp:
|
|
if resp.status_code != 200:
|
|
err = resp.read().decode("utf-8", errors="replace")
|
|
yield {"type": "error", "message": f"HTTP {resp.status_code}: {err[:400]}"}
|
|
return
|
|
buf = ""
|
|
for chunk in resp.iter_text():
|
|
buf += chunk
|
|
while "\n\n" in buf:
|
|
block, buf = buf.split("\n\n", 1)
|
|
yield from _parse_sse_chunk(block + "\n\n")
|
|
if buf.strip():
|
|
yield from _parse_sse_chunk(buf + "\n\n")
|
|
except Exception as exc: # noqa: BLE001
|
|
yield {"type": "error", "message": str(exc)}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Agentic tool loop (define your own tools)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def run_tool_loop(messages: list, tools: list, dispatch: dict, model: str = DEFAULT_MODEL,
|
|
system: str = "", max_tokens: int = 4096, max_iters: int = 8,
|
|
on_text: Callable[[str], None] = None) -> dict:
|
|
"""Run a tool-use loop. `tools` are Anthropic tool defs; `dispatch` maps name->callable."""
|
|
iterations = 0
|
|
final_text = ""
|
|
stop_reason = "max_iters"
|
|
|
|
for _ in range(max_iters):
|
|
iterations += 1
|
|
text_parts = []
|
|
tool_uses = []
|
|
index_map = {}
|
|
cur_stop = "end_turn"
|
|
|
|
for ev in stream_messages(messages, model=model, system=system, tools=tools, max_tokens=max_tokens):
|
|
t = ev.get("type")
|
|
if t == "text":
|
|
text_parts.append(ev["text"])
|
|
if on_text:
|
|
on_text(ev["text"])
|
|
elif t == "tool_use_start":
|
|
entry = {"id": ev["id"], "name": ev["name"], "index": ev["index"], "parts": []}
|
|
tool_uses.append(entry)
|
|
index_map[ev["index"]] = entry
|
|
elif t == "tool_input_delta":
|
|
if ev["index"] in index_map:
|
|
index_map[ev["index"]]["parts"].append(ev["partial_json"])
|
|
elif t == "done":
|
|
cur_stop = ev.get("stop_reason", "end_turn")
|
|
elif t == "error":
|
|
return {"messages": messages, "final_text": "".join(text_parts),
|
|
"stop_reason": "error", "iterations": iterations,
|
|
"error": ev.get("message", "")}
|
|
|
|
final_text = "".join(text_parts)
|
|
stop_reason = cur_stop
|
|
|
|
content = []
|
|
if final_text:
|
|
content.append({"type": "text", "text": final_text})
|
|
for tu in tool_uses:
|
|
raw = "".join(tu["parts"])
|
|
try:
|
|
parsed = json.loads(raw) if raw else {}
|
|
except json.JSONDecodeError:
|
|
parsed = {"_raw": raw}
|
|
content.append({"type": "tool_use", "id": tu["id"], "name": tu["name"], "input": parsed})
|
|
messages.append({"role": "assistant", "content": content})
|
|
|
|
if stop_reason != "tool_use" or not tool_uses:
|
|
break
|
|
|
|
results = []
|
|
for tu in tool_uses:
|
|
raw = "".join(tu["parts"])
|
|
try:
|
|
parsed = json.loads(raw) if raw else {}
|
|
except json.JSONDecodeError:
|
|
parsed = {"_raw": raw}
|
|
if tu["name"] not in dispatch:
|
|
rc, is_err = f"Error: tool '{tu['name']}' not found", True
|
|
else:
|
|
try:
|
|
rv = dispatch[tu["name"]](parsed)
|
|
rc, is_err = (rv if isinstance(rv, str) else json.dumps(rv)), False
|
|
except Exception as exc: # noqa: BLE001
|
|
rc, is_err = f"Error executing {tu['name']}: {exc}", True
|
|
r = {"type": "tool_result", "tool_use_id": tu["id"], "content": rc}
|
|
if is_err:
|
|
r["is_error"] = True
|
|
results.append(r)
|
|
messages.append({"role": "user", "content": results})
|
|
else:
|
|
stop_reason = "max_iters"
|
|
|
|
return {"messages": messages, "final_text": final_text,
|
|
"stop_reason": stop_reason, "iterations": iterations}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def _ask_once(messages: list, model: str, system: str) -> str:
|
|
"""Stream one assistant turn to stdout, append it to messages, return the text."""
|
|
parts = []
|
|
for ev in stream_messages(messages, model=model, system=system):
|
|
if ev["type"] == "text":
|
|
parts.append(ev["text"])
|
|
sys.stdout.write(ev["text"])
|
|
sys.stdout.flush()
|
|
elif ev["type"] == "error":
|
|
sys.stderr.write("\n[error] " + str(ev.get("message", "")) + "\n")
|
|
return ""
|
|
sys.stdout.write("\n")
|
|
text = "".join(parts)
|
|
if text:
|
|
messages.append({"role": "assistant", "content": text})
|
|
return text
|
|
|
|
|
|
def _repl(model: str, system: str):
|
|
print(f"llm · {model} · escribe tu mensaje (/model, /system, /reset, /exit)")
|
|
messages = []
|
|
while True:
|
|
try:
|
|
line = input("\n> ").strip()
|
|
except (EOFError, KeyboardInterrupt):
|
|
print()
|
|
return
|
|
if not line:
|
|
continue
|
|
if line in ("/exit", "/quit", "/q"):
|
|
return
|
|
if line == "/reset":
|
|
messages = []
|
|
print("— conversacion nueva —")
|
|
continue
|
|
if line.startswith("/model"):
|
|
parts = line.split(maxsplit=1)
|
|
if len(parts) == 2:
|
|
model = parts[1].strip()
|
|
print(f"modelo: {model}")
|
|
else:
|
|
print(f"modelo actual: {model}")
|
|
continue
|
|
if line.startswith("/system"):
|
|
parts = line.split(maxsplit=1)
|
|
system = parts[1].strip() if len(parts) == 2 else ""
|
|
print(f"system: {system or '(ninguno)'}")
|
|
continue
|
|
messages.append({"role": "user", "content": line})
|
|
_ask_once(messages, model, system)
|
|
|
|
|
|
def main(argv):
|
|
model = DEFAULT_MODEL
|
|
system = ""
|
|
rest = []
|
|
i = 0
|
|
while i < len(argv):
|
|
a = argv[i]
|
|
if a in ("--model", "-m") and i + 1 < len(argv):
|
|
model = argv[i + 1]; i += 2
|
|
elif a in ("--system", "-s") and i + 1 < len(argv):
|
|
system = argv[i + 1]; i += 2
|
|
elif a in ("--help", "-h"):
|
|
print(__doc__); return 0
|
|
else:
|
|
rest.append(a); i += 1
|
|
|
|
prompt = " ".join(rest).strip()
|
|
if not prompt and not sys.stdin.isatty():
|
|
prompt = sys.stdin.read().strip()
|
|
|
|
if prompt: # one-shot
|
|
messages = [{"role": "user", "content": prompt}]
|
|
_ask_once(messages, model, system)
|
|
return 0
|
|
|
|
_repl(model, system) # interactive
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main(sys.argv[1:]))
|