#!/usr/bin/env python3 """llm — a tiny, self-contained CLI to chat with Claude from the terminal. It talks straight to the Anthropic Messages API using the OAuth token that Claude Code already stores on your machine (~/.claude/.credentials.json), so there is nothing to configure: install httpx, run it, type. Usage: python3 llm.py # interactive chat (with memory) python3 llm.py "your question" # one-shot answer echo "summarize this" | python3 llm.py python3 llm.py --model claude-opus-4-8 "explain pointers" Interactive commands: /model switch model (e.g. claude-opus-4-8, claude-haiku-4-5-20251001) /system set a system prompt /reset start a fresh conversation /exit quit Requirements: - Python 3.9+ - httpx (pip install httpx) - Claude Code installed and logged in (so ~/.claude/.credentials.json exists) """ import json import os import sys import time from pathlib import Path from typing import Callable, Iterator API_URL = "https://api.anthropic.com/v1/messages" ANTHROPIC_VERSION = "2023-06-01" DEFAULT_MODEL = "claude-haiku-4-5-20251001" CREDENTIALS_PATH = "~/.claude/.credentials.json" # --------------------------------------------------------------------------- # Token: read (and best-effort refresh) the Claude Code OAuth access token # --------------------------------------------------------------------------- def load_oauth_token(credentials_path: str = "", refresh_if_expired: bool = True) -> str: """Return the Claude Code OAuth access token from the local credentials file.""" path = Path(credentials_path or CREDENTIALS_PATH).expanduser() if not path.exists(): raise FileNotFoundError( f"No se encontro {path}. Instala Claude Code e inicia sesion " "(`claude`) para generar las credenciales." ) data = json.loads(path.read_text(encoding="utf-8")) oauth = data["claudeAiOauth"] access_token = oauth["accessToken"] expires_at_ms = oauth.get("expiresAt", 0) refresh_token = oauth.get("refreshToken", "") now_ms = int(time.time() * 1000) if expires_at_ms and expires_at_ms <= now_ms and refresh_if_expired and refresh_token: new_token = _try_refresh(refresh_token) if new_token: return new_token print("warning: token may be expired; refresh failed.", file=sys.stderr) return access_token def _try_refresh(refresh_token: str) -> str: """Best-effort OAuth refresh. Returns '' on failure (claude normally refreshes).""" try: import httpx resp = httpx.post( "https://auth.anthropic.com/oauth/token", data={"grant_type": "refresh_token", "refresh_token": refresh_token}, headers={"content-type": "application/x-www-form-urlencoded"}, timeout=10.0, ) if resp.status_code == 200: return resp.json().get("access_token", "") except Exception as exc: # noqa: BLE001 print(f"warning: refresh failed: {exc}", file=sys.stderr) return "" # --------------------------------------------------------------------------- # SSE parsing (pure) # --------------------------------------------------------------------------- def _parse_sse_chunk(chunk: str) -> Iterator[dict]: event = "" data = "" for line in chunk.splitlines(): if line.startswith("event:"): event = line[len("event:"):].strip() elif line.startswith("data:"): data = line[len("data:"):].strip() elif line == "": if data and data != "[DONE]": try: yield from _normalise(event, json.loads(data)) except json.JSONDecodeError: pass event = data = "" if data and data != "[DONE]": try: yield from _normalise(event, json.loads(data)) except json.JSONDecodeError: pass def _normalise(event_type: str, data: dict) -> Iterator[dict]: if event_type == "content_block_start": block = data.get("content_block", {}) if block.get("type") == "tool_use": yield {"type": "tool_use_start", "id": block.get("id", ""), "name": block.get("name", ""), "index": data.get("index", 0)} elif event_type == "content_block_delta": delta = data.get("delta", {}) if delta.get("type") == "text_delta": yield {"type": "text", "text": delta.get("text", "")} elif delta.get("type") == "input_json_delta": yield {"type": "tool_input_delta", "index": data.get("index", 0), "partial_json": delta.get("partial_json", "")} elif event_type == "message_delta": stop = data.get("delta", {}).get("stop_reason", "") if stop: yield {"type": "done", "stop_reason": stop} # --------------------------------------------------------------------------- # Streaming call # --------------------------------------------------------------------------- def stream_messages(messages: list, model: str = DEFAULT_MODEL, system: str = "", tools: list = None, max_tokens: int = 4096, token: str = "") -> Iterator[dict]: """Stream an Anthropic Messages API call. Yields text/tool_use/done/error events.""" if not token: try: token = load_oauth_token() except Exception as exc: # noqa: BLE001 yield {"type": "error", "message": f"failed to load token: {exc}"} return body = {"model": model, "max_tokens": max_tokens, "messages": messages, "stream": True} if system: body["system"] = system if tools: body["tools"] = tools headers = {"authorization": f"Bearer {token}", "anthropic-version": ANTHROPIC_VERSION, "content-type": "application/json"} try: import httpx except ImportError: yield {"type": "error", "message": "falta httpx. Instala con: pip install httpx"} return try: with httpx.stream("POST", API_URL, json=body, headers=headers, timeout=120.0) as resp: if resp.status_code != 200: err = resp.read().decode("utf-8", errors="replace") yield {"type": "error", "message": f"HTTP {resp.status_code}: {err[:400]}"} return buf = "" for chunk in resp.iter_text(): buf += chunk while "\n\n" in buf: block, buf = buf.split("\n\n", 1) yield from _parse_sse_chunk(block + "\n\n") if buf.strip(): yield from _parse_sse_chunk(buf + "\n\n") except Exception as exc: # noqa: BLE001 yield {"type": "error", "message": str(exc)} # --------------------------------------------------------------------------- # Agentic tool loop (define your own tools) # --------------------------------------------------------------------------- def run_tool_loop(messages: list, tools: list, dispatch: dict, model: str = DEFAULT_MODEL, system: str = "", max_tokens: int = 4096, max_iters: int = 8, on_text: Callable[[str], None] = None) -> dict: """Run a tool-use loop. `tools` are Anthropic tool defs; `dispatch` maps name->callable.""" iterations = 0 final_text = "" stop_reason = "max_iters" for _ in range(max_iters): iterations += 1 text_parts = [] tool_uses = [] index_map = {} cur_stop = "end_turn" for ev in stream_messages(messages, model=model, system=system, tools=tools, max_tokens=max_tokens): t = ev.get("type") if t == "text": text_parts.append(ev["text"]) if on_text: on_text(ev["text"]) elif t == "tool_use_start": entry = {"id": ev["id"], "name": ev["name"], "index": ev["index"], "parts": []} tool_uses.append(entry) index_map[ev["index"]] = entry elif t == "tool_input_delta": if ev["index"] in index_map: index_map[ev["index"]]["parts"].append(ev["partial_json"]) elif t == "done": cur_stop = ev.get("stop_reason", "end_turn") elif t == "error": return {"messages": messages, "final_text": "".join(text_parts), "stop_reason": "error", "iterations": iterations, "error": ev.get("message", "")} final_text = "".join(text_parts) stop_reason = cur_stop content = [] if final_text: content.append({"type": "text", "text": final_text}) for tu in tool_uses: raw = "".join(tu["parts"]) try: parsed = json.loads(raw) if raw else {} except json.JSONDecodeError: parsed = {"_raw": raw} content.append({"type": "tool_use", "id": tu["id"], "name": tu["name"], "input": parsed}) messages.append({"role": "assistant", "content": content}) if stop_reason != "tool_use" or not tool_uses: break results = [] for tu in tool_uses: raw = "".join(tu["parts"]) try: parsed = json.loads(raw) if raw else {} except json.JSONDecodeError: parsed = {"_raw": raw} if tu["name"] not in dispatch: rc, is_err = f"Error: tool '{tu['name']}' not found", True else: try: rv = dispatch[tu["name"]](parsed) rc, is_err = (rv if isinstance(rv, str) else json.dumps(rv)), False except Exception as exc: # noqa: BLE001 rc, is_err = f"Error executing {tu['name']}: {exc}", True r = {"type": "tool_result", "tool_use_id": tu["id"], "content": rc} if is_err: r["is_error"] = True results.append(r) messages.append({"role": "user", "content": results}) else: stop_reason = "max_iters" return {"messages": messages, "final_text": final_text, "stop_reason": stop_reason, "iterations": iterations} # --------------------------------------------------------------------------- # CLI # --------------------------------------------------------------------------- def _ask_once(messages: list, model: str, system: str) -> str: """Stream one assistant turn to stdout, append it to messages, return the text.""" parts = [] for ev in stream_messages(messages, model=model, system=system): if ev["type"] == "text": parts.append(ev["text"]) sys.stdout.write(ev["text"]) sys.stdout.flush() elif ev["type"] == "error": sys.stderr.write("\n[error] " + str(ev.get("message", "")) + "\n") return "" sys.stdout.write("\n") text = "".join(parts) if text: messages.append({"role": "assistant", "content": text}) return text def _repl(model: str, system: str): print(f"llm · {model} · escribe tu mensaje (/model, /system, /reset, /exit)") messages = [] while True: try: line = input("\n> ").strip() except (EOFError, KeyboardInterrupt): print() return if not line: continue if line in ("/exit", "/quit", "/q"): return if line == "/reset": messages = [] print("— conversacion nueva —") continue if line.startswith("/model"): parts = line.split(maxsplit=1) if len(parts) == 2: model = parts[1].strip() print(f"modelo: {model}") else: print(f"modelo actual: {model}") continue if line.startswith("/system"): parts = line.split(maxsplit=1) system = parts[1].strip() if len(parts) == 2 else "" print(f"system: {system or '(ninguno)'}") continue messages.append({"role": "user", "content": line}) _ask_once(messages, model, system) def main(argv): model = DEFAULT_MODEL system = "" rest = [] i = 0 while i < len(argv): a = argv[i] if a in ("--model", "-m") and i + 1 < len(argv): model = argv[i + 1]; i += 2 elif a in ("--system", "-s") and i + 1 < len(argv): system = argv[i + 1]; i += 2 elif a in ("--help", "-h"): print(__doc__); return 0 else: rest.append(a); i += 1 prompt = " ".join(rest).strip() if not prompt and not sys.stdin.isatty(): prompt = sys.stdin.read().strip() if prompt: # one-shot messages = [{"role": "user", "content": prompt}] _ask_once(messages, model, system) return 0 _repl(model, system) # interactive return 0 if __name__ == "__main__": sys.exit(main(sys.argv[1:]))