"""Query ClickHouse via the HTTP interface (port 8123) and return rows as dicts.""" import json import urllib.error import urllib.parse import urllib.request def clickhouse_query( base_url: str, sql: str, *, user: str = "default", password: str = "", database: str = "analytics", timeout: float = 30.0, ) -> list[dict]: """Execute a SQL statement against ClickHouse via HTTP and return results. Args: base_url: ClickHouse HTTP base URL without trailing slash, e.g. "http://127.0.0.1:18123". sql: Full SQL statement. For SELECT queries the server returns JSONEachRow automatically via the default_format param. For DDL/DML with no result set (CREATE, INSERT, etc.) the response body is empty and [] is returned. user: ClickHouse username (default "default"). password: ClickHouse password (default empty string). database: Target database sent as query param (default "analytics"). timeout: Socket timeout in seconds (default 30.0). Returns: List of dicts, one per result row. Empty list for statements that produce no result set. Numbers may come back as strings for some ClickHouse types (e.g. Int64 is returned as a JSON string in JSONEachRow — cast explicitly if needed: int(row["c"])). Raises: ValueError: On non-200 HTTP response, with status code and first 500 chars of the response body. urllib.error.URLError: On network-level errors (connection refused, DNS failure, timeout). """ params = urllib.parse.urlencode( {"database": database, "default_format": "JSONEachRow"} ) url = f"{base_url}/?{params}" body_bytes = sql.encode("utf-8") req = urllib.request.Request( url, data=body_bytes, method="POST", headers={ "Content-Type": "text/plain", "X-ClickHouse-User": user, "X-ClickHouse-Key": password, }, ) try: with urllib.request.urlopen(req, timeout=timeout) as resp: if resp.status != 200: body_preview = resp.read(500).decode("utf-8", errors="replace") raise ValueError( f"ClickHouse query failed: HTTP {resp.status} — {body_preview}" ) raw = resp.read().decode("utf-8", errors="replace") except urllib.error.HTTPError as exc: body_preview = exc.read(500).decode("utf-8", errors="replace") raise ValueError( f"ClickHouse query failed: HTTP {exc.code} — {body_preview}" ) from exc rows = [] for line in raw.splitlines(): line = line.strip() if line: rows.append(json.loads(line)) return rows