package infra import org.json.JSONArray import org.json.JSONObject import java.io.ConnectException import java.net.HttpURLConnection import java.net.URL data class OllamaChatResponse( val content: String, val model: String, val totalDurationMs: Long, val evalCount: Int ) /** * Sends a chat completion request to a local Ollama instance. * * POSTs to `$baseUrl/api/chat` with the given messages and returns the * generated content together with timing and token-count metrics. * * @param messages List of role/content maps, e.g. [{"role":"user","content":"hi"}]. * @param model Ollama model tag to use. * @param baseUrl Base URL of the Ollama server. * @param temperature Sampling temperature (0.0 – 1.0). * @param maxTokens Maximum number of tokens to generate. * @return OllamaChatResponse with content, model name, duration and eval count. * @throws RuntimeException if Ollama is not reachable or the server returns an error. */ fun ollamaChat( messages: List>, model: String = "llama3.1:8b", baseUrl: String = "http://localhost:11434", temperature: Double = 0.7, maxTokens: Int = 1024 ): OllamaChatResponse { val messagesArray = JSONArray().apply { for (msg in messages) { put(JSONObject(msg as Map<*, *>)) } } val options = JSONObject().apply { put("temperature", temperature) put("num_predict", maxTokens) } val body = JSONObject().apply { put("model", model) put("messages", messagesArray) put("stream", false) put("options", options) }.toString().toByteArray(Charsets.UTF_8) val url = URL("$baseUrl/api/chat") val conn: HttpURLConnection try { conn = url.openConnection() as HttpURLConnection } catch (e: ConnectException) { throw RuntimeException("Ollama no está corriendo en $baseUrl", e) } try { conn.requestMethod = "POST" conn.setRequestProperty("Content-Type", "application/json; charset=utf-8") conn.setRequestProperty("Accept", "application/json") conn.connectTimeout = 60_000 conn.readTimeout = 60_000 conn.doOutput = true try { conn.outputStream.use { it.write(body) } } catch (e: ConnectException) { throw RuntimeException("Ollama no está corriendo en $baseUrl", e) } val statusCode = conn.responseCode if (statusCode != HttpURLConnection.HTTP_OK) { val errorBody = conn.errorStream?.bufferedReader()?.readText() ?: "" throw RuntimeException("Ollama devolvió HTTP $statusCode: $errorBody") } val responseText = conn.inputStream.bufferedReader().readText() val json = JSONObject(responseText) val content = json.getJSONObject("message").getString("content") val returnedModel = json.optString("model", model) val totalDurationNs = json.optLong("total_duration", 0L) val evalCount = json.optInt("eval_count", 0) return OllamaChatResponse( content = content, model = returnedModel, totalDurationMs = totalDurationNs / 1_000_000L, evalCount = evalCount ) } finally { conn.disconnect() } }