b1016ec845
Añade estructura inicial kotlin/functions/, actualiza registry.db con todos los cambios indexados, y ajusta .gitignore. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
102 lines
3.2 KiB
Kotlin
102 lines
3.2 KiB
Kotlin
package infra
|
||
|
||
import org.json.JSONArray
|
||
import org.json.JSONObject
|
||
import java.io.ConnectException
|
||
import java.net.HttpURLConnection
|
||
import java.net.URL
|
||
|
||
data class OllamaChatResponse(
|
||
val content: String,
|
||
val model: String,
|
||
val totalDurationMs: Long,
|
||
val evalCount: Int
|
||
)
|
||
|
||
/**
|
||
* Sends a chat completion request to a local Ollama instance.
|
||
*
|
||
* POSTs to `$baseUrl/api/chat` with the given messages and returns the
|
||
* generated content together with timing and token-count metrics.
|
||
*
|
||
* @param messages List of role/content maps, e.g. [{"role":"user","content":"hi"}].
|
||
* @param model Ollama model tag to use.
|
||
* @param baseUrl Base URL of the Ollama server.
|
||
* @param temperature Sampling temperature (0.0 – 1.0).
|
||
* @param maxTokens Maximum number of tokens to generate.
|
||
* @return OllamaChatResponse with content, model name, duration and eval count.
|
||
* @throws RuntimeException if Ollama is not reachable or the server returns an error.
|
||
*/
|
||
fun ollamaChat(
|
||
messages: List<Map<String, String>>,
|
||
model: String = "llama3.1:8b",
|
||
baseUrl: String = "http://localhost:11434",
|
||
temperature: Double = 0.7,
|
||
maxTokens: Int = 1024
|
||
): OllamaChatResponse {
|
||
val messagesArray = JSONArray().apply {
|
||
for (msg in messages) {
|
||
put(JSONObject(msg as Map<*, *>))
|
||
}
|
||
}
|
||
|
||
val options = JSONObject().apply {
|
||
put("temperature", temperature)
|
||
put("num_predict", maxTokens)
|
||
}
|
||
|
||
val body = JSONObject().apply {
|
||
put("model", model)
|
||
put("messages", messagesArray)
|
||
put("stream", false)
|
||
put("options", options)
|
||
}.toString().toByteArray(Charsets.UTF_8)
|
||
|
||
val url = URL("$baseUrl/api/chat")
|
||
val conn: HttpURLConnection
|
||
|
||
try {
|
||
conn = url.openConnection() as HttpURLConnection
|
||
} catch (e: ConnectException) {
|
||
throw RuntimeException("Ollama no está corriendo en $baseUrl", e)
|
||
}
|
||
|
||
try {
|
||
conn.requestMethod = "POST"
|
||
conn.setRequestProperty("Content-Type", "application/json; charset=utf-8")
|
||
conn.setRequestProperty("Accept", "application/json")
|
||
conn.connectTimeout = 60_000
|
||
conn.readTimeout = 60_000
|
||
conn.doOutput = true
|
||
|
||
try {
|
||
conn.outputStream.use { it.write(body) }
|
||
} catch (e: ConnectException) {
|
||
throw RuntimeException("Ollama no está corriendo en $baseUrl", e)
|
||
}
|
||
|
||
val statusCode = conn.responseCode
|
||
if (statusCode != HttpURLConnection.HTTP_OK) {
|
||
val errorBody = conn.errorStream?.bufferedReader()?.readText() ?: ""
|
||
throw RuntimeException("Ollama devolvió HTTP $statusCode: $errorBody")
|
||
}
|
||
|
||
val responseText = conn.inputStream.bufferedReader().readText()
|
||
val json = JSONObject(responseText)
|
||
|
||
val content = json.getJSONObject("message").getString("content")
|
||
val returnedModel = json.optString("model", model)
|
||
val totalDurationNs = json.optLong("total_duration", 0L)
|
||
val evalCount = json.optInt("eval_count", 0)
|
||
|
||
return OllamaChatResponse(
|
||
content = content,
|
||
model = returnedModel,
|
||
totalDurationMs = totalDurationNs / 1_000_000L,
|
||
evalCount = evalCount
|
||
)
|
||
} finally {
|
||
conn.disconnect()
|
||
}
|
||
}
|