179 lines
5.2 KiB
Python
179 lines
5.2 KiB
Python
import requests
|
|
import json
|
|
|
|
BASE_URL = "http://localhost:11434/api"
|
|
|
|
def generate_completion(model: str, prompt: str, stream: bool = False, **options):
|
|
"""
|
|
Generate a response for a given prompt with the specified model.
|
|
"""
|
|
url = f"{BASE_URL}/generate"
|
|
payload = {
|
|
"model": model,
|
|
"prompt": prompt,
|
|
"stream": stream,
|
|
"options": options
|
|
}
|
|
response = requests.post(url, json=payload)
|
|
if response.ok:
|
|
return response.json() if not stream else response.text
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
|
|
############################################################################################################
|
|
|
|
def generate_chat(model: str, prompt: str, system_prompt: str = None, conversation_history: list = None, tool_prompt: str = None, stream: bool = False, **options):
|
|
"""
|
|
Generate a chat completion using full conversation history, including user and assistant messages.
|
|
|
|
Args:
|
|
model (str): The model name to use.
|
|
prompt (str): The current user input.
|
|
system_prompt (str, optional): The system-level prompt to include.
|
|
conversation_history (list, optional): A list of previous messages (user/assistant) for context.
|
|
tool_prompt (str, optional): A prompt specifically for tools if needed.
|
|
stream (bool): Whether to stream the response or not.
|
|
**options: Additional options to include in the payload.
|
|
|
|
Returns:
|
|
dict or str: The response JSON or streamed text.
|
|
"""
|
|
# Construct the messages list
|
|
messages = []
|
|
|
|
# Add the system prompt if provided
|
|
if system_prompt:
|
|
messages.append({"role": "system", "content": system_prompt})
|
|
|
|
# Add the conversation history if provided
|
|
if conversation_history:
|
|
messages.extend(conversation_history)
|
|
|
|
# Add the current user prompt
|
|
messages.append({"role": "user", "content": prompt})
|
|
|
|
# Add the tool prompt if provided
|
|
if tool_prompt:
|
|
messages.append({"role": "tool", "content": tool_prompt})
|
|
|
|
# Construct the payload
|
|
url = f"{BASE_URL}/chat"
|
|
payload = {
|
|
"model": model,
|
|
"messages": messages,
|
|
"stream": stream,
|
|
}
|
|
|
|
# Flatten additional options into the payload
|
|
payload.update(options)
|
|
|
|
# Send the request
|
|
response = requests.post(url, json=payload)
|
|
if response.ok:
|
|
result = response.json() if not stream else response.text
|
|
|
|
return {
|
|
"user": payload,
|
|
"assistant": result
|
|
}
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
|
|
############################################################################################################
|
|
|
|
|
|
def list_local_models():
|
|
"""
|
|
List all locally available models.
|
|
"""
|
|
url = f"{BASE_URL}/tags"
|
|
response = requests.get(url)
|
|
if response.ok:
|
|
return response.json()["models"]
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
def show_model_info(model: str):
|
|
"""
|
|
Get detailed information about a specific model.
|
|
"""
|
|
url = f"{BASE_URL}/show"
|
|
payload = {"model": model}
|
|
response = requests.post(url, json=payload)
|
|
if response.ok:
|
|
return response.json()
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
def pull_model(model: str):
|
|
"""
|
|
Pull a model from the Ollama library.
|
|
"""
|
|
url = f"{BASE_URL}/pull"
|
|
payload = {"model": model}
|
|
response = requests.post(url, json=payload)
|
|
if response.ok:
|
|
return response.json()
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
def delete_model(model: str):
|
|
"""
|
|
Delete a local model.
|
|
"""
|
|
url = f"{BASE_URL}/delete"
|
|
payload = {"model": model}
|
|
response = requests.delete(url, json=payload)
|
|
if response.ok:
|
|
return response.json()
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
def generate_embeddings(model: str, inputs: list):
|
|
"""
|
|
Generate embeddings for a given input text or list of texts.
|
|
"""
|
|
url = f"{BASE_URL}/embed"
|
|
payload = {
|
|
"model": model,
|
|
"input": inputs
|
|
}
|
|
response = requests.post(url, json=payload)
|
|
if response.ok:
|
|
return response.json()["embeddings"]
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
def list_running_models():
|
|
"""
|
|
List models currently loaded into memory.
|
|
"""
|
|
url = f"{BASE_URL}/ps"
|
|
response = requests.get(url)
|
|
if response.ok:
|
|
return response.json()["models"]
|
|
raise Exception(f"Error: {response.status_code}, {response.text}")
|
|
|
|
if __name__ == "__main__":
|
|
|
|
system_prompt ="Eres un asistente virtual que te puede ayudar a responder preguntas de cualquier tipo."
|
|
|
|
|
|
prompt = "Hola que tal como andas?"
|
|
|
|
|
|
try:
|
|
|
|
|
|
history = []
|
|
|
|
response = generate_chat(
|
|
model="llama3.1",
|
|
prompt=prompt,
|
|
system_prompt=system_prompt,
|
|
conversation_history=history
|
|
)
|
|
|
|
print("Usuario: " + response['user']['messages'][-1]['content'])
|
|
print("Asistente: " + response['assistant']['message']['content'])
|
|
|
|
|
|
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|