Sistema local de gestión de contexto para IA: - Log inmutable (blockchain-style) - Algoritmos versionados y mejorables - Agnóstico al modelo (Anthropic, OpenAI, Ollama) - Sistema de métricas y A/B testing 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
142 lines
4.3 KiB
Python
142 lines
4.3 KiB
Python
"""
|
|
Adaptador para Ollama (modelos locales)
|
|
"""
|
|
|
|
import os
|
|
import requests
|
|
from typing import List, Dict, Any, Optional
|
|
|
|
from .base import BaseProvider, ProviderResponse
|
|
from ..models import SelectedContext, ContextSource
|
|
|
|
|
|
class OllamaProvider(BaseProvider):
|
|
"""Proveedor para modelos locales via Ollama"""
|
|
|
|
provider_name = "ollama"
|
|
|
|
def __init__(
|
|
self,
|
|
model: str = "llama3",
|
|
host: str = None,
|
|
port: int = None,
|
|
**kwargs
|
|
):
|
|
super().__init__(model=model, **kwargs)
|
|
|
|
self.host = host or os.getenv("OLLAMA_HOST", "localhost")
|
|
self.port = port or int(os.getenv("OLLAMA_PORT", "11434"))
|
|
self.model = model
|
|
self.base_url = f"http://{self.host}:{self.port}"
|
|
|
|
def format_context(self, context: SelectedContext) -> List[Dict[str, str]]:
|
|
"""
|
|
Formatea el contexto para Ollama.
|
|
|
|
Returns:
|
|
Lista de mensajes en formato Ollama
|
|
"""
|
|
messages = []
|
|
system_parts = []
|
|
|
|
for item in context.items:
|
|
if item.source in [ContextSource.MEMORY, ContextSource.KNOWLEDGE,
|
|
ContextSource.AMBIENT, ContextSource.DATASET]:
|
|
system_parts.append(item.content)
|
|
elif item.source == ContextSource.HISTORY:
|
|
role = item.metadata.get("role", "user")
|
|
messages.append({
|
|
"role": role,
|
|
"content": item.content
|
|
})
|
|
|
|
if system_parts:
|
|
messages.insert(0, {
|
|
"role": "system",
|
|
"content": "\n\n".join(system_parts)
|
|
})
|
|
|
|
return messages
|
|
|
|
def send_message(
|
|
self,
|
|
message: str,
|
|
context: SelectedContext = None,
|
|
system_prompt: str = None,
|
|
temperature: float = 0.7,
|
|
**kwargs
|
|
) -> ProviderResponse:
|
|
"""Envía mensaje a Ollama"""
|
|
|
|
# Formatear contexto
|
|
messages = self.format_context(context) if context else []
|
|
|
|
# Añadir system prompt
|
|
if system_prompt:
|
|
if messages and messages[0]["role"] == "system":
|
|
messages[0]["content"] = f"{system_prompt}\n\n{messages[0]['content']}"
|
|
else:
|
|
messages.insert(0, {"role": "system", "content": system_prompt})
|
|
|
|
# Añadir mensaje del usuario
|
|
messages.append({"role": "user", "content": message})
|
|
|
|
# Llamar a la API
|
|
url = f"{self.base_url}/api/chat"
|
|
payload = {
|
|
"model": self.model,
|
|
"messages": messages,
|
|
"stream": False,
|
|
"options": {
|
|
"temperature": temperature
|
|
}
|
|
}
|
|
|
|
response, latency_ms = self._measure_latency(
|
|
requests.post,
|
|
url,
|
|
json=payload,
|
|
timeout=120
|
|
)
|
|
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
|
|
# Ollama no siempre retorna conteos de tokens
|
|
tokens_input = data.get("prompt_eval_count", self.estimate_tokens(str(messages)))
|
|
tokens_output = data.get("eval_count", self.estimate_tokens(data["message"]["content"]))
|
|
|
|
return ProviderResponse(
|
|
content=data["message"]["content"],
|
|
model=data.get("model", self.model),
|
|
tokens_input=tokens_input,
|
|
tokens_output=tokens_output,
|
|
latency_ms=latency_ms,
|
|
finish_reason=data.get("done_reason", "stop"),
|
|
raw_response={
|
|
"total_duration": data.get("total_duration"),
|
|
"load_duration": data.get("load_duration"),
|
|
"prompt_eval_duration": data.get("prompt_eval_duration"),
|
|
"eval_duration": data.get("eval_duration")
|
|
}
|
|
)
|
|
|
|
def list_models(self) -> List[str]:
|
|
"""Lista los modelos disponibles en Ollama"""
|
|
response = requests.get(f"{self.base_url}/api/tags")
|
|
response.raise_for_status()
|
|
data = response.json()
|
|
return [m["name"] for m in data.get("models", [])]
|
|
|
|
def pull_model(self, model_name: str):
|
|
"""Descarga un modelo en Ollama"""
|
|
response = requests.post(
|
|
f"{self.base_url}/api/pull",
|
|
json={"name": model_name},
|
|
stream=True
|
|
)
|
|
response.raise_for_status()
|
|
for line in response.iter_lines():
|
|
if line:
|
|
yield line.decode("utf-8")
|