Files
context-manager/src/providers/ollama.py
ARCHITECT 6ab93d3485 Initial commit: Context Manager v1.0.0
Sistema local de gestión de contexto para IA:
- Log inmutable (blockchain-style)
- Algoritmos versionados y mejorables
- Agnóstico al modelo (Anthropic, OpenAI, Ollama)
- Sistema de métricas y A/B testing

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-29 18:55:27 +00:00

142 lines
4.3 KiB
Python

"""
Adaptador para Ollama (modelos locales)
"""
import os
import requests
from typing import List, Dict, Any, Optional
from .base import BaseProvider, ProviderResponse
from ..models import SelectedContext, ContextSource
class OllamaProvider(BaseProvider):
"""Proveedor para modelos locales via Ollama"""
provider_name = "ollama"
def __init__(
self,
model: str = "llama3",
host: str = None,
port: int = None,
**kwargs
):
super().__init__(model=model, **kwargs)
self.host = host or os.getenv("OLLAMA_HOST", "localhost")
self.port = port or int(os.getenv("OLLAMA_PORT", "11434"))
self.model = model
self.base_url = f"http://{self.host}:{self.port}"
def format_context(self, context: SelectedContext) -> List[Dict[str, str]]:
"""
Formatea el contexto para Ollama.
Returns:
Lista de mensajes en formato Ollama
"""
messages = []
system_parts = []
for item in context.items:
if item.source in [ContextSource.MEMORY, ContextSource.KNOWLEDGE,
ContextSource.AMBIENT, ContextSource.DATASET]:
system_parts.append(item.content)
elif item.source == ContextSource.HISTORY:
role = item.metadata.get("role", "user")
messages.append({
"role": role,
"content": item.content
})
if system_parts:
messages.insert(0, {
"role": "system",
"content": "\n\n".join(system_parts)
})
return messages
def send_message(
self,
message: str,
context: SelectedContext = None,
system_prompt: str = None,
temperature: float = 0.7,
**kwargs
) -> ProviderResponse:
"""Envía mensaje a Ollama"""
# Formatear contexto
messages = self.format_context(context) if context else []
# Añadir system prompt
if system_prompt:
if messages and messages[0]["role"] == "system":
messages[0]["content"] = f"{system_prompt}\n\n{messages[0]['content']}"
else:
messages.insert(0, {"role": "system", "content": system_prompt})
# Añadir mensaje del usuario
messages.append({"role": "user", "content": message})
# Llamar a la API
url = f"{self.base_url}/api/chat"
payload = {
"model": self.model,
"messages": messages,
"stream": False,
"options": {
"temperature": temperature
}
}
response, latency_ms = self._measure_latency(
requests.post,
url,
json=payload,
timeout=120
)
response.raise_for_status()
data = response.json()
# Ollama no siempre retorna conteos de tokens
tokens_input = data.get("prompt_eval_count", self.estimate_tokens(str(messages)))
tokens_output = data.get("eval_count", self.estimate_tokens(data["message"]["content"]))
return ProviderResponse(
content=data["message"]["content"],
model=data.get("model", self.model),
tokens_input=tokens_input,
tokens_output=tokens_output,
latency_ms=latency_ms,
finish_reason=data.get("done_reason", "stop"),
raw_response={
"total_duration": data.get("total_duration"),
"load_duration": data.get("load_duration"),
"prompt_eval_duration": data.get("prompt_eval_duration"),
"eval_duration": data.get("eval_duration")
}
)
def list_models(self) -> List[str]:
"""Lista los modelos disponibles en Ollama"""
response = requests.get(f"{self.base_url}/api/tags")
response.raise_for_status()
data = response.json()
return [m["name"] for m in data.get("models", [])]
def pull_model(self, model_name: str):
"""Descarga un modelo en Ollama"""
response = requests.post(
f"{self.base_url}/api/pull",
json={"name": model_name},
stream=True
)
response.raise_for_status()
for line in response.iter_lines():
if line:
yield line.decode("utf-8")