""" Adaptador para Ollama (modelos locales) """ import os import requests from typing import List, Dict, Any, Optional from .base import BaseProvider, ProviderResponse from ..models import SelectedContext, ContextSource class OllamaProvider(BaseProvider): """Proveedor para modelos locales via Ollama""" provider_name = "ollama" def __init__( self, model: str = "llama3", host: str = None, port: int = None, **kwargs ): super().__init__(model=model, **kwargs) self.host = host or os.getenv("OLLAMA_HOST", "localhost") self.port = port or int(os.getenv("OLLAMA_PORT", "11434")) self.model = model self.base_url = f"http://{self.host}:{self.port}" def format_context(self, context: SelectedContext) -> List[Dict[str, str]]: """ Formatea el contexto para Ollama. Returns: Lista de mensajes en formato Ollama """ messages = [] system_parts = [] for item in context.items: if item.source in [ContextSource.MEMORY, ContextSource.KNOWLEDGE, ContextSource.AMBIENT, ContextSource.DATASET]: system_parts.append(item.content) elif item.source == ContextSource.HISTORY: role = item.metadata.get("role", "user") messages.append({ "role": role, "content": item.content }) if system_parts: messages.insert(0, { "role": "system", "content": "\n\n".join(system_parts) }) return messages def send_message( self, message: str, context: SelectedContext = None, system_prompt: str = None, temperature: float = 0.7, **kwargs ) -> ProviderResponse: """Envía mensaje a Ollama""" # Formatear contexto messages = self.format_context(context) if context else [] # Añadir system prompt if system_prompt: if messages and messages[0]["role"] == "system": messages[0]["content"] = f"{system_prompt}\n\n{messages[0]['content']}" else: messages.insert(0, {"role": "system", "content": system_prompt}) # Añadir mensaje del usuario messages.append({"role": "user", "content": message}) # Llamar a la API url = f"{self.base_url}/api/chat" payload = { "model": self.model, "messages": messages, "stream": False, "options": { "temperature": temperature } } response, latency_ms = self._measure_latency( requests.post, url, json=payload, timeout=120 ) response.raise_for_status() data = response.json() # Ollama no siempre retorna conteos de tokens tokens_input = data.get("prompt_eval_count", self.estimate_tokens(str(messages))) tokens_output = data.get("eval_count", self.estimate_tokens(data["message"]["content"])) return ProviderResponse( content=data["message"]["content"], model=data.get("model", self.model), tokens_input=tokens_input, tokens_output=tokens_output, latency_ms=latency_ms, finish_reason=data.get("done_reason", "stop"), raw_response={ "total_duration": data.get("total_duration"), "load_duration": data.get("load_duration"), "prompt_eval_duration": data.get("prompt_eval_duration"), "eval_duration": data.get("eval_duration") } ) def list_models(self) -> List[str]: """Lista los modelos disponibles en Ollama""" response = requests.get(f"{self.base_url}/api/tags") response.raise_for_status() data = response.json() return [m["name"] for m in data.get("models", [])] def pull_model(self, model_name: str): """Descarga un modelo en Ollama""" response = requests.post( f"{self.base_url}/api/pull", json={"name": model_name}, stream=True ) response.raise_for_status() for line in response.iter_lines(): if line: yield line.decode("utf-8")