From acffd2a2a7a2601686451eeb56306ac679cdfd90 Mon Sep 17 00:00:00 2001 From: ARCHITECT Date: Wed, 31 Dec 2025 20:07:26 +0000 Subject: [PATCH] Remove context-manager (moved to dedicated repo) --- context-manager/schemas/00_base.sql | 39 -- context-manager/schemas/01_immutable_log.sql | 276 -------- .../schemas/02_context_manager.sql | 243 ------- .../schemas/03_algorithm_engine.sql | 399 ----------- context-manager/src/__init__.py | 25 - context-manager/src/context_selector.py | 508 -------------- context-manager/src/database.py | 621 ------------------ context-manager/src/models.py | 309 --------- context-manager/src/providers/__init__.py | 18 - context-manager/src/providers/anthropic.py | 110 ---- context-manager/src/providers/base.py | 85 --- context-manager/src/providers/openai.py | 120 ---- 12 files changed, 2753 deletions(-) delete mode 100644 context-manager/schemas/00_base.sql delete mode 100644 context-manager/schemas/01_immutable_log.sql delete mode 100644 context-manager/schemas/02_context_manager.sql delete mode 100644 context-manager/schemas/03_algorithm_engine.sql delete mode 100644 context-manager/src/__init__.py delete mode 100644 context-manager/src/context_selector.py delete mode 100644 context-manager/src/database.py delete mode 100644 context-manager/src/models.py delete mode 100644 context-manager/src/providers/__init__.py delete mode 100644 context-manager/src/providers/anthropic.py delete mode 100644 context-manager/src/providers/base.py delete mode 100644 context-manager/src/providers/openai.py diff --git a/context-manager/schemas/00_base.sql b/context-manager/schemas/00_base.sql deleted file mode 100644 index b179ec8..0000000 --- a/context-manager/schemas/00_base.sql +++ /dev/null @@ -1,39 +0,0 @@ --- ============================================ --- CONTEXT MANAGER - BASE TYPES --- Sistema local de gestión de contexto para IA --- ============================================ - --- Extension para UUIDs -CREATE EXTENSION IF NOT EXISTS "pgcrypto"; - --- ============================================ --- TIPOS ENUMERADOS --- ============================================ - -CREATE TYPE mensaje_role AS ENUM ('user', 'assistant', 'system', 'tool'); -CREATE TYPE context_source AS ENUM ('memory', 'knowledge', 'history', 'ambient', 'dataset'); -CREATE TYPE algorithm_status AS ENUM ('draft', 'testing', 'active', 'deprecated'); -CREATE TYPE metric_type AS ENUM ('relevance', 'token_efficiency', 'response_quality', 'latency'); - --- ============================================ --- FUNCIÓN: Timestamp de actualización --- ============================================ - -CREATE OR REPLACE FUNCTION update_updated_at_column() -RETURNS TRIGGER AS $$ -BEGIN - NEW.updated_at = CURRENT_TIMESTAMP; - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- FUNCIÓN: Hash SHA-256 --- ============================================ - -CREATE OR REPLACE FUNCTION sha256_hash(content TEXT) -RETURNS VARCHAR(64) AS $$ -BEGIN - RETURN encode(digest(content, 'sha256'), 'hex'); -END; -$$ LANGUAGE plpgsql IMMUTABLE; diff --git a/context-manager/schemas/01_immutable_log.sql b/context-manager/schemas/01_immutable_log.sql deleted file mode 100644 index a13fb09..0000000 --- a/context-manager/schemas/01_immutable_log.sql +++ /dev/null @@ -1,276 +0,0 @@ --- ============================================ --- LOG INMUTABLE - TABLA DE REFERENCIA --- NO EDITABLE - Solo INSERT permitido --- ============================================ --- Esta tabla es la fuente de verdad del sistema. --- Nunca se modifica ni se borra. Solo se inserta. - --- ============================================ --- TABLA: immutable_log --- Registro permanente de todas las interacciones --- ============================================ - -CREATE TABLE IF NOT EXISTS immutable_log ( - -- Identificación - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - hash VARCHAR(64) NOT NULL UNIQUE, -- SHA-256 del contenido - hash_anterior VARCHAR(64), -- Encadenamiento (blockchain-style) - - -- Sesión - session_id UUID NOT NULL, - sequence_num BIGINT NOT NULL, -- Número secuencial en la sesión - - -- Mensaje - role mensaje_role NOT NULL, - content TEXT NOT NULL, - - -- Modelo IA (agnóstico) - model_provider VARCHAR(50), -- anthropic, openai, ollama, local, etc. - model_name VARCHAR(100), -- claude-3-opus, gpt-4, llama-3, etc. - model_params JSONB DEFAULT '{}', -- temperature, max_tokens, etc. - - -- Contexto enviado (snapshot) - context_snapshot JSONB, -- Copia del contexto usado - context_algorithm_id UUID, -- Qué algoritmo seleccionó el contexto - context_tokens_used INT, - - -- Respuesta (solo para role=assistant) - tokens_input INT, - tokens_output INT, - latency_ms INT, - - -- Metadata inmutable - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, - source_ip VARCHAR(45), - user_agent TEXT, - - -- Integridad - CONSTRAINT chain_integrity CHECK ( - (sequence_num = 1 AND hash_anterior IS NULL) OR - (sequence_num > 1 AND hash_anterior IS NOT NULL) - ) -); - --- Índices para consulta (no para modificación) -CREATE INDEX IF NOT EXISTS idx_log_session ON immutable_log(session_id, sequence_num); -CREATE INDEX IF NOT EXISTS idx_log_created ON immutable_log(created_at DESC); -CREATE INDEX IF NOT EXISTS idx_log_model ON immutable_log(model_provider, model_name); -CREATE INDEX IF NOT EXISTS idx_log_hash ON immutable_log(hash); -CREATE INDEX IF NOT EXISTS idx_log_chain ON immutable_log(hash_anterior); - --- ============================================ --- PROTECCIÓN: Trigger que impide UPDATE y DELETE --- ============================================ - -CREATE OR REPLACE FUNCTION prevent_log_modification() -RETURNS TRIGGER AS $$ -BEGIN - RAISE EXCEPTION 'immutable_log no permite modificaciones. Solo INSERT está permitido.'; - RETURN NULL; -END; -$$ LANGUAGE plpgsql; - -DROP TRIGGER IF EXISTS protect_immutable_log_update ON immutable_log; -CREATE TRIGGER protect_immutable_log_update - BEFORE UPDATE ON immutable_log - FOR EACH ROW EXECUTE FUNCTION prevent_log_modification(); - -DROP TRIGGER IF EXISTS protect_immutable_log_delete ON immutable_log; -CREATE TRIGGER protect_immutable_log_delete - BEFORE DELETE ON immutable_log - FOR EACH ROW EXECUTE FUNCTION prevent_log_modification(); - --- ============================================ --- FUNCIÓN: Insertar en log con hash automático --- ============================================ - -CREATE OR REPLACE FUNCTION insert_log_entry( - p_session_id UUID, - p_role mensaje_role, - p_content TEXT, - p_model_provider VARCHAR DEFAULT NULL, - p_model_name VARCHAR DEFAULT NULL, - p_model_params JSONB DEFAULT '{}', - p_context_snapshot JSONB DEFAULT NULL, - p_context_algorithm_id UUID DEFAULT NULL, - p_context_tokens_used INT DEFAULT NULL, - p_tokens_input INT DEFAULT NULL, - p_tokens_output INT DEFAULT NULL, - p_latency_ms INT DEFAULT NULL, - p_source_ip VARCHAR DEFAULT NULL, - p_user_agent TEXT DEFAULT NULL -) -RETURNS UUID AS $$ -DECLARE - v_sequence_num BIGINT; - v_hash_anterior VARCHAR(64); - v_content_hash VARCHAR(64); - v_new_id UUID; -BEGIN - -- Obtener último hash y secuencia de la sesión - SELECT sequence_num, hash - INTO v_sequence_num, v_hash_anterior - FROM immutable_log - WHERE session_id = p_session_id - ORDER BY sequence_num DESC - LIMIT 1; - - IF v_sequence_num IS NULL THEN - v_sequence_num := 1; - v_hash_anterior := NULL; - ELSE - v_sequence_num := v_sequence_num + 1; - END IF; - - -- Calcular hash del contenido (incluye hash anterior para encadenamiento) - v_content_hash := sha256_hash( - COALESCE(v_hash_anterior, '') || - p_session_id::TEXT || - v_sequence_num::TEXT || - p_role::TEXT || - p_content - ); - - -- Insertar - INSERT INTO immutable_log ( - session_id, sequence_num, hash, hash_anterior, - role, content, - model_provider, model_name, model_params, - context_snapshot, context_algorithm_id, context_tokens_used, - tokens_input, tokens_output, latency_ms, - source_ip, user_agent - ) VALUES ( - p_session_id, v_sequence_num, v_content_hash, v_hash_anterior, - p_role, p_content, - p_model_provider, p_model_name, p_model_params, - p_context_snapshot, p_context_algorithm_id, p_context_tokens_used, - p_tokens_input, p_tokens_output, p_latency_ms, - p_source_ip, p_user_agent - ) RETURNING id INTO v_new_id; - - RETURN v_new_id; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- TABLA: sessions --- Registro de sesiones (también inmutable) --- ============================================ - -CREATE TABLE IF NOT EXISTS sessions ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - hash VARCHAR(64) NOT NULL UNIQUE, - - -- Identificación - user_id VARCHAR(100), - instance_id VARCHAR(100), - - -- Configuración inicial - initial_model_provider VARCHAR(50), - initial_model_name VARCHAR(100), - initial_context_algorithm_id UUID, - - -- Metadata - metadata JSONB DEFAULT '{}', - - -- Timestamps inmutables - started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL, - ended_at TIMESTAMP, - - -- Estadísticas finales (se actualizan solo al cerrar) - total_messages INT DEFAULT 0, - total_tokens_input INT DEFAULT 0, - total_tokens_output INT DEFAULT 0, - total_latency_ms INT DEFAULT 0 -); - -CREATE INDEX IF NOT EXISTS idx_sessions_user ON sessions(user_id); -CREATE INDEX IF NOT EXISTS idx_sessions_instance ON sessions(instance_id); -CREATE INDEX IF NOT EXISTS idx_sessions_started ON sessions(started_at DESC); - --- ============================================ --- FUNCIÓN: Crear nueva sesión --- ============================================ - -CREATE OR REPLACE FUNCTION create_session( - p_user_id VARCHAR DEFAULT NULL, - p_instance_id VARCHAR DEFAULT NULL, - p_model_provider VARCHAR DEFAULT NULL, - p_model_name VARCHAR DEFAULT NULL, - p_algorithm_id UUID DEFAULT NULL, - p_metadata JSONB DEFAULT '{}' -) -RETURNS UUID AS $$ -DECLARE - v_session_id UUID; - v_hash VARCHAR(64); -BEGIN - v_session_id := gen_random_uuid(); - v_hash := sha256_hash(v_session_id::TEXT || CURRENT_TIMESTAMP::TEXT); - - INSERT INTO sessions ( - id, hash, user_id, instance_id, - initial_model_provider, initial_model_name, - initial_context_algorithm_id, metadata - ) VALUES ( - v_session_id, v_hash, p_user_id, p_instance_id, - p_model_provider, p_model_name, - p_algorithm_id, p_metadata - ); - - RETURN v_session_id; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- FUNCIÓN: Verificar integridad de la cadena --- ============================================ - -CREATE OR REPLACE FUNCTION verify_chain_integrity(p_session_id UUID) -RETURNS TABLE ( - is_valid BOOLEAN, - broken_at_sequence BIGINT, - expected_hash VARCHAR(64), - actual_hash VARCHAR(64) -) AS $$ -DECLARE - rec RECORD; - prev_hash VARCHAR(64) := NULL; - computed_hash VARCHAR(64); -BEGIN - FOR rec IN - SELECT * FROM immutable_log - WHERE session_id = p_session_id - ORDER BY sequence_num - LOOP - -- Verificar encadenamiento - IF rec.sequence_num = 1 AND rec.hash_anterior IS NOT NULL THEN - RETURN QUERY SELECT FALSE, rec.sequence_num, NULL::VARCHAR(64), rec.hash_anterior; - RETURN; - END IF; - - IF rec.sequence_num > 1 AND rec.hash_anterior != prev_hash THEN - RETURN QUERY SELECT FALSE, rec.sequence_num, prev_hash, rec.hash_anterior; - RETURN; - END IF; - - -- Verificar hash del contenido - computed_hash := sha256_hash( - COALESCE(prev_hash, '') || - rec.session_id::TEXT || - rec.sequence_num::TEXT || - rec.role::TEXT || - rec.content - ); - - IF computed_hash != rec.hash THEN - RETURN QUERY SELECT FALSE, rec.sequence_num, computed_hash, rec.hash; - RETURN; - END IF; - - prev_hash := rec.hash; - END LOOP; - - RETURN QUERY SELECT TRUE, NULL::BIGINT, NULL::VARCHAR(64), NULL::VARCHAR(64); -END; -$$ LANGUAGE plpgsql; diff --git a/context-manager/schemas/02_context_manager.sql b/context-manager/schemas/02_context_manager.sql deleted file mode 100644 index 3501bc1..0000000 --- a/context-manager/schemas/02_context_manager.sql +++ /dev/null @@ -1,243 +0,0 @@ --- ============================================ --- GESTOR DE CONTEXTO - TABLAS EDITABLES --- Estas tablas SÍ se pueden modificar --- ============================================ - --- ============================================ --- TABLA: context_blocks --- Bloques de contexto reutilizables --- ============================================ - -CREATE TABLE IF NOT EXISTS context_blocks ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- Identificación - code VARCHAR(100) UNIQUE NOT NULL, - name VARCHAR(255) NOT NULL, - description TEXT, - - -- Contenido - content TEXT NOT NULL, - content_hash VARCHAR(64), -- Para detectar cambios - - -- Clasificación - category VARCHAR(50) NOT NULL, -- system, persona, knowledge, rules, examples - priority INT DEFAULT 50, -- 0-100, mayor = más importante - tokens_estimated INT, - - -- Alcance - scope VARCHAR(50) DEFAULT 'global', -- global, project, session - project_id UUID, - - -- Condiciones de activación - activation_rules JSONB DEFAULT '{}', - /* - Ejemplo activation_rules: - { - "always": false, - "keywords": ["database", "sql"], - "model_providers": ["anthropic"], - "min_session_messages": 0, - "time_of_day": null - } - */ - - -- Estado - active BOOLEAN DEFAULT true, - version INT DEFAULT 1, - - -- Timestamps - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX IF NOT EXISTS idx_ctx_blocks_code ON context_blocks(code); -CREATE INDEX IF NOT EXISTS idx_ctx_blocks_category ON context_blocks(category); -CREATE INDEX IF NOT EXISTS idx_ctx_blocks_priority ON context_blocks(priority DESC); -CREATE INDEX IF NOT EXISTS idx_ctx_blocks_active ON context_blocks(active); -CREATE INDEX IF NOT EXISTS idx_ctx_blocks_scope ON context_blocks(scope); - -DROP TRIGGER IF EXISTS update_ctx_blocks_updated_at ON context_blocks; -CREATE TRIGGER update_ctx_blocks_updated_at - BEFORE UPDATE ON context_blocks - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); - --- Trigger para calcular hash y tokens al insertar/actualizar -CREATE OR REPLACE FUNCTION update_block_metadata() -RETURNS TRIGGER AS $$ -BEGIN - NEW.content_hash := sha256_hash(NEW.content); - -- Estimación simple: ~4 caracteres por token - NEW.tokens_estimated := CEIL(LENGTH(NEW.content) / 4.0); - RETURN NEW; -END; -$$ LANGUAGE plpgsql; - -DROP TRIGGER IF EXISTS calc_block_metadata ON context_blocks; -CREATE TRIGGER calc_block_metadata - BEFORE INSERT OR UPDATE OF content ON context_blocks - FOR EACH ROW EXECUTE FUNCTION update_block_metadata(); - --- ============================================ --- TABLA: knowledge_base --- Base de conocimiento (RAG simple) --- ============================================ - -CREATE TABLE IF NOT EXISTS knowledge_base ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- Identificación - title VARCHAR(255) NOT NULL, - category VARCHAR(100) NOT NULL, - tags TEXT[] DEFAULT '{}', - - -- Contenido - content TEXT NOT NULL, - content_hash VARCHAR(64), - tokens_estimated INT, - - -- Embeddings (para búsqueda semántica futura) - embedding_model VARCHAR(100), - embedding VECTOR(1536), -- Requiere pgvector si se usa - - -- Fuente - source_type VARCHAR(50), -- file, url, manual, extracted - source_ref TEXT, - - -- Relevancia - priority INT DEFAULT 50, - access_count INT DEFAULT 0, - last_accessed_at TIMESTAMP, - - -- Estado - active BOOLEAN DEFAULT true, - - -- Timestamps - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX IF NOT EXISTS idx_kb_category ON knowledge_base(category); -CREATE INDEX IF NOT EXISTS idx_kb_tags ON knowledge_base USING GIN(tags); -CREATE INDEX IF NOT EXISTS idx_kb_priority ON knowledge_base(priority DESC); -CREATE INDEX IF NOT EXISTS idx_kb_active ON knowledge_base(active); - -DROP TRIGGER IF EXISTS update_kb_updated_at ON knowledge_base; -CREATE TRIGGER update_kb_updated_at - BEFORE UPDATE ON knowledge_base - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); - -DROP TRIGGER IF EXISTS calc_kb_metadata ON knowledge_base; -CREATE TRIGGER calc_kb_metadata - BEFORE INSERT OR UPDATE OF content ON knowledge_base - FOR EACH ROW EXECUTE FUNCTION update_block_metadata(); - --- ============================================ --- TABLA: memory --- Memoria a largo plazo extraída de conversaciones --- ============================================ - -CREATE TABLE IF NOT EXISTS memory ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- Clasificación - type VARCHAR(50) NOT NULL, -- fact, preference, decision, learning, procedure - category VARCHAR(100), - - -- Contenido - content TEXT NOT NULL, - summary VARCHAR(500), - content_hash VARCHAR(64), - - -- Origen - extracted_from_session UUID REFERENCES sessions(id), - extracted_from_log UUID, -- No FK para no bloquear - - -- Relevancia - importance INT DEFAULT 50, -- 0-100 - confidence DECIMAL(3,2) DEFAULT 1.0, -- 0.00-1.00 - uses INT DEFAULT 0, - last_used_at TIMESTAMP, - - -- Expiración - expires_at TIMESTAMP, - - -- Estado - active BOOLEAN DEFAULT true, - verified BOOLEAN DEFAULT false, -- Confirmado por usuario - - -- Timestamps - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX IF NOT EXISTS idx_memory_type ON memory(type); -CREATE INDEX IF NOT EXISTS idx_memory_importance ON memory(importance DESC); -CREATE INDEX IF NOT EXISTS idx_memory_active ON memory(active); -CREATE INDEX IF NOT EXISTS idx_memory_expires ON memory(expires_at); - -DROP TRIGGER IF EXISTS update_memory_updated_at ON memory; -CREATE TRIGGER update_memory_updated_at - BEFORE UPDATE ON memory - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); - --- ============================================ --- TABLA: ambient_context --- Contexto ambiental (estado actual del sistema) --- ============================================ - -CREATE TABLE IF NOT EXISTS ambient_context ( - id SERIAL PRIMARY KEY, - - -- Snapshot - captured_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - expires_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + INTERVAL '1 hour', - - -- Datos ambientales - environment JSONB DEFAULT '{}', - /* - { - "timezone": "Europe/Madrid", - "locale": "es-ES", - "working_directory": "/home/user/project", - "git_branch": "main", - "active_project": "my-app" - } - */ - - -- Estado del sistema - system_state JSONB DEFAULT '{}', - /* - { - "servers": {"architect": "online"}, - "services": {"gitea": "running"}, - "pending_tasks": [], - "alerts": [] - } - */ - - -- Archivos/recursos activos - active_resources JSONB DEFAULT '[]' - /* - [ - {"type": "file", "path": "/path/to/file.py", "modified": true}, - {"type": "url", "href": "https://docs.example.com"} - ] - */ -); - -CREATE INDEX IF NOT EXISTS idx_ambient_captured ON ambient_context(captured_at DESC); -CREATE INDEX IF NOT EXISTS idx_ambient_expires ON ambient_context(expires_at); - --- Limpiar contextos expirados -CREATE OR REPLACE FUNCTION cleanup_expired_ambient() -RETURNS INTEGER AS $$ -DECLARE - deleted_count INTEGER; -BEGIN - DELETE FROM ambient_context - WHERE expires_at < CURRENT_TIMESTAMP; - GET DIAGNOSTICS deleted_count = ROW_COUNT; - RETURN deleted_count; -END; -$$ LANGUAGE plpgsql; diff --git a/context-manager/schemas/03_algorithm_engine.sql b/context-manager/schemas/03_algorithm_engine.sql deleted file mode 100644 index 0b7fbe8..0000000 --- a/context-manager/schemas/03_algorithm_engine.sql +++ /dev/null @@ -1,399 +0,0 @@ --- ============================================ --- MOTOR DE ALGORITMOS - Sistema evolutivo --- Permite versionar y mejorar el algoritmo de contexto --- ============================================ - --- ============================================ --- TABLA: context_algorithms --- Definición de algoritmos de selección de contexto --- ============================================ - -CREATE TABLE IF NOT EXISTS context_algorithms ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- Identificación - code VARCHAR(100) UNIQUE NOT NULL, - name VARCHAR(255) NOT NULL, - description TEXT, - version VARCHAR(20) NOT NULL DEFAULT '1.0.0', - - -- Estado - status algorithm_status DEFAULT 'draft', - - -- Configuración del algoritmo - config JSONB NOT NULL DEFAULT '{ - "max_tokens": 4000, - "sources": { - "system_prompts": true, - "context_blocks": true, - "memory": true, - "knowledge": true, - "history": true, - "ambient": true - }, - "weights": { - "priority": 0.4, - "relevance": 0.3, - "recency": 0.2, - "frequency": 0.1 - }, - "history_config": { - "max_messages": 20, - "summarize_after": 10, - "include_system": false - }, - "memory_config": { - "max_items": 15, - "min_importance": 30 - }, - "knowledge_config": { - "max_items": 5, - "require_keyword_match": true - } - }'::jsonb, - - -- Código del algoritmo (Python embebido) - selector_code TEXT, - /* - Ejemplo: - def select_context(session, message, config): - context = [] - # ... lógica de selección - return context - */ - - -- Estadísticas - times_used INT DEFAULT 0, - avg_tokens_used DECIMAL(10,2), - avg_relevance_score DECIMAL(3,2), - avg_response_quality DECIMAL(3,2), - - -- Linaje - parent_algorithm_id UUID REFERENCES context_algorithms(id), - fork_reason TEXT, - - -- Timestamps - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - activated_at TIMESTAMP, - deprecated_at TIMESTAMP -); - -CREATE INDEX IF NOT EXISTS idx_algo_code ON context_algorithms(code); -CREATE INDEX IF NOT EXISTS idx_algo_status ON context_algorithms(status); -CREATE INDEX IF NOT EXISTS idx_algo_version ON context_algorithms(version); -CREATE INDEX IF NOT EXISTS idx_algo_parent ON context_algorithms(parent_algorithm_id); - -DROP TRIGGER IF EXISTS update_algo_updated_at ON context_algorithms; -CREATE TRIGGER update_algo_updated_at - BEFORE UPDATE ON context_algorithms - FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); - --- ============================================ --- TABLA: algorithm_metrics --- Métricas de rendimiento por algoritmo --- ============================================ - -CREATE TABLE IF NOT EXISTS algorithm_metrics ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- Referencias - algorithm_id UUID NOT NULL REFERENCES context_algorithms(id), - session_id UUID REFERENCES sessions(id), - log_entry_id UUID, -- Referencia al log inmutable - - -- Métricas de contexto - tokens_budget INT, - tokens_used INT, - token_efficiency DECIMAL(5,4), -- tokens_used / tokens_budget - - -- Composición del contexto - context_composition JSONB, - /* - { - "system_prompts": {"count": 1, "tokens": 500}, - "context_blocks": {"count": 3, "tokens": 800}, - "memory": {"count": 5, "tokens": 300}, - "knowledge": {"count": 2, "tokens": 400}, - "history": {"count": 10, "tokens": 1500}, - "ambient": {"count": 1, "tokens": 100} - } - */ - - -- Métricas de respuesta - latency_ms INT, - model_tokens_input INT, - model_tokens_output INT, - - -- Evaluación (puede ser automática o manual) - relevance_score DECIMAL(3,2), -- 0.00-1.00: ¿El contexto fue relevante? - response_quality DECIMAL(3,2), -- 0.00-1.00: ¿La respuesta fue buena? - user_satisfaction DECIMAL(3,2), -- 0.00-1.00: Feedback del usuario - - -- Evaluación automática - auto_evaluated BOOLEAN DEFAULT false, - evaluation_method VARCHAR(50), -- llm_judge, heuristic, user_feedback - - -- Timestamp - recorded_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP -); - -CREATE INDEX IF NOT EXISTS idx_metrics_algorithm ON algorithm_metrics(algorithm_id); -CREATE INDEX IF NOT EXISTS idx_metrics_session ON algorithm_metrics(session_id); -CREATE INDEX IF NOT EXISTS idx_metrics_recorded ON algorithm_metrics(recorded_at DESC); -CREATE INDEX IF NOT EXISTS idx_metrics_quality ON algorithm_metrics(response_quality DESC); - --- ============================================ --- TABLA: algorithm_experiments --- A/B testing de algoritmos --- ============================================ - -CREATE TABLE IF NOT EXISTS algorithm_experiments ( - id UUID PRIMARY KEY DEFAULT gen_random_uuid(), - - -- Identificación - name VARCHAR(255) NOT NULL, - description TEXT, - - -- Algoritmos en competencia - control_algorithm_id UUID NOT NULL REFERENCES context_algorithms(id), - treatment_algorithm_id UUID NOT NULL REFERENCES context_algorithms(id), - - -- Configuración - traffic_split DECIMAL(3,2) DEFAULT 0.50, -- % para treatment - min_samples INT DEFAULT 100, - max_samples INT DEFAULT 1000, - - -- Estado - status VARCHAR(50) DEFAULT 'pending', -- pending, running, completed, cancelled - - -- Resultados - control_samples INT DEFAULT 0, - treatment_samples INT DEFAULT 0, - control_avg_quality DECIMAL(3,2), - treatment_avg_quality DECIMAL(3,2), - winner_algorithm_id UUID REFERENCES context_algorithms(id), - statistical_significance DECIMAL(5,4), - - -- Timestamps - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - started_at TIMESTAMP, - completed_at TIMESTAMP -); - -CREATE INDEX IF NOT EXISTS idx_exp_status ON algorithm_experiments(status); -CREATE INDEX IF NOT EXISTS idx_exp_control ON algorithm_experiments(control_algorithm_id); -CREATE INDEX IF NOT EXISTS idx_exp_treatment ON algorithm_experiments(treatment_algorithm_id); - --- ============================================ --- VISTA: Resumen de rendimiento por algoritmo --- ============================================ - -CREATE OR REPLACE VIEW algorithm_performance AS -SELECT - a.id, - a.code, - a.name, - a.version, - a.status, - a.times_used, - COUNT(m.id) as total_metrics, - AVG(m.token_efficiency) as avg_token_efficiency, - AVG(m.relevance_score) as avg_relevance, - AVG(m.response_quality) as avg_quality, - AVG(m.user_satisfaction) as avg_satisfaction, - AVG(m.latency_ms) as avg_latency, - PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY m.response_quality) as median_quality, - STDDEV(m.response_quality) as quality_stddev -FROM context_algorithms a -LEFT JOIN algorithm_metrics m ON a.id = m.algorithm_id -GROUP BY a.id, a.code, a.name, a.version, a.status, a.times_used; - --- ============================================ --- FUNCIÓN: Activar algoritmo (desactiva el anterior) --- ============================================ - -CREATE OR REPLACE FUNCTION activate_algorithm(p_algorithm_id UUID) -RETURNS BOOLEAN AS $$ -BEGIN - -- Deprecar algoritmo activo actual - UPDATE context_algorithms - SET status = 'deprecated', deprecated_at = CURRENT_TIMESTAMP - WHERE status = 'active'; - - -- Activar nuevo algoritmo - UPDATE context_algorithms - SET status = 'active', activated_at = CURRENT_TIMESTAMP - WHERE id = p_algorithm_id; - - RETURN FOUND; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- FUNCIÓN: Clonar algoritmo para experimentación --- ============================================ - -CREATE OR REPLACE FUNCTION fork_algorithm( - p_source_id UUID, - p_new_code VARCHAR, - p_new_name VARCHAR, - p_reason TEXT DEFAULT NULL -) -RETURNS UUID AS $$ -DECLARE - v_new_id UUID; - v_source RECORD; -BEGIN - SELECT * INTO v_source FROM context_algorithms WHERE id = p_source_id; - - IF NOT FOUND THEN - RAISE EXCEPTION 'Algoritmo fuente no encontrado: %', p_source_id; - END IF; - - INSERT INTO context_algorithms ( - code, name, description, version, - status, config, selector_code, - parent_algorithm_id, fork_reason - ) VALUES ( - p_new_code, - p_new_name, - v_source.description, - '1.0.0', - 'draft', - v_source.config, - v_source.selector_code, - p_source_id, - p_reason - ) RETURNING id INTO v_new_id; - - RETURN v_new_id; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- FUNCIÓN: Obtener algoritmo activo --- ============================================ - -CREATE OR REPLACE FUNCTION get_active_algorithm() -RETURNS UUID AS $$ - SELECT id FROM context_algorithms - WHERE status = 'active' - ORDER BY activated_at DESC - LIMIT 1; -$$ LANGUAGE SQL STABLE; - --- ============================================ --- FUNCIÓN: Registrar métrica de uso --- ============================================ - -CREATE OR REPLACE FUNCTION record_algorithm_metric( - p_algorithm_id UUID, - p_session_id UUID, - p_log_entry_id UUID, - p_tokens_budget INT, - p_tokens_used INT, - p_context_composition JSONB, - p_latency_ms INT DEFAULT NULL, - p_model_tokens_input INT DEFAULT NULL, - p_model_tokens_output INT DEFAULT NULL -) -RETURNS UUID AS $$ -DECLARE - v_metric_id UUID; - v_efficiency DECIMAL(5,4); -BEGIN - v_efficiency := CASE - WHEN p_tokens_budget > 0 THEN p_tokens_used::DECIMAL / p_tokens_budget - ELSE 0 - END; - - INSERT INTO algorithm_metrics ( - algorithm_id, session_id, log_entry_id, - tokens_budget, tokens_used, token_efficiency, - context_composition, latency_ms, - model_tokens_input, model_tokens_output - ) VALUES ( - p_algorithm_id, p_session_id, p_log_entry_id, - p_tokens_budget, p_tokens_used, v_efficiency, - p_context_composition, p_latency_ms, - p_model_tokens_input, p_model_tokens_output - ) RETURNING id INTO v_metric_id; - - -- Actualizar contador del algoritmo - UPDATE context_algorithms - SET times_used = times_used + 1 - WHERE id = p_algorithm_id; - - RETURN v_metric_id; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- FUNCIÓN: Actualizar evaluación de métrica --- ============================================ - -CREATE OR REPLACE FUNCTION update_metric_evaluation( - p_metric_id UUID, - p_relevance DECIMAL DEFAULT NULL, - p_quality DECIMAL DEFAULT NULL, - p_satisfaction DECIMAL DEFAULT NULL, - p_method VARCHAR DEFAULT 'manual' -) -RETURNS BOOLEAN AS $$ -BEGIN - UPDATE algorithm_metrics - SET - relevance_score = COALESCE(p_relevance, relevance_score), - response_quality = COALESCE(p_quality, response_quality), - user_satisfaction = COALESCE(p_satisfaction, user_satisfaction), - auto_evaluated = (p_method != 'user_feedback'), - evaluation_method = p_method - WHERE id = p_metric_id; - - RETURN FOUND; -END; -$$ LANGUAGE plpgsql; - --- ============================================ --- DATOS INICIALES: Algoritmo por defecto --- ============================================ - -INSERT INTO context_algorithms (code, name, description, version, status, config) VALUES -( - 'ALG_DEFAULT_V1', - 'Algoritmo por defecto v1', - 'Selección de contexto basada en prioridad y tokens disponibles', - '1.0.0', - 'active', - '{ - "max_tokens": 4000, - "sources": { - "system_prompts": true, - "context_blocks": true, - "memory": true, - "knowledge": true, - "history": true, - "ambient": true - }, - "weights": { - "priority": 0.4, - "relevance": 0.3, - "recency": 0.2, - "frequency": 0.1 - }, - "history_config": { - "max_messages": 20, - "summarize_after": 10, - "include_system": false - }, - "memory_config": { - "max_items": 15, - "min_importance": 30 - }, - "knowledge_config": { - "max_items": 5, - "require_keyword_match": true - } - }'::jsonb -) ON CONFLICT (code) DO NOTHING; diff --git a/context-manager/src/__init__.py b/context-manager/src/__init__.py deleted file mode 100644 index 7636214..0000000 --- a/context-manager/src/__init__.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Context Manager - Sistema local de gestión de contexto para IA - -Características: -- Log inmutable (tabla de referencia no editable) -- Gestor de contexto mejorable -- Agnóstico al modelo de IA -- Sistema de métricas para mejora continua -""" - -__version__ = "1.0.0" -__author__ = "TZZR System" - -from .database import Database -from .context_selector import ContextSelector -from .models import Session, Message, ContextBlock, Algorithm - -__all__ = [ - "Database", - "ContextSelector", - "Session", - "Message", - "ContextBlock", - "Algorithm", -] diff --git a/context-manager/src/context_selector.py b/context-manager/src/context_selector.py deleted file mode 100644 index 94ba00e..0000000 --- a/context-manager/src/context_selector.py +++ /dev/null @@ -1,508 +0,0 @@ -""" -Selector de Contexto - Motor principal - -Selecciona el contexto óptimo para enviar al modelo de IA -basándose en el algoritmo activo y las fuentes disponibles. -""" - -import re -import uuid -from datetime import datetime -from typing import Optional, List, Dict, Any, Callable - -from .models import ( - Session, Message, MessageRole, ContextBlock, Memory, - Knowledge, Algorithm, AmbientContext, ContextItem, - SelectedContext, ContextSource -) -from .database import Database - - -class ContextSelector: - """ - Motor de selección de contexto. - - Características: - - Agnóstico al modelo de IA - - Basado en algoritmo configurable - - Métricas de rendimiento - - Soporte para múltiples fuentes - """ - - def __init__(self, db: Database): - self.db = db - self._custom_selectors: Dict[str, Callable] = {} - - def register_selector(self, name: str, selector_fn: Callable): - """Registra un selector de contexto personalizado""" - self._custom_selectors[name] = selector_fn - - def select_context( - self, - session: Session, - user_message: str, - algorithm: Algorithm = None, - max_tokens: int = None - ) -> SelectedContext: - """ - Selecciona el contexto óptimo para la sesión actual. - - Args: - session: Sesión activa - user_message: Mensaje del usuario - algorithm: Algoritmo a usar (o el activo por defecto) - max_tokens: Límite de tokens (sobreescribe config del algoritmo) - - Returns: - SelectedContext con los items seleccionados - """ - # Obtener algoritmo - if algorithm is None: - algorithm = self.db.get_active_algorithm() - - if algorithm is None: - # Algoritmo por defecto si no hay ninguno - algorithm = Algorithm( - code="DEFAULT", - name="Default", - config={ - "max_tokens": 4000, - "sources": { - "system_prompts": True, - "context_blocks": True, - "memory": True, - "knowledge": True, - "history": True, - "ambient": True - }, - "weights": {"priority": 0.4, "relevance": 0.3, "recency": 0.2, "frequency": 0.1}, - "history_config": {"max_messages": 20, "summarize_after": 10, "include_system": False}, - "memory_config": {"max_items": 15, "min_importance": 30}, - "knowledge_config": {"max_items": 5, "require_keyword_match": True} - } - ) - - config = algorithm.config - token_budget = max_tokens or config.get("max_tokens", 4000) - sources = config.get("sources", {}) - - # Verificar si hay selector personalizado - if algorithm.selector_code and algorithm.code in self._custom_selectors: - return self._custom_selectors[algorithm.code]( - session, user_message, config, self.db - ) - - # Selección estándar - context = SelectedContext(algorithm_id=algorithm.id) - composition = {} - - # 1. System prompts y bloques de contexto - if sources.get("context_blocks", True): - blocks = self._select_context_blocks(user_message, config) - for block in blocks: - if context.total_tokens + block.tokens_estimated <= token_budget: - context.items.append(ContextItem( - source=ContextSource.DATASET, - content=block.content, - tokens=block.tokens_estimated, - priority=block.priority, - metadata={"block_code": block.code, "category": block.category} - )) - context.total_tokens += block.tokens_estimated - - composition["context_blocks"] = { - "count": len([i for i in context.items if i.source == ContextSource.DATASET]), - "tokens": sum(i.tokens for i in context.items if i.source == ContextSource.DATASET) - } - - # 2. Memoria a largo plazo - if sources.get("memory", True): - memory_config = config.get("memory_config", {}) - memories = self._select_memories( - user_message, - min_importance=memory_config.get("min_importance", 30), - max_items=memory_config.get("max_items", 15) - ) - - memory_tokens = 0 - memory_count = 0 - for mem in memories: - tokens = len(mem.content) // 4 - if context.total_tokens + tokens <= token_budget: - context.items.append(ContextItem( - source=ContextSource.MEMORY, - content=f"[Memoria - {mem.type}]: {mem.content}", - tokens=tokens, - priority=mem.importance, - metadata={"memory_type": mem.type, "importance": mem.importance} - )) - context.total_tokens += tokens - memory_tokens += tokens - memory_count += 1 - - composition["memory"] = {"count": memory_count, "tokens": memory_tokens} - - # 3. Base de conocimiento - if sources.get("knowledge", True): - knowledge_config = config.get("knowledge_config", {}) - keywords = self._extract_keywords(user_message) - - if keywords or not knowledge_config.get("require_keyword_match", True): - knowledge_items = self.db.search_knowledge( - keywords=keywords if knowledge_config.get("require_keyword_match", True) else None, - limit=knowledge_config.get("max_items", 5) - ) - - knowledge_tokens = 0 - knowledge_count = 0 - for item in knowledge_items: - if context.total_tokens + item.tokens_estimated <= token_budget: - context.items.append(ContextItem( - source=ContextSource.KNOWLEDGE, - content=f"[Conocimiento - {item.title}]: {item.content}", - tokens=item.tokens_estimated, - priority=item.priority, - metadata={"title": item.title, "category": item.category} - )) - context.total_tokens += item.tokens_estimated - knowledge_tokens += item.tokens_estimated - knowledge_count += 1 - - composition["knowledge"] = {"count": knowledge_count, "tokens": knowledge_tokens} - - # 4. Contexto ambiental - if sources.get("ambient", True): - ambient = self.db.get_latest_ambient_context() - if ambient: - ambient_content = self._format_ambient_context(ambient) - tokens = len(ambient_content) // 4 - if context.total_tokens + tokens <= token_budget: - context.items.append(ContextItem( - source=ContextSource.AMBIENT, - content=ambient_content, - tokens=tokens, - priority=30, - metadata={"captured_at": ambient.captured_at.isoformat()} - )) - context.total_tokens += tokens - composition["ambient"] = {"count": 1, "tokens": tokens} - - # 5. Historial de conversación (al final para llenar espacio restante) - if sources.get("history", True): - history_config = config.get("history_config", {}) - history = self.db.get_session_history( - session.id, - limit=history_config.get("max_messages", 20), - include_system=history_config.get("include_system", False) - ) - - history_tokens = 0 - history_count = 0 - for msg in history: - tokens = len(msg.content) // 4 - if context.total_tokens + tokens <= token_budget: - context.items.append(ContextItem( - source=ContextSource.HISTORY, - content=msg.content, - tokens=tokens, - priority=10, - metadata={"role": msg.role.value, "sequence": msg.sequence_num} - )) - context.total_tokens += tokens - history_tokens += tokens - history_count += 1 - - composition["history"] = {"count": history_count, "tokens": history_tokens} - - context.composition = composition - return context - - def _select_context_blocks( - self, - user_message: str, - config: Dict[str, Any] - ) -> List[ContextBlock]: - """Selecciona bloques de contexto relevantes""" - blocks = self.db.get_active_context_blocks() - relevant_blocks = [] - - keywords = self._extract_keywords(user_message) - - for block in blocks: - rules = block.activation_rules - - # Siempre incluir - if rules.get("always", False): - relevant_blocks.append(block) - continue - - # Verificar keywords - block_keywords = rules.get("keywords", []) - if block_keywords: - if any(kw.lower() in user_message.lower() for kw in block_keywords): - relevant_blocks.append(block) - continue - - # Verificar categoría system (siempre incluir) - if block.category == "system": - relevant_blocks.append(block) - - # Ordenar por prioridad - relevant_blocks.sort(key=lambda b: b.priority, reverse=True) - return relevant_blocks - - def _select_memories( - self, - user_message: str, - min_importance: int = 30, - max_items: int = 15 - ) -> List[Memory]: - """Selecciona memorias relevantes""" - memories = self.db.get_memories( - min_importance=min_importance, - limit=max_items * 2 # Obtener más para filtrar - ) - - # Filtrar por relevancia al mensaje - keywords = self._extract_keywords(user_message) - if keywords: - scored_memories = [] - for mem in memories: - score = sum(1 for kw in keywords if kw.lower() in mem.content.lower()) - scored_memories.append((mem, score + mem.importance / 100)) - - scored_memories.sort(key=lambda x: x[1], reverse=True) - return [m[0] for m in scored_memories[:max_items]] - - return memories[:max_items] - - def _extract_keywords(self, text: str) -> List[str]: - """Extrae keywords de un texto""" - # Palabras comunes a ignorar - stopwords = { - "el", "la", "los", "las", "un", "una", "unos", "unas", - "de", "del", "al", "a", "en", "con", "por", "para", - "que", "qué", "como", "cómo", "donde", "dónde", "cuando", "cuándo", - "es", "son", "está", "están", "ser", "estar", "tener", "hacer", - "y", "o", "pero", "si", "no", "me", "te", "se", "nos", - "the", "a", "an", "is", "are", "was", "were", "be", "been", - "have", "has", "had", "do", "does", "did", "will", "would", - "can", "could", "should", "may", "might", "must", - "and", "or", "but", "if", "then", "else", "when", "where", - "what", "which", "who", "whom", "this", "that", "these", "those", - "i", "you", "he", "she", "it", "we", "they", "my", "your", "his", "her" - } - - # Extraer palabras - words = re.findall(r'\b\w+\b', text.lower()) - - # Filtrar - keywords = [ - w for w in words - if len(w) > 2 and w not in stopwords - ] - - return list(set(keywords)) - - def _format_ambient_context(self, ambient: AmbientContext) -> str: - """Formatea el contexto ambiental como texto""" - lines = ["[Contexto del sistema]"] - - env = ambient.environment - if env: - if env.get("timezone"): - lines.append(f"- Zona horaria: {env['timezone']}") - if env.get("working_directory"): - lines.append(f"- Directorio: {env['working_directory']}") - if env.get("git_branch"): - lines.append(f"- Git branch: {env['git_branch']}") - if env.get("active_project"): - lines.append(f"- Proyecto activo: {env['active_project']}") - - state = ambient.system_state - if state: - if state.get("servers"): - servers = state["servers"] - online = [k for k, v in servers.items() if v == "online"] - if online: - lines.append(f"- Servidores online: {', '.join(online)}") - - if state.get("alerts"): - alerts = state["alerts"] - if alerts: - lines.append(f"- Alertas activas: {len(alerts)}") - - return "\n".join(lines) - - -class ContextManager: - """ - Gestor completo de contexto. - - Combina el selector con el logging y métricas. - """ - - def __init__( - self, - db: Database = None, - host: str = None, - port: int = None, - database: str = None, - user: str = None, - password: str = None - ): - if db: - self.db = db - else: - self.db = Database( - host=host, - port=port, - database=database, - user=user, - password=password - ) - - self.selector = ContextSelector(self.db) - self._current_session: Optional[Session] = None - - def start_session( - self, - user_id: str = None, - instance_id: str = None, - model_provider: str = None, - model_name: str = None, - metadata: Dict[str, Any] = None - ) -> Session: - """Inicia una nueva sesión""" - algorithm = self.db.get_active_algorithm() - self._current_session = self.db.create_session( - user_id=user_id, - instance_id=instance_id, - model_provider=model_provider, - model_name=model_name, - algorithm_id=algorithm.id if algorithm else None, - metadata=metadata - ) - return self._current_session - - def get_context_for_message( - self, - message: str, - max_tokens: int = None, - session: Session = None - ) -> SelectedContext: - """Obtiene el contexto para un mensaje""" - session = session or self._current_session - if not session: - raise ValueError("No hay sesión activa. Llama a start_session() primero.") - - return self.selector.select_context( - session=session, - user_message=message, - max_tokens=max_tokens - ) - - def log_user_message( - self, - content: str, - context: SelectedContext = None, - session: Session = None - ) -> uuid.UUID: - """Registra un mensaje del usuario en el log inmutable""" - session = session or self._current_session - if not session: - raise ValueError("No hay sesión activa.") - - return self.db.insert_log_entry( - session_id=session.id, - role=MessageRole.USER, - content=content, - model_provider=session.model_provider, - model_name=session.model_name, - context_snapshot=context.to_dict() if context else None, - context_algorithm_id=context.algorithm_id if context else None, - context_tokens_used=context.total_tokens if context else None - ) - - def log_assistant_message( - self, - content: str, - tokens_input: int = None, - tokens_output: int = None, - latency_ms: int = None, - model_provider: str = None, - model_name: str = None, - model_params: Dict[str, Any] = None, - session: Session = None - ) -> uuid.UUID: - """Registra una respuesta del asistente en el log inmutable""" - session = session or self._current_session - if not session: - raise ValueError("No hay sesión activa.") - - return self.db.insert_log_entry( - session_id=session.id, - role=MessageRole.ASSISTANT, - content=content, - model_provider=model_provider or session.model_provider, - model_name=model_name or session.model_name, - model_params=model_params, - tokens_input=tokens_input, - tokens_output=tokens_output, - latency_ms=latency_ms - ) - - def record_metric( - self, - context: SelectedContext, - log_entry_id: uuid.UUID, - tokens_budget: int, - latency_ms: int = None, - model_tokens_input: int = None, - model_tokens_output: int = None, - session: Session = None - ) -> uuid.UUID: - """Registra una métrica de uso del algoritmo""" - session = session or self._current_session - if not session or not context.algorithm_id: - return None - - return self.db.record_metric( - algorithm_id=context.algorithm_id, - session_id=session.id, - log_entry_id=log_entry_id, - tokens_budget=tokens_budget, - tokens_used=context.total_tokens, - context_composition=context.composition, - latency_ms=latency_ms, - model_tokens_input=model_tokens_input, - model_tokens_output=model_tokens_output - ) - - def rate_response( - self, - metric_id: uuid.UUID, - relevance: float = None, - quality: float = None, - satisfaction: float = None - ): - """Evalúa una respuesta (feedback manual)""" - self.db.update_metric_evaluation( - metric_id=metric_id, - relevance=relevance, - quality=quality, - satisfaction=satisfaction, - method="user_feedback" - ) - - def verify_session_integrity(self, session: Session = None) -> Dict[str, Any]: - """Verifica la integridad de la sesión""" - session = session or self._current_session - if not session: - raise ValueError("No hay sesión activa.") - - return self.db.verify_chain_integrity(session.id) - - def close(self): - """Cierra las conexiones""" - self.db.close() diff --git a/context-manager/src/database.py b/context-manager/src/database.py deleted file mode 100644 index ae6e8db..0000000 --- a/context-manager/src/database.py +++ /dev/null @@ -1,621 +0,0 @@ -""" -Conexión a base de datos PostgreSQL -""" - -import os -import uuid -import json -from datetime import datetime -from typing import Optional, List, Dict, Any -from contextlib import contextmanager - -try: - import psycopg2 - from psycopg2.extras import RealDictCursor, Json - from psycopg2 import pool - HAS_PSYCOPG2 = True -except ImportError: - HAS_PSYCOPG2 = False - -from .models import ( - Session, Message, MessageRole, ContextBlock, Memory, - Knowledge, Algorithm, AlgorithmStatus, AlgorithmMetric, - AmbientContext -) - - -class Database: - """Gestión de conexión a PostgreSQL""" - - def __init__( - self, - host: str = None, - port: int = None, - database: str = None, - user: str = None, - password: str = None, - min_connections: int = 1, - max_connections: int = 10 - ): - if not HAS_PSYCOPG2: - raise ImportError("psycopg2 no está instalado. Ejecuta: pip install psycopg2-binary") - - self.host = host or os.getenv("PGHOST", "localhost") - self.port = port or int(os.getenv("PGPORT", "5432")) - self.database = database or os.getenv("PGDATABASE", "context_manager") - self.user = user or os.getenv("PGUSER", "postgres") - self.password = password or os.getenv("PGPASSWORD", "") - - self._pool = pool.ThreadedConnectionPool( - min_connections, - max_connections, - host=self.host, - port=self.port, - database=self.database, - user=self.user, - password=self.password - ) - - @contextmanager - def get_connection(self): - """Obtiene una conexión del pool""" - conn = self._pool.getconn() - try: - yield conn - conn.commit() - except Exception: - conn.rollback() - raise - finally: - self._pool.putconn(conn) - - @contextmanager - def get_cursor(self, dict_cursor: bool = True): - """Obtiene un cursor""" - with self.get_connection() as conn: - cursor_factory = RealDictCursor if dict_cursor else None - with conn.cursor(cursor_factory=cursor_factory) as cur: - yield cur - - def close(self): - """Cierra el pool de conexiones""" - self._pool.closeall() - - # ========================================== - # SESIONES - # ========================================== - - def create_session( - self, - user_id: str = None, - instance_id: str = None, - model_provider: str = None, - model_name: str = None, - algorithm_id: uuid.UUID = None, - metadata: Dict[str, Any] = None - ) -> Session: - """Crea una nueva sesión""" - with self.get_cursor() as cur: - cur.execute( - """ - SELECT create_session(%s, %s, %s, %s, %s, %s) as id - """, - (user_id, instance_id, model_provider, model_name, - str(algorithm_id) if algorithm_id else None, - Json(metadata or {})) - ) - session_id = cur.fetchone()["id"] - - return Session( - id=session_id, - user_id=user_id, - instance_id=instance_id, - model_provider=model_provider, - model_name=model_name, - algorithm_id=algorithm_id, - metadata=metadata or {} - ) - - def get_session(self, session_id: uuid.UUID) -> Optional[Session]: - """Obtiene una sesión por ID""" - with self.get_cursor() as cur: - cur.execute( - "SELECT * FROM sessions WHERE id = %s", - (str(session_id),) - ) - row = cur.fetchone() - if row: - return Session( - id=row["id"], - user_id=row["user_id"], - instance_id=row["instance_id"], - model_provider=row["initial_model_provider"], - model_name=row["initial_model_name"], - algorithm_id=row["initial_context_algorithm_id"], - metadata=row["metadata"] or {}, - started_at=row["started_at"], - ended_at=row["ended_at"], - total_messages=row["total_messages"], - total_tokens_input=row["total_tokens_input"], - total_tokens_output=row["total_tokens_output"] - ) - return None - - # ========================================== - # LOG INMUTABLE - # ========================================== - - def insert_log_entry( - self, - session_id: uuid.UUID, - role: MessageRole, - content: str, - model_provider: str = None, - model_name: str = None, - model_params: Dict[str, Any] = None, - context_snapshot: Dict[str, Any] = None, - context_algorithm_id: uuid.UUID = None, - context_tokens_used: int = None, - tokens_input: int = None, - tokens_output: int = None, - latency_ms: int = None, - source_ip: str = None, - user_agent: str = None - ) -> uuid.UUID: - """Inserta una entrada en el log inmutable""" - with self.get_cursor() as cur: - cur.execute( - """ - SELECT insert_log_entry( - %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s - ) as id - """, - ( - str(session_id), - role.value, - content, - model_provider, - model_name, - Json(model_params or {}), - Json(context_snapshot) if context_snapshot else None, - str(context_algorithm_id) if context_algorithm_id else None, - context_tokens_used, - tokens_input, - tokens_output, - latency_ms, - source_ip, - user_agent - ) - ) - return cur.fetchone()["id"] - - def get_session_history( - self, - session_id: uuid.UUID, - limit: int = None, - include_system: bool = False - ) -> List[Message]: - """Obtiene el historial de una sesión""" - with self.get_cursor() as cur: - query = """ - SELECT * FROM immutable_log - WHERE session_id = %s - """ - params = [str(session_id)] - - if not include_system: - query += " AND role != 'system'" - - query += " ORDER BY sequence_num DESC" - - if limit: - query += " LIMIT %s" - params.append(limit) - - cur.execute(query, params) - rows = cur.fetchall() - - messages = [] - for row in reversed(rows): - messages.append(Message( - id=row["id"], - session_id=row["session_id"], - sequence_num=row["sequence_num"], - role=MessageRole(row["role"]), - content=row["content"], - hash=row["hash"], - hash_anterior=row["hash_anterior"], - model_provider=row["model_provider"], - model_name=row["model_name"], - model_params=row["model_params"] or {}, - context_snapshot=row["context_snapshot"], - context_algorithm_id=row["context_algorithm_id"], - context_tokens_used=row["context_tokens_used"], - tokens_input=row["tokens_input"], - tokens_output=row["tokens_output"], - latency_ms=row["latency_ms"], - created_at=row["created_at"] - )) - return messages - - def verify_chain_integrity(self, session_id: uuid.UUID) -> Dict[str, Any]: - """Verifica la integridad de la cadena de hashes""" - with self.get_cursor() as cur: - cur.execute( - "SELECT * FROM verify_chain_integrity(%s)", - (str(session_id),) - ) - row = cur.fetchone() - return { - "is_valid": row["is_valid"], - "broken_at_sequence": row["broken_at_sequence"], - "expected_hash": row["expected_hash"], - "actual_hash": row["actual_hash"] - } - - # ========================================== - # BLOQUES DE CONTEXTO - # ========================================== - - def create_context_block(self, block: ContextBlock) -> uuid.UUID: - """Crea un bloque de contexto""" - with self.get_cursor() as cur: - cur.execute( - """ - INSERT INTO context_blocks - (code, name, description, content, category, priority, scope, project_id, activation_rules, active) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) - RETURNING id - """, - ( - block.code, block.name, block.description, block.content, - block.category, block.priority, block.scope, - str(block.project_id) if block.project_id else None, - Json(block.activation_rules), block.active - ) - ) - return cur.fetchone()["id"] - - def get_active_context_blocks( - self, - category: str = None, - scope: str = None, - project_id: uuid.UUID = None - ) -> List[ContextBlock]: - """Obtiene bloques de contexto activos""" - with self.get_cursor() as cur: - query = "SELECT * FROM context_blocks WHERE active = true" - params = [] - - if category: - query += " AND category = %s" - params.append(category) - if scope: - query += " AND scope = %s" - params.append(scope) - if project_id: - query += " AND (project_id = %s OR project_id IS NULL)" - params.append(str(project_id)) - - query += " ORDER BY priority DESC" - - cur.execute(query, params) - return [ - ContextBlock( - id=row["id"], - code=row["code"], - name=row["name"], - description=row["description"], - content=row["content"], - content_hash=row["content_hash"], - category=row["category"], - priority=row["priority"], - tokens_estimated=row["tokens_estimated"], - scope=row["scope"], - project_id=row["project_id"], - activation_rules=row["activation_rules"] or {}, - active=row["active"], - version=row["version"] - ) - for row in cur.fetchall() - ] - - # ========================================== - # MEMORIA - # ========================================== - - def get_memories( - self, - type: str = None, - min_importance: int = 0, - limit: int = 20 - ) -> List[Memory]: - """Obtiene memorias activas""" - with self.get_cursor() as cur: - query = """ - SELECT * FROM memory - WHERE active = true - AND importance >= %s - AND (expires_at IS NULL OR expires_at > NOW()) - """ - params = [min_importance] - - if type: - query += " AND type = %s" - params.append(type) - - query += " ORDER BY importance DESC, last_used_at DESC NULLS LAST LIMIT %s" - params.append(limit) - - cur.execute(query, params) - return [ - Memory( - id=row["id"], - type=row["type"], - category=row["category"], - content=row["content"], - summary=row["summary"], - importance=row["importance"], - confidence=float(row["confidence"]) if row["confidence"] else 1.0, - uses=row["uses"], - last_used_at=row["last_used_at"], - verified=row["verified"] - ) - for row in cur.fetchall() - ] - - def save_memory(self, memory: Memory) -> uuid.UUID: - """Guarda una memoria""" - with self.get_cursor() as cur: - cur.execute( - """ - INSERT INTO memory - (type, category, content, summary, extracted_from_session, importance, confidence, expires_at) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s) - RETURNING id - """, - ( - memory.type, memory.category, memory.content, memory.summary, - str(memory.extracted_from_session) if memory.extracted_from_session else None, - memory.importance, memory.confidence, memory.expires_at - ) - ) - return cur.fetchone()["id"] - - # ========================================== - # CONOCIMIENTO - # ========================================== - - def search_knowledge( - self, - keywords: List[str] = None, - category: str = None, - tags: List[str] = None, - limit: int = 5 - ) -> List[Knowledge]: - """Busca en la base de conocimiento""" - with self.get_cursor() as cur: - query = "SELECT * FROM knowledge_base WHERE active = true" - params = [] - - if category: - query += " AND category = %s" - params.append(category) - - if tags: - query += " AND tags && %s" - params.append(tags) - - if keywords: - # Búsqueda simple por contenido - keyword_conditions = [] - for kw in keywords: - keyword_conditions.append("content ILIKE %s") - params.append(f"%{kw}%") - query += f" AND ({' OR '.join(keyword_conditions)})" - - query += " ORDER BY priority DESC, access_count DESC LIMIT %s" - params.append(limit) - - cur.execute(query, params) - return [ - Knowledge( - id=row["id"], - title=row["title"], - category=row["category"], - tags=row["tags"] or [], - content=row["content"], - tokens_estimated=row["tokens_estimated"], - priority=row["priority"], - access_count=row["access_count"] - ) - for row in cur.fetchall() - ] - - # ========================================== - # CONTEXTO AMBIENTAL - # ========================================== - - def get_latest_ambient_context(self) -> Optional[AmbientContext]: - """Obtiene el contexto ambiental más reciente""" - with self.get_cursor() as cur: - cur.execute( - """ - SELECT * FROM ambient_context - WHERE expires_at > NOW() - ORDER BY captured_at DESC - LIMIT 1 - """ - ) - row = cur.fetchone() - if row: - return AmbientContext( - id=row["id"], - captured_at=row["captured_at"], - expires_at=row["expires_at"], - environment=row["environment"] or {}, - system_state=row["system_state"] or {}, - active_resources=row["active_resources"] or [] - ) - return None - - def save_ambient_context(self, context: AmbientContext) -> int: - """Guarda un snapshot de contexto ambiental""" - with self.get_cursor() as cur: - cur.execute( - """ - INSERT INTO ambient_context - (environment, system_state, active_resources, expires_at) - VALUES (%s, %s, %s, %s) - RETURNING id - """, - ( - Json(context.environment), - Json(context.system_state), - Json(context.active_resources), - context.expires_at - ) - ) - return cur.fetchone()["id"] - - # ========================================== - # ALGORITMOS - # ========================================== - - def get_active_algorithm(self) -> Optional[Algorithm]: - """Obtiene el algoritmo activo""" - with self.get_cursor() as cur: - cur.execute( - """ - SELECT * FROM context_algorithms - WHERE status = 'active' - ORDER BY activated_at DESC - LIMIT 1 - """ - ) - row = cur.fetchone() - if row: - return Algorithm( - id=row["id"], - code=row["code"], - name=row["name"], - description=row["description"], - version=row["version"], - status=AlgorithmStatus(row["status"]), - config=row["config"] or {}, - selector_code=row["selector_code"], - times_used=row["times_used"], - avg_tokens_used=float(row["avg_tokens_used"]) if row["avg_tokens_used"] else None, - avg_relevance_score=float(row["avg_relevance_score"]) if row["avg_relevance_score"] else None, - avg_response_quality=float(row["avg_response_quality"]) if row["avg_response_quality"] else None, - parent_algorithm_id=row["parent_algorithm_id"], - activated_at=row["activated_at"] - ) - return None - - def get_algorithm(self, algorithm_id: uuid.UUID) -> Optional[Algorithm]: - """Obtiene un algoritmo por ID""" - with self.get_cursor() as cur: - cur.execute( - "SELECT * FROM context_algorithms WHERE id = %s", - (str(algorithm_id),) - ) - row = cur.fetchone() - if row: - return Algorithm( - id=row["id"], - code=row["code"], - name=row["name"], - description=row["description"], - version=row["version"], - status=AlgorithmStatus(row["status"]), - config=row["config"] or {}, - selector_code=row["selector_code"], - times_used=row["times_used"] - ) - return None - - def fork_algorithm( - self, - source_id: uuid.UUID, - new_code: str, - new_name: str, - reason: str = None - ) -> uuid.UUID: - """Clona un algoritmo para experimentación""" - with self.get_cursor() as cur: - cur.execute( - "SELECT fork_algorithm(%s, %s, %s, %s) as id", - (str(source_id), new_code, new_name, reason) - ) - return cur.fetchone()["id"] - - def activate_algorithm(self, algorithm_id: uuid.UUID) -> bool: - """Activa un algoritmo""" - with self.get_cursor() as cur: - cur.execute( - "SELECT activate_algorithm(%s) as success", - (str(algorithm_id),) - ) - return cur.fetchone()["success"] - - # ========================================== - # MÉTRICAS - # ========================================== - - def record_metric( - self, - algorithm_id: uuid.UUID, - session_id: uuid.UUID, - log_entry_id: uuid.UUID, - tokens_budget: int, - tokens_used: int, - context_composition: Dict[str, Any], - latency_ms: int = None, - model_tokens_input: int = None, - model_tokens_output: int = None - ) -> uuid.UUID: - """Registra una métrica de uso""" - with self.get_cursor() as cur: - cur.execute( - """ - SELECT record_algorithm_metric( - %s, %s, %s, %s, %s, %s, %s, %s, %s - ) as id - """, - ( - str(algorithm_id), str(session_id), str(log_entry_id), - tokens_budget, tokens_used, Json(context_composition), - latency_ms, model_tokens_input, model_tokens_output - ) - ) - return cur.fetchone()["id"] - - def update_metric_evaluation( - self, - metric_id: uuid.UUID, - relevance: float = None, - quality: float = None, - satisfaction: float = None, - method: str = "manual" - ) -> bool: - """Actualiza la evaluación de una métrica""" - with self.get_cursor() as cur: - cur.execute( - "SELECT update_metric_evaluation(%s, %s, %s, %s, %s) as success", - (str(metric_id), relevance, quality, satisfaction, method) - ) - return cur.fetchone()["success"] - - def get_algorithm_performance(self, algorithm_id: uuid.UUID = None) -> List[Dict[str, Any]]: - """Obtiene estadísticas de rendimiento de algoritmos""" - with self.get_cursor() as cur: - query = "SELECT * FROM algorithm_performance" - params = [] - - if algorithm_id: - query += " WHERE id = %s" - params.append(str(algorithm_id)) - - cur.execute(query, params) - return [dict(row) for row in cur.fetchall()] diff --git a/context-manager/src/models.py b/context-manager/src/models.py deleted file mode 100644 index 7ed6d3e..0000000 --- a/context-manager/src/models.py +++ /dev/null @@ -1,309 +0,0 @@ -""" -Modelos de datos para Context Manager -""" - -from dataclasses import dataclass, field -from datetime import datetime -from typing import Optional, List, Dict, Any -from enum import Enum -import uuid -import hashlib -import json - - -class MessageRole(Enum): - USER = "user" - ASSISTANT = "assistant" - SYSTEM = "system" - TOOL = "tool" - - -class ContextSource(Enum): - MEMORY = "memory" - KNOWLEDGE = "knowledge" - HISTORY = "history" - AMBIENT = "ambient" - DATASET = "dataset" - - -class AlgorithmStatus(Enum): - DRAFT = "draft" - TESTING = "testing" - ACTIVE = "active" - DEPRECATED = "deprecated" - - -@dataclass -class Session: - """Sesión de conversación""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - user_id: Optional[str] = None - instance_id: Optional[str] = None - model_provider: Optional[str] = None - model_name: Optional[str] = None - algorithm_id: Optional[uuid.UUID] = None - metadata: Dict[str, Any] = field(default_factory=dict) - started_at: datetime = field(default_factory=datetime.now) - ended_at: Optional[datetime] = None - total_messages: int = 0 - total_tokens_input: int = 0 - total_tokens_output: int = 0 - - @property - def hash(self) -> str: - content = f"{self.id}{self.started_at.isoformat()}" - return hashlib.sha256(content.encode()).hexdigest() - - -@dataclass -class Message: - """Mensaje en el log inmutable""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - session_id: uuid.UUID = None - sequence_num: int = 0 - role: MessageRole = MessageRole.USER - content: str = "" - hash: str = "" - hash_anterior: Optional[str] = None - - # Modelo - model_provider: Optional[str] = None - model_name: Optional[str] = None - model_params: Dict[str, Any] = field(default_factory=dict) - - # Contexto - context_snapshot: Optional[Dict[str, Any]] = None - context_algorithm_id: Optional[uuid.UUID] = None - context_tokens_used: Optional[int] = None - - # Respuesta - tokens_input: Optional[int] = None - tokens_output: Optional[int] = None - latency_ms: Optional[int] = None - - # Metadata - created_at: datetime = field(default_factory=datetime.now) - source_ip: Optional[str] = None - user_agent: Optional[str] = None - - def compute_hash(self) -> str: - """Calcula el hash del mensaje (blockchain-style)""" - content = ( - (self.hash_anterior or "") + - str(self.session_id) + - str(self.sequence_num) + - self.role.value + - self.content - ) - return hashlib.sha256(content.encode()).hexdigest() - - -@dataclass -class ContextBlock: - """Bloque de contexto reutilizable""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - code: str = "" - name: str = "" - description: Optional[str] = None - content: str = "" - content_hash: Optional[str] = None - category: str = "general" - priority: int = 50 - tokens_estimated: int = 0 - scope: str = "global" - project_id: Optional[uuid.UUID] = None - activation_rules: Dict[str, Any] = field(default_factory=dict) - active: bool = True - version: int = 1 - created_at: datetime = field(default_factory=datetime.now) - updated_at: datetime = field(default_factory=datetime.now) - - def __post_init__(self): - self.content_hash = hashlib.sha256(self.content.encode()).hexdigest() - self.tokens_estimated = len(self.content) // 4 - - -@dataclass -class Memory: - """Memoria a largo plazo""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - type: str = "fact" - category: Optional[str] = None - content: str = "" - summary: Optional[str] = None - content_hash: Optional[str] = None - extracted_from_session: Optional[uuid.UUID] = None - importance: int = 50 - confidence: float = 1.0 - uses: int = 0 - last_used_at: Optional[datetime] = None - expires_at: Optional[datetime] = None - active: bool = True - verified: bool = False - created_at: datetime = field(default_factory=datetime.now) - - -@dataclass -class Knowledge: - """Base de conocimiento""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - title: str = "" - category: str = "" - tags: List[str] = field(default_factory=list) - content: str = "" - content_hash: Optional[str] = None - tokens_estimated: int = 0 - source_type: Optional[str] = None - source_ref: Optional[str] = None - priority: int = 50 - access_count: int = 0 - active: bool = True - created_at: datetime = field(default_factory=datetime.now) - - -@dataclass -class Algorithm: - """Algoritmo de selección de contexto""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - code: str = "" - name: str = "" - description: Optional[str] = None - version: str = "1.0.0" - status: AlgorithmStatus = AlgorithmStatus.DRAFT - config: Dict[str, Any] = field(default_factory=lambda: { - "max_tokens": 4000, - "sources": { - "system_prompts": True, - "context_blocks": True, - "memory": True, - "knowledge": True, - "history": True, - "ambient": True - }, - "weights": { - "priority": 0.4, - "relevance": 0.3, - "recency": 0.2, - "frequency": 0.1 - }, - "history_config": { - "max_messages": 20, - "summarize_after": 10, - "include_system": False - }, - "memory_config": { - "max_items": 15, - "min_importance": 30 - }, - "knowledge_config": { - "max_items": 5, - "require_keyword_match": True - } - }) - selector_code: Optional[str] = None - times_used: int = 0 - avg_tokens_used: Optional[float] = None - avg_relevance_score: Optional[float] = None - avg_response_quality: Optional[float] = None - parent_algorithm_id: Optional[uuid.UUID] = None - fork_reason: Optional[str] = None - created_at: datetime = field(default_factory=datetime.now) - activated_at: Optional[datetime] = None - deprecated_at: Optional[datetime] = None - - -@dataclass -class AlgorithmMetric: - """Métrica de rendimiento de algoritmo""" - id: uuid.UUID = field(default_factory=uuid.uuid4) - algorithm_id: uuid.UUID = None - session_id: Optional[uuid.UUID] = None - log_entry_id: Optional[uuid.UUID] = None - tokens_budget: int = 0 - tokens_used: int = 0 - token_efficiency: float = 0.0 - context_composition: Dict[str, Any] = field(default_factory=dict) - latency_ms: Optional[int] = None - model_tokens_input: Optional[int] = None - model_tokens_output: Optional[int] = None - relevance_score: Optional[float] = None - response_quality: Optional[float] = None - user_satisfaction: Optional[float] = None - auto_evaluated: bool = False - evaluation_method: Optional[str] = None - recorded_at: datetime = field(default_factory=datetime.now) - - -@dataclass -class AmbientContext: - """Contexto ambiental del sistema""" - id: int = 0 - captured_at: datetime = field(default_factory=datetime.now) - expires_at: Optional[datetime] = None - environment: Dict[str, Any] = field(default_factory=dict) - system_state: Dict[str, Any] = field(default_factory=dict) - active_resources: List[Dict[str, Any]] = field(default_factory=list) - - -@dataclass -class ContextItem: - """Item individual de contexto seleccionado""" - source: ContextSource - content: str - tokens: int - priority: int - metadata: Dict[str, Any] = field(default_factory=dict) - - -@dataclass -class SelectedContext: - """Contexto seleccionado para enviar al modelo""" - items: List[ContextItem] = field(default_factory=list) - total_tokens: int = 0 - algorithm_id: Optional[uuid.UUID] = None - composition: Dict[str, Any] = field(default_factory=dict) - - def to_messages(self) -> List[Dict[str, str]]: - """Convierte el contexto a formato de mensajes para la API""" - messages = [] - - # System context primero - system_content = [] - for item in self.items: - if item.source in [ContextSource.MEMORY, ContextSource.KNOWLEDGE, ContextSource.AMBIENT]: - system_content.append(item.content) - - if system_content: - messages.append({ - "role": "system", - "content": "\n\n".join(system_content) - }) - - # History messages - for item in self.items: - if item.source == ContextSource.HISTORY: - role = item.metadata.get("role", "user") - messages.append({ - "role": role, - "content": item.content - }) - - return messages - - def to_dict(self) -> Dict[str, Any]: - """Serializa el contexto para snapshot""" - return { - "items": [ - { - "source": item.source.value, - "content": item.content[:500] + "..." if len(item.content) > 500 else item.content, - "tokens": item.tokens, - "priority": item.priority, - "metadata": item.metadata - } - for item in self.items - ], - "total_tokens": self.total_tokens, - "algorithm_id": str(self.algorithm_id) if self.algorithm_id else None, - "composition": self.composition - } diff --git a/context-manager/src/providers/__init__.py b/context-manager/src/providers/__init__.py deleted file mode 100644 index 18df91f..0000000 --- a/context-manager/src/providers/__init__.py +++ /dev/null @@ -1,18 +0,0 @@ -""" -Adaptadores para proveedores de IA - -Permite usar el Context Manager con cualquier modelo de IA. -""" - -from .base import BaseProvider, ProviderResponse -from .anthropic import AnthropicProvider -from .openai import OpenAIProvider -from .ollama import OllamaProvider - -__all__ = [ - "BaseProvider", - "ProviderResponse", - "AnthropicProvider", - "OpenAIProvider", - "OllamaProvider", -] diff --git a/context-manager/src/providers/anthropic.py b/context-manager/src/providers/anthropic.py deleted file mode 100644 index 18d1e15..0000000 --- a/context-manager/src/providers/anthropic.py +++ /dev/null @@ -1,110 +0,0 @@ -""" -Adaptador para Anthropic (Claude) -""" - -import os -from typing import List, Dict, Any, Optional - -from .base import BaseProvider, ProviderResponse -from ..models import SelectedContext, ContextSource - -try: - import anthropic - HAS_ANTHROPIC = True -except ImportError: - HAS_ANTHROPIC = False - - -class AnthropicProvider(BaseProvider): - """Proveedor para modelos de Anthropic (Claude)""" - - provider_name = "anthropic" - - def __init__( - self, - api_key: str = None, - model: str = "claude-sonnet-4-20250514", - max_tokens: int = 4096, - **kwargs - ): - super().__init__(api_key=api_key, model=model, **kwargs) - - if not HAS_ANTHROPIC: - raise ImportError("anthropic no está instalado. Ejecuta: pip install anthropic") - - self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY") - self.model = model - self.max_tokens = max_tokens - self.client = anthropic.Anthropic(api_key=self.api_key) - - def format_context(self, context: SelectedContext) -> tuple: - """ - Formatea el contexto para la API de Anthropic. - - Returns: - Tuple de (system_prompt, messages) - """ - system_parts = [] - messages = [] - - for item in context.items: - if item.source in [ContextSource.MEMORY, ContextSource.KNOWLEDGE, - ContextSource.AMBIENT, ContextSource.DATASET]: - system_parts.append(item.content) - elif item.source == ContextSource.HISTORY: - role = item.metadata.get("role", "user") - messages.append({ - "role": role, - "content": item.content - }) - - system_prompt = "\n\n".join(system_parts) if system_parts else None - return system_prompt, messages - - def send_message( - self, - message: str, - context: SelectedContext = None, - system_prompt: str = None, - temperature: float = 1.0, - **kwargs - ) -> ProviderResponse: - """Envía mensaje a Claude""" - - # Formatear contexto - context_system, context_messages = self.format_context(context) if context else (None, []) - - # Combinar system prompts - final_system = "" - if system_prompt: - final_system = system_prompt - if context_system: - final_system = f"{final_system}\n\n{context_system}" if final_system else context_system - - # Construir mensajes - messages = context_messages.copy() - messages.append({"role": "user", "content": message}) - - # Llamar a la API - response, latency_ms = self._measure_latency( - self.client.messages.create, - model=self.model, - max_tokens=kwargs.get("max_tokens", self.max_tokens), - system=final_system if final_system else anthropic.NOT_GIVEN, - messages=messages, - temperature=temperature - ) - - return ProviderResponse( - content=response.content[0].text, - model=response.model, - tokens_input=response.usage.input_tokens, - tokens_output=response.usage.output_tokens, - latency_ms=latency_ms, - finish_reason=response.stop_reason, - raw_response={ - "id": response.id, - "type": response.type, - "role": response.role - } - ) diff --git a/context-manager/src/providers/base.py b/context-manager/src/providers/base.py deleted file mode 100644 index bd68a59..0000000 --- a/context-manager/src/providers/base.py +++ /dev/null @@ -1,85 +0,0 @@ -""" -Clase base para proveedores de IA -""" - -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import List, Dict, Any, Optional -import time - -from ..models import SelectedContext - - -@dataclass -class ProviderResponse: - """Respuesta de un proveedor de IA""" - content: str - model: str - tokens_input: int = 0 - tokens_output: int = 0 - latency_ms: int = 0 - finish_reason: str = "stop" - raw_response: Dict[str, Any] = field(default_factory=dict) - - -class BaseProvider(ABC): - """ - Clase base para adaptadores de proveedores de IA. - - Cada proveedor debe implementar: - - send_message(): Enviar mensaje y recibir respuesta - - format_context(): Formatear contexto al formato del proveedor - """ - - provider_name: str = "base" - - def __init__(self, api_key: str = None, model: str = None, **kwargs): - self.api_key = api_key - self.model = model - self.extra_config = kwargs - - @abstractmethod - def send_message( - self, - message: str, - context: SelectedContext = None, - system_prompt: str = None, - **kwargs - ) -> ProviderResponse: - """ - Envía un mensaje al modelo y retorna la respuesta. - - Args: - message: Mensaje del usuario - context: Contexto seleccionado - system_prompt: Prompt de sistema adicional - **kwargs: Parámetros adicionales del modelo - - Returns: - ProviderResponse con la respuesta - """ - pass - - @abstractmethod - def format_context(self, context: SelectedContext) -> List[Dict[str, str]]: - """ - Formatea el contexto al formato de mensajes del proveedor. - - Args: - context: Contexto seleccionado - - Returns: - Lista de mensajes en el formato del proveedor - """ - pass - - def estimate_tokens(self, text: str) -> int: - """Estimación simple de tokens (4 caracteres por token)""" - return len(text) // 4 - - def _measure_latency(self, func, *args, **kwargs): - """Mide la latencia de una función""" - start = time.time() - result = func(*args, **kwargs) - latency_ms = int((time.time() - start) * 1000) - return result, latency_ms diff --git a/context-manager/src/providers/openai.py b/context-manager/src/providers/openai.py deleted file mode 100644 index f84f250..0000000 --- a/context-manager/src/providers/openai.py +++ /dev/null @@ -1,120 +0,0 @@ -""" -Adaptador para OpenAI (GPT) -""" - -import os -from typing import List, Dict, Any, Optional - -from .base import BaseProvider, ProviderResponse -from ..models import SelectedContext, ContextSource - -try: - import openai - HAS_OPENAI = True -except ImportError: - HAS_OPENAI = False - - -class OpenAIProvider(BaseProvider): - """Proveedor para modelos de OpenAI (GPT)""" - - provider_name = "openai" - - def __init__( - self, - api_key: str = None, - model: str = "gpt-4", - max_tokens: int = 4096, - base_url: str = None, - **kwargs - ): - super().__init__(api_key=api_key, model=model, **kwargs) - - if not HAS_OPENAI: - raise ImportError("openai no está instalado. Ejecuta: pip install openai") - - self.api_key = api_key or os.getenv("OPENAI_API_KEY") - self.model = model - self.max_tokens = max_tokens - self.client = openai.OpenAI( - api_key=self.api_key, - base_url=base_url - ) - - def format_context(self, context: SelectedContext) -> List[Dict[str, str]]: - """ - Formatea el contexto para la API de OpenAI. - - Returns: - Lista de mensajes en formato OpenAI - """ - messages = [] - system_parts = [] - - for item in context.items: - if item.source in [ContextSource.MEMORY, ContextSource.KNOWLEDGE, - ContextSource.AMBIENT, ContextSource.DATASET]: - system_parts.append(item.content) - elif item.source == ContextSource.HISTORY: - role = item.metadata.get("role", "user") - messages.append({ - "role": role, - "content": item.content - }) - - # Insertar system message al inicio - if system_parts: - messages.insert(0, { - "role": "system", - "content": "\n\n".join(system_parts) - }) - - return messages - - def send_message( - self, - message: str, - context: SelectedContext = None, - system_prompt: str = None, - temperature: float = 1.0, - **kwargs - ) -> ProviderResponse: - """Envía mensaje a GPT""" - - # Formatear contexto - messages = self.format_context(context) if context else [] - - # Añadir system prompt adicional - if system_prompt: - if messages and messages[0]["role"] == "system": - messages[0]["content"] = f"{system_prompt}\n\n{messages[0]['content']}" - else: - messages.insert(0, {"role": "system", "content": system_prompt}) - - # Añadir mensaje del usuario - messages.append({"role": "user", "content": message}) - - # Llamar a la API - response, latency_ms = self._measure_latency( - self.client.chat.completions.create, - model=self.model, - messages=messages, - max_tokens=kwargs.get("max_tokens", self.max_tokens), - temperature=temperature - ) - - choice = response.choices[0] - - return ProviderResponse( - content=choice.message.content, - model=response.model, - tokens_input=response.usage.prompt_tokens, - tokens_output=response.usage.completion_tokens, - latency_ms=latency_ms, - finish_reason=choice.finish_reason, - raw_response={ - "id": response.id, - "created": response.created, - "system_fingerprint": response.system_fingerprint - } - )