Add pending apps and frontend components

- apps/captain-mobile: Mobile API service
- apps/flow-ui: Flow UI application
- apps/mindlink: Mindlink application
- apps/storage: Storage API and workers
- apps/tzzr-cli: TZZR CLI tool
- deck-frontend/backups: Historical TypeScript versions
- hst-frontend: Standalone HST frontend

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
ARCHITECT
2026-01-16 18:26:59 +00:00
parent 17506aaee2
commit 9b244138b5
177 changed files with 15063 additions and 0 deletions

View File

@@ -0,0 +1,20 @@
[Unit]
Description=Captain Claude Mobile API
After=network.target postgresql.service
[Service]
Type=simple
User=architect
WorkingDirectory=/home/architect/captain-claude/apps/captain-mobile
Environment="PATH=/home/architect/.local/bin:/usr/local/bin:/usr/bin:/bin"
Environment="JWT_SECRET=captain-claude-mobile-prod-secret-2025"
Environment="API_USER=captain"
Environment="API_PASSWORD=tzzr2025"
Environment="DATABASE_URL=postgresql://captain:captain@localhost/captain_mobile"
Environment="CLAUDE_CMD=/home/architect/.claude/local/claude"
ExecStart=/home/architect/captain-claude/apps/captain-mobile/venv/bin/uvicorn captain_api:app --host 127.0.0.1 --port 3030
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target

View File

@@ -0,0 +1,482 @@
#!/usr/bin/env python3
"""
Captain Claude Mobile API
FastAPI backend for Captain Claude mobile app
Provides REST + WebSocket endpoints for chat and terminal access
"""
import asyncio
import json
import os
import pty
import select
import subprocess
import uuid
from datetime import datetime, timedelta
from typing import Optional
import asyncpg
import jwt
from fastapi import FastAPI, WebSocket, WebSocketDisconnect, HTTPException, Depends, UploadFile, File
from fastapi.middleware.cors import CORSMiddleware
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
from pydantic import BaseModel
from contextlib import asynccontextmanager
# Configuration
JWT_SECRET = os.getenv("JWT_SECRET", "captain-claude-secret-key-change-in-production")
JWT_ALGORITHM = "HS256"
JWT_EXPIRATION_DAYS = 7
API_USER = os.getenv("API_USER", "captain")
API_PASSWORD = os.getenv("API_PASSWORD", "tzzr2025")
DATABASE_URL = os.getenv("DATABASE_URL", "postgresql://captain:captain@localhost/captain_mobile")
CLAUDE_CMD = os.getenv("CLAUDE_CMD", "/home/architect/.claude/local/claude")
# Database pool
db_pool: Optional[asyncpg.Pool] = None
@asynccontextmanager
async def lifespan(app: FastAPI):
global db_pool
try:
db_pool = await asyncpg.create_pool(DATABASE_URL, min_size=2, max_size=10)
await init_database()
print("Database connected")
except Exception as e:
print(f"Database connection failed: {e}")
db_pool = None
yield
if db_pool:
await db_pool.close()
app = FastAPI(
title="Captain Claude Mobile API",
version="1.0.0",
lifespan=lifespan
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
security = HTTPBearer()
# Models
class LoginRequest(BaseModel):
username: str
password: str
class LoginResponse(BaseModel):
token: str
expires_at: str
class Message(BaseModel):
role: str
content: str
timestamp: str
class Conversation(BaseModel):
id: str
title: str
created_at: str
message_count: int
class ScreenSession(BaseModel):
name: str
pid: str
attached: bool
# Database initialization
async def init_database():
if not db_pool:
return
async with db_pool.acquire() as conn:
await conn.execute("""
CREATE TABLE IF NOT EXISTS conversations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id VARCHAR(255) NOT NULL,
title VARCHAR(500),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
)
""")
await conn.execute("""
CREATE TABLE IF NOT EXISTS messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
conversation_id UUID REFERENCES conversations(id) ON DELETE CASCADE,
role VARCHAR(50) NOT NULL,
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT NOW()
)
""")
await conn.execute("""
CREATE INDEX IF NOT EXISTS idx_messages_conversation ON messages(conversation_id)
""")
await conn.execute("""
CREATE INDEX IF NOT EXISTS idx_conversations_user ON conversations(user_id)
""")
# Auth helpers
def create_token(username: str) -> tuple[str, datetime]:
expires = datetime.utcnow() + timedelta(days=JWT_EXPIRATION_DAYS)
token = jwt.encode(
{"sub": username, "exp": expires},
JWT_SECRET,
algorithm=JWT_ALGORITHM
)
return token, expires
def verify_token(credentials: HTTPAuthorizationCredentials = Depends(security)) -> str:
try:
payload = jwt.decode(credentials.credentials, JWT_SECRET, algorithms=[JWT_ALGORITHM])
return payload["sub"]
except jwt.ExpiredSignatureError:
raise HTTPException(status_code=401, detail="Token expired")
except jwt.InvalidTokenError:
raise HTTPException(status_code=401, detail="Invalid token")
# REST Endpoints
@app.get("/health")
async def health():
return {"status": "ok", "service": "captain-api", "version": "1.0.0"}
@app.post("/auth/login", response_model=LoginResponse)
async def login(request: LoginRequest):
if request.username == API_USER and request.password == API_PASSWORD:
token, expires = create_token(request.username)
return LoginResponse(token=token, expires_at=expires.isoformat())
raise HTTPException(status_code=401, detail="Invalid credentials")
@app.get("/sessions")
async def list_sessions(user: str = Depends(verify_token)) -> list[ScreenSession]:
"""List active screen sessions"""
try:
result = subprocess.run(
["screen", "-ls"],
capture_output=True,
text=True
)
sessions = []
for line in result.stdout.split("\n"):
if "\t" in line and ("Attached" in line or "Detached" in line):
parts = line.strip().split("\t")
if len(parts) >= 2:
session_info = parts[0]
pid_name = session_info.split(".")
if len(pid_name) >= 2:
sessions.append(ScreenSession(
pid=pid_name[0],
name=".".join(pid_name[1:]),
attached="Attached" in line
))
return sessions
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/history")
async def get_history(user: str = Depends(verify_token), limit: int = 20) -> list[Conversation]:
"""Get conversation history"""
if not db_pool:
return []
async with db_pool.acquire() as conn:
rows = await conn.fetch("""
SELECT c.id, c.title, c.created_at,
COUNT(m.id) as message_count
FROM conversations c
LEFT JOIN messages m ON m.conversation_id = c.id
WHERE c.user_id = $1
GROUP BY c.id
ORDER BY c.updated_at DESC
LIMIT $2
""", user, limit)
return [
Conversation(
id=str(row["id"]),
title=row["title"] or "Untitled",
created_at=row["created_at"].isoformat(),
message_count=row["message_count"]
)
for row in rows
]
@app.get("/history/{conversation_id}")
async def get_conversation(conversation_id: str, user: str = Depends(verify_token)) -> list[Message]:
"""Get messages for a conversation"""
if not db_pool:
return []
async with db_pool.acquire() as conn:
rows = await conn.fetch("""
SELECT m.role, m.content, m.created_at
FROM messages m
JOIN conversations c ON c.id = m.conversation_id
WHERE c.id = $1 AND c.user_id = $2
ORDER BY m.created_at ASC
""", uuid.UUID(conversation_id), user)
return [
Message(
role=row["role"],
content=row["content"],
timestamp=row["created_at"].isoformat()
)
for row in rows
]
@app.post("/upload")
async def upload_file(file: UploadFile = File(...), user: str = Depends(verify_token)):
"""Upload a file for context"""
upload_dir = "/tmp/captain-uploads"
os.makedirs(upload_dir, exist_ok=True)
file_id = str(uuid.uuid4())
file_path = os.path.join(upload_dir, f"{file_id}_{file.filename}")
with open(file_path, "wb") as f:
content = await file.read()
f.write(content)
return {"file_id": file_id, "filename": file.filename, "path": file_path}
# WebSocket Chat with Captain Claude
@app.websocket("/ws/chat")
async def websocket_chat(websocket: WebSocket):
await websocket.accept()
# First message should be auth token
try:
auth_data = await asyncio.wait_for(websocket.receive_json(), timeout=10)
token = auth_data.get("token")
if not token:
await websocket.close(code=4001, reason="No token provided")
return
try:
payload = jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
user = payload["sub"]
except:
await websocket.close(code=4001, reason="Invalid token")
return
await websocket.send_json({"type": "connected", "user": user})
# Create conversation
conversation_id = None
if db_pool:
async with db_pool.acquire() as conn:
row = await conn.fetchrow(
"INSERT INTO conversations (user_id, title) VALUES ($1, $2) RETURNING id",
user, "New conversation"
)
conversation_id = row["id"]
# Main chat loop
while True:
data = await websocket.receive_json()
if data.get("type") == "message":
user_message = data.get("content", "")
context_files = data.get("files", [])
# Save user message
if db_pool and conversation_id:
async with db_pool.acquire() as conn:
await conn.execute(
"INSERT INTO messages (conversation_id, role, content) VALUES ($1, $2, $3)",
conversation_id, "user", user_message
)
# Build claude command
cmd = [CLAUDE_CMD, "-p", user_message, "--output-format", "stream-json"]
# Add file context if provided
for file_path in context_files:
if os.path.exists(file_path):
cmd.extend(["--file", file_path])
# Stream response from claude
try:
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
cwd="/home/architect/captain-claude"
)
full_response = ""
async for line in process.stdout:
line = line.decode().strip()
if not line:
continue
try:
event = json.loads(line)
event_type = event.get("type")
if event_type == "assistant":
# Start of response
await websocket.send_json({
"type": "start",
"conversation_id": str(conversation_id) if conversation_id else None
})
elif event_type == "content_block_delta":
delta = event.get("delta", {})
if delta.get("type") == "text_delta":
text = delta.get("text", "")
full_response += text
await websocket.send_json({
"type": "delta",
"content": text
})
elif event_type == "result":
# End of response
await websocket.send_json({
"type": "done",
"content": full_response
})
except json.JSONDecodeError:
continue
await process.wait()
# Save assistant message
if db_pool and conversation_id and full_response:
async with db_pool.acquire() as conn:
await conn.execute(
"INSERT INTO messages (conversation_id, role, content) VALUES ($1, $2, $3)",
conversation_id, "assistant", full_response
)
# Update title from first response
title = full_response[:100].split("\n")[0]
await conn.execute(
"UPDATE conversations SET title = $1, updated_at = NOW() WHERE id = $2",
title, conversation_id
)
except Exception as e:
await websocket.send_json({
"type": "error",
"message": str(e)
})
elif data.get("type") == "ping":
await websocket.send_json({"type": "pong"})
except WebSocketDisconnect:
pass
except asyncio.TimeoutError:
await websocket.close(code=4002, reason="Auth timeout")
except Exception as e:
print(f"Chat WebSocket error: {e}")
try:
await websocket.close(code=4000, reason=str(e))
except:
pass
# WebSocket Terminal for screen sessions
@app.websocket("/ws/terminal/{session_name}")
async def websocket_terminal(websocket: WebSocket, session_name: str):
await websocket.accept()
# Auth
try:
auth_data = await asyncio.wait_for(websocket.receive_json(), timeout=10)
token = auth_data.get("token")
if not token:
await websocket.close(code=4001, reason="No token provided")
return
try:
jwt.decode(token, JWT_SECRET, algorithms=[JWT_ALGORITHM])
except:
await websocket.close(code=4001, reason="Invalid token")
return
await websocket.send_json({"type": "connected", "session": session_name})
# Create PTY and attach to screen session
master_fd, slave_fd = pty.openpty()
process = subprocess.Popen(
["screen", "-x", session_name],
stdin=slave_fd,
stdout=slave_fd,
stderr=slave_fd,
preexec_fn=os.setsid
)
os.close(slave_fd)
# Set non-blocking
import fcntl
flags = fcntl.fcntl(master_fd, fcntl.F_GETFL)
fcntl.fcntl(master_fd, fcntl.F_SETFL, flags | os.O_NONBLOCK)
async def read_pty():
"""Read from PTY and send to websocket"""
while True:
try:
await asyncio.sleep(0.01)
r, _, _ = select.select([master_fd], [], [], 0)
if r:
data = os.read(master_fd, 4096)
if data:
await websocket.send_json({
"type": "output",
"data": data.decode("utf-8", errors="replace")
})
except Exception as e:
break
async def write_pty():
"""Read from websocket and write to PTY"""
while True:
try:
data = await websocket.receive_json()
if data.get("type") == "input":
os.write(master_fd, data.get("data", "").encode())
elif data.get("type") == "resize":
import struct
import fcntl
import termios
winsize = struct.pack("HHHH",
data.get("rows", 24),
data.get("cols", 80),
0, 0
)
fcntl.ioctl(master_fd, termios.TIOCSWINSZ, winsize)
except WebSocketDisconnect:
break
except Exception as e:
break
# Run both tasks
read_task = asyncio.create_task(read_pty())
write_task = asyncio.create_task(write_pty())
try:
await asyncio.gather(read_task, write_task)
finally:
read_task.cancel()
write_task.cancel()
os.close(master_fd)
process.terminate()
except asyncio.TimeoutError:
await websocket.close(code=4002, reason="Auth timeout")
except Exception as e:
print(f"Terminal WebSocket error: {e}")
try:
await websocket.close(code=4000, reason=str(e))
except:
pass
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=3030)

View File

@@ -0,0 +1,6 @@
fastapi==0.115.0
uvicorn[standard]==0.30.6
asyncpg==0.29.0
pyjwt==2.9.0
python-multipart==0.0.9
websockets==13.0

View File

@@ -0,0 +1,42 @@
-- Captain Claude Mobile - PostgreSQL Schema
-- Database: captain_mobile
-- Create user and database if not exists (run as postgres superuser)
-- CREATE USER captain WITH PASSWORD 'captain';
-- CREATE DATABASE captain_mobile OWNER captain;
-- GRANT ALL PRIVILEGES ON DATABASE captain_mobile TO captain;
-- Tables for conversation history
CREATE TABLE IF NOT EXISTS conversations (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
user_id VARCHAR(255) NOT NULL,
title VARCHAR(500),
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE IF NOT EXISTS messages (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
conversation_id UUID REFERENCES conversations(id) ON DELETE CASCADE,
role VARCHAR(50) NOT NULL, -- 'user' or 'assistant'
content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT NOW()
);
-- Indexes
CREATE INDEX IF NOT EXISTS idx_messages_conversation ON messages(conversation_id);
CREATE INDEX IF NOT EXISTS idx_conversations_user ON conversations(user_id);
CREATE INDEX IF NOT EXISTS idx_conversations_updated ON conversations(updated_at DESC);
-- File attachments tracking (optional)
CREATE TABLE IF NOT EXISTS attachments (
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
message_id UUID REFERENCES messages(id) ON DELETE CASCADE,
filename VARCHAR(500) NOT NULL,
file_path VARCHAR(1000) NOT NULL,
mime_type VARCHAR(100),
size_bytes BIGINT,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_attachments_message ON attachments(message_id);

1
apps/flow-ui Submodule

Submodule apps/flow-ui added at f0c09b10ad

1
apps/mindlink Submodule

Submodule apps/mindlink added at 40c0944cf7

109
apps/storage/migrate_atc.py Normal file
View File

@@ -0,0 +1,109 @@
#!/usr/bin/env python3
"""
Migración: Importar archivos existentes de secretaria_clara.atc a storage
"""
import asyncio
import asyncpg
import os
import json
DB_URL = os.environ.get("DATABASE_URL", "postgresql://tzzr:tzzr@localhost:5432/tzzr")
async def migrate():
pool = await asyncpg.create_pool(DB_URL, min_size=2, max_size=10)
async with pool.acquire() as conn:
# Obtener archivos de atc que tienen hash y url_file
atc_files = await conn.fetch("""
SELECT
mrf,
private_mrf,
alias,
name_es,
ref,
ext,
jsonb_standard,
hashtags
FROM secretaria_clara.atc
WHERE jsonb_standard IS NOT NULL
AND jsonb_standard->'L2_document'->>'url_file' IS NOT NULL
""")
print(f"Encontrados {len(atc_files)} archivos en atc")
migrated = 0
skipped = 0
errors = 0
for file in atc_files:
try:
mrf = file["mrf"]
jsonb = file["jsonb_standard"] or {}
# Extraer datos
l2 = jsonb.get("L2_document", {})
url_file = l2.get("url_file")
size_bytes = l2.get("size_bytes", 0)
mime_type = l2.get("mime_type", "application/octet-stream")
if not url_file:
skipped += 1
continue
# Verificar si ya existe en storage
existing = await conn.fetchrow("""
SELECT content_hash FROM storage.physical_blobs
WHERE content_hash = $1
""", mrf)
if existing:
skipped += 1
continue
# Insertar en physical_blobs
await conn.execute("""
INSERT INTO storage.physical_blobs
(content_hash, file_size, mime_type, storage_provider, storage_path, verification_status)
VALUES ($1, $2, $3, 'R2_PRIMARY', $4, 'VERIFIED')
""", mrf, size_bytes, mime_type, url_file)
# Crear user_asset con el mismo public_key que el mrf
# Usamos un UUID dummy para user_id ya que no tenemos usuarios
await conn.execute("""
INSERT INTO storage.user_assets
(public_key, blob_hash, user_id, original_filename)
VALUES ($1, $2, '00000000-0000-0000-0000-000000000000'::uuid, $3)
ON CONFLICT (public_key) DO NOTHING
""", mrf, mrf, file["name_es"] or file["alias"] or mrf[:20])
migrated += 1
if migrated % 100 == 0:
print(f" Migrados: {migrated}")
except Exception as e:
errors += 1
print(f"Error migrando {file['mrf']}: {e}")
print(f"\nMigración completada:")
print(f" - Migrados: {migrated}")
print(f" - Saltados (ya existían o sin datos): {skipped}")
print(f" - Errores: {errors}")
# Actualizar ref_count
await conn.execute("""
UPDATE storage.physical_blobs pb
SET ref_count = (
SELECT COUNT(*) FROM storage.user_assets ua
WHERE ua.blob_hash = pb.content_hash
)
""")
print(" - ref_count actualizado")
await pool.close()
if __name__ == "__main__":
asyncio.run(migrate())

View File

@@ -0,0 +1,9 @@
fastapi>=0.104.0
uvicorn>=0.24.0
asyncpg>=0.29.0
boto3>=1.34.0
Pillow>=10.0.0
PyMuPDF>=1.23.0
argon2-cffi>=23.1.0
python-multipart>=0.0.6
pydantic>=2.5.0

View File

@@ -0,0 +1,20 @@
[Unit]
Description=Storage API Server
After=network.target postgresql.service
[Service]
Type=simple
User=root
WorkingDirectory=/opt/storage
ExecStart=/opt/storage/venv/bin/python storage_api.py
Restart=always
RestartSec=5
Environment=R2_ENDPOINT=https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com
Environment=R2_BUCKET=deck
Environment=DATABASE_URL=postgresql://tzzr:tzzr@localhost:5432/tzzr
Environment=AWS_ACCESS_KEY_ID=
Environment=AWS_SECRET_ACCESS_KEY=
[Install]
WantedBy=multi-user.target

445
apps/storage/storage_api.py Normal file
View File

@@ -0,0 +1,445 @@
#!/usr/bin/env python3
"""
Storage API - Endpoints para upload/download de archivos
Spec: Sistema de Almacenamiento Híbrido v4.0
"""
import os
import hashlib
import json
import asyncio
from datetime import datetime, timedelta
from typing import Optional
import asyncpg
import boto3
from fastapi import FastAPI, HTTPException, Request, Header, Query, BackgroundTasks
from fastapi.responses import RedirectResponse, JSONResponse
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn
from collections import defaultdict
import time
import argon2
# Configuración
R2_ENDPOINT = os.environ.get("R2_ENDPOINT", "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com")
R2_BUCKET = os.environ.get("R2_BUCKET", "deck")
DB_URL = os.environ.get("DATABASE_URL", "postgresql://tzzr:tzzr@localhost:5432/tzzr")
PRESIGNED_UPLOAD_EXPIRY = 3 * 60 * 60 # 3 horas
PRESIGNED_DOWNLOAD_EXPIRY = 45 * 60 # 45 minutos
# Rate limiting
RATE_LIMIT_IP = 100 # req/min por IP
RATE_LIMIT_KEY = 50 # descargas/hora por public_key
RATE_LIMIT_TRANSFER = 10 * 1024 * 1024 * 1024 # 10 GB/hora
app = FastAPI(title="Storage API", version="4.0")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# Estado global
db_pool = None
s3_client = None
rate_limits = {
"ip": defaultdict(list), # IP -> [timestamps]
"key": defaultdict(list), # public_key -> [timestamps]
"transfer": defaultdict(int) # IP -> bytes
}
ph = argon2.PasswordHasher()
# =========================================================================
# STARTUP / SHUTDOWN
# =========================================================================
@app.on_event("startup")
async def startup():
global db_pool, s3_client
db_pool = await asyncpg.create_pool(DB_URL, min_size=2, max_size=20)
s3_client = boto3.client(
"s3",
endpoint_url=R2_ENDPOINT,
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)
@app.on_event("shutdown")
async def shutdown():
if db_pool:
await db_pool.close()
# =========================================================================
# RATE LIMITING
# =========================================================================
def check_rate_limit_ip(ip: str) -> bool:
"""100 req/min por IP"""
now = time.time()
minute_ago = now - 60
# Limpiar timestamps viejos
rate_limits["ip"][ip] = [t for t in rate_limits["ip"][ip] if t > minute_ago]
if len(rate_limits["ip"][ip]) >= RATE_LIMIT_IP:
return False
rate_limits["ip"][ip].append(now)
return True
def check_rate_limit_key(public_key: str) -> bool:
"""50 descargas/hora por public_key"""
now = time.time()
hour_ago = now - 3600
rate_limits["key"][public_key] = [t for t in rate_limits["key"][public_key] if t > hour_ago]
if len(rate_limits["key"][public_key]) >= RATE_LIMIT_KEY:
return False
rate_limits["key"][public_key].append(now)
return True
# =========================================================================
# MODELS
# =========================================================================
class UploadInitRequest(BaseModel):
hash: str
size: int
mime_type: str
filename: str
user_id: str
password: Optional[str] = None
class UploadInitResponse(BaseModel):
status: str
presigned_url: Optional[str] = None
deduplicated: bool = False
public_key: Optional[str] = None
# =========================================================================
# UPLOAD ENDPOINTS
# =========================================================================
@app.post("/upload/init", response_model=UploadInitResponse)
async def upload_init(req: UploadInitRequest, request: Request, background_tasks: BackgroundTasks):
"""
Iniciar upload. Devuelve presigned URL o confirma deduplicación.
"""
client_ip = request.client.host
if not check_rate_limit_ip(client_ip):
raise HTTPException(429, "Rate limit exceeded")
async with db_pool.acquire() as conn:
# Verificar si blob ya existe
blob = await conn.fetchrow("""
SELECT content_hash, verification_status
FROM storage.physical_blobs
WHERE content_hash = $1
""", req.hash)
if blob:
if blob["verification_status"] == "VERIFIED":
# Deduplicación: crear asset sin subir
public_key = hashlib.sha256(
f"{req.hash}{req.user_id}{datetime.now().isoformat()}".encode()
).hexdigest()
password_hash = None
if req.password:
password_hash = ph.hash(req.password)
await conn.execute("""
INSERT INTO storage.user_assets
(public_key, blob_hash, user_id, original_filename, access_password)
VALUES ($1, $2, $3, $4, $5)
""", public_key, req.hash, req.user_id, req.filename, password_hash)
return UploadInitResponse(
status="created",
deduplicated=True,
public_key=public_key
)
# Blob existe pero PENDING - cliente debe subir de todas formas
else:
# Crear registro PENDING
storage_path = f"{req.hash}.bin"
await conn.execute("""
INSERT INTO storage.physical_blobs
(content_hash, file_size, mime_type, storage_provider, storage_path)
VALUES ($1, $2, $3, 'R2_PRIMARY', $4)
""", req.hash, req.size, req.mime_type, storage_path)
# Generar presigned URL para upload
storage_path = f"{req.hash}.bin"
presigned_url = s3_client.generate_presigned_url(
"put_object",
Params={
"Bucket": R2_BUCKET,
"Key": storage_path,
"ContentType": req.mime_type
},
ExpiresIn=PRESIGNED_UPLOAD_EXPIRY
)
return UploadInitResponse(
status="upload_required",
presigned_url=presigned_url,
deduplicated=False
)
@app.post("/upload/complete/{content_hash}")
async def upload_complete(
content_hash: str,
user_id: str = Query(...),
filename: str = Query(...),
password: Optional[str] = Query(None),
background_tasks: BackgroundTasks = None
):
"""
Confirmar upload completado. Encola verificación.
"""
async with db_pool.acquire() as conn:
blob = await conn.fetchrow("""
SELECT content_hash, storage_path
FROM storage.physical_blobs
WHERE content_hash = $1
""", content_hash)
if not blob:
raise HTTPException(404, "Blob not found")
# Encolar verificación en background
# En producción esto iría a una cola (Redis, RabbitMQ, etc.)
background_tasks.add_task(
verify_and_finalize,
content_hash,
blob["storage_path"],
user_id,
filename,
password
)
return {"status": "processing", "content_hash": content_hash}
async def verify_and_finalize(
content_hash: str,
storage_path: str,
user_id: str,
filename: str,
password: Optional[str]
):
"""Background task para verificar y finalizar upload"""
from storage_worker import StorageWorker
worker = StorageWorker()
await worker.init()
try:
result = await worker.process_upload(
content_hash,
storage_path,
user_id,
filename,
ph.hash(password) if password else None
)
# En producción: notificar cliente via webhook/websocket
print(f"Upload finalized: {result}")
finally:
await worker.close()
# =========================================================================
# DOWNLOAD ENDPOINTS
# =========================================================================
@app.get("/file/{public_key}")
async def download_file(
public_key: str,
request: Request,
password: Optional[str] = Query(None)
):
"""
Descarga de archivo. Devuelve redirect a URL firmada.
"""
client_ip = request.client.host
# Rate limiting
if not check_rate_limit_ip(client_ip):
raise HTTPException(429, "Rate limit exceeded - IP")
if not check_rate_limit_key(public_key):
raise HTTPException(429, "Rate limit exceeded - downloads")
async with db_pool.acquire() as conn:
# Buscar asset
asset = await conn.fetchrow("""
SELECT a.id, a.blob_hash, a.original_filename, a.access_password, a.downloads_count,
b.storage_provider, b.storage_path, b.verification_status, b.mime_type
FROM storage.user_assets a
JOIN storage.physical_blobs b ON a.blob_hash = b.content_hash
WHERE a.public_key = $1
""", public_key)
if not asset:
raise HTTPException(404, "Asset not found")
# Verificar contraseña si requerida
if asset["access_password"]:
if not password:
raise HTTPException(401, "Password required")
try:
ph.verify(asset["access_password"], password)
except:
raise HTTPException(401, "Invalid password")
# Verificar estado del blob
status = asset["verification_status"]
if status == "PENDING":
raise HTTPException(202, "File is being processed")
if status in ("CORRUPT", "LOST"):
raise HTTPException(410, "File is no longer available")
# Incrementar contador de descargas
await conn.execute("""
UPDATE storage.user_assets
SET downloads_count = downloads_count + 1
WHERE id = $1
""", asset["id"])
# Generar URL firmada según provider
provider = asset["storage_provider"]
if provider in ("R2_PRIMARY", "R2_CACHE"):
presigned_url = s3_client.generate_presigned_url(
"get_object",
Params={
"Bucket": R2_BUCKET,
"Key": asset["storage_path"],
"ResponseContentDisposition": f'attachment; filename="{asset["original_filename"]}"',
"ResponseContentType": asset["mime_type"]
},
ExpiresIn=PRESIGNED_DOWNLOAD_EXPIRY
)
return RedirectResponse(presigned_url, status_code=302)
elif provider == "SHAREPOINT":
# TODO: Implementar acceso SharePoint via Graph API
raise HTTPException(503, "SharePoint access not implemented")
else:
raise HTTPException(503, "Unknown storage provider")
@app.get("/file/{public_key}/info")
async def file_info(public_key: str, request: Request):
"""
Información del archivo sin descargarlo.
"""
client_ip = request.client.host
if not check_rate_limit_ip(client_ip):
raise HTTPException(429, "Rate limit exceeded")
async with db_pool.acquire() as conn:
asset = await conn.fetchrow("""
SELECT a.public_key, a.original_filename, a.downloads_count, a.created_at,
b.file_size, b.mime_type, b.verification_status,
(a.access_password IS NOT NULL) as password_protected
FROM storage.user_assets a
JOIN storage.physical_blobs b ON a.blob_hash = b.content_hash
WHERE a.public_key = $1
""", public_key)
if not asset:
raise HTTPException(404, "Asset not found")
return {
"public_key": asset["public_key"],
"filename": asset["original_filename"],
"size": asset["file_size"],
"mime_type": asset["mime_type"],
"status": asset["verification_status"],
"downloads": asset["downloads_count"],
"password_protected": asset["password_protected"],
"created_at": asset["created_at"].isoformat()
}
@app.get("/file/{public_key}/thumb")
async def file_thumbnail(public_key: str, request: Request):
"""
Redirect al thumbnail del archivo.
"""
client_ip = request.client.host
if not check_rate_limit_ip(client_ip):
raise HTTPException(429, "Rate limit exceeded")
async with db_pool.acquire() as conn:
asset = await conn.fetchrow("""
SELECT a.blob_hash, b.verification_status
FROM storage.user_assets a
JOIN storage.physical_blobs b ON a.blob_hash = b.content_hash
WHERE a.public_key = $1
""", public_key)
if not asset:
raise HTTPException(404, "Asset not found")
if asset["verification_status"] != "VERIFIED":
raise HTTPException(202, "Thumbnail not ready")
# URL al thumbnail
thumb_key = f"{asset['blob_hash']}.thumb"
try:
# Verificar que existe
s3_client.head_object(Bucket=R2_BUCKET, Key=thumb_key)
except:
raise HTTPException(404, "Thumbnail not available")
presigned_url = s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": R2_BUCKET, "Key": thumb_key},
ExpiresIn=PRESIGNED_DOWNLOAD_EXPIRY
)
return RedirectResponse(presigned_url, status_code=302)
# =========================================================================
# HEALTH
# =========================================================================
@app.get("/health")
async def health():
return {"status": "ok", "timestamp": datetime.now().isoformat()}
# =========================================================================
# MAIN
# =========================================================================
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8080)

View File

@@ -0,0 +1,480 @@
#!/usr/bin/env python3
"""
Storage Worker - Verificación y procesamiento de archivos
Spec: Sistema de Almacenamiento Híbrido v4.0
"""
import os
import hashlib
import json
import asyncio
import asyncpg
from datetime import datetime
from typing import Optional, Dict, Any
import boto3
from PIL import Image
import fitz # PyMuPDF
import io
import tempfile
# Configuración
R2_ENDPOINT = os.environ.get("R2_ENDPOINT", "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com")
R2_BUCKET = os.environ.get("R2_BUCKET", "deck")
DB_URL = os.environ.get("DATABASE_URL", "postgresql://tzzr:tzzr@localhost:5432/tzzr")
THUMB_WIDTH = 300
MAX_RETRIES = 9
RETRY_BACKOFF_BASE = 2
def get_s3_client():
return boto3.client(
"s3",
endpoint_url=R2_ENDPOINT,
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)
async def get_db_pool():
return await asyncpg.create_pool(DB_URL, min_size=2, max_size=10)
def calculate_sha256(data: bytes) -> str:
"""Calcula SHA-256 de bytes"""
return hashlib.sha256(data).hexdigest()
def generate_public_key(content_hash: str, user_id: str) -> str:
"""Genera public_key única para un asset"""
data = f"{content_hash}{user_id}{datetime.now().isoformat()}"
return hashlib.sha256(data.encode()).hexdigest()
class StorageWorker:
def __init__(self):
self.s3 = get_s3_client()
self.pool = None
async def init(self):
self.pool = await get_db_pool()
async def close(self):
if self.pool:
await self.pool.close()
# =========================================================================
# VERIFICACIÓN DE HASH
# =========================================================================
async def verify_blob(self, declared_hash: str, storage_path: str) -> Dict[str, Any]:
"""
Verifica que el hash declarado coincida con el contenido real.
NUNCA confiamos en el hash del cliente.
"""
try:
# Descargar archivo
obj = self.s3.get_object(Bucket=R2_BUCKET, Key=storage_path)
content = obj["Body"].read()
# Calcular hash real
calculated_hash = calculate_sha256(content)
if calculated_hash != declared_hash:
# HASH MISMATCH - Archivo corrupto o spoofing
await self._mark_corrupt(declared_hash, storage_path)
return {
"status": "CORRUPT",
"declared": declared_hash,
"calculated": calculated_hash,
"action": "deleted"
}
# Hash coincide - Marcar como verificado
await self._mark_verified(declared_hash)
return {
"status": "VERIFIED",
"hash": declared_hash,
"size": len(content)
}
except Exception as e:
return {"status": "ERROR", "error": str(e)}
async def _mark_corrupt(self, content_hash: str, storage_path: str):
"""Marca blob como corrupto y elimina archivo"""
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE storage.physical_blobs
SET verification_status = 'CORRUPT', updated_at = NOW()
WHERE content_hash = $1
""", content_hash)
# Eliminar archivo del bucket
try:
self.s3.delete_object(Bucket=R2_BUCKET, Key=storage_path)
except:
pass
async def _mark_verified(self, content_hash: str):
"""Marca blob como verificado"""
async with self.pool.acquire() as conn:
await conn.execute("""
UPDATE storage.physical_blobs
SET verification_status = 'VERIFIED',
last_verified_at = NOW(),
updated_at = NOW()
WHERE content_hash = $1
""", content_hash)
# =========================================================================
# GENERACIÓN DE DERIVADOS
# =========================================================================
async def generate_derivatives(self, content_hash: str) -> Dict[str, Any]:
"""Genera thumbnail y metadatos para un blob verificado"""
async with self.pool.acquire() as conn:
blob = await conn.fetchrow("""
SELECT content_hash, mime_type, storage_path, file_size
FROM storage.physical_blobs
WHERE content_hash = $1 AND verification_status = 'VERIFIED'
""", content_hash)
if not blob:
return {"status": "ERROR", "error": "Blob not found or not verified"}
mime_type = blob["mime_type"]
storage_path = blob["storage_path"]
# Descargar archivo
obj = self.s3.get_object(Bucket=R2_BUCKET, Key=storage_path)
content = obj["Body"].read()
metadata = {
"content_hash": content_hash,
"mime_type": mime_type,
"file_size": blob["file_size"],
"processed_at": datetime.now().isoformat()
}
thumb_generated = False
# Generar thumbnail según tipo
if mime_type.startswith("image/"):
thumb_data, extra_meta = self._process_image(content)
metadata.update(extra_meta)
if thumb_data:
await self._save_thumb(content_hash, thumb_data)
thumb_generated = True
elif mime_type == "application/pdf":
thumb_data, extra_meta = self._process_pdf(content)
metadata.update(extra_meta)
if thumb_data:
await self._save_thumb(content_hash, thumb_data)
thumb_generated = True
# Guardar metadatos
await self._save_metadata(content_hash, metadata)
return {
"status": "OK",
"thumb_generated": thumb_generated,
"metadata": metadata
}
def _process_image(self, content: bytes) -> tuple:
"""Procesa imagen: genera thumb y extrae metadatos"""
try:
img = Image.open(io.BytesIO(content))
# Metadatos
meta = {
"width": img.width,
"height": img.height,
"format": img.format,
"mode": img.mode
}
# EXIF si disponible
if hasattr(img, '_getexif') and img._getexif():
meta["has_exif"] = True
# Generar thumbnail
ratio = THUMB_WIDTH / img.width
new_height = int(img.height * ratio)
thumb = img.copy()
thumb.thumbnail((THUMB_WIDTH, new_height), Image.Resampling.LANCZOS)
# Convertir a bytes
thumb_buffer = io.BytesIO()
thumb.save(thumb_buffer, format="JPEG", quality=85)
thumb_data = thumb_buffer.getvalue()
return thumb_data, meta
except Exception as e:
return None, {"error": str(e)}
def _process_pdf(self, content: bytes) -> tuple:
"""Procesa PDF: genera thumb de primera página y extrae metadatos"""
try:
doc = fitz.open(stream=content, filetype="pdf")
meta = {
"pages": len(doc),
"format": "PDF"
}
# Metadatos del documento
pdf_meta = doc.metadata
if pdf_meta:
if pdf_meta.get("author"):
meta["author"] = pdf_meta["author"]
if pdf_meta.get("title"):
meta["title"] = pdf_meta["title"]
# Render primera página como thumbnail
if len(doc) > 0:
page = doc[0]
# Escalar para que el ancho sea THUMB_WIDTH
zoom = THUMB_WIDTH / page.rect.width
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat)
thumb_data = pix.tobytes("jpeg")
else:
thumb_data = None
doc.close()
return thumb_data, meta
except Exception as e:
return None, {"error": str(e)}
async def _save_thumb(self, content_hash: str, thumb_data: bytes):
"""Guarda thumbnail en el bucket"""
key = f"{content_hash}.thumb"
self.s3.put_object(
Bucket=R2_BUCKET,
Key=key,
Body=thumb_data,
ContentType="image/jpeg"
)
async def _save_metadata(self, content_hash: str, metadata: dict):
"""Guarda metadatos JSON en el bucket"""
key = f"{content_hash}.json"
self.s3.put_object(
Bucket=R2_BUCKET,
Key=key,
Body=json.dumps(metadata, indent=2),
ContentType="application/json"
)
# =========================================================================
# PROCESAMIENTO COMPLETO
# =========================================================================
async def process_upload(
self,
declared_hash: str,
storage_path: str,
user_id: str,
original_filename: str,
access_password: Optional[str] = None
) -> Dict[str, Any]:
"""
Proceso completo post-upload:
1. Verificar hash
2. Generar derivados
3. Crear user_asset
"""
# 1. Verificar hash
verify_result = await self.verify_blob(declared_hash, storage_path)
if verify_result["status"] != "VERIFIED":
return verify_result
# 2. Generar derivados (con reintentos)
for attempt in range(MAX_RETRIES):
try:
deriv_result = await self.generate_derivatives(declared_hash)
if deriv_result["status"] == "OK":
break
except Exception as e:
if attempt == MAX_RETRIES - 1:
# Último intento fallido, pero blob ya está verificado
deriv_result = {"status": "PARTIAL", "error": str(e)}
else:
await asyncio.sleep(RETRY_BACKOFF_BASE ** attempt)
# 3. Crear user_asset
public_key = generate_public_key(declared_hash, user_id)
async with self.pool.acquire() as conn:
await conn.execute("""
INSERT INTO storage.user_assets
(public_key, blob_hash, user_id, original_filename, access_password)
VALUES ($1, $2, $3, $4, $5)
""", public_key, declared_hash, user_id, original_filename, access_password)
return {
"status": "CREATED",
"public_key": public_key,
"content_hash": declared_hash,
"derivatives": deriv_result
}
# =========================================================================
# REGISTRO DE BLOB (sin subida - para archivos existentes)
# =========================================================================
async def register_blob(
self,
content_hash: str,
file_size: int,
mime_type: str,
storage_provider: str,
storage_path: str
) -> Dict[str, Any]:
"""Registra un blob existente en el sistema"""
async with self.pool.acquire() as conn:
# Verificar si ya existe
existing = await conn.fetchrow("""
SELECT content_hash, verification_status
FROM storage.physical_blobs
WHERE content_hash = $1
""", content_hash)
if existing:
return {
"status": "EXISTS",
"content_hash": content_hash,
"verification_status": existing["verification_status"]
}
# Insertar nuevo blob
await conn.execute("""
INSERT INTO storage.physical_blobs
(content_hash, file_size, mime_type, storage_provider, storage_path)
VALUES ($1, $2, $3, $4::storage.storage_provider_enum, $5)
""", content_hash, file_size, mime_type, storage_provider, storage_path)
return {
"status": "REGISTERED",
"content_hash": content_hash,
"verification_status": "PENDING"
}
# =========================================================================
# MANTENIMIENTO
# =========================================================================
async def garbage_collect(self, dry_run: bool = True) -> Dict[str, Any]:
"""
Elimina blobs huérfanos (ref_count = 0, sin actualizar en 30 días)
"""
async with self.pool.acquire() as conn:
orphans = await conn.fetch("""
SELECT content_hash, storage_path
FROM storage.physical_blobs
WHERE ref_count = 0
AND updated_at < NOW() - INTERVAL '30 days'
""")
deleted = []
for blob in orphans:
if not dry_run:
# Eliminar derivados
for ext in [".thumb", ".json"]:
try:
self.s3.delete_object(Bucket=R2_BUCKET, Key=f"{blob['content_hash']}{ext}")
except:
pass
# Eliminar blob
try:
self.s3.delete_object(Bucket=R2_BUCKET, Key=blob["storage_path"])
except:
pass
# Eliminar registro
async with self.pool.acquire() as conn:
await conn.execute("""
DELETE FROM storage.physical_blobs WHERE content_hash = $1
""", blob["content_hash"])
deleted.append(blob["content_hash"])
return {
"status": "OK",
"dry_run": dry_run,
"orphans_found": len(orphans),
"deleted": deleted if not dry_run else []
}
async def integrity_check(self, sample_percent: float = 0.01) -> Dict[str, Any]:
"""
Verifica integridad de una muestra aleatoria de blobs
"""
async with self.pool.acquire() as conn:
blobs = await conn.fetch("""
SELECT content_hash, storage_path
FROM storage.physical_blobs
WHERE verification_status = 'VERIFIED'
ORDER BY RANDOM()
LIMIT (SELECT CEIL(COUNT(*) * $1) FROM storage.physical_blobs WHERE verification_status = 'VERIFIED')
""", sample_percent)
results = {"checked": 0, "ok": 0, "corrupt": []}
for blob in blobs:
results["checked"] += 1
verify = await self.verify_blob(blob["content_hash"], blob["storage_path"])
if verify["status"] == "VERIFIED":
results["ok"] += 1
else:
results["corrupt"].append(blob["content_hash"])
return results
# CLI para pruebas
async def main():
import sys
worker = StorageWorker()
await worker.init()
if len(sys.argv) < 2:
print("Usage: storage_worker.py <command> [args]")
print("Commands: gc, integrity, register")
return
cmd = sys.argv[1]
if cmd == "gc":
dry_run = "--execute" not in sys.argv
result = await worker.garbage_collect(dry_run=dry_run)
print(json.dumps(result, indent=2))
elif cmd == "integrity":
result = await worker.integrity_check()
print(json.dumps(result, indent=2))
elif cmd == "register":
if len(sys.argv) < 6:
print("Usage: storage_worker.py register <hash> <size> <mime> <path>")
return
result = await worker.register_blob(
sys.argv[2], int(sys.argv[3]), sys.argv[4], "R2_PRIMARY", sys.argv[5]
)
print(json.dumps(result, indent=2))
await worker.close()
if __name__ == "__main__":
asyncio.run(main())

View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
"""
Sincronizar metadata desde JSON del bucket R2 a storage.physical_blobs
"""
import os
import json
import boto3
import asyncio
import asyncpg
R2_ENDPOINT = "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com"
R2_BUCKET = "deck"
def get_s3_client():
return boto3.client(
"s3",
endpoint_url=R2_ENDPOINT,
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
)
async def sync():
s3 = get_s3_client()
pool = await asyncpg.create_pool(
"postgresql:///tzzr?host=/var/run/postgresql",
min_size=2, max_size=10
)
async with pool.acquire() as conn:
blobs = await conn.fetch("""
SELECT content_hash, storage_path
FROM storage.physical_blobs
WHERE file_size = 0
""")
print(f"Sincronizando metadata para {len(blobs)} blobs...")
updated = 0
errors = 0
for blob in blobs:
hash = blob["content_hash"]
json_key = f"{hash}.json"
try:
obj = s3.get_object(Bucket=R2_BUCKET, Key=json_key)
meta = json.loads(obj["Body"].read())
# Extraer datos
l2 = meta.get("jsonb_standard", {}).get("L2_document", {})
size_bytes = l2.get("size_bytes", 0)
mime_type = l2.get("mime_type")
ext = meta.get("ext", "pdf")
url_atc = meta.get("url_atc", [])
storage_path = url_atc[0] if url_atc else f"{hash}.{ext}"
if not mime_type:
if ext == "pdf":
mime_type = "application/pdf"
elif ext in ("jpg", "jpeg"):
mime_type = "image/jpeg"
elif ext == "png":
mime_type = "image/png"
else:
mime_type = "application/octet-stream"
# Obtener size real del archivo si no está en JSON
if size_bytes == 0:
try:
file_obj = s3.head_object(Bucket=R2_BUCKET, Key=storage_path)
size_bytes = file_obj.get("ContentLength", 0)
except:
pass
# Actualizar registro
await conn.execute("""
UPDATE storage.physical_blobs
SET file_size = $2,
mime_type = $3,
storage_path = $4
WHERE content_hash = $1
""", hash, size_bytes, mime_type, storage_path)
updated += 1
if updated % 100 == 0:
print(f" Actualizados: {updated}")
except s3.exceptions.NoSuchKey:
# JSON no existe, intentar obtener size del archivo directamente
try:
# Probar diferentes extensiones
for ext in ["pdf", "png", "jpg"]:
try:
file_key = f"{hash}.{ext}"
file_obj = s3.head_object(Bucket=R2_BUCKET, Key=file_key)
size_bytes = file_obj.get("ContentLength", 0)
content_type = file_obj.get("ContentType", "application/octet-stream")
await conn.execute("""
UPDATE storage.physical_blobs
SET file_size = $2,
mime_type = $3,
storage_path = $4
WHERE content_hash = $1
""", hash, size_bytes, content_type, file_key)
updated += 1
break
except:
continue
except Exception as e:
errors += 1
except Exception as e:
errors += 1
print(f"Error en {hash}: {e}")
print(f"\nSincronización completada:")
print(f" - Actualizados: {updated}")
print(f" - Errores: {errors}")
await pool.close()
if __name__ == "__main__":
asyncio.run(sync())

1
apps/tzzr-cli Submodule

Submodule apps/tzzr-cli added at 0327df5277