Add pending apps and frontend components

- apps/captain-mobile: Mobile API service - apps/flow-ui: Flow UI application - apps/mindlink: Mindlink application - apps/storage: Storage API and workers - apps/tzzr-cli: TZZR CLI tool - deck-frontend/backups: Historical TypeScript versions - hst-frontend: Standalone HST frontend Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 18:26:59 +00:00
parent 17506aaee2
commit 9b244138b5
177 changed files with 15063 additions and 0 deletions
--- a/apps/storage/migrate_atc.py
+++ b/apps/storage/migrate_atc.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python3
+"""
+Migración: Importar archivos existentes de secretaria_clara.atc a storage
+"""
+
+import asyncio
+import asyncpg
+import os
+import json
+
+DB_URL = os.environ.get("DATABASE_URL", "postgresql://tzzr:tzzr@localhost:5432/tzzr")
+
+
+async def migrate():
+    pool = await asyncpg.create_pool(DB_URL, min_size=2, max_size=10)
+
+    async with pool.acquire() as conn:
+        # Obtener archivos de atc que tienen hash y url_file
+        atc_files = await conn.fetch("""
+            SELECT
+                mrf,
+                private_mrf,
+                alias,
+                name_es,
+                ref,
+                ext,
+                jsonb_standard,
+                hashtags
+            FROM secretaria_clara.atc
+            WHERE jsonb_standard IS NOT NULL
+            AND jsonb_standard->'L2_document'->>'url_file' IS NOT NULL
+        """)
+
+        print(f"Encontrados {len(atc_files)} archivos en atc")
+
+        migrated = 0
+        skipped = 0
+        errors = 0
+
+        for file in atc_files:
+            try:
+                mrf = file["mrf"]
+                jsonb = file["jsonb_standard"] or {}
+
+                # Extraer datos
+                l2 = jsonb.get("L2_document", {})
+                url_file = l2.get("url_file")
+                size_bytes = l2.get("size_bytes", 0)
+                mime_type = l2.get("mime_type", "application/octet-stream")
+
+                if not url_file:
+                    skipped += 1
+                    continue
+
+                # Verificar si ya existe en storage
+                existing = await conn.fetchrow("""
+                    SELECT content_hash FROM storage.physical_blobs
+                    WHERE content_hash = $1
+                """, mrf)
+
+                if existing:
+                    skipped += 1
+                    continue
+
+                # Insertar en physical_blobs
+                await conn.execute("""
+                    INSERT INTO storage.physical_blobs
+                    (content_hash, file_size, mime_type, storage_provider, storage_path, verification_status)
+                    VALUES ($1, $2, $3, 'R2_PRIMARY', $4, 'VERIFIED')
+                """, mrf, size_bytes, mime_type, url_file)
+
+                # Crear user_asset con el mismo public_key que el mrf
+                # Usamos un UUID dummy para user_id ya que no tenemos usuarios
+                await conn.execute("""
+                    INSERT INTO storage.user_assets
+                    (public_key, blob_hash, user_id, original_filename)
+                    VALUES ($1, $2, '00000000-0000-0000-0000-000000000000'::uuid, $3)
+                    ON CONFLICT (public_key) DO NOTHING
+                """, mrf, mrf, file["name_es"] or file["alias"] or mrf[:20])
+
+                migrated += 1
+
+                if migrated % 100 == 0:
+                    print(f"  Migrados: {migrated}")
+
+            except Exception as e:
+                errors += 1
+                print(f"Error migrando {file['mrf']}: {e}")
+
+        print(f"\nMigración completada:")
+        print(f"  - Migrados: {migrated}")
+        print(f"  - Saltados (ya existían o sin datos): {skipped}")
+        print(f"  - Errores: {errors}")
+
+        # Actualizar ref_count
+        await conn.execute("""
+            UPDATE storage.physical_blobs pb
+            SET ref_count = (
+                SELECT COUNT(*) FROM storage.user_assets ua
+                WHERE ua.blob_hash = pb.content_hash
+            )
+        """)
+        print("  - ref_count actualizado")
+
+    await pool.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(migrate())
--- a/apps/storage/requirements.txt
+++ b/apps/storage/requirements.txt
@@ -0,0 +1,9 @@
+fastapi>=0.104.0
+uvicorn>=0.24.0
+asyncpg>=0.29.0
+boto3>=1.34.0
+Pillow>=10.0.0
+PyMuPDF>=1.23.0
+argon2-cffi>=23.1.0
+python-multipart>=0.0.6
+pydantic>=2.5.0
--- a/apps/storage/storage-api.service
+++ b/apps/storage/storage-api.service
@@ -0,0 +1,20 @@
+[Unit]
+Description=Storage API Server
+After=network.target postgresql.service
+
+[Service]
+Type=simple
+User=root
+WorkingDirectory=/opt/storage
+ExecStart=/opt/storage/venv/bin/python storage_api.py
+Restart=always
+RestartSec=5
+
+Environment=R2_ENDPOINT=https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com
+Environment=R2_BUCKET=deck
+Environment=DATABASE_URL=postgresql://tzzr:tzzr@localhost:5432/tzzr
+Environment=AWS_ACCESS_KEY_ID=
+Environment=AWS_SECRET_ACCESS_KEY=
+
+[Install]
+WantedBy=multi-user.target
--- a/apps/storage/storage_api.py
+++ b/apps/storage/storage_api.py
@@ -0,0 +1,445 @@
+#!/usr/bin/env python3
+"""
+Storage API - Endpoints para upload/download de archivos
+Spec: Sistema de Almacenamiento Híbrido v4.0
+"""
+
+import os
+import hashlib
+import json
+import asyncio
+from datetime import datetime, timedelta
+from typing import Optional
+import asyncpg
+import boto3
+from fastapi import FastAPI, HTTPException, Request, Header, Query, BackgroundTasks
+from fastapi.responses import RedirectResponse, JSONResponse
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+import uvicorn
+from collections import defaultdict
+import time
+import argon2
+
+# Configuración
+R2_ENDPOINT = os.environ.get("R2_ENDPOINT", "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com")
+R2_BUCKET = os.environ.get("R2_BUCKET", "deck")
+DB_URL = os.environ.get("DATABASE_URL", "postgresql://tzzr:tzzr@localhost:5432/tzzr")
+
+PRESIGNED_UPLOAD_EXPIRY = 3 * 60 * 60  # 3 horas
+PRESIGNED_DOWNLOAD_EXPIRY = 45 * 60     # 45 minutos
+
+# Rate limiting
+RATE_LIMIT_IP = 100          # req/min por IP
+RATE_LIMIT_KEY = 50          # descargas/hora por public_key
+RATE_LIMIT_TRANSFER = 10 * 1024 * 1024 * 1024  # 10 GB/hora
+
+app = FastAPI(title="Storage API", version="4.0")
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# Estado global
+db_pool = None
+s3_client = None
+rate_limits = {
+    "ip": defaultdict(list),      # IP -> [timestamps]
+    "key": defaultdict(list),     # public_key -> [timestamps]
+    "transfer": defaultdict(int)  # IP -> bytes
+}
+
+ph = argon2.PasswordHasher()
+
+
+# =========================================================================
+# STARTUP / SHUTDOWN
+# =========================================================================
+
+@app.on_event("startup")
+async def startup():
+    global db_pool, s3_client
+    db_pool = await asyncpg.create_pool(DB_URL, min_size=2, max_size=20)
+    s3_client = boto3.client(
+        "s3",
+        endpoint_url=R2_ENDPOINT,
+        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
+    )
+
+
+@app.on_event("shutdown")
+async def shutdown():
+    if db_pool:
+        await db_pool.close()
+
+
+# =========================================================================
+# RATE LIMITING
+# =========================================================================
+
+def check_rate_limit_ip(ip: str) -> bool:
+    """100 req/min por IP"""
+    now = time.time()
+    minute_ago = now - 60
+
+    # Limpiar timestamps viejos
+    rate_limits["ip"][ip] = [t for t in rate_limits["ip"][ip] if t > minute_ago]
+
+    if len(rate_limits["ip"][ip]) >= RATE_LIMIT_IP:
+        return False
+
+    rate_limits["ip"][ip].append(now)
+    return True
+
+
+def check_rate_limit_key(public_key: str) -> bool:
+    """50 descargas/hora por public_key"""
+    now = time.time()
+    hour_ago = now - 3600
+
+    rate_limits["key"][public_key] = [t for t in rate_limits["key"][public_key] if t > hour_ago]
+
+    if len(rate_limits["key"][public_key]) >= RATE_LIMIT_KEY:
+        return False
+
+    rate_limits["key"][public_key].append(now)
+    return True
+
+
+# =========================================================================
+# MODELS
+# =========================================================================
+
+class UploadInitRequest(BaseModel):
+    hash: str
+    size: int
+    mime_type: str
+    filename: str
+    user_id: str
+    password: Optional[str] = None
+
+
+class UploadInitResponse(BaseModel):
+    status: str
+    presigned_url: Optional[str] = None
+    deduplicated: bool = False
+    public_key: Optional[str] = None
+
+
+# =========================================================================
+# UPLOAD ENDPOINTS
+# =========================================================================
+
+@app.post("/upload/init", response_model=UploadInitResponse)
+async def upload_init(req: UploadInitRequest, request: Request, background_tasks: BackgroundTasks):
+    """
+    Iniciar upload. Devuelve presigned URL o confirma deduplicación.
+    """
+    client_ip = request.client.host
+
+    if not check_rate_limit_ip(client_ip):
+        raise HTTPException(429, "Rate limit exceeded")
+
+    async with db_pool.acquire() as conn:
+        # Verificar si blob ya existe
+        blob = await conn.fetchrow("""
+            SELECT content_hash, verification_status
+            FROM storage.physical_blobs
+            WHERE content_hash = $1
+        """, req.hash)
+
+        if blob:
+            if blob["verification_status"] == "VERIFIED":
+                # Deduplicación: crear asset sin subir
+                public_key = hashlib.sha256(
+                    f"{req.hash}{req.user_id}{datetime.now().isoformat()}".encode()
+                ).hexdigest()
+
+                password_hash = None
+                if req.password:
+                    password_hash = ph.hash(req.password)
+
+                await conn.execute("""
+                    INSERT INTO storage.user_assets
+                    (public_key, blob_hash, user_id, original_filename, access_password)
+                    VALUES ($1, $2, $3, $4, $5)
+                """, public_key, req.hash, req.user_id, req.filename, password_hash)
+
+                return UploadInitResponse(
+                    status="created",
+                    deduplicated=True,
+                    public_key=public_key
+                )
+
+            # Blob existe pero PENDING - cliente debe subir de todas formas
+
+        else:
+            # Crear registro PENDING
+            storage_path = f"{req.hash}.bin"
+            await conn.execute("""
+                INSERT INTO storage.physical_blobs
+                (content_hash, file_size, mime_type, storage_provider, storage_path)
+                VALUES ($1, $2, $3, 'R2_PRIMARY', $4)
+            """, req.hash, req.size, req.mime_type, storage_path)
+
+    # Generar presigned URL para upload
+    storage_path = f"{req.hash}.bin"
+    presigned_url = s3_client.generate_presigned_url(
+        "put_object",
+        Params={
+            "Bucket": R2_BUCKET,
+            "Key": storage_path,
+            "ContentType": req.mime_type
+        },
+        ExpiresIn=PRESIGNED_UPLOAD_EXPIRY
+    )
+
+    return UploadInitResponse(
+        status="upload_required",
+        presigned_url=presigned_url,
+        deduplicated=False
+    )
+
+
+@app.post("/upload/complete/{content_hash}")
+async def upload_complete(
+    content_hash: str,
+    user_id: str = Query(...),
+    filename: str = Query(...),
+    password: Optional[str] = Query(None),
+    background_tasks: BackgroundTasks = None
+):
+    """
+    Confirmar upload completado. Encola verificación.
+    """
+    async with db_pool.acquire() as conn:
+        blob = await conn.fetchrow("""
+            SELECT content_hash, storage_path
+            FROM storage.physical_blobs
+            WHERE content_hash = $1
+        """, content_hash)
+
+        if not blob:
+            raise HTTPException(404, "Blob not found")
+
+    # Encolar verificación en background
+    # En producción esto iría a una cola (Redis, RabbitMQ, etc.)
+    background_tasks.add_task(
+        verify_and_finalize,
+        content_hash,
+        blob["storage_path"],
+        user_id,
+        filename,
+        password
+    )
+
+    return {"status": "processing", "content_hash": content_hash}
+
+
+async def verify_and_finalize(
+    content_hash: str,
+    storage_path: str,
+    user_id: str,
+    filename: str,
+    password: Optional[str]
+):
+    """Background task para verificar y finalizar upload"""
+    from storage_worker import StorageWorker
+
+    worker = StorageWorker()
+    await worker.init()
+
+    try:
+        result = await worker.process_upload(
+            content_hash,
+            storage_path,
+            user_id,
+            filename,
+            ph.hash(password) if password else None
+        )
+        # En producción: notificar cliente via webhook/websocket
+        print(f"Upload finalized: {result}")
+    finally:
+        await worker.close()
+
+
+# =========================================================================
+# DOWNLOAD ENDPOINTS
+# =========================================================================
+
+@app.get("/file/{public_key}")
+async def download_file(
+    public_key: str,
+    request: Request,
+    password: Optional[str] = Query(None)
+):
+    """
+    Descarga de archivo. Devuelve redirect a URL firmada.
+    """
+    client_ip = request.client.host
+
+    # Rate limiting
+    if not check_rate_limit_ip(client_ip):
+        raise HTTPException(429, "Rate limit exceeded - IP")
+
+    if not check_rate_limit_key(public_key):
+        raise HTTPException(429, "Rate limit exceeded - downloads")
+
+    async with db_pool.acquire() as conn:
+        # Buscar asset
+        asset = await conn.fetchrow("""
+            SELECT a.id, a.blob_hash, a.original_filename, a.access_password, a.downloads_count,
+                   b.storage_provider, b.storage_path, b.verification_status, b.mime_type
+            FROM storage.user_assets a
+            JOIN storage.physical_blobs b ON a.blob_hash = b.content_hash
+            WHERE a.public_key = $1
+        """, public_key)
+
+        if not asset:
+            raise HTTPException(404, "Asset not found")
+
+        # Verificar contraseña si requerida
+        if asset["access_password"]:
+            if not password:
+                raise HTTPException(401, "Password required")
+            try:
+                ph.verify(asset["access_password"], password)
+            except:
+                raise HTTPException(401, "Invalid password")
+
+        # Verificar estado del blob
+        status = asset["verification_status"]
+
+        if status == "PENDING":
+            raise HTTPException(202, "File is being processed")
+
+        if status in ("CORRUPT", "LOST"):
+            raise HTTPException(410, "File is no longer available")
+
+        # Incrementar contador de descargas
+        await conn.execute("""
+            UPDATE storage.user_assets
+            SET downloads_count = downloads_count + 1
+            WHERE id = $1
+        """, asset["id"])
+
+    # Generar URL firmada según provider
+    provider = asset["storage_provider"]
+
+    if provider in ("R2_PRIMARY", "R2_CACHE"):
+        presigned_url = s3_client.generate_presigned_url(
+            "get_object",
+            Params={
+                "Bucket": R2_BUCKET,
+                "Key": asset["storage_path"],
+                "ResponseContentDisposition": f'attachment; filename="{asset["original_filename"]}"',
+                "ResponseContentType": asset["mime_type"]
+            },
+            ExpiresIn=PRESIGNED_DOWNLOAD_EXPIRY
+        )
+        return RedirectResponse(presigned_url, status_code=302)
+
+    elif provider == "SHAREPOINT":
+        # TODO: Implementar acceso SharePoint via Graph API
+        raise HTTPException(503, "SharePoint access not implemented")
+
+    else:
+        raise HTTPException(503, "Unknown storage provider")
+
+
+@app.get("/file/{public_key}/info")
+async def file_info(public_key: str, request: Request):
+    """
+    Información del archivo sin descargarlo.
+    """
+    client_ip = request.client.host
+
+    if not check_rate_limit_ip(client_ip):
+        raise HTTPException(429, "Rate limit exceeded")
+
+    async with db_pool.acquire() as conn:
+        asset = await conn.fetchrow("""
+            SELECT a.public_key, a.original_filename, a.downloads_count, a.created_at,
+                   b.file_size, b.mime_type, b.verification_status,
+                   (a.access_password IS NOT NULL) as password_protected
+            FROM storage.user_assets a
+            JOIN storage.physical_blobs b ON a.blob_hash = b.content_hash
+            WHERE a.public_key = $1
+        """, public_key)
+
+        if not asset:
+            raise HTTPException(404, "Asset not found")
+
+    return {
+        "public_key": asset["public_key"],
+        "filename": asset["original_filename"],
+        "size": asset["file_size"],
+        "mime_type": asset["mime_type"],
+        "status": asset["verification_status"],
+        "downloads": asset["downloads_count"],
+        "password_protected": asset["password_protected"],
+        "created_at": asset["created_at"].isoformat()
+    }
+
+
+@app.get("/file/{public_key}/thumb")
+async def file_thumbnail(public_key: str, request: Request):
+    """
+    Redirect al thumbnail del archivo.
+    """
+    client_ip = request.client.host
+
+    if not check_rate_limit_ip(client_ip):
+        raise HTTPException(429, "Rate limit exceeded")
+
+    async with db_pool.acquire() as conn:
+        asset = await conn.fetchrow("""
+            SELECT a.blob_hash, b.verification_status
+            FROM storage.user_assets a
+            JOIN storage.physical_blobs b ON a.blob_hash = b.content_hash
+            WHERE a.public_key = $1
+        """, public_key)
+
+        if not asset:
+            raise HTTPException(404, "Asset not found")
+
+        if asset["verification_status"] != "VERIFIED":
+            raise HTTPException(202, "Thumbnail not ready")
+
+    # URL al thumbnail
+    thumb_key = f"{asset['blob_hash']}.thumb"
+
+    try:
+        # Verificar que existe
+        s3_client.head_object(Bucket=R2_BUCKET, Key=thumb_key)
+    except:
+        raise HTTPException(404, "Thumbnail not available")
+
+    presigned_url = s3_client.generate_presigned_url(
+        "get_object",
+        Params={"Bucket": R2_BUCKET, "Key": thumb_key},
+        ExpiresIn=PRESIGNED_DOWNLOAD_EXPIRY
+    )
+
+    return RedirectResponse(presigned_url, status_code=302)
+
+
+# =========================================================================
+# HEALTH
+# =========================================================================
+
+@app.get("/health")
+async def health():
+    return {"status": "ok", "timestamp": datetime.now().isoformat()}
+
+
+# =========================================================================
+# MAIN
+# =========================================================================
+
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8080)
--- a/apps/storage/storage_worker.py
+++ b/apps/storage/storage_worker.py
@@ -0,0 +1,480 @@
+#!/usr/bin/env python3
+"""
+Storage Worker - Verificación y procesamiento de archivos
+Spec: Sistema de Almacenamiento Híbrido v4.0
+"""
+
+import os
+import hashlib
+import json
+import asyncio
+import asyncpg
+from datetime import datetime
+from typing import Optional, Dict, Any
+import boto3
+from PIL import Image
+import fitz  # PyMuPDF
+import io
+import tempfile
+
+# Configuración
+R2_ENDPOINT = os.environ.get("R2_ENDPOINT", "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com")
+R2_BUCKET = os.environ.get("R2_BUCKET", "deck")
+DB_URL = os.environ.get("DATABASE_URL", "postgresql://tzzr:tzzr@localhost:5432/tzzr")
+
+THUMB_WIDTH = 300
+MAX_RETRIES = 9
+RETRY_BACKOFF_BASE = 2
+
+
+def get_s3_client():
+    return boto3.client(
+        "s3",
+        endpoint_url=R2_ENDPOINT,
+        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
+    )
+
+
+async def get_db_pool():
+    return await asyncpg.create_pool(DB_URL, min_size=2, max_size=10)
+
+
+def calculate_sha256(data: bytes) -> str:
+    """Calcula SHA-256 de bytes"""
+    return hashlib.sha256(data).hexdigest()
+
+
+def generate_public_key(content_hash: str, user_id: str) -> str:
+    """Genera public_key única para un asset"""
+    data = f"{content_hash}{user_id}{datetime.now().isoformat()}"
+    return hashlib.sha256(data.encode()).hexdigest()
+
+
+class StorageWorker:
+    def __init__(self):
+        self.s3 = get_s3_client()
+        self.pool = None
+
+    async def init(self):
+        self.pool = await get_db_pool()
+
+    async def close(self):
+        if self.pool:
+            await self.pool.close()
+
+    # =========================================================================
+    # VERIFICACIÓN DE HASH
+    # =========================================================================
+
+    async def verify_blob(self, declared_hash: str, storage_path: str) -> Dict[str, Any]:
+        """
+        Verifica que el hash declarado coincida con el contenido real.
+        NUNCA confiamos en el hash del cliente.
+        """
+        try:
+            # Descargar archivo
+            obj = self.s3.get_object(Bucket=R2_BUCKET, Key=storage_path)
+            content = obj["Body"].read()
+
+            # Calcular hash real
+            calculated_hash = calculate_sha256(content)
+
+            if calculated_hash != declared_hash:
+                # HASH MISMATCH - Archivo corrupto o spoofing
+                await self._mark_corrupt(declared_hash, storage_path)
+                return {
+                    "status": "CORRUPT",
+                    "declared": declared_hash,
+                    "calculated": calculated_hash,
+                    "action": "deleted"
+                }
+
+            # Hash coincide - Marcar como verificado
+            await self._mark_verified(declared_hash)
+
+            return {
+                "status": "VERIFIED",
+                "hash": declared_hash,
+                "size": len(content)
+            }
+
+        except Exception as e:
+            return {"status": "ERROR", "error": str(e)}
+
+    async def _mark_corrupt(self, content_hash: str, storage_path: str):
+        """Marca blob como corrupto y elimina archivo"""
+        async with self.pool.acquire() as conn:
+            await conn.execute("""
+                UPDATE storage.physical_blobs
+                SET verification_status = 'CORRUPT', updated_at = NOW()
+                WHERE content_hash = $1
+            """, content_hash)
+
+        # Eliminar archivo del bucket
+        try:
+            self.s3.delete_object(Bucket=R2_BUCKET, Key=storage_path)
+        except:
+            pass
+
+    async def _mark_verified(self, content_hash: str):
+        """Marca blob como verificado"""
+        async with self.pool.acquire() as conn:
+            await conn.execute("""
+                UPDATE storage.physical_blobs
+                SET verification_status = 'VERIFIED',
+                    last_verified_at = NOW(),
+                    updated_at = NOW()
+                WHERE content_hash = $1
+            """, content_hash)
+
+    # =========================================================================
+    # GENERACIÓN DE DERIVADOS
+    # =========================================================================
+
+    async def generate_derivatives(self, content_hash: str) -> Dict[str, Any]:
+        """Genera thumbnail y metadatos para un blob verificado"""
+        async with self.pool.acquire() as conn:
+            blob = await conn.fetchrow("""
+                SELECT content_hash, mime_type, storage_path, file_size
+                FROM storage.physical_blobs
+                WHERE content_hash = $1 AND verification_status = 'VERIFIED'
+            """, content_hash)
+
+        if not blob:
+            return {"status": "ERROR", "error": "Blob not found or not verified"}
+
+        mime_type = blob["mime_type"]
+        storage_path = blob["storage_path"]
+
+        # Descargar archivo
+        obj = self.s3.get_object(Bucket=R2_BUCKET, Key=storage_path)
+        content = obj["Body"].read()
+
+        metadata = {
+            "content_hash": content_hash,
+            "mime_type": mime_type,
+            "file_size": blob["file_size"],
+            "processed_at": datetime.now().isoformat()
+        }
+
+        thumb_generated = False
+
+        # Generar thumbnail según tipo
+        if mime_type.startswith("image/"):
+            thumb_data, extra_meta = self._process_image(content)
+            metadata.update(extra_meta)
+            if thumb_data:
+                await self._save_thumb(content_hash, thumb_data)
+                thumb_generated = True
+
+        elif mime_type == "application/pdf":
+            thumb_data, extra_meta = self._process_pdf(content)
+            metadata.update(extra_meta)
+            if thumb_data:
+                await self._save_thumb(content_hash, thumb_data)
+                thumb_generated = True
+
+        # Guardar metadatos
+        await self._save_metadata(content_hash, metadata)
+
+        return {
+            "status": "OK",
+            "thumb_generated": thumb_generated,
+            "metadata": metadata
+        }
+
+    def _process_image(self, content: bytes) -> tuple:
+        """Procesa imagen: genera thumb y extrae metadatos"""
+        try:
+            img = Image.open(io.BytesIO(content))
+
+            # Metadatos
+            meta = {
+                "width": img.width,
+                "height": img.height,
+                "format": img.format,
+                "mode": img.mode
+            }
+
+            # EXIF si disponible
+            if hasattr(img, '_getexif') and img._getexif():
+                meta["has_exif"] = True
+
+            # Generar thumbnail
+            ratio = THUMB_WIDTH / img.width
+            new_height = int(img.height * ratio)
+            thumb = img.copy()
+            thumb.thumbnail((THUMB_WIDTH, new_height), Image.Resampling.LANCZOS)
+
+            # Convertir a bytes
+            thumb_buffer = io.BytesIO()
+            thumb.save(thumb_buffer, format="JPEG", quality=85)
+            thumb_data = thumb_buffer.getvalue()
+
+            return thumb_data, meta
+
+        except Exception as e:
+            return None, {"error": str(e)}
+
+    def _process_pdf(self, content: bytes) -> tuple:
+        """Procesa PDF: genera thumb de primera página y extrae metadatos"""
+        try:
+            doc = fitz.open(stream=content, filetype="pdf")
+
+            meta = {
+                "pages": len(doc),
+                "format": "PDF"
+            }
+
+            # Metadatos del documento
+            pdf_meta = doc.metadata
+            if pdf_meta:
+                if pdf_meta.get("author"):
+                    meta["author"] = pdf_meta["author"]
+                if pdf_meta.get("title"):
+                    meta["title"] = pdf_meta["title"]
+
+            # Render primera página como thumbnail
+            if len(doc) > 0:
+                page = doc[0]
+                # Escalar para que el ancho sea THUMB_WIDTH
+                zoom = THUMB_WIDTH / page.rect.width
+                mat = fitz.Matrix(zoom, zoom)
+                pix = page.get_pixmap(matrix=mat)
+                thumb_data = pix.tobytes("jpeg")
+            else:
+                thumb_data = None
+
+            doc.close()
+            return thumb_data, meta
+
+        except Exception as e:
+            return None, {"error": str(e)}
+
+    async def _save_thumb(self, content_hash: str, thumb_data: bytes):
+        """Guarda thumbnail en el bucket"""
+        key = f"{content_hash}.thumb"
+        self.s3.put_object(
+            Bucket=R2_BUCKET,
+            Key=key,
+            Body=thumb_data,
+            ContentType="image/jpeg"
+        )
+
+    async def _save_metadata(self, content_hash: str, metadata: dict):
+        """Guarda metadatos JSON en el bucket"""
+        key = f"{content_hash}.json"
+        self.s3.put_object(
+            Bucket=R2_BUCKET,
+            Key=key,
+            Body=json.dumps(metadata, indent=2),
+            ContentType="application/json"
+        )
+
+    # =========================================================================
+    # PROCESAMIENTO COMPLETO
+    # =========================================================================
+
+    async def process_upload(
+        self,
+        declared_hash: str,
+        storage_path: str,
+        user_id: str,
+        original_filename: str,
+        access_password: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """
+        Proceso completo post-upload:
+        1. Verificar hash
+        2. Generar derivados
+        3. Crear user_asset
+        """
+        # 1. Verificar hash
+        verify_result = await self.verify_blob(declared_hash, storage_path)
+
+        if verify_result["status"] != "VERIFIED":
+            return verify_result
+
+        # 2. Generar derivados (con reintentos)
+        for attempt in range(MAX_RETRIES):
+            try:
+                deriv_result = await self.generate_derivatives(declared_hash)
+                if deriv_result["status"] == "OK":
+                    break
+            except Exception as e:
+                if attempt == MAX_RETRIES - 1:
+                    # Último intento fallido, pero blob ya está verificado
+                    deriv_result = {"status": "PARTIAL", "error": str(e)}
+                else:
+                    await asyncio.sleep(RETRY_BACKOFF_BASE ** attempt)
+
+        # 3. Crear user_asset
+        public_key = generate_public_key(declared_hash, user_id)
+
+        async with self.pool.acquire() as conn:
+            await conn.execute("""
+                INSERT INTO storage.user_assets
+                (public_key, blob_hash, user_id, original_filename, access_password)
+                VALUES ($1, $2, $3, $4, $5)
+            """, public_key, declared_hash, user_id, original_filename, access_password)
+
+        return {
+            "status": "CREATED",
+            "public_key": public_key,
+            "content_hash": declared_hash,
+            "derivatives": deriv_result
+        }
+
+    # =========================================================================
+    # REGISTRO DE BLOB (sin subida - para archivos existentes)
+    # =========================================================================
+
+    async def register_blob(
+        self,
+        content_hash: str,
+        file_size: int,
+        mime_type: str,
+        storage_provider: str,
+        storage_path: str
+    ) -> Dict[str, Any]:
+        """Registra un blob existente en el sistema"""
+        async with self.pool.acquire() as conn:
+            # Verificar si ya existe
+            existing = await conn.fetchrow("""
+                SELECT content_hash, verification_status
+                FROM storage.physical_blobs
+                WHERE content_hash = $1
+            """, content_hash)
+
+            if existing:
+                return {
+                    "status": "EXISTS",
+                    "content_hash": content_hash,
+                    "verification_status": existing["verification_status"]
+                }
+
+            # Insertar nuevo blob
+            await conn.execute("""
+                INSERT INTO storage.physical_blobs
+                (content_hash, file_size, mime_type, storage_provider, storage_path)
+                VALUES ($1, $2, $3, $4::storage.storage_provider_enum, $5)
+            """, content_hash, file_size, mime_type, storage_provider, storage_path)
+
+        return {
+            "status": "REGISTERED",
+            "content_hash": content_hash,
+            "verification_status": "PENDING"
+        }
+
+    # =========================================================================
+    # MANTENIMIENTO
+    # =========================================================================
+
+    async def garbage_collect(self, dry_run: bool = True) -> Dict[str, Any]:
+        """
+        Elimina blobs huérfanos (ref_count = 0, sin actualizar en 30 días)
+        """
+        async with self.pool.acquire() as conn:
+            orphans = await conn.fetch("""
+                SELECT content_hash, storage_path
+                FROM storage.physical_blobs
+                WHERE ref_count = 0
+                AND updated_at < NOW() - INTERVAL '30 days'
+            """)
+
+        deleted = []
+        for blob in orphans:
+            if not dry_run:
+                # Eliminar derivados
+                for ext in [".thumb", ".json"]:
+                    try:
+                        self.s3.delete_object(Bucket=R2_BUCKET, Key=f"{blob['content_hash']}{ext}")
+                    except:
+                        pass
+
+                # Eliminar blob
+                try:
+                    self.s3.delete_object(Bucket=R2_BUCKET, Key=blob["storage_path"])
+                except:
+                    pass
+
+                # Eliminar registro
+                async with self.pool.acquire() as conn:
+                    await conn.execute("""
+                        DELETE FROM storage.physical_blobs WHERE content_hash = $1
+                    """, blob["content_hash"])
+
+            deleted.append(blob["content_hash"])
+
+        return {
+            "status": "OK",
+            "dry_run": dry_run,
+            "orphans_found": len(orphans),
+            "deleted": deleted if not dry_run else []
+        }
+
+    async def integrity_check(self, sample_percent: float = 0.01) -> Dict[str, Any]:
+        """
+        Verifica integridad de una muestra aleatoria de blobs
+        """
+        async with self.pool.acquire() as conn:
+            blobs = await conn.fetch("""
+                SELECT content_hash, storage_path
+                FROM storage.physical_blobs
+                WHERE verification_status = 'VERIFIED'
+                ORDER BY RANDOM()
+                LIMIT (SELECT CEIL(COUNT(*) * $1) FROM storage.physical_blobs WHERE verification_status = 'VERIFIED')
+            """, sample_percent)
+
+        results = {"checked": 0, "ok": 0, "corrupt": []}
+
+        for blob in blobs:
+            results["checked"] += 1
+            verify = await self.verify_blob(blob["content_hash"], blob["storage_path"])
+
+            if verify["status"] == "VERIFIED":
+                results["ok"] += 1
+            else:
+                results["corrupt"].append(blob["content_hash"])
+
+        return results
+
+
+# CLI para pruebas
+async def main():
+    import sys
+
+    worker = StorageWorker()
+    await worker.init()
+
+    if len(sys.argv) < 2:
+        print("Usage: storage_worker.py <command> [args]")
+        print("Commands: gc, integrity, register")
+        return
+
+    cmd = sys.argv[1]
+
+    if cmd == "gc":
+        dry_run = "--execute" not in sys.argv
+        result = await worker.garbage_collect(dry_run=dry_run)
+        print(json.dumps(result, indent=2))
+
+    elif cmd == "integrity":
+        result = await worker.integrity_check()
+        print(json.dumps(result, indent=2))
+
+    elif cmd == "register":
+        if len(sys.argv) < 6:
+            print("Usage: storage_worker.py register <hash> <size> <mime> <path>")
+            return
+        result = await worker.register_blob(
+            sys.argv[2], int(sys.argv[3]), sys.argv[4], "R2_PRIMARY", sys.argv[5]
+        )
+        print(json.dumps(result, indent=2))
+
+    await worker.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/apps/storage/sync_metadata.py
+++ b/apps/storage/sync_metadata.py
@@ -0,0 +1,130 @@
+#!/usr/bin/env python3
+"""
+Sincronizar metadata desde JSON del bucket R2 a storage.physical_blobs
+"""
+
+import os
+import json
+import boto3
+import asyncio
+import asyncpg
+
+R2_ENDPOINT = "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com"
+R2_BUCKET = "deck"
+
+
+def get_s3_client():
+    return boto3.client(
+        "s3",
+        endpoint_url=R2_ENDPOINT,
+        aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
+        aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
+    )
+
+
+async def sync():
+    s3 = get_s3_client()
+    pool = await asyncpg.create_pool(
+        "postgresql:///tzzr?host=/var/run/postgresql",
+        min_size=2, max_size=10
+    )
+
+    async with pool.acquire() as conn:
+        blobs = await conn.fetch("""
+            SELECT content_hash, storage_path
+            FROM storage.physical_blobs
+            WHERE file_size = 0
+        """)
+
+        print(f"Sincronizando metadata para {len(blobs)} blobs...")
+
+        updated = 0
+        errors = 0
+
+        for blob in blobs:
+            hash = blob["content_hash"]
+            json_key = f"{hash}.json"
+
+            try:
+                obj = s3.get_object(Bucket=R2_BUCKET, Key=json_key)
+                meta = json.loads(obj["Body"].read())
+
+                # Extraer datos
+                l2 = meta.get("jsonb_standard", {}).get("L2_document", {})
+                size_bytes = l2.get("size_bytes", 0)
+                mime_type = l2.get("mime_type")
+                ext = meta.get("ext", "pdf")
+                url_atc = meta.get("url_atc", [])
+                storage_path = url_atc[0] if url_atc else f"{hash}.{ext}"
+
+                if not mime_type:
+                    if ext == "pdf":
+                        mime_type = "application/pdf"
+                    elif ext in ("jpg", "jpeg"):
+                        mime_type = "image/jpeg"
+                    elif ext == "png":
+                        mime_type = "image/png"
+                    else:
+                        mime_type = "application/octet-stream"
+
+                # Obtener size real del archivo si no está en JSON
+                if size_bytes == 0:
+                    try:
+                        file_obj = s3.head_object(Bucket=R2_BUCKET, Key=storage_path)
+                        size_bytes = file_obj.get("ContentLength", 0)
+                    except:
+                        pass
+
+                # Actualizar registro
+                await conn.execute("""
+                    UPDATE storage.physical_blobs
+                    SET file_size = $2,
+                        mime_type = $3,
+                        storage_path = $4
+                    WHERE content_hash = $1
+                """, hash, size_bytes, mime_type, storage_path)
+
+                updated += 1
+
+                if updated % 100 == 0:
+                    print(f"  Actualizados: {updated}")
+
+            except s3.exceptions.NoSuchKey:
+                # JSON no existe, intentar obtener size del archivo directamente
+                try:
+                    # Probar diferentes extensiones
+                    for ext in ["pdf", "png", "jpg"]:
+                        try:
+                            file_key = f"{hash}.{ext}"
+                            file_obj = s3.head_object(Bucket=R2_BUCKET, Key=file_key)
+                            size_bytes = file_obj.get("ContentLength", 0)
+                            content_type = file_obj.get("ContentType", "application/octet-stream")
+
+                            await conn.execute("""
+                                UPDATE storage.physical_blobs
+                                SET file_size = $2,
+                                    mime_type = $3,
+                                    storage_path = $4
+                                WHERE content_hash = $1
+                            """, hash, size_bytes, content_type, file_key)
+
+                            updated += 1
+                            break
+                        except:
+                            continue
+                except Exception as e:
+                    errors += 1
+
+            except Exception as e:
+                errors += 1
+                print(f"Error en {hash}: {e}")
+
+        print(f"\nSincronización completada:")
+        print(f"  - Actualizados: {updated}")
+        print(f"  - Errores: {errors}")
+
+    await pool.close()
+
+
+if __name__ == "__main__":
+    asyncio.run(sync())