- apps/captain-mobile: Mobile API service - apps/flow-ui: Flow UI application - apps/mindlink: Mindlink application - apps/storage: Storage API and workers - apps/tzzr-cli: TZZR CLI tool - deck-frontend/backups: Historical TypeScript versions - hst-frontend: Standalone HST frontend Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
131 lines
4.3 KiB
Python
131 lines
4.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Sincronizar metadata desde JSON del bucket R2 a storage.physical_blobs
|
|
"""
|
|
|
|
import os
|
|
import json
|
|
import boto3
|
|
import asyncio
|
|
import asyncpg
|
|
|
|
R2_ENDPOINT = "https://7dedae6030f5554d99d37e98a5232996.r2.cloudflarestorage.com"
|
|
R2_BUCKET = "deck"
|
|
|
|
|
|
def get_s3_client():
|
|
return boto3.client(
|
|
"s3",
|
|
endpoint_url=R2_ENDPOINT,
|
|
aws_access_key_id=os.environ.get("AWS_ACCESS_KEY_ID"),
|
|
aws_secret_access_key=os.environ.get("AWS_SECRET_ACCESS_KEY"),
|
|
)
|
|
|
|
|
|
async def sync():
|
|
s3 = get_s3_client()
|
|
pool = await asyncpg.create_pool(
|
|
"postgresql:///tzzr?host=/var/run/postgresql",
|
|
min_size=2, max_size=10
|
|
)
|
|
|
|
async with pool.acquire() as conn:
|
|
blobs = await conn.fetch("""
|
|
SELECT content_hash, storage_path
|
|
FROM storage.physical_blobs
|
|
WHERE file_size = 0
|
|
""")
|
|
|
|
print(f"Sincronizando metadata para {len(blobs)} blobs...")
|
|
|
|
updated = 0
|
|
errors = 0
|
|
|
|
for blob in blobs:
|
|
hash = blob["content_hash"]
|
|
json_key = f"{hash}.json"
|
|
|
|
try:
|
|
obj = s3.get_object(Bucket=R2_BUCKET, Key=json_key)
|
|
meta = json.loads(obj["Body"].read())
|
|
|
|
# Extraer datos
|
|
l2 = meta.get("jsonb_standard", {}).get("L2_document", {})
|
|
size_bytes = l2.get("size_bytes", 0)
|
|
mime_type = l2.get("mime_type")
|
|
ext = meta.get("ext", "pdf")
|
|
url_atc = meta.get("url_atc", [])
|
|
storage_path = url_atc[0] if url_atc else f"{hash}.{ext}"
|
|
|
|
if not mime_type:
|
|
if ext == "pdf":
|
|
mime_type = "application/pdf"
|
|
elif ext in ("jpg", "jpeg"):
|
|
mime_type = "image/jpeg"
|
|
elif ext == "png":
|
|
mime_type = "image/png"
|
|
else:
|
|
mime_type = "application/octet-stream"
|
|
|
|
# Obtener size real del archivo si no está en JSON
|
|
if size_bytes == 0:
|
|
try:
|
|
file_obj = s3.head_object(Bucket=R2_BUCKET, Key=storage_path)
|
|
size_bytes = file_obj.get("ContentLength", 0)
|
|
except:
|
|
pass
|
|
|
|
# Actualizar registro
|
|
await conn.execute("""
|
|
UPDATE storage.physical_blobs
|
|
SET file_size = $2,
|
|
mime_type = $3,
|
|
storage_path = $4
|
|
WHERE content_hash = $1
|
|
""", hash, size_bytes, mime_type, storage_path)
|
|
|
|
updated += 1
|
|
|
|
if updated % 100 == 0:
|
|
print(f" Actualizados: {updated}")
|
|
|
|
except s3.exceptions.NoSuchKey:
|
|
# JSON no existe, intentar obtener size del archivo directamente
|
|
try:
|
|
# Probar diferentes extensiones
|
|
for ext in ["pdf", "png", "jpg"]:
|
|
try:
|
|
file_key = f"{hash}.{ext}"
|
|
file_obj = s3.head_object(Bucket=R2_BUCKET, Key=file_key)
|
|
size_bytes = file_obj.get("ContentLength", 0)
|
|
content_type = file_obj.get("ContentType", "application/octet-stream")
|
|
|
|
await conn.execute("""
|
|
UPDATE storage.physical_blobs
|
|
SET file_size = $2,
|
|
mime_type = $3,
|
|
storage_path = $4
|
|
WHERE content_hash = $1
|
|
""", hash, size_bytes, content_type, file_key)
|
|
|
|
updated += 1
|
|
break
|
|
except:
|
|
continue
|
|
except Exception as e:
|
|
errors += 1
|
|
|
|
except Exception as e:
|
|
errors += 1
|
|
print(f"Error en {hash}: {e}")
|
|
|
|
print(f"\nSincronización completada:")
|
|
print(f" - Actualizados: {updated}")
|
|
print(f" - Errores: {errors}")
|
|
|
|
await pool.close()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(sync())
|