sumaq/backend/app/routers/transcription.py

import os
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
import google.generativeai as genai
import tempfile
from dotenv import load_dotenv
from app.security import get_current_active_user

load_dotenv()

router = APIRouter(
    prefix="/transcription",
    tags=["Transcription"],
    dependencies=[Depends(get_current_active_user)]
)

# Initialize Google Gemini
api_key = os.getenv("GOOGLE_API_KEY")
if api_key:
    genai.configure(api_key=api_key)


@router.post("/")
async def transcribe_audio(file: UploadFile = File(...)):
    if not os.getenv("GOOGLE_API_KEY"):
        # Mock transcription for development if no key is present
        return {"text": f"[MOCK GEMINI TRANSCRIPTION] Se ha recibido un archivo de audio de tipo {file.content_type}. Configure GOOGLE_API_KEY para transcripción real con Gemini."}

    try:
        # Create a temporary file to store the upload
        suffix = os.path.splitext(file.filename)[1] or ".wav"
        with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
            content = await file.read()
            tmp.write(content)
            tmp_path = tmp.name

        # Upload to Gemini (Media Service)
        audio_file = genai.upload_file(path=tmp_path, mime_type=file.content_type or "audio/wav")

        # Use Gemini 1.5 Flash for audio-to-text
        model = genai.GenerativeModel("gemini-2.5-flash-lite")
        response = model.generate_content([
            "Por favor, transcribe exactamente lo que se dice en este audio. Solo devuelve el texto transcrito.",
            audio_file
        ])

        # Cleanup
        os.unlink(tmp_path)
        # Gemini files are ephemeral but we can delete explicitly if needed
        # genai.delete_file(audio_file.name)

        return {"text": response.text}

    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))