sumaq/backend/app/services/transcription.py

import os
import google.generativeai as genai
import tempfile
from fastapi import HTTPException, UploadFile
from app.core.config import settings

class TranscriptionService:
    @staticmethod
    async def transcribe_audio(file: UploadFile) -> str:
        if not settings.GOOGLE_API_KEY:
            return f"[MOCK GEMINI TRANSCRIPTION] Se ha recibido un archivo de audio de tipo {file.content_type}. Configure GOOGLE_API_KEY para transcripción real con Gemini."

        try:
            # Create a temporary file to store the upload
            suffix = os.path.splitext(file.filename)[1] or ".wav"
            with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
                content = await file.read()
                tmp.write(content)
                tmp_path = tmp.name

            # Configure and upload
            genai.configure(api_key=settings.GOOGLE_API_KEY)
            audio_file = genai.upload_file(path=tmp_path, mime_type=file.content_type or "audio/wav")

            # Use Gemini 2.0 Flash Lite
            model = genai.GenerativeModel("gemini-2.0-flash-lite")
            response = model.generate_content([
                "Por favor, transcribe exactamente lo que se dice en este audio. Solo devuelve el texto transcrito.",
                audio_file
            ])

            # Cleanup
            os.unlink(tmp_path)

            return response.text

        except Exception as e:
            raise HTTPException(status_code=500, detail=str(e))