import os from fastapi import APIRouter, Depends, UploadFile, File, HTTPException from security import get_current_active_user import google.generativeai as genai import tempfile from dotenv import load_dotenv load_dotenv() router = APIRouter( prefix="/transcription", tags=["Transcription"], dependencies=[Depends(get_current_active_user)] ) # Initialize Google Gemini api_key = os.getenv("GOOGLE_API_KEY") if api_key: genai.configure(api_key=api_key) @router.post("/") async def transcribe_audio(file: UploadFile = File(...)): if not os.getenv("GOOGLE_API_KEY"): # Mock transcription for development if no key is present return {"text": f"[MOCK GEMINI TRANSCRIPTION] Se ha recibido un archivo de audio de tipo {file.content_type}. Configure GOOGLE_API_KEY para transcripción real con Gemini."} try: # Create a temporary file to store the upload suffix = os.path.splitext(file.filename)[1] or ".wav" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: content = await file.read() tmp.write(content) tmp_path = tmp.name # Upload to Gemini (Media Service) audio_file = genai.upload_file(path=tmp_path, mime_type=file.content_type or "audio/wav") # Use Gemini 1.5 Flash for audio-to-text model = genai.GenerativeModel("gemini-2.5-flash-lite") response = model.generate_content([ "Por favor, transcribe exactamente lo que se dice en este audio. Solo devuelve el texto transcrito.", audio_file ]) # Cleanup os.unlink(tmp_path) # Gemini files are ephemeral but we can delete explicitly if needed # genai.delete_file(audio_file.name) return {"text": response.text} except Exception as e: raise HTTPException(status_code=500, detail=str(e))