55 lines
1.9 KiB
Python
55 lines
1.9 KiB
Python
import os
|
|
from fastapi import APIRouter, Depends, UploadFile, File, HTTPException
|
|
import google.generativeai as genai
|
|
import tempfile
|
|
from dotenv import load_dotenv
|
|
from app.security import get_current_active_user
|
|
|
|
load_dotenv()
|
|
|
|
router = APIRouter(
|
|
prefix="/transcription",
|
|
tags=["Transcription"],
|
|
dependencies=[Depends(get_current_active_user)]
|
|
)
|
|
|
|
# Initialize Google Gemini
|
|
api_key = os.getenv("GOOGLE_API_KEY")
|
|
if api_key:
|
|
genai.configure(api_key=api_key)
|
|
|
|
|
|
@router.post("/")
|
|
async def transcribe_audio(file: UploadFile = File(...)):
|
|
if not os.getenv("GOOGLE_API_KEY"):
|
|
# Mock transcription for development if no key is present
|
|
return {"text": f"[MOCK GEMINI TRANSCRIPTION] Se ha recibido un archivo de audio de tipo {file.content_type}. Configure GOOGLE_API_KEY para transcripción real con Gemini."}
|
|
|
|
try:
|
|
# Create a temporary file to store the upload
|
|
suffix = os.path.splitext(file.filename)[1] or ".wav"
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
|
content = await file.read()
|
|
tmp.write(content)
|
|
tmp_path = tmp.name
|
|
|
|
# Upload to Gemini (Media Service)
|
|
audio_file = genai.upload_file(path=tmp_path, mime_type=file.content_type or "audio/wav")
|
|
|
|
# Use Gemini 1.5 Flash for audio-to-text
|
|
model = genai.GenerativeModel("gemini-2.5-flash-lite")
|
|
response = model.generate_content([
|
|
"Por favor, transcribe exactamente lo que se dice en este audio. Solo devuelve el texto transcrito.",
|
|
audio_file
|
|
])
|
|
|
|
# Cleanup
|
|
os.unlink(tmp_path)
|
|
# Gemini files are ephemeral but we can delete explicitly if needed
|
|
# genai.delete_file(audio_file.name)
|
|
|
|
return {"text": response.text}
|
|
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=str(e))
|