import os
import gradio as gr
import torch
from transformers import pipeline

MODEL_ID = "openai/whisper-small"

def load_asr():
    # Prefer GPU if available, else CPU. For transformers pipelines:
    # device: int index for CUDA, or -1 for CPU.
    device = 0 if torch.cuda.is_available() else -1
    print(f"🎤 Loading transcription pipeline on {'GPU' if device == 0 else 'CPU'}...")
    return pipeline(
        task="automatic-speech-recognition",
        model=MODEL_ID,
        device=device
    )

asr = load_asr()

def transcribe_audio(audio_file_path):
    if not audio_file_path:
        return "Please upload an audio file."
    if not os.path.exists(audio_file_path):
        return f"Error: file not found at {audio_file_path}"

    print(f"→ Transcribing: {audio_file_path}")
    try:
        # chunk_length_s works with Whisper in transformers
        result = asr(audio_file_path, chunk_length_s=30, return_timestamps=True)
        # result is a dict with "text" and possibly "chunks"
        return result.get("text", "").strip() or "(No text recognized)"
    except Exception as e:
        return f"Error during transcription: {e}"

iface = gr.Interface(
    fn=transcribe_audio,
    inputs=gr.Audio(type="filepath", label="Upload audio (MP3/WAV)"),
    outputs=gr.Textbox(label="Transcription"),
    title="Audio Transcription Pipeline",
    description="Upload an audio file and get a Whisper-small transcription.",
)

if __name__ == "__main__":
    # Bind to all interfaces for Docker/Spaces
    iface.launch(server_name="0.0.0.0", server_port=7860)