import os import gradio as gr import torch from transformers import pipeline MODEL_ID = "openai/whisper-small" def load_asr(): # Prefer GPU if available, else CPU. For transformers pipelines: # device: int index for CUDA, or -1 for CPU. device = 0 if torch.cuda.is_available() else -1 print(f"🎤 Loading transcription pipeline on {'GPU' if device == 0 else 'CPU'}...") return pipeline( task="automatic-speech-recognition", model=MODEL_ID, device=device ) asr = load_asr() def transcribe_audio(audio_file_path): if not audio_file_path: return "Please upload an audio file." if not os.path.exists(audio_file_path): return f"Error: file not found at {audio_file_path}" print(f"→ Transcribing: {audio_file_path}") try: # chunk_length_s works with Whisper in transformers result = asr(audio_file_path, chunk_length_s=30, return_timestamps=True) # result is a dict with "text" and possibly "chunks" return result.get("text", "").strip() or "(No text recognized)" except Exception as e: return f"Error during transcription: {e}" iface = gr.Interface( fn=transcribe_audio, inputs=gr.Audio(type="filepath", label="Upload audio (MP3/WAV)"), outputs=gr.Textbox(label="Transcription"), title="Audio Transcription Pipeline", description="Upload an audio file and get a Whisper-small transcription.", ) if __name__ == "__main__": # Bind to all interfaces for Docker/Spaces iface.launch(server_name="0.0.0.0", server_port=7860)