# filename: elegant_arabic_transcriber.py import streamlit as st import nemo.collections.asr as nemo_asr import soundfile as sf import tempfile import os from pydub import AudioSegment import moviepy.editor as mp import time # Custom CSS for gloomy elegant styling st.markdown(""" """, unsafe_allow_html=True) # Support common audio + video file extensions. Streamlit's file_uploader uses these SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'wma', # video types 'mp4', 'mov', 'mkv', 'avi', 'webm'] VIDEO_TYPES = {'mp4', 'mov', 'mkv', 'avi', 'webm'} # Load NeMo model once @st.cache_resource def load_model(): try: model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained( model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0" ) return model except Exception as e: # Re-raise so the UI can present a friendly error when called raise RuntimeError(f"Failed to load NeMo model: {e}") model = load_model() # Helper: Convert any audio to 16kHz mono WAV def convert_audio(uploaded_file, target_sample_rate=16000): """ Convert an uploaded audio or video file to a 16kHz mono WAV file and return the temporary file path. Supports video files by extracting the audio track first. uploaded_file can be a Streamlit UploadedFile-like object or a path-like object. """ # Determine filename/extension filename = getattr(uploaded_file, "name", None) if filename is None: # fallback name filename = "uploaded" ext = filename.split('.')[-1].lower() # Save the raw upload to a temporary file first (moviepy / pydub operate on paths) with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in: try: # uploaded_file may be a BytesIO-like with .read() data = uploaded_file.read() except Exception: # If it's already a path string, just copy with open(uploaded_file, 'rb') as fsrc: data = fsrc.read() tmp_in.write(data) tmp_in_path = tmp_in.name # If it's a video type, extract audio using moviepy try: if ext in VIDEO_TYPES: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out: try: clip = mp.VideoFileClip(tmp_in_path) # moviepy will write a WAV; we can ensure sample rate later with pydub clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None) clip.close() except Exception: # fallback: try to open as audio via pydub audio = AudioSegment.from_file(tmp_in_path) audio = audio.set_frame_rate(target_sample_rate).set_channels(1) audio.export(tmp_out.name, format="wav") finally: # cleanup input video file try: os.remove(tmp_in_path) except Exception: pass return tmp_out.name else: # It's an audio file - use pydub to convert to wav 16k mono with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out: audio = AudioSegment.from_file(tmp_in_path) audio = audio.set_frame_rate(target_sample_rate).set_channels(1) audio.export(tmp_out.name, format="wav") try: os.remove(tmp_in_path) except Exception: pass return tmp_out.name except Exception as e: # Attempt to clean up and re-raise as RuntimeError with context try: os.remove(tmp_in_path) except Exception: pass raise RuntimeError(f"Failed to convert uploaded file to WAV: {e}") # App UI st.markdown("""
Convert speech to text with precision
Powered by NeMo ASR and Streamlit | Professional Arabic Transcription Service
©NightPrince | 2025 Arabic Transcriber Pro | All rights reserved