# filename: elegant_arabic_transcriber.py import streamlit as st import nemo.collections.asr as nemo_asr import soundfile as sf import tempfile import os from pydub import AudioSegment import moviepy.editor as mp import time # Custom CSS for gloomy elegant styling st.markdown(""" """, unsafe_allow_html=True) # Support common audio + video file extensions. Streamlit's file_uploader uses these SUPPORTED_TYPES = ['wav', 'mp3', 'ogg', 'flac', 'm4a', 'aac', 'wma', # video types 'mp4', 'mov', 'mkv', 'avi', 'webm'] VIDEO_TYPES = {'mp4', 'mov', 'mkv', 'avi', 'webm'} # Load NeMo model once @st.cache_resource def load_model(): try: model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained( model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0" ) return model except Exception as e: # Re-raise so the UI can present a friendly error when called raise RuntimeError(f"Failed to load NeMo model: {e}") model = load_model() # Helper: Convert any audio to 16kHz mono WAV def convert_audio(uploaded_file, target_sample_rate=16000): """ Convert an uploaded audio or video file to a 16kHz mono WAV file and return the temporary file path. Supports video files by extracting the audio track first. uploaded_file can be a Streamlit UploadedFile-like object or a path-like object. """ # Determine filename/extension filename = getattr(uploaded_file, "name", None) if filename is None: # fallback name filename = "uploaded" ext = filename.split('.')[-1].lower() # Save the raw upload to a temporary file first (moviepy / pydub operate on paths) with tempfile.NamedTemporaryFile(delete=False, suffix=f".{ext}") as tmp_in: try: # uploaded_file may be a BytesIO-like with .read() data = uploaded_file.read() except Exception: # If it's already a path string, just copy with open(uploaded_file, 'rb') as fsrc: data = fsrc.read() tmp_in.write(data) tmp_in_path = tmp_in.name # If it's a video type, extract audio using moviepy try: if ext in VIDEO_TYPES: with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out: try: clip = mp.VideoFileClip(tmp_in_path) # moviepy will write a WAV; we can ensure sample rate later with pydub clip.audio.write_audiofile(tmp_out.name, fps=target_sample_rate, logger=None) clip.close() except Exception: # fallback: try to open as audio via pydub audio = AudioSegment.from_file(tmp_in_path) audio = audio.set_frame_rate(target_sample_rate).set_channels(1) audio.export(tmp_out.name, format="wav") finally: # cleanup input video file try: os.remove(tmp_in_path) except Exception: pass return tmp_out.name else: # It's an audio file - use pydub to convert to wav 16k mono with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_out: audio = AudioSegment.from_file(tmp_in_path) audio = audio.set_frame_rate(target_sample_rate).set_channels(1) audio.export(tmp_out.name, format="wav") try: os.remove(tmp_in_path) except Exception: pass return tmp_out.name except Exception as e: # Attempt to clean up and re-raise as RuntimeError with context try: os.remove(tmp_in_path) except Exception: pass raise RuntimeError(f"Failed to convert uploaded file to WAV: {e}") # App UI st.markdown("""

Arabic Transcriber

Convert speech to text with precision

""", unsafe_allow_html=True) # Main content - single wide column layout st.markdown("""
🔊 Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.
⚡ Fast processing with advanced AI
""", unsafe_allow_html=True) uploaded_file = st.file_uploader("Drag and drop audio file here", type=SUPPORTED_TYPES) if uploaded_file is not None: # Basic size check (Streamlit UploadedFile has .size in bytes) try: file_size_mb = uploaded_file.size / (1024 * 1024) except Exception: file_size_mb = None if file_size_mb is not None and file_size_mb > 500: st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.") # Convert to 16kHz mono wav with st.spinner("Preparing audio for transcription..."): processed_wav = convert_audio(uploaded_file) # Show audio info data, sample_rate = sf.read(processed_wav) channels = 1 if len(data.shape) == 1 else data.shape[1] duration = len(data) / sample_rate # Show audio player and info st.audio(processed_wav, format="audio/wav") st.markdown("### Audio Details") st.markdown("""
Duration
{:.1f}s
Sample Rate
{} Hz
Channels
{}
""".format(duration, sample_rate, channels), unsafe_allow_html=True) # Transcription if st.button("Transcribe Audio", type="primary"): # Create a progress container progress_container = st.empty() progress_container.markdown("""
Processing audio...
""", unsafe_allow_html=True) time.sleep(0.8) progress_container.markdown("""
Transcribing content...
""", unsafe_allow_html=True) # Actual transcription try: with st.spinner(""): result = model.transcribe([processed_wav]) transcript = result[0].text except Exception as e: st.error(f"Transcription failed: {e}") # Cleanup try: os.remove(processed_wav) except Exception: pass progress_container.empty() raise # Update progress to complete progress_container.markdown("""
Transcription complete
""", unsafe_allow_html=True) time.sleep(0.5) progress_container.empty() st.markdown("### Transcription Results") st.markdown(f"""
{transcript}
""", unsafe_allow_html=True) # Download button st.download_button("Download Transcript", transcript, file_name="arabic_transcript.txt") # Cleanup os.remove(processed_wav) # Minimal footer st.markdown("---") st.markdown("""

Powered by NeMo ASR and Streamlit | Professional Arabic Transcription Service

©NightPrince | 2025 Arabic Transcriber Pro | All rights reserved

""", unsafe_allow_html=True)