NightPrince's picture
Update app.py
1766496 verified
# filename: pro_arabic_transcriper.py
import streamlit as st
import nemo.collections.asr as nemo_asr
import soundfile as sf
import tempfile
import os
import time
import magic # for file type detection
import ffmpeg
import subprocess
from pathlib import Path
# Custom CSS for gloomy elegant styling
st.markdown("""
<style>
:root {
--primary: #3a506b;
--secondary: #5bc0be;
--accent: #e55934;
--background: #1c2541;
--card: #0b132b;
--text: #e0e0e0;
--text-secondary: #b8b8b8;
}
.stApp {
background-color: var(--background);
color: var(--text);
}
.main .block-container {
max-width: 1200px;
padding: 2rem 3rem;
}
.card {
background-color: var(--card);
border-radius: 8px;
padding: 1.5rem;
margin-bottom: 1.5rem;
border-left: 3px solid var(--secondary);
}
.header {
background: linear-gradient(135deg, #0b132b, #1c2541);
color: white;
padding: 2rem 3rem;
margin: -2rem -3rem 2rem -3rem;
border-bottom: 1px solid rgba(91, 192, 190, 0.2);
}
.stButton>button {
background: var(--primary);
color: white;
border: none;
border-radius: 6px;
padding: 0.7rem 1.5rem;
font-weight: 500;
transition: all 0.2s ease;
border: 1px solid rgba(91, 192, 190, 0.3);
}
.stButton>button:hover {
background: #2c3e5a;
color: white;
}
.stDownloadButton>button {
background: var(--secondary);
color: #0b132b;
}
.stDownloadButton>button:hover {
background: #4aa8a6;
color: #0b132b;
}
.transcript-container {
background-color: rgba(11, 19, 43, 0.7);
border-radius: 8px;
padding: 1.5rem;
margin-top: 1rem;
border: 1px solid rgba(91, 192, 190, 0.1);
}
.transcript-box {
background-color: transparent;
font-size: 1.1rem;
line-height: 1.8;
min-height: 150px;
direction: rtl;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
color: var(--text);
white-space: pre-wrap;
}
.stats {
display: flex;
gap: 1rem;
margin-top: 1rem;
}
.stat-box {
background-color: rgba(58, 80, 107, 0.5);
padding: 0.8rem 1rem;
border-radius: 6px;
flex: 1;
min-width: 100px;
text-align: center;
border: 1px solid rgba(91, 192, 190, 0.1);
}
.stat-value {
font-size: 1.2rem;
font-weight: bold;
color: var(--secondary);
}
.progress-container {
height: 6px;
background-color: rgba(58, 80, 107, 0.5);
border-radius: 3px;
margin: 1.5rem 0;
overflow: hidden;
}
.progress-bar {
height: 100%;
background: linear-gradient(90deg, var(--secondary), #4aa8a6);
border-radius: 3px;
transition: width 0.4s ease;
}
h1, h2, h3 {
color: var(--text) !important;
}
.file-uploader {
border: 2px dashed var(--secondary);
border-radius: 8px;
padding: 2rem;
text-align: center;
background-color: rgba(91, 192, 190, 0.05);
margin-bottom: 1.5rem;
}
.feature-icon {
color: var(--secondary);
margin-right: 0.5rem;
}
.stSpinner > div {
border-color: var(--secondary) transparent transparent transparent !important;
}
</style>
""", unsafe_allow_html=True)
# Check if ffmpeg is available
def check_ffmpeg():
try:
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
return True
except (subprocess.SubprocessError, FileNotFoundError):
return False
if not check_ffmpeg():
st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.")
st.markdown("""
### How to install FFmpeg:
**Windows (using Chocolatey):**
```
choco install ffmpeg
```
**Windows (manual):**
1. Download from [ffmpeg.org](https://ffmpeg.org/download.html)
2. Extract and add the bin folder to your system PATH
**After installing**, restart this application.
""")
st.stop()
# Accept any file - we'll detect type server-side
AUDIO_MIMETYPES = {
'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac',
'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma'
}
VIDEO_MIMETYPES = {
'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo',
'video/webm', 'video/x-ms-wmv'
}
# Load NeMo model once
@st.cache_resource
def load_model():
try:
model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
)
return model
except Exception as e:
# Re-raise so the UI can present a friendly error when called
raise RuntimeError(f"Failed to load NeMo model: {e}")
model = load_model()
def detect_file_type(file_data):
"""Detect the MIME type of a file using python-magic"""
mime = magic.from_buffer(file_data, mime=True)
return mime
def convert_audio(uploaded_file, target_sample_rate=16000):
"""
Convert any audio or video file to a 16kHz mono WAV using FFmpeg.
Returns the path to the converted temporary WAV file.
Args:
uploaded_file: A Streamlit UploadedFile or path-like object
target_sample_rate: Output sample rate (default 16000 Hz)
Returns:
str: Path to the converted temporary WAV file
"""
try:
# Read the file data
if hasattr(uploaded_file, 'read'):
file_data = uploaded_file.read()
uploaded_file.seek(0) # Reset position for later use
else:
with open(uploaded_file, 'rb') as f:
file_data = f.read()
# Detect file type
mime_type = detect_file_type(file_data)
# Save to temporary input file
suffix = '.tmp'
if mime_type in AUDIO_MIMETYPES:
suffix = '.audio' + suffix
elif mime_type in VIDEO_MIMETYPES:
suffix = '.video' + suffix
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
if hasattr(uploaded_file, 'read'):
uploaded_file.seek(0)
tmp_in.write(uploaded_file.read())
else:
tmp_in.write(file_data)
tmp_in_path = tmp_in.name
# Create output WAV file
output_path = tempfile.mktemp(suffix='.wav')
try:
# Build the ffmpeg conversion pipeline
stream = ffmpeg.input(tmp_in_path)
# Extract audio from video if needed
if mime_type in VIDEO_MIMETYPES:
stream = stream.audio
# Convert to 16kHz mono WAV
stream = ffmpeg.output(
stream,
output_path,
acodec='pcm_s16le', # 16-bit PCM
ac=1, # mono
ar=target_sample_rate,# sample rate
loglevel='error' # reduce ffmpeg output
)
# Run the conversion
ffmpeg.run(stream, overwrite_output=True)
return output_path
except ffmpeg.Error as e:
raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}")
finally:
# Clean up input temp file
try:
os.remove(tmp_in_path)
except Exception:
pass
except Exception as e:
raise RuntimeError(f"Failed to convert file to WAV: {str(e)}")
# App UI
st.markdown("""
<div class="header">
<h1 style="margin-bottom: 0.5rem;">Arabic Transcriber Pro</h1>
<p style="color: var(--text-secondary); margin-top: 0;">Convert speech to text with the highest accuracy</p>
</div>
""", unsafe_allow_html=True)
# Main content - single wide column layout
st.markdown("""
<div class="card">
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
<span class="feature-icon">🔊</span>
<span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span>
</div>
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
<span class="feature-icon">⚡</span>
<span>Fast processing with advanced AI</span>
</div>
</div>
""", unsafe_allow_html=True)
uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None,
help="Supports any audio or video format that FFmpeg can handle")
if uploaded_file is not None:
# Basic size check (Streamlit UploadedFile has .size in bytes)
try:
file_size_mb = uploaded_file.size / (1024 * 1024)
except Exception:
file_size_mb = None
if file_size_mb is not None and file_size_mb > 500:
st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.")
# Convert to 16kHz mono wav
with st.spinner("Preparing audio for transcription..."):
processed_wav = convert_audio(uploaded_file)
# Show audio info
data, sample_rate = sf.read(processed_wav)
channels = 1 if len(data.shape) == 1 else data.shape[1]
duration = len(data) / sample_rate
# Show audio player and info
st.audio(processed_wav, format="audio/wav")
st.markdown("### Audio Details")
st.markdown("""
<div class="stats">
<div class="stat-box">
<div>Duration</div>
<div class="stat-value">{:.1f}s</div>
</div>
<div class="stat-box">
<div>Sample Rate</div>
<div class="stat-value">{} Hz</div>
</div>
<div class="stat-box">
<div>Channels</div>
<div class="stat-value">{}</div>
</div>
</div>
""".format(duration, sample_rate, channels), unsafe_allow_html=True)
# Transcription
if st.button("Transcribe Audio", type="primary"):
# Create a progress container
progress_container = st.empty()
progress_container.markdown("""
<div class="progress-container">
<div class="progress-bar" style="width: 30%;"></div>
</div>
<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Processing audio...</div>
""", unsafe_allow_html=True)
time.sleep(0.8)
progress_container.markdown("""
<div class="progress-container">
<div class="progress-bar" style="width: 70%;"></div>
</div>
<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcribing content...</div>
""", unsafe_allow_html=True)
# Actual transcription
try:
with st.spinner(""):
result = model.transcribe([processed_wav])
transcript = result[0].text
except Exception as e:
st.error(f"Transcription failed: {e}")
# Cleanup
try:
os.remove(processed_wav)
except Exception:
pass
progress_container.empty()
raise
# Update progress to complete
progress_container.markdown("""
<div class="progress-container">
<div class="progress-bar" style="width: 100%;"></div>
</div>
<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcription complete</div>
""", unsafe_allow_html=True)
time.sleep(0.5)
progress_container.empty()
st.markdown("### Transcription Results")
st.markdown(f"""
<div class="transcript-container">
<div class="transcript-box">{transcript}</div>
</div>
""", unsafe_allow_html=True)
# Download button
st.download_button("Download Transcript", transcript,
file_name="arabic_transcript.txt")
# Cleanup
os.remove(processed_wav)
# Minimal footer
st.markdown("---")
st.markdown("""
<div style="text-align: center; color: var(--text-secondary); padding: 20px; font-size: 0.9rem;">
<p>Powered by NeMo ASR and Streamlit | Professional Arabic Transcription Service</p>
<p>©YahyaAlnwsany | 2025 Arabic Transcriber Pro | All rights reserved</p>
</div>
""", unsafe_allow_html=True)