|
|
|
|
|
|
|
|
import streamlit as st |
|
|
import nemo.collections.asr as nemo_asr |
|
|
import soundfile as sf |
|
|
import tempfile |
|
|
import os |
|
|
import time |
|
|
import magic |
|
|
import ffmpeg |
|
|
import subprocess |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<style> |
|
|
:root { |
|
|
--primary: #3a506b; |
|
|
--secondary: #5bc0be; |
|
|
--accent: #e55934; |
|
|
--background: #1c2541; |
|
|
--card: #0b132b; |
|
|
--text: #e0e0e0; |
|
|
--text-secondary: #b8b8b8; |
|
|
} |
|
|
|
|
|
.stApp { |
|
|
background-color: var(--background); |
|
|
color: var(--text); |
|
|
} |
|
|
|
|
|
.main .block-container { |
|
|
max-width: 1200px; |
|
|
padding: 2rem 3rem; |
|
|
} |
|
|
|
|
|
.card { |
|
|
background-color: var(--card); |
|
|
border-radius: 8px; |
|
|
padding: 1.5rem; |
|
|
margin-bottom: 1.5rem; |
|
|
border-left: 3px solid var(--secondary); |
|
|
} |
|
|
|
|
|
.header { |
|
|
background: linear-gradient(135deg, #0b132b, #1c2541); |
|
|
color: white; |
|
|
padding: 2rem 3rem; |
|
|
margin: -2rem -3rem 2rem -3rem; |
|
|
border-bottom: 1px solid rgba(91, 192, 190, 0.2); |
|
|
} |
|
|
|
|
|
.stButton>button { |
|
|
background: var(--primary); |
|
|
color: white; |
|
|
border: none; |
|
|
border-radius: 6px; |
|
|
padding: 0.7rem 1.5rem; |
|
|
font-weight: 500; |
|
|
transition: all 0.2s ease; |
|
|
border: 1px solid rgba(91, 192, 190, 0.3); |
|
|
} |
|
|
|
|
|
.stButton>button:hover { |
|
|
background: #2c3e5a; |
|
|
color: white; |
|
|
} |
|
|
|
|
|
.stDownloadButton>button { |
|
|
background: var(--secondary); |
|
|
color: #0b132b; |
|
|
} |
|
|
|
|
|
.stDownloadButton>button:hover { |
|
|
background: #4aa8a6; |
|
|
color: #0b132b; |
|
|
} |
|
|
|
|
|
.transcript-container { |
|
|
background-color: rgba(11, 19, 43, 0.7); |
|
|
border-radius: 8px; |
|
|
padding: 1.5rem; |
|
|
margin-top: 1rem; |
|
|
border: 1px solid rgba(91, 192, 190, 0.1); |
|
|
} |
|
|
|
|
|
.transcript-box { |
|
|
background-color: transparent; |
|
|
font-size: 1.1rem; |
|
|
line-height: 1.8; |
|
|
min-height: 150px; |
|
|
direction: rtl; |
|
|
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; |
|
|
color: var(--text); |
|
|
white-space: pre-wrap; |
|
|
} |
|
|
|
|
|
.stats { |
|
|
display: flex; |
|
|
gap: 1rem; |
|
|
margin-top: 1rem; |
|
|
} |
|
|
|
|
|
.stat-box { |
|
|
background-color: rgba(58, 80, 107, 0.5); |
|
|
padding: 0.8rem 1rem; |
|
|
border-radius: 6px; |
|
|
flex: 1; |
|
|
min-width: 100px; |
|
|
text-align: center; |
|
|
border: 1px solid rgba(91, 192, 190, 0.1); |
|
|
} |
|
|
|
|
|
.stat-value { |
|
|
font-size: 1.2rem; |
|
|
font-weight: bold; |
|
|
color: var(--secondary); |
|
|
} |
|
|
|
|
|
.progress-container { |
|
|
height: 6px; |
|
|
background-color: rgba(58, 80, 107, 0.5); |
|
|
border-radius: 3px; |
|
|
margin: 1.5rem 0; |
|
|
overflow: hidden; |
|
|
} |
|
|
|
|
|
.progress-bar { |
|
|
height: 100%; |
|
|
background: linear-gradient(90deg, var(--secondary), #4aa8a6); |
|
|
border-radius: 3px; |
|
|
transition: width 0.4s ease; |
|
|
} |
|
|
|
|
|
h1, h2, h3 { |
|
|
color: var(--text) !important; |
|
|
} |
|
|
|
|
|
.file-uploader { |
|
|
border: 2px dashed var(--secondary); |
|
|
border-radius: 8px; |
|
|
padding: 2rem; |
|
|
text-align: center; |
|
|
background-color: rgba(91, 192, 190, 0.05); |
|
|
margin-bottom: 1.5rem; |
|
|
} |
|
|
|
|
|
.feature-icon { |
|
|
color: var(--secondary); |
|
|
margin-right: 0.5rem; |
|
|
} |
|
|
|
|
|
.stSpinner > div { |
|
|
border-color: var(--secondary) transparent transparent transparent !important; |
|
|
} |
|
|
</style> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
def check_ffmpeg(): |
|
|
try: |
|
|
subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True) |
|
|
return True |
|
|
except (subprocess.SubprocessError, FileNotFoundError): |
|
|
return False |
|
|
|
|
|
if not check_ffmpeg(): |
|
|
st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.") |
|
|
st.markdown(""" |
|
|
### How to install FFmpeg: |
|
|
|
|
|
**Windows (using Chocolatey):** |
|
|
``` |
|
|
choco install ffmpeg |
|
|
``` |
|
|
|
|
|
**Windows (manual):** |
|
|
1. Download from [ffmpeg.org](https://ffmpeg.org/download.html) |
|
|
2. Extract and add the bin folder to your system PATH |
|
|
|
|
|
**After installing**, restart this application. |
|
|
""") |
|
|
st.stop() |
|
|
|
|
|
|
|
|
AUDIO_MIMETYPES = { |
|
|
'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac', |
|
|
'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma' |
|
|
} |
|
|
|
|
|
VIDEO_MIMETYPES = { |
|
|
'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo', |
|
|
'video/webm', 'video/x-ms-wmv' |
|
|
} |
|
|
|
|
|
|
|
|
@st.cache_resource |
|
|
def load_model(): |
|
|
try: |
|
|
model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained( |
|
|
model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0" |
|
|
) |
|
|
return model |
|
|
except Exception as e: |
|
|
|
|
|
raise RuntimeError(f"Failed to load NeMo model: {e}") |
|
|
|
|
|
model = load_model() |
|
|
|
|
|
def detect_file_type(file_data): |
|
|
"""Detect the MIME type of a file using python-magic""" |
|
|
mime = magic.from_buffer(file_data, mime=True) |
|
|
return mime |
|
|
|
|
|
def convert_audio(uploaded_file, target_sample_rate=16000): |
|
|
""" |
|
|
Convert any audio or video file to a 16kHz mono WAV using FFmpeg. |
|
|
Returns the path to the converted temporary WAV file. |
|
|
|
|
|
Args: |
|
|
uploaded_file: A Streamlit UploadedFile or path-like object |
|
|
target_sample_rate: Output sample rate (default 16000 Hz) |
|
|
|
|
|
Returns: |
|
|
str: Path to the converted temporary WAV file |
|
|
""" |
|
|
try: |
|
|
|
|
|
if hasattr(uploaded_file, 'read'): |
|
|
file_data = uploaded_file.read() |
|
|
uploaded_file.seek(0) |
|
|
else: |
|
|
with open(uploaded_file, 'rb') as f: |
|
|
file_data = f.read() |
|
|
|
|
|
|
|
|
mime_type = detect_file_type(file_data) |
|
|
|
|
|
|
|
|
suffix = '.tmp' |
|
|
if mime_type in AUDIO_MIMETYPES: |
|
|
suffix = '.audio' + suffix |
|
|
elif mime_type in VIDEO_MIMETYPES: |
|
|
suffix = '.video' + suffix |
|
|
|
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in: |
|
|
if hasattr(uploaded_file, 'read'): |
|
|
uploaded_file.seek(0) |
|
|
tmp_in.write(uploaded_file.read()) |
|
|
else: |
|
|
tmp_in.write(file_data) |
|
|
tmp_in_path = tmp_in.name |
|
|
|
|
|
|
|
|
output_path = tempfile.mktemp(suffix='.wav') |
|
|
|
|
|
try: |
|
|
|
|
|
stream = ffmpeg.input(tmp_in_path) |
|
|
|
|
|
|
|
|
if mime_type in VIDEO_MIMETYPES: |
|
|
stream = stream.audio |
|
|
|
|
|
|
|
|
stream = ffmpeg.output( |
|
|
stream, |
|
|
output_path, |
|
|
acodec='pcm_s16le', |
|
|
ac=1, |
|
|
ar=target_sample_rate, |
|
|
loglevel='error' |
|
|
) |
|
|
|
|
|
|
|
|
ffmpeg.run(stream, overwrite_output=True) |
|
|
|
|
|
return output_path |
|
|
|
|
|
except ffmpeg.Error as e: |
|
|
raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}") |
|
|
|
|
|
finally: |
|
|
|
|
|
try: |
|
|
os.remove(tmp_in_path) |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
except Exception as e: |
|
|
raise RuntimeError(f"Failed to convert file to WAV: {str(e)}") |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<div class="header"> |
|
|
<h1 style="margin-bottom: 0.5rem;">Arabic Transcriber Pro</h1> |
|
|
<p style="color: var(--text-secondary); margin-top: 0;">Convert speech to text with the highest accuracy</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
<div class="card"> |
|
|
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;"> |
|
|
<span class="feature-icon">🔊</span> |
|
|
<span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span> |
|
|
</div> |
|
|
<div style="display: flex; gap: 1rem; margin-bottom: 1rem;"> |
|
|
<span class="feature-icon">⚡</span> |
|
|
<span>Fast processing with advanced AI</span> |
|
|
</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None, |
|
|
help="Supports any audio or video format that FFmpeg can handle") |
|
|
|
|
|
if uploaded_file is not None: |
|
|
|
|
|
try: |
|
|
file_size_mb = uploaded_file.size / (1024 * 1024) |
|
|
except Exception: |
|
|
file_size_mb = None |
|
|
|
|
|
if file_size_mb is not None and file_size_mb > 500: |
|
|
st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.") |
|
|
|
|
|
with st.spinner("Preparing audio for transcription..."): |
|
|
processed_wav = convert_audio(uploaded_file) |
|
|
|
|
|
|
|
|
data, sample_rate = sf.read(processed_wav) |
|
|
channels = 1 if len(data.shape) == 1 else data.shape[1] |
|
|
duration = len(data) / sample_rate |
|
|
|
|
|
|
|
|
st.audio(processed_wav, format="audio/wav") |
|
|
|
|
|
st.markdown("### Audio Details") |
|
|
st.markdown(""" |
|
|
<div class="stats"> |
|
|
<div class="stat-box"> |
|
|
<div>Duration</div> |
|
|
<div class="stat-value">{:.1f}s</div> |
|
|
</div> |
|
|
<div class="stat-box"> |
|
|
<div>Sample Rate</div> |
|
|
<div class="stat-value">{} Hz</div> |
|
|
</div> |
|
|
<div class="stat-box"> |
|
|
<div>Channels</div> |
|
|
<div class="stat-value">{}</div> |
|
|
</div> |
|
|
</div> |
|
|
""".format(duration, sample_rate, channels), unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
if st.button("Transcribe Audio", type="primary"): |
|
|
|
|
|
progress_container = st.empty() |
|
|
progress_container.markdown(""" |
|
|
<div class="progress-container"> |
|
|
<div class="progress-bar" style="width: 30%;"></div> |
|
|
</div> |
|
|
<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Processing audio...</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
time.sleep(0.8) |
|
|
progress_container.markdown(""" |
|
|
<div class="progress-container"> |
|
|
<div class="progress-bar" style="width: 70%;"></div> |
|
|
</div> |
|
|
<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcribing content...</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
try: |
|
|
with st.spinner(""): |
|
|
result = model.transcribe([processed_wav]) |
|
|
transcript = result[0].text |
|
|
except Exception as e: |
|
|
st.error(f"Transcription failed: {e}") |
|
|
|
|
|
try: |
|
|
os.remove(processed_wav) |
|
|
except Exception: |
|
|
pass |
|
|
progress_container.empty() |
|
|
raise |
|
|
|
|
|
|
|
|
progress_container.markdown(""" |
|
|
<div class="progress-container"> |
|
|
<div class="progress-bar" style="width: 100%;"></div> |
|
|
</div> |
|
|
<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcription complete</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
time.sleep(0.5) |
|
|
progress_container.empty() |
|
|
|
|
|
st.markdown("### Transcription Results") |
|
|
st.markdown(f""" |
|
|
<div class="transcript-container"> |
|
|
<div class="transcript-box">{transcript}</div> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
st.download_button("Download Transcript", transcript, |
|
|
file_name="arabic_transcript.txt") |
|
|
|
|
|
|
|
|
os.remove(processed_wav) |
|
|
|
|
|
|
|
|
st.markdown("---") |
|
|
st.markdown(""" |
|
|
<div style="text-align: center; color: var(--text-secondary); padding: 20px; font-size: 0.9rem;"> |
|
|
<p>Powered by NeMo ASR and Streamlit | Professional Arabic Transcription Service</p> |
|
|
<p>©YahyaAlnwsany | 2025 Arabic Transcriber Pro | All rights reserved</p> |
|
|
</div> |
|
|
""", unsafe_allow_html=True) |