Spaces:

NightPrince
/

Arabic-Transcriber-Pro

Running

App Files Files Community

Arabic-Transcriber-Pro / app.py

NightPrince

Update app.py

1766496 verified about 1 month ago

raw

history blame contribute delete

13.6 kB

	# filename: pro_arabic_transcriper.py

	import streamlit as st
	import nemo.collections.asr as nemo_asr
	import soundfile as sf
	import tempfile
	import os
	import time
	import magic # for file type detection
	import ffmpeg
	import subprocess
	from pathlib import Path

	# Custom CSS for gloomy elegant styling
	st.markdown("""
	<style>
	:root {
	--primary: #3a506b;
	--secondary: #5bc0be;
	--accent: #e55934;
	--background: #1c2541;
	--card: #0b132b;
	--text: #e0e0e0;
	--text-secondary: #b8b8b8;
	}

	.stApp {
	background-color: var(--background);
	color: var(--text);
	}

	.main .block-container {
	max-width: 1200px;
	padding: 2rem 3rem;
	}

	.card {
	background-color: var(--card);
	border-radius: 8px;
	padding: 1.5rem;
	margin-bottom: 1.5rem;
	border-left: 3px solid var(--secondary);
	}

	.header {
	background: linear-gradient(135deg, #0b132b, #1c2541);
	color: white;
	padding: 2rem 3rem;
	margin: -2rem -3rem 2rem -3rem;
	border-bottom: 1px solid rgba(91, 192, 190, 0.2);
	}

	.stButton>button {
	background: var(--primary);
	color: white;
	border: none;
	border-radius: 6px;
	padding: 0.7rem 1.5rem;
	font-weight: 500;
	transition: all 0.2s ease;
	border: 1px solid rgba(91, 192, 190, 0.3);
	}

	.stButton>button:hover {
	background: #2c3e5a;
	color: white;
	}

	.stDownloadButton>button {
	background: var(--secondary);
	color: #0b132b;
	}

	.stDownloadButton>button:hover {
	background: #4aa8a6;
	color: #0b132b;
	}

	.transcript-container {
	background-color: rgba(11, 19, 43, 0.7);
	border-radius: 8px;
	padding: 1.5rem;
	margin-top: 1rem;
	border: 1px solid rgba(91, 192, 190, 0.1);
	}

	.transcript-box {
	background-color: transparent;
	font-size: 1.1rem;
	line-height: 1.8;
	min-height: 150px;
	direction: rtl;
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	color: var(--text);
	white-space: pre-wrap;
	}

	.stats {
	display: flex;
	gap: 1rem;
	margin-top: 1rem;
	}

	.stat-box {
	background-color: rgba(58, 80, 107, 0.5);
	padding: 0.8rem 1rem;
	border-radius: 6px;
	flex: 1;
	min-width: 100px;
	text-align: center;
	border: 1px solid rgba(91, 192, 190, 0.1);
	}

	.stat-value {
	font-size: 1.2rem;
	font-weight: bold;
	color: var(--secondary);
	}

	.progress-container {
	height: 6px;
	background-color: rgba(58, 80, 107, 0.5);
	border-radius: 3px;
	margin: 1.5rem 0;
	overflow: hidden;
	}

	.progress-bar {
	height: 100%;
	background: linear-gradient(90deg, var(--secondary), #4aa8a6);
	border-radius: 3px;
	transition: width 0.4s ease;
	}

	h1, h2, h3 {
	color: var(--text) !important;
	}

	.file-uploader {
	border: 2px dashed var(--secondary);
	border-radius: 8px;
	padding: 2rem;
	text-align: center;
	background-color: rgba(91, 192, 190, 0.05);
	margin-bottom: 1.5rem;
	}

	.feature-icon {
	color: var(--secondary);
	margin-right: 0.5rem;
	}

	.stSpinner > div {
	border-color: var(--secondary) transparent transparent transparent !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# Check if ffmpeg is available
	def check_ffmpeg():
	try:
	subprocess.run(['ffmpeg', '-version'], capture_output=True, check=True)
	return True
	except (subprocess.SubprocessError, FileNotFoundError):
	return False

	if not check_ffmpeg():
	st.error("FFmpeg is not installed or not found in PATH. Please install FFmpeg to use this application.")
	st.markdown("""
	### How to install FFmpeg:

	Windows (using Chocolatey):
	```
	choco install ffmpeg
	```

	Windows (manual):
	1. Download from [ffmpeg.org](https://ffmpeg.org/download.html)
	2. Extract and add the bin folder to your system PATH

	After installing, restart this application.
	""")
	st.stop()

	# Accept any file - we'll detect type server-side
	AUDIO_MIMETYPES = {
	'audio/wav', 'audio/x-wav', 'audio/mpeg', 'audio/ogg', 'audio/flac',
	'audio/x-m4a', 'audio/aac', 'audio/x-ms-wma'
	}

	VIDEO_MIMETYPES = {
	'video/mp4', 'video/quicktime', 'video/x-matroska', 'video/x-msvideo',
	'video/webm', 'video/x-ms-wmv'
	}

	# Load NeMo model once
	@st.cache_resource
	def load_model():
	try:
	model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained(
	model_name="nvidia/stt_ar_fastconformer_hybrid_large_pcd_v1.0"
	)
	return model
	except Exception as e:
	# Re-raise so the UI can present a friendly error when called
	raise RuntimeError(f"Failed to load NeMo model: {e}")

	model = load_model()

	def detect_file_type(file_data):
	"""Detect the MIME type of a file using python-magic"""
	mime = magic.from_buffer(file_data, mime=True)
	return mime

	def convert_audio(uploaded_file, target_sample_rate=16000):
	"""
	Convert any audio or video file to a 16kHz mono WAV using FFmpeg.
	Returns the path to the converted temporary WAV file.

	Args:
	uploaded_file: A Streamlit UploadedFile or path-like object
	target_sample_rate: Output sample rate (default 16000 Hz)

	Returns:
	str: Path to the converted temporary WAV file
	"""
	try:
	# Read the file data
	if hasattr(uploaded_file, 'read'):
	file_data = uploaded_file.read()
	uploaded_file.seek(0) # Reset position for later use
	else:
	with open(uploaded_file, 'rb') as f:
	file_data = f.read()

	# Detect file type
	mime_type = detect_file_type(file_data)

	# Save to temporary input file
	suffix = '.tmp'
	if mime_type in AUDIO_MIMETYPES:
	suffix = '.audio' + suffix
	elif mime_type in VIDEO_MIMETYPES:
	suffix = '.video' + suffix

	with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_in:
	if hasattr(uploaded_file, 'read'):
	uploaded_file.seek(0)
	tmp_in.write(uploaded_file.read())
	else:
	tmp_in.write(file_data)
	tmp_in_path = tmp_in.name

	# Create output WAV file
	output_path = tempfile.mktemp(suffix='.wav')

	try:
	# Build the ffmpeg conversion pipeline
	stream = ffmpeg.input(tmp_in_path)

	# Extract audio from video if needed
	if mime_type in VIDEO_MIMETYPES:
	stream = stream.audio

	# Convert to 16kHz mono WAV
	stream = ffmpeg.output(
	stream,
	output_path,
	acodec='pcm_s16le', # 16-bit PCM
	ac=1, # mono
	ar=target_sample_rate,# sample rate
	loglevel='error' # reduce ffmpeg output
	)

	# Run the conversion
	ffmpeg.run(stream, overwrite_output=True)

	return output_path

	except ffmpeg.Error as e:
	raise RuntimeError(f"FFmpeg error during conversion: {e.stderr.decode()}")

	finally:
	# Clean up input temp file
	try:
	os.remove(tmp_in_path)
	except Exception:
	pass

	except Exception as e:
	raise RuntimeError(f"Failed to convert file to WAV: {str(e)}")

	# App UI
	st.markdown("""
	<div class="header">
	<h1 style="margin-bottom: 0.5rem;">Arabic Transcriber Pro</h1>
	<p style="color: var(--text-secondary); margin-top: 0;">Convert speech to text with the highest accuracy</p>
	</div>
	""", unsafe_allow_html=True)

	# Main content - single wide column layout
	st.markdown("""
	<div class="card">
	<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
	<span class="feature-icon">🔊</span>
	<span>Supports many audio formats and common video types (MP4, MOV, MKV). Upload audio or video and the app will extract audio automatically.</span>
	</div>
	<div style="display: flex; gap: 1rem; margin-bottom: 1rem;">
	<span class="feature-icon">⚡</span>
	<span>Fast processing with advanced AI</span>
	</div>
	</div>
	""", unsafe_allow_html=True)

	uploaded_file = st.file_uploader("Drag and drop any audio or video file here", type=None,
	help="Supports any audio or video format that FFmpeg can handle")

	if uploaded_file is not None:
	# Basic size check (Streamlit UploadedFile has .size in bytes)
	try:
	file_size_mb = uploaded_file.size / (1024 * 1024)
	except Exception:
	file_size_mb = None

	if file_size_mb is not None and file_size_mb > 500:
	st.warning("Large file detected (>500MB). Processing may take a long time or fail. Consider uploading a smaller file.")
	# Convert to 16kHz mono wav
	with st.spinner("Preparing audio for transcription..."):
	processed_wav = convert_audio(uploaded_file)

	# Show audio info
	data, sample_rate = sf.read(processed_wav)
	channels = 1 if len(data.shape) == 1 else data.shape[1]
	duration = len(data) / sample_rate

	# Show audio player and info
	st.audio(processed_wav, format="audio/wav")

	st.markdown("### Audio Details")
	st.markdown("""
	<div class="stats">
	<div class="stat-box">
	<div>Duration</div>
	<div class="stat-value">{:.1f}s</div>
	</div>
	<div class="stat-box">
	<div>Sample Rate</div>
	<div class="stat-value">{} Hz</div>
	</div>
	<div class="stat-box">
	<div>Channels</div>
	<div class="stat-value">{}</div>
	</div>
	</div>
	""".format(duration, sample_rate, channels), unsafe_allow_html=True)

	# Transcription
	if st.button("Transcribe Audio", type="primary"):
	# Create a progress container
	progress_container = st.empty()
	progress_container.markdown("""
	<div class="progress-container">
	<div class="progress-bar" style="width: 30%;"></div>
	</div>
	<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Processing audio...</div>
	""", unsafe_allow_html=True)

	time.sleep(0.8)
	progress_container.markdown("""
	<div class="progress-container">
	<div class="progress-bar" style="width: 70%;"></div>
	</div>
	<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcribing content...</div>
	""", unsafe_allow_html=True)

	# Actual transcription
	try:
	with st.spinner(""):
	result = model.transcribe([processed_wav])
	transcript = result[0].text
	except Exception as e:
	st.error(f"Transcription failed: {e}")
	# Cleanup
	try:
	os.remove(processed_wav)
	except Exception:
	pass
	progress_container.empty()
	raise

	# Update progress to complete
	progress_container.markdown("""
	<div class="progress-container">
	<div class="progress-bar" style="width: 100%;"></div>
	</div>
	<div style="text-align: center; margin-top: 5px; color: var(--secondary);">Transcription complete</div>
	""", unsafe_allow_html=True)

	time.sleep(0.5)
	progress_container.empty()

	st.markdown("### Transcription Results")
	st.markdown(f"""
	<div class="transcript-container">
	<div class="transcript-box">{transcript}</div>
	</div>
	""", unsafe_allow_html=True)

	# Download button
	st.download_button("Download Transcript", transcript,
	file_name="arabic_transcript.txt")

	# Cleanup
	os.remove(processed_wav)

	# Minimal footer
	st.markdown("---")
	st.markdown("""
	<div style="text-align: center; color: var(--text-secondary); padding: 20px; font-size: 0.9rem;">
	<p>Powered by NeMo ASR and Streamlit \| Professional Arabic Transcription Service</p>
	<p>©YahyaAlnwsany \| 2025 Arabic Transcriber Pro \| All rights reserved</p>
	</div>
	""", unsafe_allow_html=True)