|
|
import gradio as gr |
|
|
import os |
|
|
import requests |
|
|
import tarfile |
|
|
import uuid |
|
|
import soundfile as sf |
|
|
import numpy as np |
|
|
import sherpa_onnx |
|
|
|
|
|
|
|
|
MODELS = [ |
|
|
['mms fa', 'https://huggingface.co/willwade/mms-tts-multilingual-models-onnx/resolve/main/fas', "🌠 راد", 'https://huggingface.co/facebook/mms-tts-fas'], |
|
|
['coqui-vits-female1-karim23657', 'https://huggingface.co/karim23657/persian-tts-vits/tree/main/persian-tts-female1-vits-coqui', "🌺 نگار", 'https://huggingface.co/Kamtera/persian-tts-female1-vits'], |
|
|
['coqui-vits-male1-karim23657', 'https://huggingface.co/karim23657/persian-tts-vits/tree/main/persian-tts-male1-vits-coqui', "🌟 آرش", 'https://huggingface.co/Kamtera/persian-tts-male1-vits'], |
|
|
] |
|
|
|
|
|
def download_and_extract_model(url, destination): |
|
|
"""دانلود و استخراج مدل""" |
|
|
os.makedirs(destination, exist_ok=True) |
|
|
try: |
|
|
if url.endswith(".tar.bz2"): |
|
|
tar_path = os.path.join(destination, "model.tar.bz2") |
|
|
print(f"Downloading {url} ...") |
|
|
r = requests.get(url, stream=True) |
|
|
with open(tar_path, "wb") as f: |
|
|
for chunk in r.iter_content(chunk_size=8192): |
|
|
if chunk: |
|
|
f.write(chunk) |
|
|
print("Download complete, extracting...") |
|
|
with tarfile.open(tar_path, "r:bz2") as tar: |
|
|
tar.extractall(path=destination) |
|
|
os.remove(tar_path) |
|
|
elif url.endswith(".onnx") or "huggingface.co" in url: |
|
|
|
|
|
model_path = os.path.join(destination, "model.onnx") |
|
|
tokens_path = os.path.join(destination, "tokens.txt") |
|
|
model_url = f"{url}/model.onnx" |
|
|
tokens_url = f"{url}/tokens.txt" |
|
|
with open(model_path, "wb") as f: |
|
|
f.write(requests.get(model_url).content) |
|
|
with open(tokens_path, "wb") as f: |
|
|
f.write(requests.get(tokens_url).content) |
|
|
except Exception as e: |
|
|
raise RuntimeError(f"خطا در دانلود یا استخراج مدل: {str(e)}") |
|
|
|
|
|
def find_model_files(model_dir): |
|
|
"""جستجوی فایلهای مدل""" |
|
|
model_files = {} |
|
|
for root, _, files in os.walk(model_dir): |
|
|
for file in files: |
|
|
if file.endswith('.onnx'): |
|
|
model_files['model'] = os.path.join(root, file) |
|
|
elif file == 'tokens.txt': |
|
|
model_files['tokens'] = os.path.join(root, file) |
|
|
elif file == 'lexicon.txt': |
|
|
model_files['lexicon'] = os.path.join(root, file) |
|
|
return model_files if 'model' in model_files else {} |
|
|
|
|
|
def check_model_files(model_name): |
|
|
"""بررسی وجود فایلهای ضروری مدل""" |
|
|
model_dir = os.path.join("models", model_name) |
|
|
if not os.path.exists(model_dir): |
|
|
for m in MODELS: |
|
|
if m[2] == model_name: |
|
|
download_and_extract_model(m[1], model_dir) |
|
|
break |
|
|
files = find_model_files(model_dir) |
|
|
required = ['model', 'tokens'] |
|
|
for r in required: |
|
|
if r not in files or not os.path.exists(files[r]): |
|
|
raise FileNotFoundError(f"فایل ضروری مدل '{r}' یافت نشد.") |
|
|
return files |
|
|
|
|
|
def generate_audio_safe(text, model_name): |
|
|
"""تولید صدا با چک خطا""" |
|
|
try: |
|
|
files = check_model_files(model_name) |
|
|
is_mms = 'mms' in model_name.lower() |
|
|
if is_mms: |
|
|
vits_config = sherpa_onnx.OfflineTtsVitsModelConfig( |
|
|
files['model'], '', files['tokens'], '', '', 0.667, 0.8, 1.0 |
|
|
) |
|
|
else: |
|
|
lexicon = files.get('lexicon', '') |
|
|
vits_config = sherpa_onnx.OfflineTtsVitsModelConfig( |
|
|
files['model'], lexicon, files['tokens'], '', '', 0.667, 0.8, 1.0 |
|
|
) |
|
|
model_config = sherpa_onnx.OfflineTtsModelConfig() |
|
|
model_config.vits = vits_config |
|
|
config = sherpa_onnx.OfflineTtsConfig(model=model_config, max_num_sentences=2) |
|
|
tts = sherpa_onnx.OfflineTts(config) |
|
|
audio_data = tts.generate(text) |
|
|
audio_array = np.array(audio_data.samples, dtype=np.float32) |
|
|
if np.any(audio_array): |
|
|
audio_array = audio_array / np.abs(audio_array).max() |
|
|
else: |
|
|
raise ValueError("صدای تولید شده خالی است") |
|
|
return audio_array, audio_data.sample_rate |
|
|
except Exception as e: |
|
|
raise RuntimeError(f"خطا در تولید صدا: {str(e)}") |
|
|
|
|
|
def tts_interface_multiple(selected_model, texts): |
|
|
"""چندین متن ورودی، چندین خروجی صوتی""" |
|
|
lines = [line.strip() for line in texts.split("\n") if line.strip()] |
|
|
if not lines: |
|
|
return [], "لطفا متنی وارد کنید" |
|
|
|
|
|
audio_files = [] |
|
|
status_messages = [] |
|
|
|
|
|
for idx, text in enumerate(lines, start=1): |
|
|
try: |
|
|
audio_data, sr = generate_audio_safe(text, selected_model) |
|
|
filename = f"tts_output_{idx}_{uuid.uuid4()}.wav" |
|
|
sf.write(filename, audio_data, sr, subtype="PCM_16") |
|
|
audio_files.append(filename) |
|
|
model_url = next((m[3] for m in MODELS if m[2]==selected_model), "") |
|
|
status_messages.append(f"{idx}. مدل: {selected_model}, منبع: {model_url}, متن: {text}") |
|
|
except Exception as e: |
|
|
audio_files.append(None) |
|
|
status_messages.append(f"{idx}. خطا: {str(e)}") |
|
|
|
|
|
return audio_files, "\n".join(status_messages) |
|
|
|
|
|
def create_gradio_interface(): |
|
|
voices = [m[2] for m in MODELS] |
|
|
with gr.Blocks(title="تبدیل متن به گفتار فارسی", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("## تبدیل متن به گفتار فارسی\nهر خط یک متن جداگانه محسوب میشود.") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
text_input = gr.TextArea(label="متن فارسی", placeholder="هر خط یک متن...", lines=5) |
|
|
voice_dropdown = gr.Dropdown(label="صدا", choices=voices, value=voices[0]) |
|
|
generate_button = gr.Button("تبدیل به گفتار") |
|
|
with gr.Column(): |
|
|
audio_output = gr.Audio(label="خروجی صوتی", interactive=False, type="filepath") |
|
|
status_output = gr.Textbox(label="وضعیت", interactive=False) |
|
|
|
|
|
generate_button.click( |
|
|
fn=tts_interface_multiple, |
|
|
inputs=[voice_dropdown, text_input], |
|
|
outputs=[audio_output, status_output] |
|
|
) |
|
|
return demo |
|
|
|
|
|
if __name__ == "__main__": |
|
|
os.makedirs("models", exist_ok=True) |
|
|
demo = create_gradio_interface() |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |