File size: 6,741 Bytes
369294c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0b87765
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369294c
 
 
 
 
 
 
 
 
 
 
 
 
 
0b87765
 
369294c
 
 
 
 
 
 
0b87765
 
 
 
 
369294c
0b87765
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369294c
 
 
 
 
 
 
 
 
 
 
 
0b87765
369294c
 
 
 
 
 
 
0b87765
369294c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import gradio as gr
import os
import requests
import tarfile
import uuid
import soundfile as sf
import numpy as np
import sherpa_onnx

# مدل‌های فارسی
MODELS = [
    ['mms fa', 'https://huggingface.co/willwade/mms-tts-multilingual-models-onnx/resolve/main/fas', "🌠 راد", 'https://huggingface.co/facebook/mms-tts-fas'],
    ['coqui-vits-female1-karim23657', 'https://huggingface.co/karim23657/persian-tts-vits/tree/main/persian-tts-female1-vits-coqui', "🌺 نگار", 'https://huggingface.co/Kamtera/persian-tts-female1-vits'],
    ['coqui-vits-male1-karim23657', 'https://huggingface.co/karim23657/persian-tts-vits/tree/main/persian-tts-male1-vits-coqui', "🌟 آرش", 'https://huggingface.co/Kamtera/persian-tts-male1-vits'],
]

def download_and_extract_model(url, destination):
    """دانلود و استخراج مدل"""
    os.makedirs(destination, exist_ok=True)
    try:
        if url.endswith(".tar.bz2"):
            tar_path = os.path.join(destination, "model.tar.bz2")
            print(f"Downloading {url} ...")
            r = requests.get(url, stream=True)
            with open(tar_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)
            print("Download complete, extracting...")
            with tarfile.open(tar_path, "r:bz2") as tar:
                tar.extractall(path=destination)
            os.remove(tar_path)
        elif url.endswith(".onnx") or "huggingface.co" in url:
            # مدل‌های MMS با onnx
            model_path = os.path.join(destination, "model.onnx")
            tokens_path = os.path.join(destination, "tokens.txt")
            model_url = f"{url}/model.onnx"
            tokens_url = f"{url}/tokens.txt"
            with open(model_path, "wb") as f:
                f.write(requests.get(model_url).content)
            with open(tokens_path, "wb") as f:
                f.write(requests.get(tokens_url).content)
    except Exception as e:
        raise RuntimeError(f"خطا در دانلود یا استخراج مدل: {str(e)}")

def find_model_files(model_dir):
    """جستجوی فایل‌های مدل"""
    model_files = {}
    for root, _, files in os.walk(model_dir):
        for file in files:
            if file.endswith('.onnx'):
                model_files['model'] = os.path.join(root, file)
            elif file == 'tokens.txt':
                model_files['tokens'] = os.path.join(root, file)
            elif file == 'lexicon.txt':
                model_files['lexicon'] = os.path.join(root, file)
    return model_files if 'model' in model_files else {}

def check_model_files(model_name):
    """بررسی وجود فایل‌های ضروری مدل"""
    model_dir = os.path.join("models", model_name)
    if not os.path.exists(model_dir):
        for m in MODELS:
            if m[2] == model_name:
                download_and_extract_model(m[1], model_dir)
                break
    files = find_model_files(model_dir)
    required = ['model', 'tokens']
    for r in required:
        if r not in files or not os.path.exists(files[r]):
            raise FileNotFoundError(f"فایل ضروری مدل '{r}' یافت نشد.")
    return files

def generate_audio_safe(text, model_name):
    """تولید صدا با چک خطا"""
    try:
        files = check_model_files(model_name)
        is_mms = 'mms' in model_name.lower()
        if is_mms:
            vits_config = sherpa_onnx.OfflineTtsVitsModelConfig(
                files['model'], '', files['tokens'], '', '', 0.667, 0.8, 1.0
            )
        else:
            lexicon = files.get('lexicon', '')
            vits_config = sherpa_onnx.OfflineTtsVitsModelConfig(
                files['model'], lexicon, files['tokens'], '', '', 0.667, 0.8, 1.0
            )
        model_config = sherpa_onnx.OfflineTtsModelConfig()
        model_config.vits = vits_config
        config = sherpa_onnx.OfflineTtsConfig(model=model_config, max_num_sentences=2)
        tts = sherpa_onnx.OfflineTts(config)
        audio_data = tts.generate(text)
        audio_array = np.array(audio_data.samples, dtype=np.float32)
        if np.any(audio_array):
            audio_array = audio_array / np.abs(audio_array).max()
        else:
            raise ValueError("صدای تولید شده خالی است")
        return audio_array, audio_data.sample_rate
    except Exception as e:
        raise RuntimeError(f"خطا در تولید صدا: {str(e)}")

def tts_interface_multiple(selected_model, texts):
    """چندین متن ورودی، چندین خروجی صوتی"""
    lines = [line.strip() for line in texts.split("\n") if line.strip()]
    if not lines:
        return [], "لطفا متنی وارد کنید"

    audio_files = []
    status_messages = []

    for idx, text in enumerate(lines, start=1):
        try:
            audio_data, sr = generate_audio_safe(text, selected_model)
            filename = f"tts_output_{idx}_{uuid.uuid4()}.wav"
            sf.write(filename, audio_data, sr, subtype="PCM_16")
            audio_files.append(filename)
            model_url = next((m[3] for m in MODELS if m[2]==selected_model), "")
            status_messages.append(f"{idx}. مدل: {selected_model}, منبع: {model_url}, متن: {text}")
        except Exception as e:
            audio_files.append(None)
            status_messages.append(f"{idx}. خطا: {str(e)}")

    return audio_files, "\n".join(status_messages)

def create_gradio_interface():
    voices = [m[2] for m in MODELS]
    with gr.Blocks(title="تبدیل متن به گفتار فارسی", theme=gr.themes.Soft()) as demo:
        gr.Markdown("## تبدیل متن به گفتار فارسی\nهر خط یک متن جداگانه محسوب می‌شود.")
        with gr.Row():
            with gr.Column():
                text_input = gr.TextArea(label="متن فارسی", placeholder="هر خط یک متن...", lines=5)
                voice_dropdown = gr.Dropdown(label="صدا", choices=voices, value=voices[0])
                generate_button = gr.Button("تبدیل به گفتار")
            with gr.Column():
                audio_output = gr.Audio(label="خروجی صوتی", interactive=False, type="filepath")
                status_output = gr.Textbox(label="وضعیت", interactive=False)

        generate_button.click(
            fn=tts_interface_multiple,
            inputs=[voice_dropdown, text_input],
            outputs=[audio_output, status_output]
        )
    return demo

if __name__ == "__main__":
    os.makedirs("models", exist_ok=True)
    demo = create_gradio_interface()
    demo.launch(server_name="0.0.0.0", server_port=7860)