Spaces:
Build error
Build error
积极的屁孩
commited on
Commit
·
ced52e3
1
Parent(s):
b3c35e4
debug
Browse files
README.md
CHANGED
|
@@ -11,3 +11,36 @@ license: apache-2.0
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
+
|
| 15 |
+
# Vevo语音转换模型演示
|
| 16 |
+
|
| 17 |
+
这是基于[amphion/Vevo](https://huggingface.co/amphion/Vevo)模型的Gradio演示应用。Vevo是一个强大的语音转换模型,支持多种语音处理功能。
|
| 18 |
+
|
| 19 |
+
## 功能
|
| 20 |
+
|
| 21 |
+
- **语音转换 (VevoVoice)**: 将内容音频的内容转换为参考音频的风格和音色
|
| 22 |
+
- **风格转换 (VevoStyle)**: 将内容音频的风格转换为参考音频的风格,保留原始音色
|
| 23 |
+
- **音色转换 (VevoTimbre)**: 将内容音频的音色转换为参考音频的音色,保留内容和风格
|
| 24 |
+
- **文本转语音 (VevoTTS)**: 将输入文本转换为语音,使用参考音频的风格和音色
|
| 25 |
+
|
| 26 |
+
## 安装
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
pip install -r requirements.txt
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## 运行
|
| 33 |
+
|
| 34 |
+
```bash
|
| 35 |
+
python app.py
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## 模型来源
|
| 39 |
+
|
| 40 |
+
- 模型:[amphion/Vevo](https://huggingface.co/amphion/Vevo)
|
| 41 |
+
- 原始仓库:[open-mmlab/Amphion](https://github.com/open-mmlab/Amphion)
|
| 42 |
+
|
| 43 |
+
## 注意事项
|
| 44 |
+
|
| 45 |
+
- 首次运行时,会自动下载模型文件,可能需要一些时间
|
| 46 |
+
- 音频输入应为WAV格式,采样率为24kHz
|
app.py
CHANGED
|
@@ -334,24 +334,15 @@ class VevoGradioApp:
|
|
| 334 |
"""语音转换功能"""
|
| 335 |
pipeline = self.init_voice_conversion_pipeline()
|
| 336 |
|
| 337 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
| 338 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as reference_file, \
|
| 339 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
| 340 |
-
|
| 341 |
-
content_path = content_file.name
|
| 342 |
-
reference_path = reference_file.name
|
| 343 |
output_path = output_file.name
|
| 344 |
|
| 345 |
-
# 保存上传的音频文件
|
| 346 |
-
content_audio.save(content_path)
|
| 347 |
-
reference_audio.save(reference_path)
|
| 348 |
-
|
| 349 |
# 执行语音转换
|
| 350 |
gen_audio = pipeline.inference_ar_and_fm(
|
| 351 |
-
src_wav_path=
|
| 352 |
src_text=None,
|
| 353 |
-
style_ref_wav_path=
|
| 354 |
-
timbre_ref_wav_path=
|
| 355 |
)
|
| 356 |
save_audio(gen_audio, output_path=output_path)
|
| 357 |
|
|
@@ -361,24 +352,15 @@ class VevoGradioApp:
|
|
| 361 |
"""风格转换功能"""
|
| 362 |
pipeline = self.init_voice_conversion_pipeline()
|
| 363 |
|
| 364 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
| 365 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as style_file, \
|
| 366 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
| 367 |
-
|
| 368 |
-
content_path = content_file.name
|
| 369 |
-
style_path = style_file.name
|
| 370 |
output_path = output_file.name
|
| 371 |
|
| 372 |
-
# 保存上传的音频文件
|
| 373 |
-
content_audio.save(content_path)
|
| 374 |
-
style_audio.save(style_path)
|
| 375 |
-
|
| 376 |
# 执行风格转换
|
| 377 |
gen_audio = pipeline.inference_ar_and_fm(
|
| 378 |
-
src_wav_path=
|
| 379 |
src_text=None,
|
| 380 |
-
style_ref_wav_path=
|
| 381 |
-
timbre_ref_wav_path=
|
| 382 |
)
|
| 383 |
save_audio(gen_audio, output_path=output_path)
|
| 384 |
|
|
@@ -388,22 +370,13 @@ class VevoGradioApp:
|
|
| 388 |
"""音色转换功能"""
|
| 389 |
pipeline = self.init_timbre_pipeline()
|
| 390 |
|
| 391 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
| 392 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as reference_file, \
|
| 393 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
| 394 |
-
|
| 395 |
-
content_path = content_file.name
|
| 396 |
-
reference_path = reference_file.name
|
| 397 |
output_path = output_file.name
|
| 398 |
|
| 399 |
-
# 保存上传的音频文件
|
| 400 |
-
content_audio.save(content_path)
|
| 401 |
-
reference_audio.save(reference_path)
|
| 402 |
-
|
| 403 |
# 执行音色转换
|
| 404 |
gen_audio = pipeline.inference_fm(
|
| 405 |
-
src_wav_path=
|
| 406 |
-
timbre_ref_wav_path=
|
| 407 |
flow_matching_steps=32,
|
| 408 |
)
|
| 409 |
save_audio(gen_audio, output_path=output_path)
|
|
@@ -414,21 +387,15 @@ class VevoGradioApp:
|
|
| 414 |
"""文本转语音功能"""
|
| 415 |
pipeline = self.init_tts_pipeline()
|
| 416 |
|
| 417 |
-
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as
|
| 418 |
-
tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
| 419 |
-
|
| 420 |
-
ref_path = ref_file.name
|
| 421 |
output_path = output_file.name
|
| 422 |
|
| 423 |
-
# 保存上传的音频文件
|
| 424 |
-
ref_audio.save(ref_path)
|
| 425 |
-
|
| 426 |
# 执行文本转语音
|
| 427 |
gen_audio = pipeline.inference_ar_and_fm(
|
| 428 |
src_wav_path=None,
|
| 429 |
src_text=text,
|
| 430 |
-
style_ref_wav_path=
|
| 431 |
-
timbre_ref_wav_path=
|
| 432 |
style_ref_wav_text=ref_text if ref_text else None,
|
| 433 |
src_text_language=src_language,
|
| 434 |
style_ref_wav_text_language=ref_language,
|
|
|
|
| 334 |
"""语音转换功能"""
|
| 335 |
pipeline = self.init_voice_conversion_pipeline()
|
| 336 |
|
| 337 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 338 |
output_path = output_file.name
|
| 339 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 340 |
# 执行语音转换
|
| 341 |
gen_audio = pipeline.inference_ar_and_fm(
|
| 342 |
+
src_wav_path=content_audio, # 直接使用路径
|
| 343 |
src_text=None,
|
| 344 |
+
style_ref_wav_path=reference_audio, # 直接使用路径
|
| 345 |
+
timbre_ref_wav_path=reference_audio,
|
| 346 |
)
|
| 347 |
save_audio(gen_audio, output_path=output_path)
|
| 348 |
|
|
|
|
| 352 |
"""风格转换功能"""
|
| 353 |
pipeline = self.init_voice_conversion_pipeline()
|
| 354 |
|
| 355 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 356 |
output_path = output_file.name
|
| 357 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 358 |
# 执行风格转换
|
| 359 |
gen_audio = pipeline.inference_ar_and_fm(
|
| 360 |
+
src_wav_path=content_audio, # 直接使用路径
|
| 361 |
src_text=None,
|
| 362 |
+
style_ref_wav_path=style_audio, # 直接使用路径
|
| 363 |
+
timbre_ref_wav_path=content_audio,
|
| 364 |
)
|
| 365 |
save_audio(gen_audio, output_path=output_path)
|
| 366 |
|
|
|
|
| 370 |
"""音色转换功能"""
|
| 371 |
pipeline = self.init_timbre_pipeline()
|
| 372 |
|
| 373 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
output_path = output_file.name
|
| 375 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
# 执行音色转换
|
| 377 |
gen_audio = pipeline.inference_fm(
|
| 378 |
+
src_wav_path=content_audio, # 直接使用路径
|
| 379 |
+
timbre_ref_wav_path=reference_audio, # 直接使用路径
|
| 380 |
flow_matching_steps=32,
|
| 381 |
)
|
| 382 |
save_audio(gen_audio, output_path=output_path)
|
|
|
|
| 387 |
"""文本转语音功能"""
|
| 388 |
pipeline = self.init_tts_pipeline()
|
| 389 |
|
| 390 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
|
|
|
|
|
|
|
|
|
| 391 |
output_path = output_file.name
|
| 392 |
|
|
|
|
|
|
|
|
|
|
| 393 |
# 执行文本转语音
|
| 394 |
gen_audio = pipeline.inference_ar_and_fm(
|
| 395 |
src_wav_path=None,
|
| 396 |
src_text=text,
|
| 397 |
+
style_ref_wav_path=ref_audio, # 直接使用路径
|
| 398 |
+
timbre_ref_wav_path=ref_audio,
|
| 399 |
style_ref_wav_text=ref_text if ref_text else None,
|
| 400 |
src_text_language=src_language,
|
| 401 |
style_ref_wav_text_language=ref_language,
|