Spaces:
Sleeping
Sleeping
Rasmus Lellep
commited on
Commit
·
b137cc2
1
Parent(s):
1e4fe3c
working new gradio version, added more example clips
Browse files- README.md +1 -1
- app.py +25 -29
- examples/female.wav +3 -0
- examples/male.wav +3 -0
- requirements.txt +3 -3
README.md
CHANGED
|
@@ -4,7 +4,7 @@ emoji: 🦀
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
python_version: 3.11
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
|
|
|
| 4 |
colorFrom: blue
|
| 5 |
colorTo: purple
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: 5.41.0
|
| 8 |
python_version: 3.11
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
app.py
CHANGED
|
@@ -416,9 +416,6 @@ def predict(
|
|
| 416 |
None,
|
| 417 |
)
|
| 418 |
return (
|
| 419 |
-
gr.make_waveform(
|
| 420 |
-
audio="output.wav",
|
| 421 |
-
),
|
| 422 |
"output.wav",
|
| 423 |
metrics_text,
|
| 424 |
speaker_wav,
|
|
@@ -471,7 +468,7 @@ examples = [
|
|
| 471 |
[
|
| 472 |
"Once when I was six years old I saw a magnificent picture",
|
| 473 |
"en",
|
| 474 |
-
"examples/
|
| 475 |
None,
|
| 476 |
False,
|
| 477 |
False,
|
|
@@ -481,7 +478,7 @@ examples = [
|
|
| 481 |
[
|
| 482 |
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
| 483 |
"fr",
|
| 484 |
-
"examples/
|
| 485 |
None,
|
| 486 |
False,
|
| 487 |
False,
|
|
@@ -491,7 +488,7 @@ examples = [
|
|
| 491 |
[
|
| 492 |
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
| 493 |
"de",
|
| 494 |
-
"examples/
|
| 495 |
None,
|
| 496 |
False,
|
| 497 |
False,
|
|
@@ -501,7 +498,7 @@ examples = [
|
|
| 501 |
[
|
| 502 |
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
| 503 |
"es",
|
| 504 |
-
"examples/
|
| 505 |
None,
|
| 506 |
False,
|
| 507 |
False,
|
|
@@ -511,7 +508,7 @@ examples = [
|
|
| 511 |
[
|
| 512 |
"Kunagi, kui olin kuueaastane, nägin ma ühte imelist pilti",
|
| 513 |
"et",
|
| 514 |
-
"examples/
|
| 515 |
None,
|
| 516 |
False,
|
| 517 |
False,
|
|
@@ -521,7 +518,7 @@ examples = [
|
|
| 521 |
[
|
| 522 |
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
| 523 |
"pt",
|
| 524 |
-
"examples/
|
| 525 |
None,
|
| 526 |
False,
|
| 527 |
False,
|
|
@@ -531,7 +528,7 @@ examples = [
|
|
| 531 |
[
|
| 532 |
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
| 533 |
"pl",
|
| 534 |
-
"examples/
|
| 535 |
None,
|
| 536 |
False,
|
| 537 |
False,
|
|
@@ -541,7 +538,7 @@ examples = [
|
|
| 541 |
[
|
| 542 |
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
| 543 |
"it",
|
| 544 |
-
"examples/
|
| 545 |
None,
|
| 546 |
False,
|
| 547 |
False,
|
|
@@ -551,7 +548,7 @@ examples = [
|
|
| 551 |
[
|
| 552 |
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
| 553 |
"tr",
|
| 554 |
-
"examples/
|
| 555 |
None,
|
| 556 |
False,
|
| 557 |
False,
|
|
@@ -561,7 +558,7 @@ examples = [
|
|
| 561 |
[
|
| 562 |
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
| 563 |
"ru",
|
| 564 |
-
"examples/
|
| 565 |
None,
|
| 566 |
False,
|
| 567 |
False,
|
|
@@ -571,7 +568,7 @@ examples = [
|
|
| 571 |
[
|
| 572 |
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
| 573 |
"nl",
|
| 574 |
-
"examples/
|
| 575 |
None,
|
| 576 |
False,
|
| 577 |
False,
|
|
@@ -581,7 +578,7 @@ examples = [
|
|
| 581 |
[
|
| 582 |
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
| 583 |
"cs",
|
| 584 |
-
"examples/
|
| 585 |
None,
|
| 586 |
False,
|
| 587 |
False,
|
|
@@ -591,7 +588,7 @@ examples = [
|
|
| 591 |
[
|
| 592 |
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
| 593 |
"zh-cn",
|
| 594 |
-
"examples/
|
| 595 |
None,
|
| 596 |
False,
|
| 597 |
False,
|
|
@@ -601,7 +598,7 @@ examples = [
|
|
| 601 |
[
|
| 602 |
"かつて 六歳のとき、素晴らしい絵を見ました",
|
| 603 |
"ja",
|
| 604 |
-
"examples/
|
| 605 |
None,
|
| 606 |
False,
|
| 607 |
True,
|
|
@@ -611,17 +608,17 @@ examples = [
|
|
| 611 |
[
|
| 612 |
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
| 613 |
"ko",
|
| 614 |
-
"examples/
|
| 615 |
None,
|
| 616 |
False,
|
| 617 |
True,
|
| 618 |
False,
|
| 619 |
True,
|
| 620 |
],
|
| 621 |
-
|
| 622 |
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
| 623 |
"hu",
|
| 624 |
-
"examples/
|
| 625 |
None,
|
| 626 |
False,
|
| 627 |
True,
|
|
@@ -655,7 +652,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 655 |
input_text_gr = gr.Textbox(
|
| 656 |
label="Text Prompt",
|
| 657 |
info="One or two sentences at a time is better. Up to 200 text characters.",
|
| 658 |
-
value="Tere, olen sinu
|
| 659 |
)
|
| 660 |
language_gr = gr.Dropdown(
|
| 661 |
label="Language",
|
|
@@ -680,18 +677,18 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 680 |
"hu",
|
| 681 |
"hi"
|
| 682 |
],
|
| 683 |
-
|
| 684 |
value="et",
|
| 685 |
)
|
| 686 |
ref_gr = gr.Audio(
|
| 687 |
label="Reference Audio",
|
| 688 |
-
info="Click on the ✎ button to upload your own target speaker audio",
|
| 689 |
type="filepath",
|
| 690 |
-
value="examples/
|
| 691 |
)
|
| 692 |
mic_gr = gr.Audio(
|
| 693 |
-
|
| 694 |
-
info="Use your microphone to record audio",
|
| 695 |
type="filepath",
|
| 696 |
label="Use Microphone for Reference",
|
| 697 |
)
|
|
@@ -720,7 +717,6 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 720 |
|
| 721 |
|
| 722 |
with gr.Column():
|
| 723 |
-
video_gr = gr.Video(label="Waveform Visual")
|
| 724 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
| 725 |
out_text_gr = gr.Text(label="Metrics")
|
| 726 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
|
@@ -729,11 +725,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 729 |
gr.Examples(examples,
|
| 730 |
label="Examples",
|
| 731 |
inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
|
| 732 |
-
outputs=[
|
| 733 |
fn=predict,
|
| 734 |
cache_examples=False,)
|
| 735 |
|
| 736 |
-
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[
|
| 737 |
|
| 738 |
if __name__ == "__main__":
|
| 739 |
demo.queue()
|
|
|
|
| 416 |
None,
|
| 417 |
)
|
| 418 |
return (
|
|
|
|
|
|
|
|
|
|
| 419 |
"output.wav",
|
| 420 |
metrics_text,
|
| 421 |
speaker_wav,
|
|
|
|
| 468 |
[
|
| 469 |
"Once when I was six years old I saw a magnificent picture",
|
| 470 |
"en",
|
| 471 |
+
"examples/female.wav",
|
| 472 |
None,
|
| 473 |
False,
|
| 474 |
False,
|
|
|
|
| 478 |
[
|
| 479 |
"Lorsque j'avais six ans j'ai vu, une fois, une magnifique image",
|
| 480 |
"fr",
|
| 481 |
+
"examples/female.wav",
|
| 482 |
None,
|
| 483 |
False,
|
| 484 |
False,
|
|
|
|
| 488 |
[
|
| 489 |
"Als ich sechs war, sah ich einmal ein wunderbares Bild",
|
| 490 |
"de",
|
| 491 |
+
"examples/female.wav",
|
| 492 |
None,
|
| 493 |
False,
|
| 494 |
False,
|
|
|
|
| 498 |
[
|
| 499 |
"Cuando tenía seis años, vi una vez una imagen magnífica",
|
| 500 |
"es",
|
| 501 |
+
"examples/female.wav",
|
| 502 |
None,
|
| 503 |
False,
|
| 504 |
False,
|
|
|
|
| 508 |
[
|
| 509 |
"Kunagi, kui olin kuueaastane, nägin ma ühte imelist pilti",
|
| 510 |
"et",
|
| 511 |
+
"examples/female.wav",
|
| 512 |
None,
|
| 513 |
False,
|
| 514 |
False,
|
|
|
|
| 518 |
[
|
| 519 |
"Quando eu tinha seis anos eu vi, uma vez, uma imagem magnífica",
|
| 520 |
"pt",
|
| 521 |
+
"examples/female.wav",
|
| 522 |
None,
|
| 523 |
False,
|
| 524 |
False,
|
|
|
|
| 528 |
[
|
| 529 |
"Kiedy miałem sześć lat, zobaczyłem pewnego razu wspaniały obrazek",
|
| 530 |
"pl",
|
| 531 |
+
"examples/female.wav",
|
| 532 |
None,
|
| 533 |
False,
|
| 534 |
False,
|
|
|
|
| 538 |
[
|
| 539 |
"Un tempo lontano, quando avevo sei anni, vidi un magnifico disegno",
|
| 540 |
"it",
|
| 541 |
+
"examples/female.wav",
|
| 542 |
None,
|
| 543 |
False,
|
| 544 |
False,
|
|
|
|
| 548 |
[
|
| 549 |
"Bir zamanlar, altı yaşındayken, muhteşem bir resim gördüm",
|
| 550 |
"tr",
|
| 551 |
+
"examples/male.wav",
|
| 552 |
None,
|
| 553 |
False,
|
| 554 |
False,
|
|
|
|
| 558 |
[
|
| 559 |
"Когда мне было шесть лет, я увидел однажды удивительную картинку",
|
| 560 |
"ru",
|
| 561 |
+
"examples/female.wav",
|
| 562 |
None,
|
| 563 |
False,
|
| 564 |
False,
|
|
|
|
| 568 |
[
|
| 569 |
"Toen ik een jaar of zes was, zag ik op een keer een prachtige plaat",
|
| 570 |
"nl",
|
| 571 |
+
"examples/male.wav",
|
| 572 |
None,
|
| 573 |
False,
|
| 574 |
False,
|
|
|
|
| 578 |
[
|
| 579 |
"Když mi bylo šest let, viděl jsem jednou nádherný obrázek",
|
| 580 |
"cs",
|
| 581 |
+
"examples/female.wav",
|
| 582 |
None,
|
| 583 |
False,
|
| 584 |
False,
|
|
|
|
| 588 |
[
|
| 589 |
"当我还只有六岁的时候, 看到了一副精彩的插画",
|
| 590 |
"zh-cn",
|
| 591 |
+
"examples/male.wav",
|
| 592 |
None,
|
| 593 |
False,
|
| 594 |
False,
|
|
|
|
| 598 |
[
|
| 599 |
"かつて 六歳のとき、素晴らしい絵を見ました",
|
| 600 |
"ja",
|
| 601 |
+
"examples/female.wav",
|
| 602 |
None,
|
| 603 |
False,
|
| 604 |
True,
|
|
|
|
| 608 |
[
|
| 609 |
"한번은 내가 여섯 살이었을 때 멋진 그림을 보았습니다.",
|
| 610 |
"ko",
|
| 611 |
+
"examples/male.wav",
|
| 612 |
None,
|
| 613 |
False,
|
| 614 |
True,
|
| 615 |
False,
|
| 616 |
True,
|
| 617 |
],
|
| 618 |
+
[
|
| 619 |
"Egyszer hat éves koromban láttam egy csodálatos képet",
|
| 620 |
"hu",
|
| 621 |
+
"examples/male.wav",
|
| 622 |
None,
|
| 623 |
False,
|
| 624 |
True,
|
|
|
|
| 652 |
input_text_gr = gr.Textbox(
|
| 653 |
label="Text Prompt",
|
| 654 |
info="One or two sentences at a time is better. Up to 200 text characters.",
|
| 655 |
+
value="Tere, olen sinu hääle kloon. Ürita mulle lindistada võimalikult hea kvaliteediga klipp, et oskaksin su kõnet paremini jäljendada.",
|
| 656 |
)
|
| 657 |
language_gr = gr.Dropdown(
|
| 658 |
label="Language",
|
|
|
|
| 677 |
"hu",
|
| 678 |
"hi"
|
| 679 |
],
|
| 680 |
+
multiselect=False,
|
| 681 |
value="et",
|
| 682 |
)
|
| 683 |
ref_gr = gr.Audio(
|
| 684 |
label="Reference Audio",
|
| 685 |
+
#info="Click on the ✎ button to upload your own target speaker audio",
|
| 686 |
type="filepath",
|
| 687 |
+
value="examples/female.wav",
|
| 688 |
)
|
| 689 |
mic_gr = gr.Audio(
|
| 690 |
+
sources="microphone",
|
| 691 |
+
#info="Use your microphone to record audio",
|
| 692 |
type="filepath",
|
| 693 |
label="Use Microphone for Reference",
|
| 694 |
)
|
|
|
|
| 717 |
|
| 718 |
|
| 719 |
with gr.Column():
|
|
|
|
| 720 |
audio_gr = gr.Audio(label="Synthesised Audio", autoplay=True)
|
| 721 |
out_text_gr = gr.Text(label="Metrics")
|
| 722 |
ref_audio_gr = gr.Audio(label="Reference Audio Used")
|
|
|
|
| 725 |
gr.Examples(examples,
|
| 726 |
label="Examples",
|
| 727 |
inputs=[input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr],
|
| 728 |
+
outputs=[audio_gr, out_text_gr, ref_audio_gr],
|
| 729 |
fn=predict,
|
| 730 |
cache_examples=False,)
|
| 731 |
|
| 732 |
+
tts_button.click(predict, [input_text_gr, language_gr, ref_gr, mic_gr, use_mic_gr, clean_ref_gr, auto_det_lang_gr, tos_gr], outputs=[audio_gr, out_text_gr, ref_audio_gr])
|
| 733 |
|
| 734 |
if __name__ == "__main__":
|
| 735 |
demo.queue()
|
examples/female.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89a4fa9a16b6463f852cf9424f72c3d3c87aa83010e89db534c53fcd1ae12c02
|
| 3 |
+
size 1002030
|
examples/male.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:937c74afad004937e00d1687c68e02210e0c5d93ac072a7c8aeb9ab573517bb1
|
| 3 |
+
size 762126
|
requirements.txt
CHANGED
|
@@ -65,9 +65,9 @@ spacy[ja]>=3,<3.8
|
|
| 65 |
tokenizers==0.20.1
|
| 66 |
#deps for gradio
|
| 67 |
huggingface_hub
|
| 68 |
-
gradio==
|
| 69 |
-
pydantic==
|
| 70 |
-
python-multipart==0.0.
|
| 71 |
typing-extensions>=4.8.0
|
| 72 |
langid
|
| 73 |
deepspeed==0.14.5
|
|
|
|
| 65 |
tokenizers==0.20.1
|
| 66 |
#deps for gradio
|
| 67 |
huggingface_hub
|
| 68 |
+
gradio==5.41.0
|
| 69 |
+
pydantic==2.11.7
|
| 70 |
+
python-multipart==0.0.20
|
| 71 |
typing-extensions>=4.8.0
|
| 72 |
langid
|
| 73 |
deepspeed==0.14.5
|