Spaces:
Sleeping
Sleeping
Merge branch 'refactor' of github.com:Masao-Someki/SingingSDS-dev into feature/add_per
Browse files- evaluation/svs_eval.py +3 -3
- interface.py +7 -5
evaluation/svs_eval.py
CHANGED
|
@@ -102,7 +102,7 @@ def pypinyin_g2p_phone_without_prosody(text):
|
|
| 102 |
return phones
|
| 103 |
|
| 104 |
|
| 105 |
-
def eval_per(audio_path, model=None):
|
| 106 |
audio_array, sr = librosa.load(audio_path, sr=16000)
|
| 107 |
asr_result = asr_pipeline(
|
| 108 |
audio_array,
|
|
@@ -133,12 +133,12 @@ def load_evaluators(config):
|
|
| 133 |
return loaded
|
| 134 |
|
| 135 |
|
| 136 |
-
def run_evaluation(audio_path, evaluators):
|
| 137 |
results = {}
|
| 138 |
if "singmos" in evaluators:
|
| 139 |
results.update(eval_singmos(audio_path, evaluators["singmos"]))
|
| 140 |
if "per" in evaluators:
|
| 141 |
-
results.update(eval_per(audio_path, evaluators["per"]))
|
| 142 |
if "melody" in evaluators:
|
| 143 |
results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
|
| 144 |
if "aesthetic" in evaluators:
|
|
|
|
| 102 |
return phones
|
| 103 |
|
| 104 |
|
| 105 |
+
def eval_per(audio_path, reference_text, model=None):
|
| 106 |
audio_array, sr = librosa.load(audio_path, sr=16000)
|
| 107 |
asr_result = asr_pipeline(
|
| 108 |
audio_array,
|
|
|
|
| 133 |
return loaded
|
| 134 |
|
| 135 |
|
| 136 |
+
def run_evaluation(audio_path, evaluators, **kwargs):
|
| 137 |
results = {}
|
| 138 |
if "singmos" in evaluators:
|
| 139 |
results.update(eval_singmos(audio_path, evaluators["singmos"]))
|
| 140 |
if "per" in evaluators:
|
| 141 |
+
results.update(eval_per(audio_path, kwargs["llm_text"], evaluators["per"]))
|
| 142 |
if "melody" in evaluators:
|
| 143 |
results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
|
| 144 |
if "aesthetic" in evaluators:
|
interface.py
CHANGED
|
@@ -24,6 +24,7 @@ class GradioInterface:
|
|
| 24 |
self.character_info[self.current_character].default_voice
|
| 25 |
]
|
| 26 |
self.pipeline = SingingDialoguePipeline(self.default_config)
|
|
|
|
| 27 |
|
| 28 |
def load_config(self, path: str):
|
| 29 |
with open(path, "r") as f:
|
|
@@ -211,21 +212,22 @@ class GradioInterface:
|
|
| 211 |
if not audio_path:
|
| 212 |
return gr.update(value=""), gr.update(value="")
|
| 213 |
tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
|
| 214 |
-
results = self.pipeline.run(
|
| 215 |
audio_path,
|
| 216 |
self.svs_model_map[self.current_svs_model]["lang"],
|
| 217 |
self.character_info[self.current_character].prompt,
|
| 218 |
self.current_voice,
|
| 219 |
output_audio_path=tmp_file,
|
| 220 |
)
|
| 221 |
-
formatted_logs = f"ASR: {results['asr_text']}\nLLM: {results['llm_text']}"
|
| 222 |
return gr.update(value=formatted_logs), gr.update(
|
| 223 |
-
value=results["output_audio_path"]
|
| 224 |
)
|
| 225 |
|
| 226 |
def update_metrics(self, audio_path):
|
| 227 |
-
if not audio_path:
|
| 228 |
return gr.update(value="")
|
| 229 |
-
results = self.pipeline.evaluate(audio_path)
|
|
|
|
| 230 |
formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
|
| 231 |
return gr.update(value=formatted_metrics)
|
|
|
|
| 24 |
self.character_info[self.current_character].default_voice
|
| 25 |
]
|
| 26 |
self.pipeline = SingingDialoguePipeline(self.default_config)
|
| 27 |
+
self.results = None
|
| 28 |
|
| 29 |
def load_config(self, path: str):
|
| 30 |
with open(path, "r") as f:
|
|
|
|
| 212 |
if not audio_path:
|
| 213 |
return gr.update(value=""), gr.update(value="")
|
| 214 |
tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
|
| 215 |
+
self.results = self.pipeline.run(
|
| 216 |
audio_path,
|
| 217 |
self.svs_model_map[self.current_svs_model]["lang"],
|
| 218 |
self.character_info[self.current_character].prompt,
|
| 219 |
self.current_voice,
|
| 220 |
output_audio_path=tmp_file,
|
| 221 |
)
|
| 222 |
+
formatted_logs = f"ASR: {self.results['asr_text']}\nLLM: {self.results['llm_text']}"
|
| 223 |
return gr.update(value=formatted_logs), gr.update(
|
| 224 |
+
value=self.results["output_audio_path"]
|
| 225 |
)
|
| 226 |
|
| 227 |
def update_metrics(self, audio_path):
|
| 228 |
+
if not audio_path or not self.results:
|
| 229 |
return gr.update(value="")
|
| 230 |
+
results = self.pipeline.evaluate(audio_path, **self.results)
|
| 231 |
+
results.update(self.results.get("metrics", {}))
|
| 232 |
formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
|
| 233 |
return gr.update(value=formatted_metrics)
|