ms180 commited on
Commit
87a2973
·
2 Parent(s): 0ad68fa 87f144d

Merge branch 'refactor' of github.com:Masao-Someki/SingingSDS-dev into feature/add_per

Browse files
Files changed (2) hide show
  1. evaluation/svs_eval.py +3 -3
  2. interface.py +7 -5
evaluation/svs_eval.py CHANGED
@@ -102,7 +102,7 @@ def pypinyin_g2p_phone_without_prosody(text):
102
  return phones
103
 
104
 
105
- def eval_per(audio_path, model=None):
106
  audio_array, sr = librosa.load(audio_path, sr=16000)
107
  asr_result = asr_pipeline(
108
  audio_array,
@@ -133,12 +133,12 @@ def load_evaluators(config):
133
  return loaded
134
 
135
 
136
- def run_evaluation(audio_path, evaluators):
137
  results = {}
138
  if "singmos" in evaluators:
139
  results.update(eval_singmos(audio_path, evaluators["singmos"]))
140
  if "per" in evaluators:
141
- results.update(eval_per(audio_path, evaluators["per"]))
142
  if "melody" in evaluators:
143
  results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
144
  if "aesthetic" in evaluators:
 
102
  return phones
103
 
104
 
105
+ def eval_per(audio_path, reference_text, model=None):
106
  audio_array, sr = librosa.load(audio_path, sr=16000)
107
  asr_result = asr_pipeline(
108
  audio_array,
 
133
  return loaded
134
 
135
 
136
+ def run_evaluation(audio_path, evaluators, **kwargs):
137
  results = {}
138
  if "singmos" in evaluators:
139
  results.update(eval_singmos(audio_path, evaluators["singmos"]))
140
  if "per" in evaluators:
141
+ results.update(eval_per(audio_path, kwargs["llm_text"], evaluators["per"]))
142
  if "melody" in evaluators:
143
  results.update(eval_melody_metrics(audio_path, evaluators["melody"]))
144
  if "aesthetic" in evaluators:
interface.py CHANGED
@@ -24,6 +24,7 @@ class GradioInterface:
24
  self.character_info[self.current_character].default_voice
25
  ]
26
  self.pipeline = SingingDialoguePipeline(self.default_config)
 
27
 
28
  def load_config(self, path: str):
29
  with open(path, "r") as f:
@@ -211,21 +212,22 @@ class GradioInterface:
211
  if not audio_path:
212
  return gr.update(value=""), gr.update(value="")
213
  tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
214
- results = self.pipeline.run(
215
  audio_path,
216
  self.svs_model_map[self.current_svs_model]["lang"],
217
  self.character_info[self.current_character].prompt,
218
  self.current_voice,
219
  output_audio_path=tmp_file,
220
  )
221
- formatted_logs = f"ASR: {results['asr_text']}\nLLM: {results['llm_text']}"
222
  return gr.update(value=formatted_logs), gr.update(
223
- value=results["output_audio_path"]
224
  )
225
 
226
  def update_metrics(self, audio_path):
227
- if not audio_path:
228
  return gr.update(value="")
229
- results = self.pipeline.evaluate(audio_path)
 
230
  formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
231
  return gr.update(value=formatted_metrics)
 
24
  self.character_info[self.current_character].default_voice
25
  ]
26
  self.pipeline = SingingDialoguePipeline(self.default_config)
27
+ self.results = None
28
 
29
  def load_config(self, path: str):
30
  with open(path, "r") as f:
 
212
  if not audio_path:
213
  return gr.update(value=""), gr.update(value="")
214
  tmp_file = f"audio_{int(time.time())}_{uuid.uuid4().hex[:8]}.wav"
215
+ self.results = self.pipeline.run(
216
  audio_path,
217
  self.svs_model_map[self.current_svs_model]["lang"],
218
  self.character_info[self.current_character].prompt,
219
  self.current_voice,
220
  output_audio_path=tmp_file,
221
  )
222
+ formatted_logs = f"ASR: {self.results['asr_text']}\nLLM: {self.results['llm_text']}"
223
  return gr.update(value=formatted_logs), gr.update(
224
+ value=self.results["output_audio_path"]
225
  )
226
 
227
  def update_metrics(self, audio_path):
228
+ if not audio_path or not self.results:
229
  return gr.update(value="")
230
+ results = self.pipeline.evaluate(audio_path, **self.results)
231
+ results.update(self.results.get("metrics", {}))
232
  formatted_metrics = "\n".join([f"{k}: {v}" for k, v in results.items()])
233
  return gr.update(value=formatted_metrics)