daihui.zhang commited on
Commit
bdb9da4
·
1 Parent(s): d3badad

fix bug of loss segemnts

Browse files
Files changed (2) hide show
  1. config.py +1 -1
  2. transcribe/pipelines/pipe_vad.py +3 -3
config.py CHANGED
@@ -21,7 +21,7 @@ console_handler.setFormatter(console_formatter)
21
  logging.getLogger().addHandler(console_handler)
22
 
23
  # 文字输出长度阈值
24
- TEXT_THREHOLD = 16
25
 
26
  BASE_DIR = pathlib.Path(__file__).parent
27
  MODEL_DIR = BASE_DIR / "moyoyo_asr_models"
 
21
  logging.getLogger().addHandler(console_handler)
22
 
23
  # 文字输出长度阈值
24
+ TEXT_THREHOLD = 6
25
 
26
  BASE_DIR = pathlib.Path(__file__).parent
27
  MODEL_DIR = BASE_DIR / "moyoyo_asr_models"
transcribe/pipelines/pipe_vad.py CHANGED
@@ -62,15 +62,15 @@ class VadPipe(BasePipe):
62
 
63
  if speech_data: # 表示有音频的变化点出现
64
  rel_start_frame, rel_end_frame = speech_data
65
- if rel_start_frame and not rel_end_frame:
66
  self._status = "START" # 语音开始
67
  target_audio = source_audio[rel_start_frame:]
68
  logging.debug("🫸 Speech start frame: {}".format(rel_start_frame))
69
- elif not rel_start_frame and rel_end_frame:
70
  self._status = "END" # 音频结束
71
  target_audio = source_audio[:rel_end_frame]
72
  logging.debug(" 🫷Speech ended, capturing audio up to frame: {}".format(rel_end_frame))
73
- elif rel_start_frame and rel_end_frame:
74
  self._status = 'END'
75
  target_audio = source_audio[rel_start_frame:rel_end_frame]
76
  logging.debug(" 🔄 Speech segment captured from frame {} to frame {}".format(rel_start_frame, rel_end_frame))
 
62
 
63
  if speech_data: # 表示有音频的变化点出现
64
  rel_start_frame, rel_end_frame = speech_data
65
+ if rel_start_frame is not None and rel_end_frame is None:
66
  self._status = "START" # 语音开始
67
  target_audio = source_audio[rel_start_frame:]
68
  logging.debug("🫸 Speech start frame: {}".format(rel_start_frame))
69
+ elif rel_start_frame is None and rel_end_frame is not None:
70
  self._status = "END" # 音频结束
71
  target_audio = source_audio[:rel_end_frame]
72
  logging.debug(" 🫷Speech ended, capturing audio up to frame: {}".format(rel_end_frame))
73
+ elif rel_start_frame is not None and rel_end_frame is not None:
74
  self._status = 'END'
75
  target_audio = source_audio[rel_start_frame:rel_end_frame]
76
  logging.debug(" 🔄 Speech segment captured from frame {} to frame {}".format(rel_start_frame, rel_end_frame))