daihui.zhang commited on
Commit
d3badad
·
1 Parent(s): ca5d527

update text threhold

Browse files
Files changed (1) hide show
  1. transcribe/whisper_llm_serve.py +22 -8
transcribe/whisper_llm_serve.py CHANGED
@@ -55,6 +55,7 @@ class WhisperTranscriptionService:
55
  self.frames_np = None
56
  # 完整音频队列
57
  self.segments_queue = collections.deque()
 
58
 
59
  self._transcrible_analysis = None
60
  # 启动处理线程
@@ -136,14 +137,14 @@ class WhisperTranscriptionService:
136
  self.frames_np = frame_np.copy()
137
  else:
138
  self.frames_np = np.append(self.frames_np, frame_np)
139
- if speech_status == "END" and len(self.frames_np) > 0:
140
  self.segments_queue.appendleft(self.frames_np.copy())
141
  self.frames_np = np.array([], dtype=np.float32)
142
  except queue.Empty:
143
  pass
144
 
145
- def _process_transcription_results_2(self, segments: List[TranscriptToken],partial):
146
- seg_text = self.text_separator.join(seg.text for seg in segments)
147
  item = TransResult(
148
  seg_id=self.row_number,
149
  context=seg_text,
@@ -181,6 +182,7 @@ class WhisperTranscriptionService:
181
  silence_audio = np.zeros(self.sample_rate, dtype=np.float32)
182
  silence_audio[-len(audio_buffer):] = audio_buffer
183
  audio_buffer = silence_audio
 
184
 
185
  logger.debug(f"audio buffer size: {len(audio_buffer) / self.sample_rate:.2f}s")
186
  # try:
@@ -188,14 +190,26 @@ class WhisperTranscriptionService:
188
  segments = meta_item.segments
189
  logger.debug(f"Segments: {segments}")
190
  if len(segments):
191
- result = self._process_transcription_results_2(segments, partial)
 
 
 
 
 
 
 
 
 
 
 
 
192
  self._send_result_to_client(result)
193
  time.sleep(0.1)
194
 
195
- if partial == False:
196
- frame_epoch = 1
197
- else:
198
- frame_epoch += 1
199
  # 处理转录结果并发送到客户端
200
  # for result in self._process_transcription_results(segments, audio_buffer):
201
  # self._send_result_to_client(result)
 
55
  self.frames_np = None
56
  # 完整音频队列
57
  self.segments_queue = collections.deque()
58
+ self._temp_string = ""
59
 
60
  self._transcrible_analysis = None
61
  # 启动处理线程
 
137
  self.frames_np = frame_np.copy()
138
  else:
139
  self.frames_np = np.append(self.frames_np, frame_np)
140
+ if speech_status == "END" and len(frame_np) > 0:
141
  self.segments_queue.appendleft(self.frames_np.copy())
142
  self.frames_np = np.array([], dtype=np.float32)
143
  except queue.Empty:
144
  pass
145
 
146
+ def _process_transcription_results_2(self, seg_text:str,partial):
147
+
148
  item = TransResult(
149
  seg_id=self.row_number,
150
  context=seg_text,
 
182
  silence_audio = np.zeros(self.sample_rate, dtype=np.float32)
183
  silence_audio[-len(audio_buffer):] = audio_buffer
184
  audio_buffer = silence_audio
185
+
186
 
187
  logger.debug(f"audio buffer size: {len(audio_buffer) / self.sample_rate:.2f}s")
188
  # try:
 
190
  segments = meta_item.segments
191
  logger.debug(f"Segments: {segments}")
192
  if len(segments):
193
+ seg_text = self.text_separator.join(seg.text for seg in segments)
194
+ if self._temp_string:
195
+ seg_text = self._temp_string + seg_text
196
+
197
+ if partial == False:
198
+ if len(seg_text) < config.TEXT_THREHOLD:
199
+ partial = True
200
+ self._temp_string = seg_text
201
+ else:
202
+ self._temp_string = ""
203
+
204
+
205
+ result = self._process_transcription_results_2(seg_text, partial)
206
  self._send_result_to_client(result)
207
  time.sleep(0.1)
208
 
209
+ if partial == False:
210
+ frame_epoch = 1
211
+ else:
212
+ frame_epoch += 1
213
  # 处理转录结果并发送到客户端
214
  # for result in self._process_transcription_results(segments, audio_buffer):
215
  # self._send_result_to_client(result)