daihui.zhang commited on
Commit
ebd6110
·
1 Parent(s): e046f39

fix utf-8 error

Browse files
transcribe/pipelines/base.py CHANGED
@@ -59,4 +59,5 @@ class BasePipe(Process):
59
  if item is None: # Check for termination signal
60
  break
61
  out_item = self.process(item)
62
- self.output_queue.put(out_item)
 
 
59
  if item is None: # Check for termination signal
60
  break
61
  out_item = self.process(item)
62
+ if out_item:
63
+ self.output_queue.put(out_item)
transcribe/pipelines/pipe_whisper.py CHANGED
@@ -22,7 +22,7 @@ class WhisperPipe(BasePipe):
22
  source_language = in_data.source_language
23
  segments = self.whisper.transcribe(audio_data, source_language)
24
  texts = "".join([s.text for s in segments])
25
- in_data.segments = [Segment(t0=s.t0, t1=s.t1, text=s.text) for s in segments]
26
  in_data.transcribe_content = texts
27
  in_data.audio = b""
28
  return in_data
 
22
  source_language = in_data.source_language
23
  segments = self.whisper.transcribe(audio_data, source_language)
24
  texts = "".join([s.text for s in segments])
25
+ in_data.segments = [Segment(t0=s.t0, t1=s.t1, text=s.text) for s in segments if s.text != "�"]
26
  in_data.transcribe_content = texts
27
  in_data.audio = b""
28
  return in_data
transcribe/transcription.py CHANGED
@@ -176,8 +176,8 @@ class TranscriptionServer:
176
  frame_data = websocket.recv()
177
  if frame_data == b"END_OF_AUDIO":
178
  return False
179
- # return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
180
- return np.frombuffer(frame_data, dtype=np.float32)
181
 
182
 
183
  def handle_new_connection(self, websocket):
 
176
  frame_data = websocket.recv()
177
  if frame_data == b"END_OF_AUDIO":
178
  return False
179
+ return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
180
+ # return np.frombuffer(frame_data, dtype=np.float32)
181
 
182
 
183
  def handle_new_connection(self, websocket):
transcribe/whisper.py CHANGED
@@ -2,6 +2,9 @@ from pywhispercpp.model import Model
2
  import soundfile
3
  import config
4
  import numpy as np
 
 
 
5
 
6
 
7
  class WhisperCPP:
@@ -14,6 +17,7 @@ class WhisperCPP:
14
  print_realtime=False,
15
  print_progress=False,
16
  print_timestamps=False,
 
17
  )
18
  if warmup:
19
  self.warmup()
@@ -35,12 +39,15 @@ class WhisperCPP:
35
  def transcribe(self, audio_buffer:bytes, language):
36
  max_len, prompt = self.config_language(language)
37
  audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
38
- output = self.model.transcribe(
39
- audio_buffer,
40
- initial_prompt=prompt,
41
- language=language,
42
- token_timestamps=True,
43
- max_len=max_len
44
- )
45
- return output
46
-
 
 
 
 
2
  import soundfile
3
  import config
4
  import numpy as np
5
+ from logging import getLogger
6
+
7
+ logger = getLogger(__name__)
8
 
9
 
10
  class WhisperCPP:
 
17
  print_realtime=False,
18
  print_progress=False,
19
  print_timestamps=False,
20
+ translate=False
21
  )
22
  if warmup:
23
  self.warmup()
 
39
  def transcribe(self, audio_buffer:bytes, language):
40
  max_len, prompt = self.config_language(language)
41
  audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
42
+ try:
43
+ output = self.model.transcribe(
44
+ audio_buffer,
45
+ initial_prompt=prompt,
46
+ language=language,
47
+ token_timestamps=True,
48
+ max_len=max_len
49
+ )
50
+ return output
51
+ except Exception as e:
52
+ logger.error(e)
53
+ return None