daihui.zhang
commited on
Commit
·
72f7126
1
Parent(s):
19824ff
adapter to run client
Browse files- transcribe/client.py +3 -3
- transcribe/transcription.py +7 -4
- transcribe/whisper_llm_serve.py +5 -5
transcribe/client.py
CHANGED
|
@@ -133,9 +133,9 @@ class Client:
|
|
| 133 |
"""
|
| 134 |
message = json.loads(message)
|
| 135 |
|
| 136 |
-
if self.uid != message.get("uid"):
|
| 137 |
-
|
| 138 |
-
|
| 139 |
|
| 140 |
if "status" in message.keys():
|
| 141 |
self.handle_status_messages(message)
|
|
|
|
| 133 |
"""
|
| 134 |
message = json.loads(message)
|
| 135 |
|
| 136 |
+
# if self.uid != message.get("uid"):
|
| 137 |
+
# print("[ERROR]: invalid client uid")
|
| 138 |
+
# return
|
| 139 |
|
| 140 |
if "status" in message.keys():
|
| 141 |
self.handle_status_messages(message)
|
transcribe/transcription.py
CHANGED
|
@@ -174,7 +174,8 @@ class TranscriptionServer:
|
|
| 174 |
frame_data = websocket.recv()
|
| 175 |
if frame_data == b"END_OF_AUDIO":
|
| 176 |
return False
|
| 177 |
-
return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
|
|
|
|
| 178 |
|
| 179 |
|
| 180 |
def handle_new_connection(self, websocket):
|
|
@@ -184,9 +185,11 @@ class TranscriptionServer:
|
|
| 184 |
|
| 185 |
try:
|
| 186 |
logging.info("New client connected")
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
|
|
|
|
|
|
| 190 |
if self.client_manager is None:
|
| 191 |
max_clients = options.get('max_clients', 4)
|
| 192 |
max_connection_time = options.get('max_connection_time', 600)
|
|
|
|
| 174 |
frame_data = websocket.recv()
|
| 175 |
if frame_data == b"END_OF_AUDIO":
|
| 176 |
return False
|
| 177 |
+
# return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0
|
| 178 |
+
return np.frombuffer(frame_data, dtype=np.float32)
|
| 179 |
|
| 180 |
|
| 181 |
def handle_new_connection(self, websocket):
|
|
|
|
| 185 |
|
| 186 |
try:
|
| 187 |
logging.info("New client connected")
|
| 188 |
+
options = websocket.recv()
|
| 189 |
+
try:
|
| 190 |
+
options = json.loads(options)
|
| 191 |
+
except Exception as e:
|
| 192 |
+
options = {"language": from_lang, "uid": str(uuid1())}
|
| 193 |
if self.client_manager is None:
|
| 194 |
max_clients = options.get('max_clients', 4)
|
| 195 |
max_connection_time = options.get('max_connection_time', 600)
|
transcribe/whisper_llm_serve.py
CHANGED
|
@@ -117,7 +117,6 @@ class PywhisperInference:
|
|
| 117 |
llm_model = None
|
| 118 |
vad_model = None
|
| 119 |
|
| 120 |
-
|
| 121 |
@classmethod
|
| 122 |
def initializer(cls, event:mp.Event, warmup=True):
|
| 123 |
models_dir = config.MODEL_DIR.as_posix()
|
|
@@ -265,9 +264,10 @@ class PyWhiperCppServe(ServeClientBase):
|
|
| 265 |
def translate_text(self, text):
|
| 266 |
"""
|
| 267 |
translate the text to dst lang"""
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
|
|
|
| 271 |
|
| 272 |
def _segments_split(self, segments, audio_buffer: np.ndarray):
|
| 273 |
"""根据左边第一个标点符号来将序列拆分成 观察段 和 剩余部分"""
|
|
@@ -292,7 +292,7 @@ class PyWhiperCppServe(ServeClientBase):
|
|
| 292 |
rest_buffer_duration = (len(audio_buffer) - seg_index) / self.sample_rate
|
| 293 |
# is_end = any(i in seg.text for i in config.SENTENCE_END_MARKERS)
|
| 294 |
right_watch_sequences = segments[min(idx+1, len(segments)):]
|
| 295 |
-
if rest_buffer_duration >= 1:
|
| 296 |
left_watch_idx = seg_index
|
| 297 |
break
|
| 298 |
return left_watch_idx, left_watch_sequences, right_watch_sequences, is_end
|
|
|
|
| 117 |
llm_model = None
|
| 118 |
vad_model = None
|
| 119 |
|
|
|
|
| 120 |
@classmethod
|
| 121 |
def initializer(cls, event:mp.Event, warmup=True):
|
| 122 |
models_dir = config.MODEL_DIR.as_posix()
|
|
|
|
| 264 |
def translate_text(self, text):
|
| 265 |
"""
|
| 266 |
translate the text to dst lang"""
|
| 267 |
+
return "sample english"
|
| 268 |
+
# translate_fut = self._pool.submit(
|
| 269 |
+
# PywhisperInference.translate, text, self.language, self.dst_lang)
|
| 270 |
+
# return translate_fut.result()
|
| 271 |
|
| 272 |
def _segments_split(self, segments, audio_buffer: np.ndarray):
|
| 273 |
"""根据左边第一个标点符号来将序列拆分成 观察段 和 剩余部分"""
|
|
|
|
| 292 |
rest_buffer_duration = (len(audio_buffer) - seg_index) / self.sample_rate
|
| 293 |
# is_end = any(i in seg.text for i in config.SENTENCE_END_MARKERS)
|
| 294 |
right_watch_sequences = segments[min(idx+1, len(segments)):]
|
| 295 |
+
if rest_buffer_duration >= 1.5:
|
| 296 |
left_watch_idx = seg_index
|
| 297 |
break
|
| 298 |
return left_watch_idx, left_watch_sequences, right_watch_sequences, is_end
|