File size: 2,899 Bytes
ca5d527 485d8e3 c6b44fd 485d8e3 3a0633a 6f13b8c 269051f 485d8e3 31ad35a ca5d527 3a0633a 485d8e3 31ad35a 2aed46a c0447ed ca5d527 269051f 31ad35a 3a0633a c0447ed 269051f c0447ed 485d8e3 6f13b8c 269051f 3a0633a 485d8e3 2aed46a 3a0633a 2aed46a 485d8e3 3a0633a 6f13b8c 31ad35a 3a0633a 31ad35a 485d8e3 3a0633a 485d8e3 3a0633a c0447ed 485d8e3 3a0633a 485d8e3 c0447ed 485d8e3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
from transcribe.pipelines import WhisperPipe, MetaItem, WhisperChinese, Translate7BPipe, FunASRPipe, VadPipe
class TranslatePipes:
def __init__(self) -> None:
# self.whisper_input_q = mp.Queue()
# self.translate_input_q = mp.Queue()
# self.result_queue = mp.Queue()
self._process = []
# whisper 转录
self._whisper_pipe_en = self._launch_process(WhisperPipe())
# self._whisper_pipe_zh = self._launch_process(WhisperChinese())
self._funasr_pipe = self._launch_process(FunASRPipe())
# llm 翻译
# self._translate_pipe = self._launch_process(TranslatePipe())
self._translate_7b_pipe = self._launch_process(Translate7BPipe())
# vad
self._vad_pipe = self._launch_process(VadPipe())
# def reset(self):
# self._vad_pipe.reset()
def _launch_process(self, process_obj):
process_obj.daemon = True
process_obj.start()
self._process.append(process_obj)
return process_obj
def wait_ready(self):
for p in self._process:
p.wait()
def translate(self, text, src_lang, dst_lang) -> MetaItem:
item = MetaItem(
transcribe_content=text,
source_language=src_lang,
destination_language=dst_lang)
self._translate_pipe.input_queue.put(item)
return self._translate_pipe.output_queue.get()
def translate_large(self, text, src_lang, dst_lang) -> MetaItem:
item = MetaItem(
transcribe_content=text,
source_language=src_lang,
destination_language=dst_lang)
self._translate_7b_pipe.input_queue.put(item)
return self._translate_7b_pipe.output_queue.get()
def get_whisper_model(self, lang: str = 'en'):
if lang == 'zh':
return self._whisper_pipe_zh
return self._whisper_pipe_en
def get_transcription_model(self, lang: str = 'en'):
if lang == 'zh':
return self._funasr_pipe
return self._whisper_pipe_en
def transcrible(self, audio_buffer: bytes, src_lang: str) -> MetaItem:
transcription_model = self.get_transcription_model(src_lang)
item = MetaItem(audio=audio_buffer, source_language=src_lang)
transcription_model.input_queue.put(item)
return transcription_model.output_queue.get()
def voice_detect(self, audio_buffer: bytes) -> MetaItem:
item = MetaItem(source_audio=audio_buffer)
self._vad_pipe.input_queue.put(item)
return self._vad_pipe.output_queue.get()
if __name__ == "__main__":
import soundfile
tp = TranslatePipes()
# result = tp.translate("你好,今天天气怎么样?", src_lang="zh", dst_lang="en")
mel, _, = soundfile.read("assets/jfk.flac")
# result = tp.transcrible(mel, 'en')
result = tp.voice_detect(mel)
print(result)
|