File size: 2,899 Bytes
ca5d527
485d8e3
c6b44fd
485d8e3
 
3a0633a
6f13b8c
 
269051f
485d8e3
31ad35a
ca5d527
3a0633a
 
485d8e3
31ad35a
2aed46a
c0447ed
ca5d527
269051f
31ad35a
 
3a0633a
c0447ed
 
 
269051f
c0447ed
485d8e3
6f13b8c
269051f
 
3a0633a
485d8e3
 
2aed46a
3a0633a
2aed46a
 
 
 
 
 
485d8e3
3a0633a
6f13b8c
31ad35a
 
3a0633a
 
31ad35a
 
 
485d8e3
3a0633a
 
 
 
 
 
 
485d8e3
3a0633a
 
 
 
c0447ed
 
 
 
485d8e3
 
 
3a0633a
485d8e3
 
 
c0447ed
 
485d8e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from transcribe.pipelines import WhisperPipe, MetaItem, WhisperChinese, Translate7BPipe, FunASRPipe, VadPipe


class TranslatePipes:
    def __init__(self) -> None:
        # self.whisper_input_q = mp.Queue()
        # self.translate_input_q = mp.Queue()
        # self.result_queue = mp.Queue()
        self._process = []
        # whisper 转录
        self._whisper_pipe_en = self._launch_process(WhisperPipe())
        # self._whisper_pipe_zh = self._launch_process(WhisperChinese())
        self._funasr_pipe = self._launch_process(FunASRPipe())

        # llm 翻译
        # self._translate_pipe = self._launch_process(TranslatePipe())
        self._translate_7b_pipe = self._launch_process(Translate7BPipe())
        # vad 
        self._vad_pipe = self._launch_process(VadPipe())
        
    # def reset(self):
    #     self._vad_pipe.reset()

    def _launch_process(self, process_obj):
        process_obj.daemon = True
        process_obj.start()
        self._process.append(process_obj)
        return process_obj

    def wait_ready(self):
        for p in self._process:
            p.wait()

    def translate(self, text, src_lang, dst_lang) -> MetaItem:
        item = MetaItem(
            transcribe_content=text,
            source_language=src_lang,
            destination_language=dst_lang)
        self._translate_pipe.input_queue.put(item)
        return self._translate_pipe.output_queue.get()

    def translate_large(self, text, src_lang, dst_lang) -> MetaItem:
        item = MetaItem(
            transcribe_content=text,
            source_language=src_lang,
            destination_language=dst_lang)
        self._translate_7b_pipe.input_queue.put(item)
        return self._translate_7b_pipe.output_queue.get()

    def get_whisper_model(self, lang: str = 'en'):
        if lang == 'zh':
            return self._whisper_pipe_zh
        return self._whisper_pipe_en

    def get_transcription_model(self, lang: str = 'en'):
        if lang == 'zh':
            return self._funasr_pipe
        return self._whisper_pipe_en

    def transcrible(self, audio_buffer: bytes, src_lang: str) -> MetaItem:
        transcription_model = self.get_transcription_model(src_lang)
        item = MetaItem(audio=audio_buffer, source_language=src_lang)
        transcription_model.input_queue.put(item)
        return transcription_model.output_queue.get()

    def voice_detect(self, audio_buffer: bytes) -> MetaItem:
        item = MetaItem(source_audio=audio_buffer)
        self._vad_pipe.input_queue.put(item)
        return self._vad_pipe.output_queue.get()


if __name__ == "__main__":
    import soundfile

    tp = TranslatePipes()
    # result = tp.translate("你好,今天天气怎么样?", src_lang="zh", dst_lang="en")
    mel, _, = soundfile.read("assets/jfk.flac")
    # result = tp.transcrible(mel, 'en')
    result = tp.voice_detect(mel)
    print(result)