daihui.zhang commited on
Commit
485d8e3
·
1 Parent(s): ded2334

change to pipelines

Browse files
transcribe/pipelines/__init__.py ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+
2
+ from .pipe_translate import TranslatePipe
3
+ from .pipe_whisper import WhisperPipe
4
+ from .base import MetaItem
transcribe/pipelines/base.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from dataclasses import dataclass, field
3
+ from multiprocessing import Process
4
+
5
+ @dataclass
6
+ class Segment:
7
+ t0: int
8
+ t1: int
9
+ text: str
10
+
11
+ @dataclass
12
+ class MetaItem:
13
+ segments: list[Segment] = field(default_factory=list)
14
+ audio: bytes = b''
15
+ transcribe_content: str = ''
16
+ translate_content: str = ''
17
+ source_language: str = 'zh'
18
+ destination_language: str = 'en'
19
+
20
+
21
+ class BasePipe(Process):
22
+ def __init__(self, in_queue, out_queue) -> None:
23
+ super().__init__() # Initialize the Process class
24
+ self._in_queue = in_queue
25
+ self._out_queue = out_queue
26
+
27
+ @property
28
+ def output_queue(self):
29
+ return self._out_queue
30
+
31
+ @property
32
+ def input_queue(self):
33
+ return self._in_queue
34
+
35
+ def process(self, in_data: MetaItem) -> MetaItem:
36
+ raise NotImplementedError("Subclasses should implement this method.")
37
+
38
+
39
+ @classmethod
40
+ def init(cls):
41
+ raise NotImplementedError
42
+
43
+ def run(self):
44
+ self.init()
45
+ while True:
46
+ item = self._in_queue.get()
47
+ if item is None: # Check for termination signal
48
+ break
49
+ out_item = self.process(item)
50
+ self._out_queue.put(out_item)
transcribe/pipelines/pipe_translate.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from .base import MetaItem, BasePipe, Segment
3
+ from llama_cpp import Llama
4
+ from ..translator import QwenTranslator
5
+ from config import LLM_MODEL_PATH, LLM_SYS_PROMPT
6
+
7
+ class TranslatePipe(BasePipe):
8
+ translator = None
9
+
10
+ @classmethod
11
+ def init(cls):
12
+ if cls.translator is None:
13
+ cls.translator = QwenTranslator(LLM_MODEL_PATH, LLM_SYS_PROMPT)
14
+
15
+
16
+ def process(self, in_data: MetaItem) -> MetaItem:
17
+ context = in_data.transcribe_content
18
+ result = self.translator.translate(
19
+ context, src_lang=in_data.source_language, dst_lang=in_data.destination_language)
20
+ in_data.translate_content = result
21
+ return in_data
22
+
transcribe/pipelines/pipe_vad.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+
3
+ from .base import MetaItem, BasePipe
transcribe/pipelines/pipe_whisper.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ from .base import MetaItem, BasePipe, Segment
4
+ from ..whisper import WhisperCPP
5
+
6
+
7
+ class WhisperPipe(BasePipe):
8
+ whisper = None
9
+
10
+ def __init__(self, in_queue, out_queue) -> None:
11
+ super().__init__(in_queue, out_queue)
12
+
13
+
14
+ @classmethod
15
+ def init(cls):
16
+ if cls.whisper is None:
17
+ cls.whisper = WhisperCPP()
18
+
19
+
20
+ def process(self, in_data: MetaItem) -> MetaItem:
21
+ audio_data = in_data.audio
22
+ source_language = in_data.source_language
23
+ segments = self.whisper.transcribe(audio_data, source_language)
24
+ texts = "".join([s.text for s in segments])
25
+ in_data.segments = [Segment(t0=s.t0, t1=s.t1, text=s.text) for s in segments]
26
+ in_data.transcribe_content = texts
27
+ in_data.audio = b""
28
+ return in_data
transcribe/translatepipes.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transcribe.pipelines import WhisperPipe, TranslatePipe, MetaItem
2
+ import multiprocessing as mp
3
+ import config
4
+
5
+ class TranslatePipes:
6
+ def __init__(self) -> None:
7
+
8
+ self.whisper_input_q = mp.Queue()
9
+ self.translate_input_q = mp.Queue()
10
+ self.result_queue = mp.Queue()
11
+
12
+ # whisper 转录
13
+ self._whisper_pipe = WhisperPipe(
14
+ in_queue=self.whisper_input_q,
15
+ out_queue=self.translate_input_q
16
+ )
17
+
18
+ # llm 翻译
19
+ self._translate_pipe = TranslatePipe(
20
+ in_queue=self.translate_input_q,
21
+ out_queue=self.result_queue,
22
+ )
23
+
24
+ self._whisper_pipe.daemon = True
25
+ self._whisper_pipe.start()
26
+
27
+ self._translate_pipe.daemon = True
28
+ self._translate_pipe.start()
29
+
30
+
31
+ def translate(self, text, src_lang, dst_lang) -> MetaItem:
32
+ item = MetaItem(
33
+ transcribe_content=text,
34
+ source_language=src_lang,
35
+ destination_language=dst_lang)
36
+ self._translate_pipe.input_queue.put(item)
37
+ return self._translate_pipe.output_queue.get()
38
+
39
+
40
+ def transcrible(self, audio_buffer:bytes, src_lang: str) -> MetaItem:
41
+ item = MetaItem(audio=audio_buffer, source_language=src_lang)
42
+ self._whisper_pipe.input_queue.put(item)
43
+ return self._whisper_pipe.output_queue.get()
44
+
45
+
46
+ if __name__ == "__main__":
47
+ import soundfile
48
+ tp = TranslatePipes()
49
+ # result = tp.translate("你好,今天天气怎么样?", src_lang="zh", dst_lang="en")
50
+ mel, _, = soundfile.read("assets/jfk.flac")
51
+ result = tp.transcrible(mel, 'en')
52
+ print(result)
53
+
54
+
55
+
56
+
transcribe/whisper.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pywhispercpp.model import Model
2
+ import soundfile
3
+ import config
4
+ import numpy as np
5
+
6
+
7
+ class WhisperCPP:
8
+
9
+ def __init__(self, warmup=True) -> None:
10
+ models_dir = config.MODEL_DIR.as_posix()
11
+ self.model = Model(
12
+ model=config.WHISPER_MODEL,
13
+ models_dir=models_dir,
14
+ print_realtime=False,
15
+ print_progress=False,
16
+ print_timestamps=False,
17
+ )
18
+ if warmup:
19
+ self.warmup()
20
+
21
+
22
+ def warmup(cls, warmup_steps=1):
23
+ mel, _, = soundfile.read("assets/jfk.flac")
24
+ for _ in range(warmup_steps):
25
+ cls.model.transcribe(mel, print_progress=False)
26
+
27
+ @staticmethod
28
+ def config_language(language):
29
+ if language == "zh":
30
+ return config.MAX_LENTH_ZH, config.WHISPER_PROMPT_ZH
31
+ elif language == "en":
32
+ return config.MAX_LENGTH_EN, config.WHISPER_PROMPT_EN
33
+ raise ValueError(f"Unsupported language : {language}")
34
+
35
+ def transcribe(self, audio_buffer:bytes, language):
36
+ max_len, prompt = self.config_language(language)
37
+ audio_buffer = np.frombuffer(audio_buffer, dtype=np.float32)
38
+ print("audio buffer got:", len(audio_buffer))
39
+ output = self.model.transcribe(
40
+ audio_buffer,
41
+ initial_prompt=prompt,
42
+ language=language,
43
+ token_timestamps=True,
44
+ max_len=max_len
45
+ )
46
+ return output
47
+