from .base import MetaItem, BasePipe from ..helpers.vadprocessor import VadV2 import numpy as np from silero_vad import get_speech_timestamps from typing import List import logging # import noisereduce as nr class VadPipe(BasePipe): vac = None sample_rate = 16000 window_size_samples = 512 chunk_size = 512 prob_threshold=0.5, silence_s=0.5, cache_s=0.25, @classmethod def init(cls): if cls.vac is None: cls.vac = VadV2(cls.prob_threshold, cls.sample_rate, cls.silence_s * 1000, cls.cache_s * 1000, max_speech_duration_s=15) def process(self, in_data: MetaItem) -> MetaItem: audio_buffer = np.frombuffer(in_data.source_audio) vad_audio = self.vac(audio_buffer) if vad_audio: in_data.audio = vad_audio['audio'] else: in_data.audio = b"" return in_data # def reduce_noise(self, data): # return nr.reduce_noise(y=data, sr=self.sample_rate)