Luigi commited on
Commit
b71a9e5
·
1 Parent(s): 2dc5e25

apply gain normalization to each segment we input to whisper

Browse files
Files changed (1) hide show
  1. app.py +3 -1
app.py CHANGED
@@ -5,7 +5,7 @@ import tempfile
5
  import torch
6
  import gradio as gr
7
  from faster_whisper import BatchedInferencePipeline, WhisperModel
8
- from pydub import AudioSegment
9
  from pyannote.audio import Pipeline as DiarizationPipeline
10
  import opencc
11
 
@@ -164,6 +164,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_mult
164
  end_ms = int(turn.end * 1000)
165
  segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
166
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
 
167
  segment.export(tmp.name, format="wav")
168
  segments, _ = pipe.transcribe(
169
  tmp.name,
@@ -205,6 +206,7 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_mult
205
  end_ms = int(turn.end * 1000)
206
  segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
207
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
 
208
  segment.export(tmp.name, format="wav")
209
  segments, _ = pipe.transcribe(
210
  tmp.name,
 
5
  import torch
6
  import gradio as gr
7
  from faster_whisper import BatchedInferencePipeline, WhisperModel
8
+ from pydub import AudioSegment, effects
9
  from pyannote.audio import Pipeline as DiarizationPipeline
10
  import opencc
11
 
 
164
  end_ms = int(turn.end * 1000)
165
  segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
166
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
167
+ segment = effects.normalize(segment)
168
  segment.export(tmp.name, format="wav")
169
  segments, _ = pipe.transcribe(
170
  tmp.name,
 
206
  end_ms = int(turn.end * 1000)
207
  segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
208
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
209
+ segment = effects.normalize(segment)
210
  segment.export(tmp.name, format="wav")
211
  segments, _ = pipe.transcribe(
212
  tmp.name,