Spaces:
Runtime error
Runtime error
apply gain normalization to each segment we input to whisper
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import tempfile
|
|
| 5 |
import torch
|
| 6 |
import gradio as gr
|
| 7 |
from faster_whisper import BatchedInferencePipeline, WhisperModel
|
| 8 |
-
from pydub import AudioSegment
|
| 9 |
from pyannote.audio import Pipeline as DiarizationPipeline
|
| 10 |
import opencc
|
| 11 |
|
|
@@ -164,6 +164,7 @@ def _transcribe_fwhisper_cpu_stream(model_id, language, audio_path, whisper_mult
|
|
| 164 |
end_ms = int(turn.end * 1000)
|
| 165 |
segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
|
| 166 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
|
|
| 167 |
segment.export(tmp.name, format="wav")
|
| 168 |
segments, _ = pipe.transcribe(
|
| 169 |
tmp.name,
|
|
@@ -205,6 +206,7 @@ def _transcribe_fwhisper_gpu_stream(model_id, language, audio_path, whisper_mult
|
|
| 205 |
end_ms = int(turn.end * 1000)
|
| 206 |
segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
|
| 207 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
|
|
|
| 208 |
segment.export(tmp.name, format="wav")
|
| 209 |
segments, _ = pipe.transcribe(
|
| 210 |
tmp.name,
|
|
|
|
| 5 |
import torch
|
| 6 |
import gradio as gr
|
| 7 |
from faster_whisper import BatchedInferencePipeline, WhisperModel
|
| 8 |
+
from pydub import AudioSegment, effects
|
| 9 |
from pyannote.audio import Pipeline as DiarizationPipeline
|
| 10 |
import opencc
|
| 11 |
|
|
|
|
| 164 |
end_ms = int(turn.end * 1000)
|
| 165 |
segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
|
| 166 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
| 167 |
+
segment = effects.normalize(segment)
|
| 168 |
segment.export(tmp.name, format="wav")
|
| 169 |
segments, _ = pipe.transcribe(
|
| 170 |
tmp.name,
|
|
|
|
| 206 |
end_ms = int(turn.end * 1000)
|
| 207 |
segment = AudioSegment.from_file(audio_path)[start_ms:end_ms]
|
| 208 |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
|
| 209 |
+
segment = effects.normalize(segment)
|
| 210 |
segment.export(tmp.name, format="wav")
|
| 211 |
segments, _ = pipe.transcribe(
|
| 212 |
tmp.name,
|