Update src/app/conversation_core.py
Browse files- src/app/conversation_core.py +21 -25
src/app/conversation_core.py
CHANGED
|
@@ -348,37 +348,33 @@ class ConversationManager:
|
|
| 348 |
|
| 349 |
|
| 350 |
###########################################################
|
| 351 |
-
# AUDIO TRANSCRIPTION
|
| 352 |
###########################################################
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
import numpy as np
|
| 360 |
|
| 361 |
-
#
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
_whisper_model = WhisperModel(
|
| 365 |
-
"small",
|
| 366 |
-
device="cpu",
|
| 367 |
-
compute_type="int8"
|
| 368 |
-
)
|
| 369 |
|
| 370 |
-
#
|
| 371 |
-
|
| 372 |
|
| 373 |
-
|
| 374 |
-
segments, info = _whisper_model.transcribe(
|
| 375 |
-
audio,
|
| 376 |
-
beam_size=5,
|
| 377 |
-
language=spoken_lang
|
| 378 |
-
)
|
| 379 |
|
| 380 |
-
|
| 381 |
-
return
|
|
|
|
| 382 |
|
| 383 |
|
| 384 |
|
|
|
|
| 348 |
|
| 349 |
|
| 350 |
###########################################################
|
| 351 |
+
# AUDIO TRANSCRIPTION — Transformers Whisper
|
| 352 |
###########################################################
|
| 353 |
+
|
| 354 |
+
from transformers import pipeline
|
| 355 |
+
|
| 356 |
+
# Load Whisper once at module import (fast + HF-safe)
|
| 357 |
+
whisper_pipe = pipeline(
|
| 358 |
+
task="automatic-speech-recognition",
|
| 359 |
+
model="openai/whisper-small",
|
| 360 |
+
device="cpu"
|
| 361 |
+
)
|
| 362 |
+
|
| 363 |
+
def transcribe(self, audio_segment, spoken_lang=None):
|
| 364 |
import numpy as np
|
| 365 |
|
| 366 |
+
# Convert AudioSegment → numpy float32 PCM
|
| 367 |
+
audio = np.array(audio_segment.get_array_of_samples()).astype("float32")
|
| 368 |
+
audio = audio / np.max(np.abs(audio)) # normalize to [-1, 1]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
+
# Transformers Whisper expects a Python list or numpy array
|
| 371 |
+
result = whisper_pipe(audio)
|
| 372 |
|
| 373 |
+
text = result.get("text", "").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 374 |
|
| 375 |
+
# transformers Whisper does not provide language predictions on CPU
|
| 376 |
+
return text, spoken_lang or "unknown", 1.0
|
| 377 |
+
|
| 378 |
|
| 379 |
|
| 380 |
|