from fastapi import FastAPI, WebSocket from urllib.parse import urlparse, parse_qsl from transcribe.whisper_llm_serve import PyWhiperCppServe from uuid import uuid1 from logging import getLogger import numpy as np from transcribe.translatepipes import TranslatePipes from contextlib import asynccontextmanager from multiprocessing import Process, freeze_support from fastapi.staticfiles import StaticFiles logger = getLogger(__name__) async def get_audio_from_websocket(websocket)->np.array: """ Receives audio buffer from websocket and creates a numpy array out of it. Args: websocket: The websocket to receive audio from. Returns: A numpy array containing the audio. """ frame_data = await websocket.receive_bytes() if frame_data == b"END_OF_AUDIO": return False return np.frombuffer(frame_data, dtype=np.int16).astype(np.float32) / 32768.0 @asynccontextmanager async def lifespan(app:FastAPI): global pipe pipe = TranslatePipes() pipe.wait_ready() logger.info("Pipeline is ready.") yield app = FastAPI(lifespan=lifespan) app.mount("/translate", StaticFiles(directory="frontend"),) pipe = None @app.websocket("/ws") async def translate(websocket: WebSocket): query_parameters_dict = websocket.query_params from_lang, to_lang = query_parameters_dict.get('from'), query_parameters_dict.get('to') client = PyWhiperCppServe( websocket, pipe, language="en", client_uid=f"{uuid1()}", ) if from_lang and to_lang: client.set_lang(from_lang, to_lang) logger.info(f"Source lange: {from_lang} -> Dst lange: {to_lang}") await websocket.accept() while True: frame_data = await get_audio_from_websocket(websocket) client.add_frames(frame_data) if __name__ == '__main__': freeze_support() import uvicorn uvicorn.run(app, host="0.0.0.0", port=9090)