Spaces:

RocketFarmStudios
/

cps-api-tx

Sleeping

App Files Files Community

Ali2206 commited on Aug 31

Commit

bd7030e

1 Parent(s): 0bd1eaa

device token

Browse files

Files changed (2) hide show

api/routes/txagent.py +99 -0
voice.py +15 -35

api/routes/txagent.py CHANGED Viewed

@@ -343,6 +343,66 @@ async def chat_with_txagent(
         logger.error(f"Error in TxAgent chat: {e}")
         raise HTTPException(status_code=500, detail="Failed to process chat request")
 @router.post("/voice/transcribe")
 async def transcribe_audio(
     audio: UploadFile = File(...),
@@ -678,4 +738,43 @@ async def get_all_patients_analysis_reports_pdf(
         logger.error(f"Error generating PDF report for all patients: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Failed to generate PDF report: {str(e)}")

         logger.error(f"Error in TxAgent chat: {e}")
         raise HTTPException(status_code=500, detail="Failed to process chat request")
+@router.post("/chat-stream")
+async def chat_stream_with_txagent(
+    request: ChatRequest,
+    current_user: dict = Depends(get_current_user)
+):
+    """Streaming chat avec TxAgent intégré"""
+    try:
+        # Vérifier que l'utilisateur est médecin ou admin
+        if not any(role in current_user.get('roles', []) for role in ['doctor', 'admin']):
+            raise HTTPException(status_code=403, detail="Only doctors and admins can use TxAgent")
+        logger.info(f"Chat stream initiated by {current_user['email']}: {request.message}")
+        # Generate a response (for now, a simple response)
+        response_text = f"Hello! I'm your clinical assistant. You said: '{request.message}'. How can I help you with patient care today?"
+        # Store the chat in the database
+        try:
+            from db.mongo import db
+            chats_collection = db.chats
+            chat_entry = {
+                "message": request.message,
+                "response": response_text,
+                "user_id": current_user.get('_id'),
+                "user_email": current_user.get('email'),
+                "timestamp": datetime.utcnow(),
+                "patient_id": request.patient_id if hasattr(request, 'patient_id') else None,
+                "chat_type": "text_chat"
+            }
+            await chats_collection.insert_one(chat_entry)
+            logger.info(f"Chat stored in database for user {current_user['email']}")
+        except Exception as db_error:
+            logger.error(f"Failed to store chat in database: {str(db_error)}")
+            # Continue even if database storage fails
+        # Return streaming response
+        async def generate_response():
+            # Simulate streaming by sending the response in chunks
+            words = response_text.split()
+            chunk_size = 3  # Send 3 words at a time
+            for i in range(0, len(words), chunk_size):
+                chunk = " ".join(words[i:i + chunk_size])
+                if i + chunk_size < len(words):
+                    chunk += " "  # Add space if not the last chunk
+                yield chunk
+                await asyncio.sleep(0.1)  # Small delay to simulate streaming
+        return StreamingResponse(
+            generate_response(),
+            media_type="text/plain"
+        )
+    except Exception as e:
+        logger.error(f"Error in TxAgent chat stream: {e}")
+        raise HTTPException(status_code=500, detail="Failed to process chat stream request")
 @router.post("/voice/transcribe")
 async def transcribe_audio(
     audio: UploadFile = File(...),
         logger.error(f"Error generating PDF report for all patients: {str(e)}")
         raise HTTPException(status_code=500, detail=f"Failed to generate PDF report: {str(e)}")
+# Voice synthesis endpoint
+@router.post("/voice/synthesize")
+async def synthesize_voice(
+    request: dict,
+    current_user: dict = Depends(get_current_user)
+):
+    """
+    Convert text to speech using gTTS
+    """
+    try:
+        logger.info(f"Voice synthesis initiated by {current_user['email']}")
+        # Extract parameters from request
+        text = request.get('text', '')
+        language = request.get('language', 'en-US')
+        return_format = request.get('return_format', 'mp3')
+        if not text:
+            raise HTTPException(status_code=400, detail="Text is required")
+        # Convert language code for gTTS (e.g., 'en-US' -> 'en')
+        language_code = language.split('-')[0] if '-' in language else language
+        # Generate speech
+        audio_data = text_to_speech(text, language=language_code)
+        # Return audio data
+        return StreamingResponse(
+            io.BytesIO(audio_data),
+            media_type=f"audio/{return_format}",
+            headers={"Content-Disposition": f"attachment; filename=speech.{return_format}"}
+        )
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error in voice synthesis: {e}")
+        raise HTTPException(status_code=500, detail="Error generating voice output")

voice.py CHANGED Viewed

@@ -1,32 +1,24 @@
 from typing import Optional
 from fastapi import HTTPException
-from config import logger
 import io
-import speech_recognition as sr
 from gtts import gTTS
-from pydub import AudioSegment
-import base64
-from utils import clean_text_response  # Added this import
-def recognize_speech(audio_data: bytes, language: str = "en-US") -> str:
-    recognizer = sr.Recognizer()
-    try:
-        with io.BytesIO(audio_data) as audio_file:
-            with sr.AudioFile(audio_file) as source:
-                audio = recognizer.record(source)
-                text = recognizer.recognize_google(audio, language=language)
-                return text
-    except sr.UnknownValueError:
-        logger.error("Google Speech Recognition could not understand audio")
-        raise HTTPException(status_code=400, detail="Could not understand audio")
-    except sr.RequestError as e:
-        logger.error(f"Could not request results from Google Speech Recognition service; {e}")
-        raise HTTPException(status_code=503, detail="Speech recognition service unavailable")
-    except Exception as e:
-        logger.error(f"Error in speech recognition: {e}")
-        raise HTTPException(status_code=500, detail="Error processing speech")
 def text_to_speech(text: str, language: str = "en", slow: bool = False) -> bytes:
     try:
         tts = gTTS(text=text, lang=language, slow=slow)
         mp3_fp = io.BytesIO()
@@ -35,16 +27,4 @@ def text_to_speech(text: str, language: str = "en", slow: bool = False) -> bytes
         return mp3_fp.read()
     except Exception as e:
         logger.error(f"Error in text-to-speech conversion: {e}")
-        raise HTTPException(status_code=500, detail="Error generating speech")
-def extract_text_from_pdf(pdf_data: bytes) -> str:
-    try:
-        from PyPDF2 import PdfReader
-        pdf_reader = PdfReader(io.BytesIO(pdf_data))
-        text = ""
-        for page in pdf_reader.pages:
-            text += page.extract_text() or ""
-        return clean_text_response(text)  # Now works with the import
-    except Exception as e:
-        logger.error(f"Error extracting text from PDF: {e}")
-        raise HTTPException(status_code=400, detail="Failed to extract text from PDF")

 from typing import Optional
 from fastapi import HTTPException
+import logging
 import io
 from gtts import gTTS
+# Configure logging
+logger = logging.getLogger(__name__)
 def text_to_speech(text: str, language: str = "en", slow: bool = False) -> bytes:
+    """
+    Convert text to speech using gTTS (Google Text-to-Speech)
+    Args:
+        text (str): The text to convert to speech
+        language (str): Language code (default: "en")
+        slow (bool): Whether to speak slowly (default: False)
+    Returns:
+        bytes: MP3 audio data
+    """
     try:
         tts = gTTS(text=text, lang=language, slow=slow)
         mp3_fp = io.BytesIO()
         return mp3_fp.read()
     except Exception as e:
         logger.error(f"Error in text-to-speech conversion: {e}")
+        raise HTTPException(status_code=500, detail="Error generating speech")