Spaces:

stepfun-ai
/

Step-Audio-R1

Running

App Files Files Community

moevis commited on 11 days ago

Commit

e6f110c

verified ·

1 Parent(s): d78daff

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -69

app.py CHANGED Viewed

@@ -75,7 +75,7 @@ def format_messages(system, history, user_text, audio_data_list=None):
             continue
         # Check for Audio
-        is_audio = not isinstance(content, list) and content["component"] == "audio"
         if is_audio:
             audio_path = content["value"]["path"]
@@ -241,7 +241,8 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
     try:
         with httpx.Client(base_url=API_BASE_URL, timeout=120) as client:
-            response = client.post("/chat/completions", json={
                 "model": model_name,
                 "messages": messages,
                 "max_tokens": max_tokens,
@@ -250,75 +251,77 @@ def chat(system_prompt, user_text, audio_file, history, max_tokens, temperature,
                 "stream": True,
                 "repetition_penalty": 1.07,
                 "stop_token_ids": [151665]
-            })
-            if response.status_code != 200:
-                error_msg = f"❌ API Error {response.status_code}"
-                if response.status_code == 404:
-                    error_msg += " - vLLM service not ready"
-                elif response.status_code == 400:
-                    error_msg += " - Bad request"
-                elif response.status_code == 500:
-                    error_msg += " - Model error"
-                yield history, error_msg
-                return
-            # Process streaming response
-            buffer = ""
-            is_thinking = True
-            for line in response.iter_lines():
-                if not line:
-                    continue
-                # Ensure line is string format
-                if isinstance(line, bytes):
-                    line = line.decode('utf-8')
-                else:
-                    line = str(line)
-                if line.startswith('data: '):
-                    data_str = line[6:]
-                    if data_str.strip() == '[DONE]':
-                        break
-                    try:
-                        data = json.loads(data_str)
-                        if 'choices' in data and len(data['choices']) > 0:
-                            delta = data['choices'][0].get('delta', {})
-                            if 'content' in delta:
-                                content = delta['content']
-                                buffer += content
-                                if is_thinking:
-                                    if "</think>" in buffer:
-                                        is_thinking = False
                                         parts = buffer.split("</think>", 1)
-                                        think_content = parts[0]
                                         response_content = parts[1]
-                                        if think_content.startswith("<think>"):
-                                            think_content = think_content[len("<think>"):].strip()
-                                        # Update thinking message
-                                        history[-1].content = think_content
-                                        # Add response message
-                                        history.append({"role": "assistant", "content": response_content})
-                                    else:
-                                        # Update thinking message
-                                        current_think = buffer
-                                        if current_think.startswith("<think>"):
-                                            current_think = current_think[len("<think>"):]
-                                        history[-1].content = current_think
-                                else:
-                                    # Already split, just update response message
-                                    parts = buffer.split("</think>", 1)
-                                    response_content = parts[1]
-                                    history[-1]["content"] = response_content
-                                yield history, ""
-                    except json.JSONDecodeError:
-                        continue
     except httpx.ConnectError:
         yield history, "❌ Cannot connect to vLLM API"

             continue
         # Check for Audio
+        is_audio = not isinstance(content, list) and content.get("component", None) == "audio"
         if is_audio:
             audio_path = content["value"]["path"]
     try:
         with httpx.Client(base_url=API_BASE_URL, timeout=120) as client:
+            # Use client.stream for better streaming control
+            with client.stream("POST", "/chat/completions", json={
                 "model": model_name,
                 "messages": messages,
                 "max_tokens": max_tokens,
                 "stream": True,
                 "repetition_penalty": 1.07,
                 "stop_token_ids": [151665]
+            }) as response:
+                if response.status_code != 200:
+                    error_msg = f"❌ API Error {response.status_code}"
+                    if response.status_code == 404:
+                        error_msg += " - vLLM service not ready"
+                    elif response.status_code == 400:
+                        error_msg += " - Bad request"
+                    elif response.status_code == 500:
+                        error_msg += " - Model error"
+                    yield history, error_msg
+                    return
+                # Process streaming response
+                buffer = ""
+                is_thinking = True
+                print("[DEBUG] Start receiving stream...")
+                for line in response.iter_lines():
+                    if not line:
+                        continue
+                    # Ensure line is string format
+                    if isinstance(line, bytes):
+                        line = line.decode('utf-8')
+                    else:
+                        line = str(line)
+                    if line.startswith('data: '):
+                        data_str = line[6:]
+                        if data_str.strip() == '[DONE]':
+                            print("[DEBUG] Stream finished [DONE]")
+                            break
+                        try:
+                            data = json.loads(data_str)
+                            if 'choices' in data and len(data['choices']) > 0:
+                                delta = data['choices'][0].get('delta', {})
+                                if 'content' in delta:
+                                    content = delta['content']
+                                    buffer += content
+                                    if is_thinking:
+                                        if "</think>" in buffer:
+                                            is_thinking = False
+                                            parts = buffer.split("</think>", 1)
+                                            think_content = parts[0]
+                                            response_content = parts[1]
+                                            if think_content.startswith("<think>"):
+                                                think_content = think_content[len("<think>"):].strip()
+                                            # Update thinking message
+                                            history[-1].content = think_content
+                                            # Add response message
+                                            history.append({"role": "assistant", "content": response_content})
+                                        else:
+                                            # Update thinking message
+                                            current_think = buffer
+                                            if current_think.startswith("<think>"):
+                                                current_think = current_think[len("<think>"):]
+                                            history[-1].content = current_think
+                                    else:
+                                        # Already split, just update response message
                                         parts = buffer.split("</think>", 1)
                                         response_content = parts[1]
+                                        history[-1]["content"] = response_content
+                                    yield history, ""
+                        except json.JSONDecodeError:
+                            continue
     except httpx.ConnectError:
         yield history, "❌ Cannot connect to vLLM API"