Spaces:

baidu
/

ERNIE-4.5-VL-28B-A3B-Thinking

Running

App Files Files Community

jzhang533

sunzhongkai588 commited on 13 days ago

Commit

5ea0a9a

verified ·

1 Parent(s): 0bec3bf

update ui (#1)

Browse files

- update (8c56dccbad4de373bd76f6984d4e0085c20d74f1)
- fix (e12c9500d9b1323ef3aaf117a86f2d98f86a1267)
- minor (05eaebe318f427e62d2038ddab734a39fb21567e)

Co-authored-by: Suen.ZK <[email protected]>

Files changed (1) hide show

app.py +278 -40

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import base64
 import mimetypes
 import os
 from pathlib import Path
@@ -10,6 +11,167 @@ from openai import OpenAI
 DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
 BASE_URL = os.getenv("BASE_URL","")
 api_key = os.getenv("ERNIE_API_KEY","")
 _client = OpenAI(
     base_url=BASE_URL,
     api_key=api_key,
@@ -37,6 +199,27 @@ def _text_content(text: str) -> Dict[str, Any]:
 def _message(role: str, content: Any) -> Dict[str, Any]:
     return {"role": role, "content": content}
 def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
     files = message.get("files") or []
     text = (message.get("text") or "").strip()
@@ -83,48 +266,98 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
             messages=messages,
             stream=True
         )
-        thinking = ""  # 收集推理内容
-        answer = ""    # 收集回答内容
-        thinking_complete = False  # 推理是否完成的标记
-        # 第一阶段：只输出Thinking
         for chunk in stream:
             delta = chunk.choices[0].delta
-            # 处理推理内容
-            if hasattr(delta, "reasoning_content") and delta.reasoning_content:
-                thinking += delta.reasoning_content
-                # 直接yield Thinking（不带Answer）
-                yield f"Thinking:\n{thinking}\n\n"
-            # 检测到回答内容，标记推理结束
-            if hasattr(delta, "content") and delta.content:
-                answer += delta.content
-                thinking_complete = True
-                break
-        # 若推理未完成但流已结束，强制标记完成
-        if not thinking_complete:
-            thinking_complete = True
-        # 第二阶段：先输出完整Thinking，再输出Answer（流式）
-        for chunk in stream:
-            delta = chunk.choices[0].delta
-            if hasattr(delta, "content") and delta.content:
-                answer += delta.content
-                # 直接yield 完整Thinking + 当前Answer
-                yield f"Thinking:\n{thinking}\n\nAnswer:\n{answer}"
     except Exception as e:
         yield f"Failed to get response: {e}"
 def build_demo() -> gr.Blocks:
-    with gr.Blocks(title="ERNIE-4.5-VL-28B-A3B-Thinking") as demo:
-        chatbot = gr.Chatbot(type="messages", allow_tags=["think"],height=600)
         textbox = gr.MultimodalTextbox(
             show_label=False,
             placeholder="Enter text, or upload one or more images...",
             file_types=["image","video"],
             file_count="multiple"
         )
         examples = [
             {
@@ -132,25 +365,30 @@ def build_demo() -> gr.Blocks:
                 "files": ["examples/case1.png"]
             },
             {
-                "text": "图中实际上有几个真人",
                 "files": ["examples/case2.png"]
             },
         ]
-        chat_interface = gr.ChatInterface(
-            fn=stream_response,
-            type="messages",
-            multimodal=True,
-            chatbot=chatbot,
-            textbox=textbox,
-        )
-        with gr.Row():
             gr.Examples(
                 examples=examples,
                 inputs=textbox,
-                label="示例 Examples",
-                examples_per_page=2,
             )

 import base64
+import html
 import mimetypes
 import os
 from pathlib import Path
 DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
 BASE_URL = os.getenv("BASE_URL","")
 api_key = os.getenv("ERNIE_API_KEY","")
+CUSTOM_CSS = """
+body {
+    background: radial-gradient(circle at top, #fdfbff 0%, #e7ecf7 45%, #dfe6f5 100%);
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif;
+    color: #0f172a;
+}
+.gradio-container {
+    max-width: 1200px !important;
+    margin: 0 auto;
+}
+#ernie-hero {
+    padding: 12px 0 4px;
+}
+#ernie-hero h1 {
+    font-size: 1.85rem;
+    margin-bottom: 0;
+    font-weight: 500;
+}
+#model-link {
+    margin-top: 6px;
+    font-size: 0.95rem;
+}
+#model-link a {
+    color: #4c1d95;
+    text-decoration: none;
+    font-weight: 500;
+}
+#model-link a:hover {
+    text-decoration: underline;
+}
+#examples-panel {
+    margin-top: 20px;
+    padding: 18px 22px;
+    border-radius: 18px;
+    border: 1px solid rgba(15, 23, 42, 0.12);
+    background: rgba(255, 255, 255, 0.92);
+    box-shadow: 0 15px 35px rgba(15, 23, 42, 0.08);
+    gap: 18px;
+}
+#examples-panel h4 {
+    margin: 0 0 8px;
+    font-size: 1.1rem;
+    font-weight: 500;
+}
+#examples-panel p {
+    margin: 0;
+    color: rgba(15, 23, 42, 0.7);
+    font-size: 0.95rem;
+}
+#examples-grid table {
+    width: 100%;
+}
+#examples-grid table tbody {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
+    gap: 12px;
+}
+#examples-grid table tr {
+    display: block;
+    background: #f7f9ff;
+    border-radius: 14px;
+    border: 1px solid rgba(15, 23, 42, 0.08);
+    padding: 14px;
+    box-shadow: 0 10px 28px rgba(15, 23, 42, 0.08);
+}
+#examples-grid table td {
+    display: block;
+    padding: 0;
+}
+#chat-wrapper {
+    margin-top: 32px;
+    border-radius: 24px;
+    padding: 18px;
+    background: rgba(255, 255, 255, 0.95);
+    border: 1px solid rgba(15, 23, 42, 0.1);
+    box-shadow: 0 25px 60px rgba(15, 23, 42, 0.12);
+}
+.ernie-section {
+    border-radius: 18px;
+    margin-bottom: 14px;
+    padding: 16px 18px;
+    border: 1px solid rgba(15, 23, 42, 0.1);
+    background: rgba(255, 255, 255, 0.95);
+    box-shadow: 0 10px 24px rgba(15, 23, 42, 0.08);
+}
+.ernie-section-header {
+    font-size: 0.85rem;
+    text-transform: uppercase;
+    letter-spacing: 0.08em;
+    font-weight: 600;
+    color: rgba(15, 23, 42, 0.65);
+    display: flex;
+    align-items: center;
+    gap: 6px;
+}
+.ernie-section-body {
+    margin-top: 10px;
+    font-size: 1rem;
+    color: rgba(15, 23, 42, 0.92);
+    white-space: pre-wrap;
+    line-height: 1.55;
+}
+.ernie-thinking {
+    border-color: rgba(79, 70, 229, 0.35);
+    background: rgba(129, 140, 248, 0.08);
+}
+.ernie-answer {
+    border-color: rgba(16, 185, 129, 0.35);
+    background: rgba(110, 231, 183, 0.08);
+}
+@media (prefers-color-scheme: dark) {
+    body {
+        background: radial-gradient(circle at top, #1f264b 0%, #0f172a 45%, #040713 100%);
+        color: #ecf2ff;
+    }
+    #model-link a {
+        color: #a5b4fc;
+    }
+    #examples-panel {
+        border: 1px solid rgba(255, 255, 255, 0.05);
+        background: rgba(8, 13, 30, 0.85);
+        box-shadow: 0 15px 45px rgba(3, 7, 18, 0.55);
+    }
+    #examples-panel p {
+        color: rgba(236, 242, 255, 0.75);
+    }
+    #examples-grid table tr {
+        background: rgba(15, 23, 42, 0.7);
+        border: 1px solid rgba(255, 255, 255, 0.04);
+        box-shadow: 0 10px 30px rgba(4, 6, 15, 0.45);
+    }
+    #chat-wrapper {
+        background: rgba(2, 6, 23, 0.78);
+        border: 1px solid rgba(99, 102, 241, 0.25);
+        box-shadow: 0 25px 70px rgba(2, 6, 23, 0.7);
+    }
+    .ernie-section {
+        border: 1px solid rgba(255, 255, 255, 0.08);
+        background: rgba(15, 23, 42, 0.85);
+        box-shadow: 0 10px 30px rgba(2, 6, 23, 0.55);
+    }
+    .ernie-section-header {
+        color: rgba(236, 242, 255, 0.75);
+    }
+    .ernie-section-body {
+        color: rgba(248, 250, 255, 0.95);
+    }
+    .ernie-answer {
+        border-color: rgba(45, 212, 191, 0.45);
+        background: rgba(8, 47, 56, 0.65);
+    }
+    .ernie-thinking {
+        border-color: rgba(165, 180, 252, 0.4);
+        background: rgba(30, 27, 75, 0.65);
+    }
+}
+"""
 _client = OpenAI(
     base_url=BASE_URL,
     api_key=api_key,
 def _message(role: str, content: Any) -> Dict[str, Any]:
     return {"role": role, "content": content}
+def _format_sections(thinking: str, answer: str | None = None) -> str:
+    """Render Thinking/Answer blocks with HTML so the chatbot can style them."""
+    def _build_block(kind: str, label: str, text: str, icon: str) -> str:
+        text = (text or "").strip()
+        if not text:
+            return ""
+        escaped = html.escape(text)
+        return (
+            f'<div class="ernie-section ernie-{kind}">'
+            f'<div class="ernie-section-header">{icon} {label}</div>'
+            f'<div class="ernie-section-body">{escaped}</div>'
+            "</div>"
+        )
+    sections = [
+        _build_block("thinking", "Thinking", thinking, "🧠"),
+        _build_block("answer", "Answer", answer, "✨") if answer is not None else "",
+    ]
+    rendered = "".join(section for section in sections if section)
+    return rendered
 def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
     files = message.get("files") or []
     text = (message.get("text") or "").strip()
             messages=messages,
             stream=True
         )
+        thinking_parts: List[str] = []
+        answer_parts: List[str] = []
+        answer_started = False
         for chunk in stream:
             delta = chunk.choices[0].delta
+            if getattr(delta, "reasoning_content", None):
+                thinking_parts.append(delta.reasoning_content)
+            if getattr(delta, "content", None):
+                answer_started = True
+                answer_parts.append(delta.content)
+            thinking_text = "".join(thinking_parts)
+            answer_text = "".join(answer_parts) if answer_parts else None
+            if answer_started:
+                rendered = _format_sections(thinking_text, answer_text)
+            else:
+                rendered = _format_sections(thinking_text)
+            if rendered:
+                yield rendered
+        if not answer_started and thinking_parts:
+            # 流结束但模型未返回Answer时，至少保证Thinking被展示完全
+            rendered = _format_sections("".join(thinking_parts))
+            if rendered:
+                yield rendered
     except Exception as e:
         yield f"Failed to get response: {e}"
+def run_example(message: Dict[str, Any], history: List[Dict[str, Any]] | None = None):
+    """
+    用于 Examples 点击时直接走大模型。
+    - 输入还是 ChatInterface 那种 message dict：{"text": ..., "files": [...]}
+    - history 是 Chatbot 当前的消息列表（type="messages"）
+    - 输出改成 Chatbot 需要的消息列表：[{role, content}, ...]
+    """
+    history = history or []
+    # 直接复用你现有的流式函数，只是把它返回的 HTML 包一层 messages
+    for rendered in stream_response(message, history):
+        # 这里只简单把 user 文本展示出来；图片就当“上下文里有了”，不专门渲染
+        user_text = (message.get("text") or "").strip() or "[Example]"
+        display_history = history + [
+            {"role": "user", "content": user_text},
+            {"role": "assistant", "content": rendered},
+        ]
+        # 关键：对 Chatbot 来说，返回值要是「完整的消息列表」
+        yield display_history
 def build_demo() -> gr.Blocks:
+    theme = gr.themes.Soft(primary_hue="violet", secondary_hue="cyan", neutral_hue="slate")
+    with gr.Blocks(
+        title="ERNIE-4.5-VL-28B-A3B-Thinking",
+        theme=theme,
+        css=CUSTOM_CSS,
+    ) as demo:
+        with gr.Column(elem_id="ernie-hero"):
+            gr.Markdown(
+                """
+                <h1>Chat with ERNIE-4.5-VL-28B-A3B-Thinking</h1>
+                """,
+                elem_id="hero-text",
+            )
+            gr.Markdown(
+                """
+                <p id="model-link">
+                    Model Repository:
+                    <a href="https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-Thinking" target="_blank" rel="noopener">
+                        ERNIE-4.5-VL-28B-A3B-Thinking
+                    </a>
+                </p>
+                """
+            )
         textbox = gr.MultimodalTextbox(
             show_label=False,
             placeholder="Enter text, or upload one or more images...",
             file_types=["image","video"],
             file_count="multiple"
         )
+        chatbot = gr.Chatbot(
+            type="messages",
+            allow_tags=["think"],
+            height=560,
+            render_markdown=True,
+            show_copy_button=True,
+        )
         examples = [
             {
                 "files": ["examples/case1.png"]
             },
             {
+                "text": "How many real people are actually in the picture?",
                 "files": ["examples/case2.png"]
             },
         ]
+        with gr.Column(elem_id="examples-panel"):
             gr.Examples(
                 examples=examples,
                 inputs=textbox,
+                label=None,
+                examples_per_page=4,
+                elem_id="examples-grid",
+                fn=run_example,      # 点击示例时，直接走大模型
+                outputs=chatbot,
+                run_on_click=True,
+            )
+        with gr.Column(elem_id="chat-wrapper"):
+            chat_interface = gr.ChatInterface(
+                fn=stream_response,
+                type="messages",
+                multimodal=True,
+                chatbot=chatbot,
+                textbox=textbox,
             )