jzhang533 sunzhongkai588 commited on
Commit
5ea0a9a
·
verified ·
1 Parent(s): 0bec3bf
Files changed (1) hide show
  1. app.py +278 -40
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import base64
 
2
  import mimetypes
3
  import os
4
  from pathlib import Path
@@ -10,6 +11,167 @@ from openai import OpenAI
10
  DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
11
  BASE_URL = os.getenv("BASE_URL","")
12
  api_key = os.getenv("ERNIE_API_KEY","")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  _client = OpenAI(
14
  base_url=BASE_URL,
15
  api_key=api_key,
@@ -37,6 +199,27 @@ def _text_content(text: str) -> Dict[str, Any]:
37
  def _message(role: str, content: Any) -> Dict[str, Any]:
38
  return {"role": role, "content": content}
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
41
  files = message.get("files") or []
42
  text = (message.get("text") or "").strip()
@@ -83,48 +266,98 @@ def stream_response(message: Dict[str, Any], history: List[Dict[str, Any]], mode
83
  messages=messages,
84
  stream=True
85
  )
86
- thinking = "" # 收集推理内容
87
- answer = "" # 收集回答内容
88
- thinking_complete = False # 推理是否完成的标记
89
 
90
- # 第一阶段:只输出Thinking
91
  for chunk in stream:
92
  delta = chunk.choices[0].delta
93
- # 处理推理内容
94
- if hasattr(delta, "reasoning_content") and delta.reasoning_content:
95
- thinking += delta.reasoning_content
96
- # 直接yield Thinking(不带Answer)
97
- yield f"Thinking:\n{thinking}\n\n"
98
- # 检测到回答内容,标记推理结束
99
- if hasattr(delta, "content") and delta.content:
100
- answer += delta.content
101
- thinking_complete = True
102
- break
103
-
104
- # 若推理未完成但流已结束,强制标记完成
105
- if not thinking_complete:
106
- thinking_complete = True
107
-
108
- # 第二阶段:先输出完整Thinking,再输出Answer(流式)
109
- for chunk in stream:
110
- delta = chunk.choices[0].delta
111
- if hasattr(delta, "content") and delta.content:
112
- answer += delta.content
113
- # 直接yield 完整Thinking + 当前Answer
114
- yield f"Thinking:\n{thinking}\n\nAnswer:\n{answer}"
 
 
115
  except Exception as e:
116
  yield f"Failed to get response: {e}"
117
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  def build_demo() -> gr.Blocks:
120
- with gr.Blocks(title="ERNIE-4.5-VL-28B-A3B-Thinking") as demo:
121
- chatbot = gr.Chatbot(type="messages", allow_tags=["think"],height=600)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
  textbox = gr.MultimodalTextbox(
123
  show_label=False,
124
  placeholder="Enter text, or upload one or more images...",
125
  file_types=["image","video"],
126
  file_count="multiple"
127
  )
 
 
 
 
 
 
 
128
 
129
  examples = [
130
  {
@@ -132,25 +365,30 @@ def build_demo() -> gr.Blocks:
132
  "files": ["examples/case1.png"]
133
  },
134
  {
135
- "text": "图中实际上有几个真人",
136
  "files": ["examples/case2.png"]
137
  },
138
  ]
139
 
140
- chat_interface = gr.ChatInterface(
141
- fn=stream_response,
142
- type="messages",
143
- multimodal=True,
144
- chatbot=chatbot,
145
- textbox=textbox,
146
- )
147
-
148
- with gr.Row():
149
  gr.Examples(
150
  examples=examples,
151
  inputs=textbox,
152
- label="示例 Examples",
153
- examples_per_page=2,
 
 
 
 
 
 
 
 
 
 
 
 
 
154
  )
155
 
156
 
 
1
  import base64
2
+ import html
3
  import mimetypes
4
  import os
5
  from pathlib import Path
 
11
  DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "ERNIE-4.5-VL-28B-A3B-Thinking")
12
  BASE_URL = os.getenv("BASE_URL","")
13
  api_key = os.getenv("ERNIE_API_KEY","")
14
+
15
+
16
+ CUSTOM_CSS = """
17
+ body {
18
+ background: radial-gradient(circle at top, #fdfbff 0%, #e7ecf7 45%, #dfe6f5 100%);
19
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Arial, sans-serif;
20
+ color: #0f172a;
21
+ }
22
+ .gradio-container {
23
+ max-width: 1200px !important;
24
+ margin: 0 auto;
25
+ }
26
+ #ernie-hero {
27
+ padding: 12px 0 4px;
28
+ }
29
+ #ernie-hero h1 {
30
+ font-size: 1.85rem;
31
+ margin-bottom: 0;
32
+ font-weight: 500;
33
+ }
34
+ #model-link {
35
+ margin-top: 6px;
36
+ font-size: 0.95rem;
37
+ }
38
+ #model-link a {
39
+ color: #4c1d95;
40
+ text-decoration: none;
41
+ font-weight: 500;
42
+ }
43
+ #model-link a:hover {
44
+ text-decoration: underline;
45
+ }
46
+ #examples-panel {
47
+ margin-top: 20px;
48
+ padding: 18px 22px;
49
+ border-radius: 18px;
50
+ border: 1px solid rgba(15, 23, 42, 0.12);
51
+ background: rgba(255, 255, 255, 0.92);
52
+ box-shadow: 0 15px 35px rgba(15, 23, 42, 0.08);
53
+ gap: 18px;
54
+ }
55
+ #examples-panel h4 {
56
+ margin: 0 0 8px;
57
+ font-size: 1.1rem;
58
+ font-weight: 500;
59
+ }
60
+ #examples-panel p {
61
+ margin: 0;
62
+ color: rgba(15, 23, 42, 0.7);
63
+ font-size: 0.95rem;
64
+ }
65
+ #examples-grid table {
66
+ width: 100%;
67
+ }
68
+ #examples-grid table tbody {
69
+ display: grid;
70
+ grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
71
+ gap: 12px;
72
+ }
73
+ #examples-grid table tr {
74
+ display: block;
75
+ background: #f7f9ff;
76
+ border-radius: 14px;
77
+ border: 1px solid rgba(15, 23, 42, 0.08);
78
+ padding: 14px;
79
+ box-shadow: 0 10px 28px rgba(15, 23, 42, 0.08);
80
+ }
81
+ #examples-grid table td {
82
+ display: block;
83
+ padding: 0;
84
+ }
85
+ #chat-wrapper {
86
+ margin-top: 32px;
87
+ border-radius: 24px;
88
+ padding: 18px;
89
+ background: rgba(255, 255, 255, 0.95);
90
+ border: 1px solid rgba(15, 23, 42, 0.1);
91
+ box-shadow: 0 25px 60px rgba(15, 23, 42, 0.12);
92
+ }
93
+ .ernie-section {
94
+ border-radius: 18px;
95
+ margin-bottom: 14px;
96
+ padding: 16px 18px;
97
+ border: 1px solid rgba(15, 23, 42, 0.1);
98
+ background: rgba(255, 255, 255, 0.95);
99
+ box-shadow: 0 10px 24px rgba(15, 23, 42, 0.08);
100
+ }
101
+ .ernie-section-header {
102
+ font-size: 0.85rem;
103
+ text-transform: uppercase;
104
+ letter-spacing: 0.08em;
105
+ font-weight: 600;
106
+ color: rgba(15, 23, 42, 0.65);
107
+ display: flex;
108
+ align-items: center;
109
+ gap: 6px;
110
+ }
111
+ .ernie-section-body {
112
+ margin-top: 10px;
113
+ font-size: 1rem;
114
+ color: rgba(15, 23, 42, 0.92);
115
+ white-space: pre-wrap;
116
+ line-height: 1.55;
117
+ }
118
+ .ernie-thinking {
119
+ border-color: rgba(79, 70, 229, 0.35);
120
+ background: rgba(129, 140, 248, 0.08);
121
+ }
122
+ .ernie-answer {
123
+ border-color: rgba(16, 185, 129, 0.35);
124
+ background: rgba(110, 231, 183, 0.08);
125
+ }
126
+
127
+ @media (prefers-color-scheme: dark) {
128
+ body {
129
+ background: radial-gradient(circle at top, #1f264b 0%, #0f172a 45%, #040713 100%);
130
+ color: #ecf2ff;
131
+ }
132
+ #model-link a {
133
+ color: #a5b4fc;
134
+ }
135
+ #examples-panel {
136
+ border: 1px solid rgba(255, 255, 255, 0.05);
137
+ background: rgba(8, 13, 30, 0.85);
138
+ box-shadow: 0 15px 45px rgba(3, 7, 18, 0.55);
139
+ }
140
+ #examples-panel p {
141
+ color: rgba(236, 242, 255, 0.75);
142
+ }
143
+ #examples-grid table tr {
144
+ background: rgba(15, 23, 42, 0.7);
145
+ border: 1px solid rgba(255, 255, 255, 0.04);
146
+ box-shadow: 0 10px 30px rgba(4, 6, 15, 0.45);
147
+ }
148
+ #chat-wrapper {
149
+ background: rgba(2, 6, 23, 0.78);
150
+ border: 1px solid rgba(99, 102, 241, 0.25);
151
+ box-shadow: 0 25px 70px rgba(2, 6, 23, 0.7);
152
+ }
153
+ .ernie-section {
154
+ border: 1px solid rgba(255, 255, 255, 0.08);
155
+ background: rgba(15, 23, 42, 0.85);
156
+ box-shadow: 0 10px 30px rgba(2, 6, 23, 0.55);
157
+ }
158
+ .ernie-section-header {
159
+ color: rgba(236, 242, 255, 0.75);
160
+ }
161
+ .ernie-section-body {
162
+ color: rgba(248, 250, 255, 0.95);
163
+ }
164
+ .ernie-answer {
165
+ border-color: rgba(45, 212, 191, 0.45);
166
+ background: rgba(8, 47, 56, 0.65);
167
+ }
168
+ .ernie-thinking {
169
+ border-color: rgba(165, 180, 252, 0.4);
170
+ background: rgba(30, 27, 75, 0.65);
171
+ }
172
+ }
173
+ """
174
+
175
  _client = OpenAI(
176
  base_url=BASE_URL,
177
  api_key=api_key,
 
199
  def _message(role: str, content: Any) -> Dict[str, Any]:
200
  return {"role": role, "content": content}
201
 
202
+ def _format_sections(thinking: str, answer: str | None = None) -> str:
203
+ """Render Thinking/Answer blocks with HTML so the chatbot can style them."""
204
+ def _build_block(kind: str, label: str, text: str, icon: str) -> str:
205
+ text = (text or "").strip()
206
+ if not text:
207
+ return ""
208
+ escaped = html.escape(text)
209
+ return (
210
+ f'<div class="ernie-section ernie-{kind}">'
211
+ f'<div class="ernie-section-header">{icon} {label}</div>'
212
+ f'<div class="ernie-section-body">{escaped}</div>'
213
+ "</div>"
214
+ )
215
+
216
+ sections = [
217
+ _build_block("thinking", "Thinking", thinking, "🧠"),
218
+ _build_block("answer", "Answer", answer, "✨") if answer is not None else "",
219
+ ]
220
+ rendered = "".join(section for section in sections if section)
221
+ return rendered
222
+
223
  def _build_user_message(message: Dict[str, Any]) -> Dict[str, Any]:
224
  files = message.get("files") or []
225
  text = (message.get("text") or "").strip()
 
266
  messages=messages,
267
  stream=True
268
  )
269
+ thinking_parts: List[str] = []
270
+ answer_parts: List[str] = []
271
+ answer_started = False
272
 
 
273
  for chunk in stream:
274
  delta = chunk.choices[0].delta
275
+
276
+ if getattr(delta, "reasoning_content", None):
277
+ thinking_parts.append(delta.reasoning_content)
278
+
279
+ if getattr(delta, "content", None):
280
+ answer_started = True
281
+ answer_parts.append(delta.content)
282
+
283
+ thinking_text = "".join(thinking_parts)
284
+ answer_text = "".join(answer_parts) if answer_parts else None
285
+
286
+ if answer_started:
287
+ rendered = _format_sections(thinking_text, answer_text)
288
+ else:
289
+ rendered = _format_sections(thinking_text)
290
+
291
+ if rendered:
292
+ yield rendered
293
+
294
+ if not answer_started and thinking_parts:
295
+ # 流结束但模型未返回Answer时,至少保证Thinking被展示完全
296
+ rendered = _format_sections("".join(thinking_parts))
297
+ if rendered:
298
+ yield rendered
299
  except Exception as e:
300
  yield f"Failed to get response: {e}"
301
 
302
+ def run_example(message: Dict[str, Any], history: List[Dict[str, Any]] | None = None):
303
+ """
304
+ 用于 Examples 点击时直接走大模型。
305
+ - 输入还是 ChatInterface 那种 message dict:{"text": ..., "files": [...]}
306
+ - history 是 Chatbot 当前的消息列表(type="messages")
307
+ - 输出改成 Chatbot 需要的消息列表:[{role, content}, ...]
308
+ """
309
+ history = history or []
310
+
311
+ # 直接复用你现有的流式函数,只是把它返回的 HTML 包一层 messages
312
+ for rendered in stream_response(message, history):
313
+ # 这里只简单把 user 文本展示出来;图片就当“上下文里有了”,不专门渲染
314
+ user_text = (message.get("text") or "").strip() or "[Example]"
315
+ display_history = history + [
316
+ {"role": "user", "content": user_text},
317
+ {"role": "assistant", "content": rendered},
318
+ ]
319
+ # 关键:对 Chatbot 来说,返回值要是「完整的消息列表」
320
+ yield display_history
321
 
322
  def build_demo() -> gr.Blocks:
323
+ theme = gr.themes.Soft(primary_hue="violet", secondary_hue="cyan", neutral_hue="slate")
324
+
325
+ with gr.Blocks(
326
+ title="ERNIE-4.5-VL-28B-A3B-Thinking",
327
+ theme=theme,
328
+ css=CUSTOM_CSS,
329
+ ) as demo:
330
+ with gr.Column(elem_id="ernie-hero"):
331
+ gr.Markdown(
332
+ """
333
+ <h1>Chat with ERNIE-4.5-VL-28B-A3B-Thinking</h1>
334
+ """,
335
+ elem_id="hero-text",
336
+ )
337
+ gr.Markdown(
338
+ """
339
+ <p id="model-link">
340
+ Model Repository:
341
+ <a href="https://huggingface.co/baidu/ERNIE-4.5-VL-28B-A3B-Thinking" target="_blank" rel="noopener">
342
+ ERNIE-4.5-VL-28B-A3B-Thinking
343
+ </a>
344
+ </p>
345
+ """
346
+ )
347
+
348
  textbox = gr.MultimodalTextbox(
349
  show_label=False,
350
  placeholder="Enter text, or upload one or more images...",
351
  file_types=["image","video"],
352
  file_count="multiple"
353
  )
354
+ chatbot = gr.Chatbot(
355
+ type="messages",
356
+ allow_tags=["think"],
357
+ height=560,
358
+ render_markdown=True,
359
+ show_copy_button=True,
360
+ )
361
 
362
  examples = [
363
  {
 
365
  "files": ["examples/case1.png"]
366
  },
367
  {
368
+ "text": "How many real people are actually in the picture?",
369
  "files": ["examples/case2.png"]
370
  },
371
  ]
372
 
373
+ with gr.Column(elem_id="examples-panel"):
 
 
 
 
 
 
 
 
374
  gr.Examples(
375
  examples=examples,
376
  inputs=textbox,
377
+ label=None,
378
+ examples_per_page=4,
379
+ elem_id="examples-grid",
380
+ fn=run_example, # 点击示例时,直接走大模型
381
+ outputs=chatbot,
382
+ run_on_click=True,
383
+ )
384
+
385
+ with gr.Column(elem_id="chat-wrapper"):
386
+ chat_interface = gr.ChatInterface(
387
+ fn=stream_response,
388
+ type="messages",
389
+ multimodal=True,
390
+ chatbot=chatbot,
391
+ textbox=textbox,
392
  )
393
 
394