Seounghyup Claude commited on
Commit
9026aea
·
1 Parent(s): d51e161

Add streaming API endpoint for Android app

Browse files

- Add /chat/stream endpoint with SSE (Server-Sent Events)
- Prevent timeout issues for mobile clients
- Add Python test client (test_streaming.py)
- Add Android integration guide with Kotlin examples

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (4) hide show
  1. ANDROID_STREAMING_GUIDE.md +431 -0
  2. app.py +80 -1
  3. main.py +80 -1
  4. test_streaming.py +175 -0
ANDROID_STREAMING_GUIDE.md ADDED
@@ -0,0 +1,431 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ChatBIA 안드로이드 스트리밍 연동 가이드
2
+
3
+ ## 📡 API 엔드포인트
4
+
5
+ ### 1. 일반 채팅 (비스트리밍)
6
+ ```
7
+ POST /chat
8
+ ```
9
+ - **타임아웃 위험**: 긴 응답 시 타임아웃 발생 가능
10
+ - **안드로이드에서 권장하지 않음**
11
+
12
+ ### 2. 스트리밍 채팅 ✅ **권장**
13
+ ```
14
+ POST /chat/stream
15
+ ```
16
+ - **타임아웃 방지**: 토큰 단위로 실시간 수신
17
+ - **안드로이드에 최적화**
18
+ - **SSE (Server-Sent Events)** 방식
19
+
20
+ ---
21
+
22
+ ## 🔧 안드로이드 구현 (Kotlin)
23
+
24
+ ### 1. build.gradle 의존성 추가
25
+
26
+ ```gradle
27
+ dependencies {
28
+ // OkHttp for SSE streaming
29
+ implementation("com.squareup.okhttp3:okhttp:4.12.0")
30
+ implementation("com.squareup.okhttp3:okhttp-sse:4.12.0")
31
+
32
+ // JSON 파싱
33
+ implementation("com.google.code.gson:gson:2.10.1")
34
+
35
+ // Coroutines
36
+ implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3")
37
+ }
38
+ ```
39
+
40
+ ### 2. 데이터 모델
41
+
42
+ ```kotlin
43
+ // ChatRequest.kt
44
+ data class ChatRequest(
45
+ val message: String,
46
+ val mode: String = "bsl", // "bsl" or "general"
47
+ val max_tokens: Int = 1024,
48
+ val temperature: Float = 0.7f
49
+ )
50
+
51
+ // StreamingResponse.kt
52
+ data class StreamingResponse(
53
+ val token: String = "",
54
+ val done: Boolean = false,
55
+ val token_count: Int = 0,
56
+ val mode: String = "",
57
+ val error: String? = null
58
+ )
59
+ ```
60
+
61
+ ### 3. ChatBIA API 클라이언트
62
+
63
+ ```kotlin
64
+ // ChatBiaApiClient.kt
65
+ import com.google.gson.Gson
66
+ import kotlinx.coroutines.flow.Flow
67
+ import kotlinx.coroutines.flow.flow
68
+ import okhttp3.*
69
+ import okhttp3.MediaType.Companion.toMediaType
70
+ import okhttp3.RequestBody.Companion.toRequestBody
71
+ import okhttp3.sse.EventSource
72
+ import okhttp3.sse.EventSourceListener
73
+ import okhttp3.sse.EventSources
74
+ import kotlin.coroutines.resume
75
+ import kotlin.coroutines.resumeWithException
76
+ import kotlin.coroutines.suspendCoroutine
77
+
78
+ class ChatBiaApiClient(private val baseUrl: String) {
79
+
80
+ private val client = OkHttpClient.Builder()
81
+ .connectTimeout(30, TimeUnit.SECONDS)
82
+ .readTimeout(60, TimeUnit.SECONDS) // 스트리밍은 긴 타임아웃
83
+ .writeTimeout(30, TimeUnit.SECONDS)
84
+ .build()
85
+
86
+ private val gson = Gson()
87
+
88
+ /**
89
+ * 스트리밍 채팅 (권장)
90
+ * Flow를 통해 토큰 단위로 실시간 수신
91
+ */
92
+ fun chatStream(request: ChatRequest): Flow<StreamingResponse> = flow {
93
+ suspendCoroutine<Unit> { continuation ->
94
+ val url = "$baseUrl/chat/stream"
95
+
96
+ // JSON 요청 body
97
+ val jsonBody = gson.toJson(request)
98
+ val requestBody = jsonBody.toRequestBody("application/json".toMediaType())
99
+
100
+ val httpRequest = Request.Builder()
101
+ .url(url)
102
+ .post(requestBody)
103
+ .addHeader("Accept", "text/event-stream")
104
+ .build()
105
+
106
+ // SSE EventSource 생성
107
+ val eventSource = EventSources.createFactory(client)
108
+ .newEventSource(httpRequest, object : EventSourceListener() {
109
+
110
+ override fun onOpen(eventSource: EventSource, response: Response) {
111
+ // 연결 성공
112
+ }
113
+
114
+ override fun onEvent(
115
+ eventSource: EventSource,
116
+ id: String?,
117
+ type: String?,
118
+ data: String
119
+ ) {
120
+ try {
121
+ val response = gson.fromJson(data, StreamingResponse::class.java)
122
+
123
+ // Flow로 emit
124
+ trySend(response)
125
+
126
+ // 완료 시 연결 종료
127
+ if (response.done) {
128
+ eventSource.cancel()
129
+ continuation.resume(Unit)
130
+ }
131
+
132
+ } catch (e: Exception) {
133
+ eventSource.cancel()
134
+ continuation.resumeWithException(e)
135
+ }
136
+ }
137
+
138
+ override fun onFailure(
139
+ eventSource: EventSource,
140
+ t: Throwable?,
141
+ response: Response?
142
+ ) {
143
+ continuation.resumeWithException(
144
+ t ?: Exception("SSE 연결 실패: ${response?.code}")
145
+ )
146
+ }
147
+
148
+ override fun onClosed(eventSource: EventSource) {
149
+ if (!continuation.isCompleted) {
150
+ continuation.resume(Unit)
151
+ }
152
+ }
153
+ })
154
+ }
155
+ }
156
+
157
+ /**
158
+ * 일반 채팅 (비스트리밍)
159
+ * 긴 응답 시 타임아웃 위험 있음
160
+ */
161
+ suspend fun chat(request: ChatRequest): ChatResponse = suspendCoroutine { continuation ->
162
+ val url = "$baseUrl/chat"
163
+
164
+ val jsonBody = gson.toJson(request)
165
+ val requestBody = jsonBody.toRequestBody("application/json".toMediaType())
166
+
167
+ val httpRequest = Request.Builder()
168
+ .url(url)
169
+ .post(requestBody)
170
+ .build()
171
+
172
+ client.newCall(httpRequest).enqueue(object : Callback {
173
+ override fun onFailure(call: Call, e: IOException) {
174
+ continuation.resumeWithException(e)
175
+ }
176
+
177
+ override fun onResponse(call: Call, response: Response) {
178
+ if (response.isSuccessful) {
179
+ val body = response.body?.string()
180
+ val chatResponse = gson.fromJson(body, ChatResponse::class.java)
181
+ continuation.resume(chatResponse)
182
+ } else {
183
+ continuation.resumeWithException(
184
+ Exception("HTTP ${response.code}: ${response.message}")
185
+ )
186
+ }
187
+ }
188
+ })
189
+ }
190
+
191
+ data class ChatResponse(
192
+ val response: String,
193
+ val mode: String,
194
+ val tokens: Int
195
+ )
196
+ }
197
+ ```
198
+
199
+ ### 4. ViewModel 사용 예제
200
+
201
+ ```kotlin
202
+ // ChatViewModel.kt
203
+ import androidx.lifecycle.ViewModel
204
+ import androidx.lifecycle.viewModelScope
205
+ import kotlinx.coroutines.flow.MutableStateFlow
206
+ import kotlinx.coroutines.flow.StateFlow
207
+ import kotlinx.coroutines.flow.catch
208
+ import kotlinx.coroutines.launch
209
+
210
+ class ChatViewModel : ViewModel() {
211
+
212
+ private val apiClient = ChatBiaApiClient("https://your-hf-space.hf.space")
213
+
214
+ private val _chatState = MutableStateFlow("")
215
+ val chatState: StateFlow<String> = _chatState
216
+
217
+ private val _isLoading = MutableStateFlow(false)
218
+ val isLoading: StateFlow<Boolean> = _isLoading
219
+
220
+ /**
221
+ * 스트리밍 채팅 전송
222
+ */
223
+ fun sendStreamingMessage(message: String, mode: String = "bsl") {
224
+ viewModelScope.launch {
225
+ _isLoading.value = true
226
+ _chatState.value = "" // 초기화
227
+
228
+ val request = ChatRequest(
229
+ message = message,
230
+ mode = mode,
231
+ max_tokens = 1024,
232
+ temperature = 0.7f
233
+ )
234
+
235
+ apiClient.chatStream(request)
236
+ .catch { e ->
237
+ _chatState.value = "오류: ${e.message}"
238
+ _isLoading.value = false
239
+ }
240
+ .collect { response ->
241
+ if (response.error != null) {
242
+ _chatState.value = "서버 오류: ${response.error}"
243
+ _isLoading.value = false
244
+ } else if (response.done) {
245
+ // 완료
246
+ _isLoading.value = false
247
+ } else {
248
+ // 토큰 추가
249
+ _chatState.value += response.token
250
+ }
251
+ }
252
+ }
253
+ }
254
+ }
255
+ ```
256
+
257
+ ### 5. Compose UI 예제
258
+
259
+ ```kotlin
260
+ // ChatScreen.kt
261
+ import androidx.compose.foundation.layout.*
262
+ import androidx.compose.material3.*
263
+ import androidx.compose.runtime.*
264
+ import androidx.compose.ui.Modifier
265
+ import androidx.compose.ui.unit.dp
266
+
267
+ @Composable
268
+ fun ChatScreen(viewModel: ChatViewModel = viewModel()) {
269
+
270
+ val chatState by viewModel.chatState.collectAsState()
271
+ val isLoading by viewModel.isLoading.collectAsState()
272
+ var inputText by remember { mutableStateOf("") }
273
+
274
+ Column(
275
+ modifier = Modifier
276
+ .fillMaxSize()
277
+ .padding(16.dp)
278
+ ) {
279
+ // 채팅 출력
280
+ Card(
281
+ modifier = Modifier
282
+ .fillMaxWidth()
283
+ .weight(1f)
284
+ ) {
285
+ Text(
286
+ text = chatState,
287
+ modifier = Modifier.padding(16.dp)
288
+ )
289
+ }
290
+
291
+ Spacer(modifier = Modifier.height(16.dp))
292
+
293
+ // 입력 필드
294
+ Row(
295
+ modifier = Modifier.fillMaxWidth()
296
+ ) {
297
+ OutlinedTextField(
298
+ value = inputText,
299
+ onValueChange = { inputText = it },
300
+ modifier = Modifier.weight(1f),
301
+ placeholder = { Text("메시지 입력...") },
302
+ enabled = !isLoading
303
+ )
304
+
305
+ Spacer(modifier = Modifier.width(8.dp))
306
+
307
+ Button(
308
+ onClick = {
309
+ if (inputText.isNotBlank()) {
310
+ viewModel.sendStreamingMessage(inputText)
311
+ inputText = ""
312
+ }
313
+ },
314
+ enabled = !isLoading
315
+ ) {
316
+ Text(if (isLoading) "전송 중..." else "전송")
317
+ }
318
+ }
319
+ }
320
+ }
321
+ ```
322
+
323
+ ---
324
+
325
+ ## 🧪 테스트 방법
326
+
327
+ ### 1. 로컬 서버 실행
328
+ ```bash
329
+ cd ChatBIA-Server
330
+ uvicorn main:app --host 0.0.0.0 --port 8000
331
+ ```
332
+
333
+ ### 2. Python 테스트
334
+ ```bash
335
+ python test_streaming.py
336
+ ```
337
+
338
+ ### 3. 안드로이드 앱에서 연결
339
+ ```kotlin
340
+ // 로컬 테스트 (에뮬레이터)
341
+ val apiClient = ChatBiaApiClient("http://10.0.2.2:8000")
342
+
343
+ // 실제 디바이스 (같은 네트워크)
344
+ val apiClient = ChatBiaApiClient("http://YOUR_IP:8000")
345
+
346
+ // Hugging Face Spaces (배포 후)
347
+ val apiClient = ChatBiaApiClient("https://your-space.hf.space")
348
+ ```
349
+
350
+ ---
351
+
352
+ ## 📊 응답 형식
353
+
354
+ ### 스트리밍 응답 (SSE)
355
+ ```
356
+ data: {"token":"안녕","done":false,"token_count":1}
357
+
358
+ data: {"token":"하세요","done":false,"token_count":2}
359
+
360
+ data: {"token":"!","done":false,"token_count":3}
361
+
362
+ data: {"token":"","done":true,"token_count":3,"mode":"bsl"}
363
+ ```
364
+
365
+ ### 최종 응답
366
+ ```json
367
+ {
368
+ "token": "",
369
+ "done": true,
370
+ "token_count": 150,
371
+ "mode": "bsl"
372
+ }
373
+ ```
374
+
375
+ ### 오류 응답
376
+ ```json
377
+ {
378
+ "error": "오류 메시지",
379
+ "done": true
380
+ }
381
+ ```
382
+
383
+ ---
384
+
385
+ ## ⚡ 성능 최적화 팁
386
+
387
+ 1. **타임아웃 설정**
388
+ - Connect: 30초
389
+ - Read: 60초 (스트리밍)
390
+ - Write: 30초
391
+
392
+ 2. **재연결 로직**
393
+ ```kotlin
394
+ fun retryOnFailure(maxRetries: Int = 3) {
395
+ var attempts = 0
396
+ while (attempts < maxRetries) {
397
+ try {
398
+ chatStream(request).collect { }
399
+ break
400
+ } catch (e: Exception) {
401
+ attempts++
402
+ delay(2000 * attempts) // 지수 백오프
403
+ }
404
+ }
405
+ }
406
+ ```
407
+
408
+ 3. **메모리 관리**
409
+ - Flow를 사용하여 메모리 효율적으로 처리
410
+ - UI 업데이트는 StateFlow로 최적화
411
+
412
+ ---
413
+
414
+ ## 🚀 배포 후 사용
415
+
416
+ Hugging Face Spaces에 배포 후:
417
+
418
+ ```kotlin
419
+ val BASE_URL = "https://your-username-chatbia-server.hf.space"
420
+ val apiClient = ChatBiaApiClient(BASE_URL)
421
+ ```
422
+
423
+ **주의**: Hugging Face Spaces 무료 플랜은 CPU만 제공되므로 응답 속도가 느릴 수 있습니다. 스트리밍 방식이 더욱 중요합니다!
424
+
425
+ ---
426
+
427
+ ## 🔗 관련 문서
428
+
429
+ - [FastAPI Streaming](https://fastapi.tiangolo.com/advanced/custom-response/#streamingresponse)
430
+ - [OkHttp SSE](https://square.github.io/okhttp/recipes/#server-sent-events)
431
+ - [Kotlin Flow](https://kotlinlang.org/docs/flow.html)
app.py CHANGED
@@ -4,9 +4,11 @@ ChatBIA Hugging Face Spaces API
4
  """
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
 
7
  from pydantic import BaseModel
8
- from typing import Optional
9
  import os
 
10
  from llama_cpp import Llama
11
  from huggingface_hub import hf_hub_download
12
 
@@ -198,3 +200,80 @@ async def get_models():
198
  "path": bsl_model_path
199
  }
200
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
+ from typing import Optional, AsyncGenerator
10
  import os
11
+ import json
12
  from llama_cpp import Llama
13
  from huggingface_hub import hf_hub_download
14
 
 
200
  "path": bsl_model_path
201
  }
202
  }
203
+
204
+
205
+ @app.post("/chat/stream")
206
+ async def chat_stream(request: ChatRequest):
207
+ """스트리밍 채팅 엔드포인트 (안드로이드/타임아웃 방지)"""
208
+ # 모델 선택
209
+ if request.mode == "general":
210
+ model = general_model
211
+ model_name = "General"
212
+ else:
213
+ model = bsl_model
214
+ model_name = "BSL"
215
+
216
+ # 모델이 없으면 에러
217
+ if model is None:
218
+ raise HTTPException(
219
+ status_code=503,
220
+ detail=f"{model_name} 모델이 로드되지 않았습니다."
221
+ )
222
+
223
+ async def generate_stream() -> AsyncGenerator[str, None]:
224
+ """토큰 단위 스트리밍 제너레이터"""
225
+ try:
226
+ # 프롬프트 빌드
227
+ prompt = build_prompt(request.message, request.mode)
228
+
229
+ # 스트리밍 추론
230
+ stream = model(
231
+ prompt,
232
+ max_tokens=request.max_tokens,
233
+ temperature=request.temperature,
234
+ top_p=0.9,
235
+ top_k=40,
236
+ repeat_penalty=1.1,
237
+ stop=["<|im_end|>", "###", "\n\n\n"],
238
+ stream=True # 스트리밍 활성화
239
+ )
240
+
241
+ token_count = 0
242
+ for chunk in stream:
243
+ if "choices" in chunk and len(chunk["choices"]) > 0:
244
+ delta = chunk["choices"][0].get("text", "")
245
+ if delta:
246
+ token_count += 1
247
+ # SSE 형식: data: {json}\n\n
248
+ data = {
249
+ "token": delta,
250
+ "done": False,
251
+ "token_count": token_count
252
+ }
253
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
254
+
255
+ # 완료 신호
256
+ final_data = {
257
+ "token": "",
258
+ "done": True,
259
+ "token_count": token_count,
260
+ "mode": request.mode
261
+ }
262
+ yield f"data: {json.dumps(final_data, ensure_ascii=False)}\n\n"
263
+
264
+ except Exception as e:
265
+ error_data = {
266
+ "error": str(e),
267
+ "done": True
268
+ }
269
+ yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
270
+
271
+ return StreamingResponse(
272
+ generate_stream(),
273
+ media_type="text/event-stream",
274
+ headers={
275
+ "Cache-Control": "no-cache",
276
+ "Connection": "keep-alive",
277
+ "X-Accel-Buffering": "no" # Nginx 버퍼링 비활성화
278
+ }
279
+ )
main.py CHANGED
@@ -4,9 +4,11 @@ ChatBIA FastAPI Server
4
  """
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
 
7
  from pydantic import BaseModel
8
- from typing import Optional, List
9
  import os
 
10
  from llama_cpp import Llama
11
 
12
  app = FastAPI(
@@ -183,6 +185,83 @@ async def get_models():
183
  }
184
 
185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  if __name__ == "__main__":
187
  import uvicorn
188
  uvicorn.run(
 
4
  """
5
  from fastapi import FastAPI, HTTPException
6
  from fastapi.middleware.cors import CORSMiddleware
7
+ from fastapi.responses import StreamingResponse
8
  from pydantic import BaseModel
9
+ from typing import Optional, List, AsyncGenerator
10
  import os
11
+ import json
12
  from llama_cpp import Llama
13
 
14
  app = FastAPI(
 
185
  }
186
 
187
 
188
+ @app.post("/chat/stream")
189
+ async def chat_stream(request: ChatRequest):
190
+ """스트리밍 채팅 엔드포인트 (안드로이드/타임아웃 방지)"""
191
+ # 모델 선택
192
+ if request.mode == "general":
193
+ model = general_model
194
+ model_name = "General"
195
+ else:
196
+ model = bsl_model
197
+ model_name = "BSL"
198
+
199
+ # 모델이 없으면 에러
200
+ if model is None:
201
+ raise HTTPException(
202
+ status_code=503,
203
+ detail=f"{model_name} 모델이 로드되지 않았습니다."
204
+ )
205
+
206
+ async def generate_stream() -> AsyncGenerator[str, None]:
207
+ """토큰 단위 스트리밍 제너레이터"""
208
+ try:
209
+ # 프롬프트 빌드
210
+ prompt = build_prompt(request.message, request.mode)
211
+
212
+ # 스트리밍 추론
213
+ stream = model(
214
+ prompt,
215
+ max_tokens=request.max_tokens,
216
+ temperature=request.temperature,
217
+ top_p=0.9,
218
+ top_k=40,
219
+ repeat_penalty=1.1,
220
+ stop=["<|im_end|>", "###", "\n\n\n"],
221
+ stream=True # 스트리밍 활성화
222
+ )
223
+
224
+ token_count = 0
225
+ for chunk in stream:
226
+ if "choices" in chunk and len(chunk["choices"]) > 0:
227
+ delta = chunk["choices"][0].get("text", "")
228
+ if delta:
229
+ token_count += 1
230
+ # SSE 형식: data: {json}\n\n
231
+ data = {
232
+ "token": delta,
233
+ "done": False,
234
+ "token_count": token_count
235
+ }
236
+ yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
237
+
238
+ # 완료 신호
239
+ final_data = {
240
+ "token": "",
241
+ "done": True,
242
+ "token_count": token_count,
243
+ "mode": request.mode
244
+ }
245
+ yield f"data: {json.dumps(final_data, ensure_ascii=False)}\n\n"
246
+
247
+ except Exception as e:
248
+ error_data = {
249
+ "error": str(e),
250
+ "done": True
251
+ }
252
+ yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
253
+
254
+ return StreamingResponse(
255
+ generate_stream(),
256
+ media_type="text/event-stream",
257
+ headers={
258
+ "Cache-Control": "no-cache",
259
+ "Connection": "keep-alive",
260
+ "X-Accel-Buffering": "no" # Nginx 버퍼링 비활성화
261
+ }
262
+ )
263
+
264
+
265
  if __name__ == "__main__":
266
  import uvicorn
267
  uvicorn.run(
test_streaming.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ ChatBIA Streaming API 테스트 클라이언트
5
+ """
6
+ import requests
7
+ import json
8
+ import sys
9
+ import io
10
+
11
+ if sys.platform == 'win32':
12
+ sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
13
+
14
+
15
+ def test_streaming_chat(base_url: str, message: str, mode: str = "bsl"):
16
+ """
17
+ 스트리밍 채팅 테스트
18
+
19
+ Args:
20
+ base_url: API 서버 URL (예: http://localhost:8000)
21
+ message: 사용자 메시지
22
+ mode: "bsl" 또는 "general"
23
+ """
24
+ url = f"{base_url}/chat/stream"
25
+
26
+ payload = {
27
+ "message": message,
28
+ "mode": mode,
29
+ "max_tokens": 512,
30
+ "temperature": 0.7
31
+ }
32
+
33
+ print(f"🔄 요청 전송: {message}")
34
+ print(f"📡 모드: {mode}")
35
+ print("=" * 60)
36
+
37
+ try:
38
+ # 스트리밍 요청
39
+ response = requests.post(
40
+ url,
41
+ json=payload,
42
+ stream=True, # 중요: 스트리밍 활성화
43
+ headers={
44
+ "Content-Type": "application/json",
45
+ "Accept": "text/event-stream"
46
+ },
47
+ timeout=120 # 2분 타임아웃 (충분히 긴 시간)
48
+ )
49
+
50
+ if response.status_code != 200:
51
+ print(f"❌ 오류: {response.status_code}")
52
+ print(response.text)
53
+ return
54
+
55
+ print("✅ 응답 수신 중...\n")
56
+
57
+ full_text = ""
58
+ token_count = 0
59
+
60
+ # SSE 스트림 읽기
61
+ for line in response.iter_lines():
62
+ if line:
63
+ line_str = line.decode('utf-8')
64
+
65
+ # SSE 형식: "data: {json}"
66
+ if line_str.startswith("data: "):
67
+ data_str = line_str[6:] # "data: " 제거
68
+
69
+ try:
70
+ data = json.loads(data_str)
71
+
72
+ # 에러 체크
73
+ if "error" in data:
74
+ print(f"\n❌ 서버 오류: {data['error']}")
75
+ break
76
+
77
+ # 토큰 출력
78
+ if not data.get("done", False):
79
+ token = data.get("token", "")
80
+ print(token, end="", flush=True)
81
+ full_text += token
82
+ token_count = data.get("token_count", token_count)
83
+ else:
84
+ # 완료
85
+ print(f"\n\n✅ 완료!")
86
+ print(f"📊 토큰 수: {data.get('token_count', token_count)}")
87
+ print(f"🎯 모드: {data.get('mode', mode)}")
88
+ break
89
+
90
+ except json.JSONDecodeError as e:
91
+ print(f"\n⚠️ JSON 파싱 오류: {e}")
92
+ print(f" 원본: {data_str}")
93
+
94
+ print("\n" + "=" * 60)
95
+ print(f"전체 응답 길이: {len(full_text)} 글자")
96
+
97
+ except requests.exceptions.Timeout:
98
+ print("❌ 타임아웃 오류: 서버 응답이 너무 느립니다.")
99
+ except requests.exceptions.ConnectionError:
100
+ print("❌ 연결 오류: 서버에 연결할 수 없습니다.")
101
+ except Exception as e:
102
+ print(f"❌ 예상치 못한 오류: {e}")
103
+
104
+
105
+ def test_regular_chat(base_url: str, message: str, mode: str = "bsl"):
106
+ """
107
+ 일반 (비스트리밍) 채팅 테스트
108
+ """
109
+ url = f"{base_url}/chat"
110
+
111
+ payload = {
112
+ "message": message,
113
+ "mode": mode,
114
+ "max_tokens": 512,
115
+ "temperature": 0.7
116
+ }
117
+
118
+ print(f"🔄 일반 요청 전송: {message}")
119
+ print(f"📡 모드: {mode}")
120
+ print("=" * 60)
121
+
122
+ try:
123
+ response = requests.post(url, json=payload, timeout=60)
124
+
125
+ if response.status_code == 200:
126
+ result = response.json()
127
+ print("✅ 응답:\n")
128
+ print(result["response"])
129
+ print(f"\n📊 토큰 수: {result['tokens']}")
130
+ else:
131
+ print(f"❌ 오류: {response.status_code}")
132
+ print(response.text)
133
+
134
+ except Exception as e:
135
+ print(f"❌ 오류: {e}")
136
+
137
+
138
+ if __name__ == "__main__":
139
+ # 로컬 테스트
140
+ BASE_URL = "http://localhost:8000"
141
+
142
+ # Hugging Face Spaces 테스트 (배포 후)
143
+ # BASE_URL = "https://your-username-chatbia-server.hf.space"
144
+
145
+ print("=" * 60)
146
+ print("ChatBIA Streaming API 테스트")
147
+ print("=" * 60)
148
+
149
+ # 테스트 1: 스트리밍 채팅 (BSL 모드)
150
+ print("\n[테스트 1] 스트리밍 채팅 - BSL 모드\n")
151
+ test_streaming_chat(
152
+ BASE_URL,
153
+ "5천만원 설비의 감가상각 계산해줘. 내용연수는 10년이고 잔존가치는 10%야.",
154
+ mode="bsl"
155
+ )
156
+
157
+ print("\n" + "=" * 60 + "\n")
158
+
159
+ # 테스트 2: 스트리밍 채팅 (일반 모드)
160
+ print("\n[테스트 2] 스트리밍 채팅 - 일반 모드\n")
161
+ test_streaming_chat(
162
+ BASE_URL,
163
+ "회계에서 감가상각이 뭐야?",
164
+ mode="general"
165
+ )
166
+
167
+ print("\n" + "=" * 60 + "\n")
168
+
169
+ # 테스트 3: 일반 채팅 (비스트리밍) - 비교용
170
+ print("\n[테스트 3] 일반 채팅 (비스트리밍) - 비교용\n")
171
+ test_regular_chat(
172
+ BASE_URL,
173
+ "안녕하세요!",
174
+ mode="bsl"
175
+ )