Spaces:

Seounghyup
/

chatbit-api

Sleeping

Seounghyup Claude commited on Oct 3

Commit

9026aea

1 Parent(s): d51e161

Add streaming API endpoint for Android app

- Add /chat/stream endpoint with SSE (Server-Sent Events)
- Prevent timeout issues for mobile clients
- Add Python test client (test_streaming.py)
- Add Android integration guide with Kotlin examples

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <[email protected]>

Files changed (4) hide show

ANDROID_STREAMING_GUIDE.md +431 -0
app.py +80 -1
main.py +80 -1
test_streaming.py +175 -0

ANDROID_STREAMING_GUIDE.md ADDED Viewed

	@@ -0,0 +1,431 @@

+# ChatBIA 안드로이드 스트리밍 연동 가이드
+## 📡 API 엔드포인트
+### 1. 일반 채팅 (비스트리밍)
+```
+POST /chat
+```
+- **타임아웃 위험**: 긴 응답 시 타임아웃 발생 가능
+- **안드로이드에서 권장하지 않음**
+### 2. 스트리밍 채팅 ✅ **권장**
+```
+POST /chat/stream
+```
+- **타임아웃 방지**: 토큰 단위로 실시간 수신
+- **안드로이드에 최적화**
+- **SSE (Server-Sent Events)** 방식
+---
+## 🔧 안드로이드 구현 (Kotlin)
+### 1. build.gradle 의존성 추가
+```gradle
+dependencies {
+    // OkHttp for SSE streaming
+    implementation("com.squareup.okhttp3:okhttp:4.12.0")
+    implementation("com.squareup.okhttp3:okhttp-sse:4.12.0")
+    // JSON 파싱
+    implementation("com.google.code.gson:gson:2.10.1")
+    // Coroutines
+    implementation("org.jetbrains.kotlinx:kotlinx-coroutines-android:1.7.3")
+}
+```
+### 2. 데이터 모델
+```kotlin
+// ChatRequest.kt
+data class ChatRequest(
+    val message: String,
+    val mode: String = "bsl",  // "bsl" or "general"
+    val max_tokens: Int = 1024,
+    val temperature: Float = 0.7f
+)
+// StreamingResponse.kt
+data class StreamingResponse(
+    val token: String = "",
+    val done: Boolean = false,
+    val token_count: Int = 0,
+    val mode: String = "",
+    val error: String? = null
+)
+```
+### 3. ChatBIA API 클라이언트
+```kotlin
+// ChatBiaApiClient.kt
+import com.google.gson.Gson
+import kotlinx.coroutines.flow.Flow
+import kotlinx.coroutines.flow.flow
+import okhttp3.*
+import okhttp3.MediaType.Companion.toMediaType
+import okhttp3.RequestBody.Companion.toRequestBody
+import okhttp3.sse.EventSource
+import okhttp3.sse.EventSourceListener
+import okhttp3.sse.EventSources
+import kotlin.coroutines.resume
+import kotlin.coroutines.resumeWithException
+import kotlin.coroutines.suspendCoroutine
+class ChatBiaApiClient(private val baseUrl: String) {
+    private val client = OkHttpClient.Builder()
+        .connectTimeout(30, TimeUnit.SECONDS)
+        .readTimeout(60, TimeUnit.SECONDS)  // 스트리밍은 긴 타임아웃
+        .writeTimeout(30, TimeUnit.SECONDS)
+        .build()
+    private val gson = Gson()
+    /**
+     * 스트리밍 채팅 (권장)
+     * Flow를 통해 토큰 단위로 실시간 수신
+     */
+    fun chatStream(request: ChatRequest): Flow<StreamingResponse> = flow {
+        suspendCoroutine<Unit> { continuation ->
+            val url = "$baseUrl/chat/stream"
+            // JSON 요청 body
+            val jsonBody = gson.toJson(request)
+            val requestBody = jsonBody.toRequestBody("application/json".toMediaType())
+            val httpRequest = Request.Builder()
+                .url(url)
+                .post(requestBody)
+                .addHeader("Accept", "text/event-stream")
+                .build()
+            // SSE EventSource 생성
+            val eventSource = EventSources.createFactory(client)
+                .newEventSource(httpRequest, object : EventSourceListener() {
+                    override fun onOpen(eventSource: EventSource, response: Response) {
+                        // 연결 성공
+                    }
+                    override fun onEvent(
+                        eventSource: EventSource,
+                        id: String?,
+                        type: String?,
+                        data: String
+                    ) {
+                        try {
+                            val response = gson.fromJson(data, StreamingResponse::class.java)
+                            // Flow로 emit
+                            trySend(response)
+                            // 완료 시 연결 종료
+                            if (response.done) {
+                                eventSource.cancel()
+                                continuation.resume(Unit)
+                            }
+                        } catch (e: Exception) {
+                            eventSource.cancel()
+                            continuation.resumeWithException(e)
+                        }
+                    }
+                    override fun onFailure(
+                        eventSource: EventSource,
+                        t: Throwable?,
+                        response: Response?
+                    ) {
+                        continuation.resumeWithException(
+                            t ?: Exception("SSE 연결 실패: ${response?.code}")
+                        )
+                    }
+                    override fun onClosed(eventSource: EventSource) {
+                        if (!continuation.isCompleted) {
+                            continuation.resume(Unit)
+                        }
+                    }
+                })
+        }
+    }
+    /**
+     * 일반 채팅 (비스트리밍)
+     * 긴 응답 시 타임아웃 위험 있음
+     */
+    suspend fun chat(request: ChatRequest): ChatResponse = suspendCoroutine { continuation ->
+        val url = "$baseUrl/chat"
+        val jsonBody = gson.toJson(request)
+        val requestBody = jsonBody.toRequestBody("application/json".toMediaType())
+        val httpRequest = Request.Builder()
+            .url(url)
+            .post(requestBody)
+            .build()
+        client.newCall(httpRequest).enqueue(object : Callback {
+            override fun onFailure(call: Call, e: IOException) {
+                continuation.resumeWithException(e)
+            }
+            override fun onResponse(call: Call, response: Response) {
+                if (response.isSuccessful) {
+                    val body = response.body?.string()
+                    val chatResponse = gson.fromJson(body, ChatResponse::class.java)
+                    continuation.resume(chatResponse)
+                } else {
+                    continuation.resumeWithException(
+                        Exception("HTTP ${response.code}: ${response.message}")
+                    )
+                }
+            }
+        })
+    }
+    data class ChatResponse(
+        val response: String,
+        val mode: String,
+        val tokens: Int
+    )
+}
+```
+### 4. ViewModel 사용 예제
+```kotlin
+// ChatViewModel.kt
+import androidx.lifecycle.ViewModel
+import androidx.lifecycle.viewModelScope
+import kotlinx.coroutines.flow.MutableStateFlow
+import kotlinx.coroutines.flow.StateFlow
+import kotlinx.coroutines.flow.catch
+import kotlinx.coroutines.launch
+class ChatViewModel : ViewModel() {
+    private val apiClient = ChatBiaApiClient("https://your-hf-space.hf.space")
+    private val _chatState = MutableStateFlow("")
+    val chatState: StateFlow<String> = _chatState
+    private val _isLoading = MutableStateFlow(false)
+    val isLoading: StateFlow<Boolean> = _isLoading
+    /**
+     * 스트리밍 채팅 전송
+     */
+    fun sendStreamingMessage(message: String, mode: String = "bsl") {
+        viewModelScope.launch {
+            _isLoading.value = true
+            _chatState.value = ""  // 초기화
+            val request = ChatRequest(
+                message = message,
+                mode = mode,
+                max_tokens = 1024,
+                temperature = 0.7f
+            )
+            apiClient.chatStream(request)
+                .catch { e ->
+                    _chatState.value = "오류: ${e.message}"
+                    _isLoading.value = false
+                }
+                .collect { response ->
+                    if (response.error != null) {
+                        _chatState.value = "서버 오류: ${response.error}"
+                        _isLoading.value = false
+                    } else if (response.done) {
+                        // 완료
+                        _isLoading.value = false
+                    } else {
+                        // 토큰 추가
+                        _chatState.value += response.token
+                    }
+                }
+        }
+    }
+}
+```
+### 5. Compose UI 예제
+```kotlin
+// ChatScreen.kt
+import androidx.compose.foundation.layout.*
+import androidx.compose.material3.*
+import androidx.compose.runtime.*
+import androidx.compose.ui.Modifier
+import androidx.compose.ui.unit.dp
+@Composable
+fun ChatScreen(viewModel: ChatViewModel = viewModel()) {
+    val chatState by viewModel.chatState.collectAsState()
+    val isLoading by viewModel.isLoading.collectAsState()
+    var inputText by remember { mutableStateOf("") }
+    Column(
+        modifier = Modifier
+            .fillMaxSize()
+            .padding(16.dp)
+    ) {
+        // 채팅 출력
+        Card(
+            modifier = Modifier
+                .fillMaxWidth()
+                .weight(1f)
+        ) {
+            Text(
+                text = chatState,
+                modifier = Modifier.padding(16.dp)
+            )
+        }
+        Spacer(modifier = Modifier.height(16.dp))
+        // 입력 필드
+        Row(
+            modifier = Modifier.fillMaxWidth()
+        ) {
+            OutlinedTextField(
+                value = inputText,
+                onValueChange = { inputText = it },
+                modifier = Modifier.weight(1f),
+                placeholder = { Text("메시지 입력...") },
+                enabled = !isLoading
+            )
+            Spacer(modifier = Modifier.width(8.dp))
+            Button(
+                onClick = {
+                    if (inputText.isNotBlank()) {
+                        viewModel.sendStreamingMessage(inputText)
+                        inputText = ""
+                    }
+                },
+                enabled = !isLoading
+            ) {
+                Text(if (isLoading) "전송 중..." else "전송")
+            }
+        }
+    }
+}
+```
+---
+## 🧪 테스트 방법
+### 1. 로컬 서버 실행
+```bash
+cd ChatBIA-Server
+uvicorn main:app --host 0.0.0.0 --port 8000
+```
+### 2. Python 테스트
+```bash
+python test_streaming.py
+```
+### 3. 안드로이드 앱에서 연결
+```kotlin
+// 로컬 테스트 (에뮬레이터)
+val apiClient = ChatBiaApiClient("http://10.0.2.2:8000")
+// 실제 디바이스 (같은 네트워크)
+val apiClient = ChatBiaApiClient("http://YOUR_IP:8000")
+// Hugging Face Spaces (배포 후)
+val apiClient = ChatBiaApiClient("https://your-space.hf.space")
+```
+---
+## 📊 응답 형식
+### 스트리밍 응답 (SSE)
+```
+data: {"token":"안녕","done":false,"token_count":1}
+data: {"token":"하세요","done":false,"token_count":2}
+data: {"token":"!","done":false,"token_count":3}
+data: {"token":"","done":true,"token_count":3,"mode":"bsl"}
+```
+### 최종 응답
+```json
+{
+  "token": "",
+  "done": true,
+  "token_count": 150,
+  "mode": "bsl"
+}
+```
+### 오류 응답
+```json
+{
+  "error": "오류 메시지",
+  "done": true
+}
+```
+---
+## ⚡ 성능 최적화 팁
+1. **타임아웃 설정**
+   - Connect: 30초
+   - Read: 60초 (스트리밍)
+   - Write: 30초
+2. **재연결 로직**
+   ```kotlin
+   fun retryOnFailure(maxRetries: Int = 3) {
+       var attempts = 0
+       while (attempts < maxRetries) {
+           try {
+               chatStream(request).collect { }
+               break
+           } catch (e: Exception) {
+               attempts++
+               delay(2000 * attempts)  // 지수 백오프
+           }
+       }
+   }
+   ```
+3. **메모리 관리**
+   - Flow를 사용하여 메모리 효율적으로 처리
+   - UI 업데이트는 StateFlow로 최적화
+---
+## 🚀 배포 후 사용
+Hugging Face Spaces에 배포 후:
+```kotlin
+val BASE_URL = "https://your-username-chatbia-server.hf.space"
+val apiClient = ChatBiaApiClient(BASE_URL)
+```
+**주의**: Hugging Face Spaces 무료 플랜은 CPU만 제공되므로 응답 속도가 느릴 수 있습니다. 스트리밍 방식이 더욱 중요합니다!
+---
+## 🔗 관련 문서
+- [FastAPI Streaming](https://fastapi.tiangolo.com/advanced/custom-response/#streamingresponse)
+- [OkHttp SSE](https://square.github.io/okhttp/recipes/#server-sent-events)
+- [Kotlin Flow](https://kotlinlang.org/docs/flow.html)

app.py CHANGED Viewed

@@ -4,9 +4,11 @@ ChatBIA Hugging Face Spaces API
 """
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import Optional
 import os
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
@@ -198,3 +200,80 @@ async def get_models():
             "path": bsl_model_path
         }
     }

 """
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+from typing import Optional, AsyncGenerator
 import os
+import json
 from llama_cpp import Llama
 from huggingface_hub import hf_hub_download
             "path": bsl_model_path
         }
     }
+@app.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+    """스트리밍 채팅 엔드포인트 (안드로이드/타임아웃 방지)"""
+    # 모델 선택
+    if request.mode == "general":
+        model = general_model
+        model_name = "General"
+    else:
+        model = bsl_model
+        model_name = "BSL"
+    # 모델이 없으면 에러
+    if model is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"{model_name} 모델이 로드되지 않았습니다."
+        )
+    async def generate_stream() -> AsyncGenerator[str, None]:
+        """토큰 단위 스트리밍 제너레이터"""
+        try:
+            # 프롬프트 빌드
+            prompt = build_prompt(request.message, request.mode)
+            # 스트리밍 추론
+            stream = model(
+                prompt,
+                max_tokens=request.max_tokens,
+                temperature=request.temperature,
+                top_p=0.9,
+                top_k=40,
+                repeat_penalty=1.1,
+                stop=["<|im_end|>", "###", "\n\n\n"],
+                stream=True  # 스트리밍 활성화
+            )
+            token_count = 0
+            for chunk in stream:
+                if "choices" in chunk and len(chunk["choices"]) > 0:
+                    delta = chunk["choices"][0].get("text", "")
+                    if delta:
+                        token_count += 1
+                        # SSE 형식: data: {json}\n\n
+                        data = {
+                            "token": delta,
+                            "done": False,
+                            "token_count": token_count
+                        }
+                        yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+            # 완료 신호
+            final_data = {
+                "token": "",
+                "done": True,
+                "token_count": token_count,
+                "mode": request.mode
+            }
+            yield f"data: {json.dumps(final_data, ensure_ascii=False)}\n\n"
+        except Exception as e:
+            error_data = {
+                "error": str(e),
+                "done": True
+            }
+            yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
+    return StreamingResponse(
+        generate_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Nginx 버퍼링 비활성화
+        }
+    )

main.py CHANGED Viewed

@@ -4,9 +4,11 @@ ChatBIA FastAPI Server
 """
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from pydantic import BaseModel
-from typing import Optional, List
 import os
 from llama_cpp import Llama
 app = FastAPI(
@@ -183,6 +185,83 @@ async def get_models():
     }
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(

 """
 from fastapi import FastAPI, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import StreamingResponse
 from pydantic import BaseModel
+from typing import Optional, List, AsyncGenerator
 import os
+import json
 from llama_cpp import Llama
 app = FastAPI(
     }
+@app.post("/chat/stream")
+async def chat_stream(request: ChatRequest):
+    """스트리밍 채팅 엔드포인트 (안드로이드/타임아웃 방지)"""
+    # 모델 선택
+    if request.mode == "general":
+        model = general_model
+        model_name = "General"
+    else:
+        model = bsl_model
+        model_name = "BSL"
+    # 모델이 없으면 에러
+    if model is None:
+        raise HTTPException(
+            status_code=503,
+            detail=f"{model_name} 모델이 로드되지 않았습니다."
+        )
+    async def generate_stream() -> AsyncGenerator[str, None]:
+        """토큰 단위 스트리밍 제너레이터"""
+        try:
+            # 프롬프트 빌드
+            prompt = build_prompt(request.message, request.mode)
+            # 스트리밍 추론
+            stream = model(
+                prompt,
+                max_tokens=request.max_tokens,
+                temperature=request.temperature,
+                top_p=0.9,
+                top_k=40,
+                repeat_penalty=1.1,
+                stop=["<|im_end|>", "###", "\n\n\n"],
+                stream=True  # 스트리밍 활성화
+            )
+            token_count = 0
+            for chunk in stream:
+                if "choices" in chunk and len(chunk["choices"]) > 0:
+                    delta = chunk["choices"][0].get("text", "")
+                    if delta:
+                        token_count += 1
+                        # SSE 형식: data: {json}\n\n
+                        data = {
+                            "token": delta,
+                            "done": False,
+                            "token_count": token_count
+                        }
+                        yield f"data: {json.dumps(data, ensure_ascii=False)}\n\n"
+            # 완료 신호
+            final_data = {
+                "token": "",
+                "done": True,
+                "token_count": token_count,
+                "mode": request.mode
+            }
+            yield f"data: {json.dumps(final_data, ensure_ascii=False)}\n\n"
+        except Exception as e:
+            error_data = {
+                "error": str(e),
+                "done": True
+            }
+            yield f"data: {json.dumps(error_data, ensure_ascii=False)}\n\n"
+    return StreamingResponse(
+        generate_stream(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "Connection": "keep-alive",
+            "X-Accel-Buffering": "no"  # Nginx 버퍼링 비활성화
+        }
+    )
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(

test_streaming.py ADDED Viewed

	@@ -0,0 +1,175 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ChatBIA Streaming API 테스트 클라이언트
+"""
+import requests
+import json
+import sys
+import io
+if sys.platform == 'win32':
+    sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
+def test_streaming_chat(base_url: str, message: str, mode: str = "bsl"):
+    """
+    스트리밍 채팅 테스트
+    Args:
+        base_url: API 서버 URL (예: http://localhost:8000)
+        message: 사용자 메시지
+        mode: "bsl" 또는 "general"
+    """
+    url = f"{base_url}/chat/stream"
+    payload = {
+        "message": message,
+        "mode": mode,
+        "max_tokens": 512,
+        "temperature": 0.7
+    }
+    print(f"🔄 요청 전송: {message}")
+    print(f"📡 모드: {mode}")
+    print("=" * 60)
+    try:
+        # 스트리밍 요청
+        response = requests.post(
+            url,
+            json=payload,
+            stream=True,  # 중요: 스트리밍 활성화
+            headers={
+                "Content-Type": "application/json",
+                "Accept": "text/event-stream"
+            },
+            timeout=120  # 2분 타임아웃 (충분히 긴 시간)
+        )
+        if response.status_code != 200:
+            print(f"❌ 오류: {response.status_code}")
+            print(response.text)
+            return
+        print("✅ 응답 수신 중...\n")
+        full_text = ""
+        token_count = 0
+        # SSE 스트림 읽기
+        for line in response.iter_lines():
+            if line:
+                line_str = line.decode('utf-8')
+                # SSE 형식: "data: {json}"
+                if line_str.startswith("data: "):
+                    data_str = line_str[6:]  # "data: " 제거
+                    try:
+                        data = json.loads(data_str)
+                        # 에러 체크
+                        if "error" in data:
+                            print(f"\n❌ 서버 오류: {data['error']}")
+                            break
+                        # 토큰 출력
+                        if not data.get("done", False):
+                            token = data.get("token", "")
+                            print(token, end="", flush=True)
+                            full_text += token
+                            token_count = data.get("token_count", token_count)
+                        else:
+                            # 완료
+                            print(f"\n\n✅ 완료!")
+                            print(f"📊 토큰 수: {data.get('token_count', token_count)}")
+                            print(f"🎯 모드: {data.get('mode', mode)}")
+                            break
+                    except json.JSONDecodeError as e:
+                        print(f"\n⚠️ JSON 파싱 오류: {e}")
+                        print(f"   원본: {data_str}")
+        print("\n" + "=" * 60)
+        print(f"전체 응답 길이: {len(full_text)} 글자")
+    except requests.exceptions.Timeout:
+        print("❌ 타임아웃 오류: 서버 응답이 너무 느립니다.")
+    except requests.exceptions.ConnectionError:
+        print("❌ 연결 오류: 서버에 연결할 수 없습니다.")
+    except Exception as e:
+        print(f"❌ 예상치 못한 오류: {e}")
+def test_regular_chat(base_url: str, message: str, mode: str = "bsl"):
+    """
+    일반 (비스트리밍) 채팅 테스트
+    """
+    url = f"{base_url}/chat"
+    payload = {
+        "message": message,
+        "mode": mode,
+        "max_tokens": 512,
+        "temperature": 0.7
+    }
+    print(f"🔄 일반 요청 전송: {message}")
+    print(f"📡 모드: {mode}")
+    print("=" * 60)
+    try:
+        response = requests.post(url, json=payload, timeout=60)
+        if response.status_code == 200:
+            result = response.json()
+            print("✅ 응답:\n")
+            print(result["response"])
+            print(f"\n📊 토큰 수: {result['tokens']}")
+        else:
+            print(f"❌ 오류: {response.status_code}")
+            print(response.text)
+    except Exception as e:
+        print(f"❌ 오류: {e}")
+if __name__ == "__main__":
+    # 로컬 테스트
+    BASE_URL = "http://localhost:8000"
+    # Hugging Face Spaces 테스트 (배포 후)
+    # BASE_URL = "https://your-username-chatbia-server.hf.space"
+    print("=" * 60)
+    print("ChatBIA Streaming API 테스트")
+    print("=" * 60)
+    # 테스트 1: 스트리밍 채팅 (BSL 모드)
+    print("\n[테스트 1] 스트리밍 채팅 - BSL 모드\n")
+    test_streaming_chat(
+        BASE_URL,
+        "5천만원 설비의 감가상각 계산해줘. 내용연수는 10년이고 잔존가치는 10%야.",
+        mode="bsl"
+    )
+    print("\n" + "=" * 60 + "\n")
+    # 테스트 2: 스트리밍 채팅 (일반 모드)
+    print("\n[테스트 2] 스트리밍 채팅 - 일반 모드\n")
+    test_streaming_chat(
+        BASE_URL,
+        "회계에서 감가상각이 뭐야?",
+        mode="general"
+    )
+    print("\n" + "=" * 60 + "\n")
+    # 테스트 3: 일반 채팅 (비스트리밍) - 비교용
+    print("\n[테스트 3] 일반 채팅 (비스트리밍) - 비교용\n")
+    test_regular_chat(
+        BASE_URL,
+        "안녕하세요!",
+        mode="bsl"
+    )