rzline commited on
Commit
2615902
·
verified ·
1 Parent(s): 3416fd6

Create app/app/main.py

Browse files
Files changed (1) hide show
  1. app/app/main.py +378 -0
app/app/main.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request, Depends, status
2
+ from fastapi.responses import JSONResponse, StreamingResponse, HTMLResponse
3
+ from .models import ChatCompletionRequest, ChatCompletionResponse, ErrorResponse, ModelList
4
+ from .gemini import GeminiClient, ResponseWrapper
5
+ from .utils import handle_gemini_error, protect_from_abuse, APIKeyManager, test_api_key, format_log_message
6
+ import os
7
+ import json
8
+ import asyncio
9
+ from typing import Literal
10
+ import random
11
+ import requests
12
+ from datetime import datetime, timedelta
13
+ from apscheduler.schedulers.background import BackgroundScheduler
14
+ import sys
15
+ import logging
16
+
17
+ logging.getLogger("uvicorn").disabled = True
18
+ logging.getLogger("uvicorn.access").disabled = True
19
+
20
+ # 配置 logger
21
+ logger = logging.getLogger("my_logger")
22
+ logger.setLevel(logging.DEBUG)
23
+
24
+ def translate_error(message: str) -> str:
25
+ if "quota exceeded" in message.lower():
26
+ return "API 密钥配额已用尽"
27
+ if "invalid argument" in message.lower():
28
+ return "无效参数"
29
+ if "internal server error" in message.lower():
30
+ return "服务器内部错误"
31
+ if "service unavailable" in message.lower():
32
+ return "服务不可用"
33
+ return message
34
+
35
+
36
+ def handle_exception(exc_type, exc_value, exc_traceback):
37
+ if issubclass(exc_type, KeyboardInterrupt):
38
+ sys.excepthook(exc_type, exc_value, exc_traceback)
39
+ return
40
+ error_message = translate_error(str(exc_value))
41
+ log_msg = format_log_message('ERROR', f"未捕获的异常: %s" % error_message, extra={'status_code': 500, 'error_message': error_message})
42
+ logger.error(log_msg)
43
+
44
+
45
+ sys.excepthook = handle_exception
46
+
47
+ app = FastAPI()
48
+
49
+ PASSWORD = os.environ.get("PASSWORD", "123")
50
+ MAX_REQUESTS_PER_MINUTE = int(os.environ.get("MAX_REQUESTS_PER_MINUTE", "30"))
51
+ MAX_REQUESTS_PER_DAY_PER_IP = int(
52
+ os.environ.get("MAX_REQUESTS_PER_DAY_PER_IP", "600"))
53
+ # MAX_RETRIES = int(os.environ.get('MaxRetries', '3').strip() or '3')
54
+ RETRY_DELAY = 1
55
+ MAX_RETRY_DELAY = 16
56
+ safety_settings = [
57
+ {
58
+ "category": "HARM_CATEGORY_HARASSMENT",
59
+ "threshold": "BLOCK_NONE"
60
+ },
61
+ {
62
+ "category": "HARM_CATEGORY_HATE_SPEECH",
63
+ "threshold": "BLOCK_NONE"
64
+ },
65
+ {
66
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
67
+ "threshold": "BLOCK_NONE"
68
+ },
69
+ {
70
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
71
+ "threshold": "BLOCK_NONE"
72
+ },
73
+ {
74
+ "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
75
+ "threshold": 'BLOCK_NONE'
76
+ }
77
+ ]
78
+ safety_settings_g2 = [
79
+ {
80
+ "category": "HARM_CATEGORY_HARASSMENT",
81
+ "threshold": "OFF"
82
+ },
83
+ {
84
+ "category": "HARM_CATEGORY_HATE_SPEECH",
85
+ "threshold": "OFF"
86
+ },
87
+ {
88
+ "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
89
+ "threshold": "OFF"
90
+ },
91
+ {
92
+ "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
93
+ "threshold": "OFF"
94
+ },
95
+ {
96
+ "category": 'HARM_CATEGORY_CIVIC_INTEGRITY',
97
+ "threshold": 'OFF'
98
+ }
99
+ ]
100
+
101
+ key_manager = APIKeyManager() # 实例化 APIKeyManager,栈会在 __init__ 中初始化
102
+ current_api_key = key_manager.get_available_key()
103
+
104
+
105
+ def switch_api_key():
106
+ global current_api_key
107
+ key = key_manager.get_available_key() # get_available_key 会处理栈的逻辑
108
+ if key:
109
+ current_api_key = key
110
+ log_msg = format_log_message('INFO', f"API key 替换为 → {current_api_key[:8]}...", extra={'key': current_api_key[:8], 'request_type': 'switch_key'})
111
+ logger.info(log_msg)
112
+ else:
113
+ log_msg = format_log_message('ERROR', "API key 替换失败,所有API key都已尝试,请重新配置或稍后重试", extra={'key': 'N/A', 'request_type': 'switch_key', 'status_code': 'N/A'})
114
+ logger.error(log_msg)
115
+
116
+
117
+ async def check_keys():
118
+ available_keys = []
119
+ for key in key_manager.api_keys:
120
+ is_valid = await test_api_key(key)
121
+ status_msg = "有效" if is_valid else "无效"
122
+ log_msg = format_log_message('INFO', f"API Key {key[:10]}... {status_msg}.")
123
+ logger.info(log_msg)
124
+ if is_valid:
125
+ available_keys.append(key)
126
+ if not available_keys:
127
+ log_msg = format_log_message('ERROR', "没有可用的 API 密钥!", extra={'key': 'N/A', 'request_type': 'startup', 'status_code': 'N/A'})
128
+ logger.error(log_msg)
129
+ return available_keys
130
+
131
+
132
+ @app.on_event("startup")
133
+ async def startup_event():
134
+ log_msg = format_log_message('INFO', "Starting Gemini API proxy...")
135
+ logger.info(log_msg)
136
+ available_keys = await check_keys()
137
+ if available_keys:
138
+ key_manager.api_keys = available_keys
139
+ key_manager._reset_key_stack() # 启动时也确保创建随机栈
140
+ key_manager.show_all_keys()
141
+ log_msg = format_log_message('INFO', f"可用 API 密钥数量:{len(key_manager.api_keys)}")
142
+ logger.info(log_msg)
143
+ # MAX_RETRIES = len(key_manager.api_keys)
144
+ log_msg = format_log_message('INFO', f"最大重试次数设���为:{len(key_manager.api_keys)}") # 添加日志
145
+ logger.info(log_msg)
146
+ if key_manager.api_keys:
147
+ all_models = await GeminiClient.list_available_models(key_manager.api_keys[0])
148
+ GeminiClient.AVAILABLE_MODELS = [model.replace(
149
+ "models/", "") for model in all_models]
150
+ log_msg = format_log_message('INFO', "Available models loaded.")
151
+ logger.info(log_msg)
152
+
153
+ @app.get("/v1/models", response_model=ModelList)
154
+ def list_models():
155
+ log_msg = format_log_message('INFO', "Received request to list models", extra={'request_type': 'list_models', 'status_code': 200})
156
+ logger.info(log_msg)
157
+ return ModelList(data=[{"id": model, "object": "model", "created": 1678888888, "owned_by": "organization-owner"} for model in GeminiClient.AVAILABLE_MODELS])
158
+
159
+
160
+ async def verify_password(request: Request):
161
+ if PASSWORD:
162
+ auth_header = request.headers.get("Authorization")
163
+ if not auth_header or not auth_header.startswith("Bearer "):
164
+ raise HTTPException(
165
+ status_code=401, detail="Unauthorized: Missing or invalid token")
166
+ token = auth_header.split(" ")[1]
167
+ if token != PASSWORD:
168
+ raise HTTPException(
169
+ status_code=401, detail="Unauthorized: Invalid token")
170
+
171
+
172
+ async def process_request(chat_request: ChatCompletionRequest, http_request: Request, request_type: Literal['stream', 'non-stream']):
173
+ global current_api_key
174
+ protect_from_abuse(
175
+ http_request, MAX_REQUESTS_PER_MINUTE, MAX_REQUESTS_PER_DAY_PER_IP)
176
+ if chat_request.model not in GeminiClient.AVAILABLE_MODELS:
177
+ error_msg = "无效的模型"
178
+ extra_log = {'request_type': request_type, 'model': chat_request.model, 'status_code': 400, 'error_message': error_msg}
179
+ log_msg = format_log_message('ERROR', error_msg, extra=extra_log)
180
+ logger.error(log_msg)
181
+ raise HTTPException(
182
+ status_code=status.HTTP_400_BAD_REQUEST, detail=error_msg)
183
+
184
+ key_manager.reset_tried_keys_for_request() # 在每次请求处理开始时重置 tried_keys 集合
185
+
186
+ contents, system_instruction = GeminiClient.convert_messages(
187
+ GeminiClient, chat_request.messages)
188
+
189
+ retry_attempts = len(key_manager.api_keys) if key_manager.api_keys else 1 # 重试次数等于密钥数量,至少尝试 1 次
190
+ for attempt in range(1, retry_attempts + 1):
191
+ if attempt == 1:
192
+ current_api_key = key_manager.get_available_key() # 每次循环开始都获取新的 key, 栈逻辑在 get_available_key 中处理
193
+
194
+ if current_api_key is None: # 检查是否获取到 API 密钥
195
+ log_msg_no_key = format_log_message('WARNING', "没有可用的 API 密钥,跳过本次尝试", extra={'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A'})
196
+ logger.warning(log_msg_no_key)
197
+ break # 如果没有可用密钥,跳出循环
198
+
199
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 'N/A', 'error_message': ''}
200
+ log_msg = format_log_message('INFO', f"第 {attempt}/{retry_attempts} 次尝试 ... 使用密钥: {current_api_key[:8]}...", extra=extra_log)
201
+ logger.info(log_msg)
202
+
203
+ gemini_client = GeminiClient(current_api_key)
204
+ try:
205
+ if chat_request.stream:
206
+ async def stream_generator():
207
+ try:
208
+ async for chunk in gemini_client.stream_chat(chat_request, contents, safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings, system_instruction):
209
+ formatted_chunk = {"id": "chatcmpl-someid", "object": "chat.completion.chunk", "created": 1234567,
210
+ "model": chat_request.model, "choices": [{"delta": {"role": "assistant", "content": chunk}, "index": 0, "finish_reason": None}]}
211
+ yield f"data: {json.dumps(formatted_chunk)}\n\n"
212
+ yield "data: [DONE]\n\n"
213
+
214
+ except asyncio.CancelledError:
215
+ extra_log_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端已断开连接'}
216
+ log_msg = format_log_message('INFO', "客户端连接已中断", extra=extra_log_cancel)
217
+ logger.info(log_msg)
218
+ except Exception as e:
219
+ error_detail = handle_gemini_error(
220
+ e, current_api_key, key_manager)
221
+ yield f"data: {json.dumps({'error': {'message': error_detail, 'type': 'gemini_error'}})}\n\n"
222
+ return StreamingResponse(stream_generator(), media_type="text/event-stream")
223
+ else:
224
+ async def run_gemini_completion():
225
+ try:
226
+ response_content = await asyncio.to_thread(gemini_client.complete_chat, chat_request, contents, safety_settings_g2 if 'gemini-2.0-flash-exp' in chat_request.model else safety_settings, system_instruction)
227
+ return response_content
228
+ except asyncio.CancelledError:
229
+ extra_log_gemini_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '客户端断开导致API调用取消'}
230
+ log_msg = format_log_message('INFO', "API调用因客户端断开而取消", extra=extra_log_gemini_cancel)
231
+ logger.info(log_msg)
232
+ raise
233
+
234
+ async def check_client_disconnect():
235
+ while True:
236
+ if await http_request.is_disconnected():
237
+ extra_log_client_disconnect = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': '检测到客户端断开连接'}
238
+ log_msg = format_log_message('INFO', "客户端连接已中断,正在取消API请求", extra=extra_log_client_disconnect)
239
+ logger.info(log_msg)
240
+ return True
241
+ await asyncio.sleep(0.5)
242
+
243
+ gemini_task = asyncio.create_task(run_gemini_completion())
244
+ disconnect_task = asyncio.create_task(check_client_disconnect())
245
+
246
+ try:
247
+ done, pending = await asyncio.wait(
248
+ [gemini_task, disconnect_task],
249
+ return_when=asyncio.FIRST_COMPLETED
250
+ )
251
+
252
+ if disconnect_task in done:
253
+ gemini_task.cancel()
254
+ try:
255
+ await gemini_task
256
+ except asyncio.CancelledError:
257
+ extra_log_gemini_task_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message': 'API任务已终止'}
258
+ log_msg = format_log_message('INFO', "API任务已成功取消", extra=extra_log_gemini_task_cancel)
259
+ logger.info(log_msg)
260
+ # 直接抛出异常中断循环
261
+ raise HTTPException(status_code=status.HTTP_408_REQUEST_TIMEOUT, detail="客户端连接已中断")
262
+
263
+ if gemini_task in done:
264
+ disconnect_task.cancel()
265
+ try:
266
+ await disconnect_task
267
+ except asyncio.CancelledError:
268
+ pass
269
+ response_content = gemini_task.result()
270
+ if response_content.text == "":
271
+ extra_log_empty_response = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 204}
272
+ log_msg = format_log_message('INFO', "Gemini API 返回空响应", extra=extra_log_empty_response)
273
+ logger.info(log_msg)
274
+ # 继续循环
275
+ continue
276
+ response = ChatCompletionResponse(id="chatcmpl-someid", object="chat.completion", created=1234567890, model=chat_request.model,
277
+ choices=[{"index": 0, "message": {"role": "assistant", "content": response_content.text}, "finish_reason": "stop"}])
278
+ extra_log_success = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'status_code': 200}
279
+ log_msg = format_log_message('INFO', "请求处理成功", extra=extra_log_success)
280
+ logger.info(log_msg)
281
+ return response
282
+
283
+ except asyncio.CancelledError:
284
+ extra_log_request_cancel = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model, 'error_message':"请求被取消" }
285
+ log_msg = format_log_message('INFO', "请求取消", extra=extra_log_request_cancel)
286
+ logger.info(log_msg)
287
+ raise
288
+
289
+ except HTTPException as e:
290
+ if e.status_code == status.HTTP_408_REQUEST_TIMEOUT:
291
+ extra_log = {'key': current_api_key[:8], 'request_type': request_type, 'model': chat_request.model,
292
+ 'status_code': 408, 'error_message': '客户端连接中断'}
293
+ log_msg = format_log_message('ERROR', "客户端连接中断,终止后续重试", extra=extra_log)
294
+ logger.error(log_msg)
295
+ raise
296
+ else:
297
+ raise
298
+ except Exception as e:
299
+ handle_gemini_error(e, current_api_key, key_manager)
300
+ if attempt < retry_attempts:
301
+ switch_api_key()
302
+ continue
303
+
304
+ msg = "所有API密钥均失败,请稍后重试"
305
+ extra_log_all_fail = {'key': "ALL", 'request_type': request_type, 'model': chat_request.model, 'status_code': 500, 'error_message': msg}
306
+ log_msg = format_log_message('ERROR', msg, extra=extra_log_all_fail)
307
+ logger.error(log_msg)
308
+ raise HTTPException(
309
+ status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail=msg)
310
+
311
+
312
+ @app.post("/v1/chat/completions", response_model=ChatCompletionResponse)
313
+ async def chat_completions(request: ChatCompletionRequest, http_request: Request, _: None = Depends(verify_password)):
314
+ return await process_request(request, http_request, "stream" if request.stream else "non-stream")
315
+
316
+
317
+ @app.exception_handler(Exception)
318
+ async def global_exception_handler(request: Request, exc: Exception):
319
+ error_message = translate_error(str(exc))
320
+ extra_log_unhandled_exception = {'status_code': 500, 'error_message': error_message}
321
+ log_msg = format_log_message('ERROR', f"Unhandled exception: {error_message}", extra=extra_log_unhandled_exception)
322
+ logger.error(log_msg)
323
+ return JSONResponse(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, content=ErrorResponse(message=str(exc), type="internal_error").dict())
324
+
325
+
326
+ @app.get("/", response_class=HTMLResponse)
327
+ async def root():
328
+ html_content = f"""
329
+ <!DOCTYPE html>
330
+ <html>
331
+ <head>
332
+ <title>Gemini API 代理服务</title>
333
+ <style>
334
+ body {{
335
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
336
+ max-width: 800px;
337
+ margin: 0 auto;
338
+ padding: 20px;
339
+ line-height: 1.6;
340
+ }}
341
+ h1 {{
342
+ color: #333;
343
+ text-align: center;
344
+ margin-bottom: 30px;
345
+ }}
346
+ .info-box {{
347
+ background-color: #f8f9fa;
348
+ border: 1px solid #dee2e6;
349
+ border-radius: 4px;
350
+ padding: 20px;
351
+ margin-bottom: 20px;
352
+ }}
353
+ .status {{
354
+ color: #28a745;
355
+ font-weight: bold;
356
+ }}
357
+ </style>
358
+ </head>
359
+ <body>
360
+ <h1>🤖 Gemini API 代理服务</h1>
361
+
362
+ <div class="info-box">
363
+ <h2>🟢 运行状态</h2>
364
+ <p class="status">服务运行中</p>
365
+ <p>可用API密钥数量: {len(key_manager.api_keys)}</p>
366
+ <p>可用模型数量: {len(GeminiClient.AVAILABLE_MODELS)}</p>
367
+ </div>
368
+
369
+ <div class="info-box">
370
+ <h2>⚙️ 环境配置</h2>
371
+ <p>每分钟请求限制: {MAX_REQUESTS_PER_MINUTE}</p>
372
+ <p>每IP每日请求限制: {MAX_REQUESTS_PER_DAY_PER_IP}</p>
373
+ <p>最大重试次数: {len(key_manager.api_keys)}</p>
374
+ </div>
375
+ </body>
376
+ </html>
377
+ """
378
+ return html_content