Spaces:
Paused
Paused
Upload 4 files
Browse files
app.py
CHANGED
|
@@ -90,10 +90,14 @@ class ResponseWrapper:
|
|
| 90 |
self._data = data
|
| 91 |
self._text = self._extract_text()
|
| 92 |
self._finish_reason = self._extract_finish_reason()
|
|
|
|
|
|
|
|
|
|
| 93 |
self._prompt_token_count = self._extract_prompt_token_count()
|
| 94 |
self._candidates_token_count = self._extract_candidates_token_count()
|
| 95 |
self._total_token_count = self._extract_total_token_count()
|
| 96 |
self._thoughts = self._extract_thoughts()
|
|
|
|
| 97 |
|
| 98 |
def _extract_thoughts(self) -> Optional[str]:
|
| 99 |
try:
|
|
@@ -157,6 +161,14 @@ class ResponseWrapper:
|
|
| 157 |
def total_token_count(self) -> Optional[int]:
|
| 158 |
return self._total_token_count
|
| 159 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
class APIKeyManager:
|
| 161 |
def __init__(self):
|
| 162 |
self.api_keys = re.findall(r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('KeyArray'))
|
|
@@ -392,7 +404,7 @@ def chat_completions():
|
|
| 392 |
response_type = 'streamGenerateContent' if stream else 'generateContent'
|
| 393 |
is_SSE = '&alt=sse' if stream else ''
|
| 394 |
|
| 395 |
-
|
| 396 |
|
| 397 |
if error_response:
|
| 398 |
logger.error(f"处理输入消息时出错↙\n {error_response}")
|
|
@@ -414,7 +426,7 @@ def chat_completions():
|
|
| 414 |
}
|
| 415 |
|
| 416 |
data = {
|
| 417 |
-
"contents":
|
| 418 |
"generationConfig": {
|
| 419 |
"temperature": temperature,
|
| 420 |
"maxOutputTokens": max_tokens,
|
|
@@ -429,15 +441,16 @@ def chat_completions():
|
|
| 429 |
response.raise_for_status()
|
| 430 |
|
| 431 |
if stream:
|
| 432 |
-
|
| 433 |
return 1, response
|
| 434 |
else:
|
| 435 |
-
|
| 436 |
return 1, ResponseWrapper(response.json())
|
| 437 |
except requests.exceptions.RequestException as e:
|
| 438 |
return handle_api_error(e, attempt)
|
|
|
|
|
|
|
| 439 |
|
| 440 |
def generate_stream(response):
|
|
|
|
| 441 |
buffer = b""
|
| 442 |
try:
|
| 443 |
for line in response.iter_lines():
|
|
@@ -486,9 +499,10 @@ def chat_completions():
|
|
| 486 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
| 487 |
|
| 488 |
yield f"data: {json.dumps({'choices': [{'delta': {}, 'finish_reason': 'stop', 'index': 0}]})}\n\n"
|
| 489 |
-
|
|
|
|
| 490 |
except Exception as e:
|
| 491 |
-
logger.error(f"
|
| 492 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
| 493 |
|
| 494 |
attempt = 0
|
|
@@ -498,8 +512,8 @@ def chat_completions():
|
|
| 498 |
logger.info(f"第 {attempt}/{MAX_RETRIES} 次尝试 ...")
|
| 499 |
success, response = do_request(current_api_key, attempt)
|
| 500 |
|
| 501 |
-
if success ==
|
| 502 |
-
|
| 503 |
elif success == 2:
|
| 504 |
|
| 505 |
logger.error(f"{model} 很可能暂时不可用,请更换模型或未来一段时间再试")
|
|
@@ -511,6 +525,48 @@ def chat_completions():
|
|
| 511 |
}
|
| 512 |
return jsonify(response), 503
|
| 513 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 514 |
else:
|
| 515 |
logger.error(f"{MAX_RETRIES} 次尝试均失败,请调整配置,或等待官方恢复,或向Moonfanz反馈")
|
| 516 |
response = {
|
|
@@ -521,52 +577,6 @@ def chat_completions():
|
|
| 521 |
}
|
| 522 |
return jsonify(response), 500 if response is not None else 503
|
| 523 |
|
| 524 |
-
if stream:
|
| 525 |
-
return Response(
|
| 526 |
-
stream_with_context(generate_stream(response)),
|
| 527 |
-
mimetype='text/event-stream'
|
| 528 |
-
)
|
| 529 |
-
else:
|
| 530 |
-
try:
|
| 531 |
-
text_content = response.text
|
| 532 |
-
prompt_tokens = response.prompt_token_count
|
| 533 |
-
completion_tokens = response.candidates_token_count
|
| 534 |
-
total_tokens = response.total_token_count
|
| 535 |
-
finish_reason = response.finish_reason
|
| 536 |
-
if is_thinking and show_thoughts:
|
| 537 |
-
# 把thoughts加到text_content的前面再加一个回车
|
| 538 |
-
text_content = response.thoughts + '\n' + text_content
|
| 539 |
-
logger.info(f"finish_reason: {finish_reason}")
|
| 540 |
-
except AttributeError as e:
|
| 541 |
-
return jsonify({
|
| 542 |
-
'error': {
|
| 543 |
-
'message': 'AI响应处理失败',
|
| 544 |
-
'type': 'response_processing_error'
|
| 545 |
-
}
|
| 546 |
-
}), 500
|
| 547 |
-
|
| 548 |
-
response_data = {
|
| 549 |
-
'id': 'chatcmpl-xxxxxxxxxxxx',
|
| 550 |
-
'object': 'chat.completion',
|
| 551 |
-
'created': int(datetime.now().timestamp()),
|
| 552 |
-
'model': model,
|
| 553 |
-
'choices': [{
|
| 554 |
-
'index': 0,
|
| 555 |
-
'message': {
|
| 556 |
-
'role': 'assistant',
|
| 557 |
-
'content': text_content
|
| 558 |
-
},
|
| 559 |
-
'finish_reason': finish_reason
|
| 560 |
-
}],
|
| 561 |
-
'usage': {
|
| 562 |
-
'prompt_tokens': prompt_tokens,
|
| 563 |
-
'completion_tokens': completion_tokens,
|
| 564 |
-
'total_tokens': total_tokens
|
| 565 |
-
}
|
| 566 |
-
}
|
| 567 |
-
logger.info(f"200!")
|
| 568 |
-
return jsonify(response_data)
|
| 569 |
-
|
| 570 |
@app.route('/hf/v1/models', methods=['GET'])
|
| 571 |
def list_models():
|
| 572 |
response = {"object": "list", "data": GEMINI_MODELS}
|
|
|
|
| 90 |
self._data = data
|
| 91 |
self._text = self._extract_text()
|
| 92 |
self._finish_reason = self._extract_finish_reason()
|
| 93 |
+
if self.finish_reason != "STOP":
|
| 94 |
+
# 抛出错误
|
| 95 |
+
raise StopCandidateException(f"生成文本失败: {self.finish_reason}")
|
| 96 |
self._prompt_token_count = self._extract_prompt_token_count()
|
| 97 |
self._candidates_token_count = self._extract_candidates_token_count()
|
| 98 |
self._total_token_count = self._extract_total_token_count()
|
| 99 |
self._thoughts = self._extract_thoughts()
|
| 100 |
+
self._json_dumps = json.dumps(self._data, indent=4, ensure_ascii=False)
|
| 101 |
|
| 102 |
def _extract_thoughts(self) -> Optional[str]:
|
| 103 |
try:
|
|
|
|
| 161 |
def total_token_count(self) -> Optional[int]:
|
| 162 |
return self._total_token_count
|
| 163 |
|
| 164 |
+
@property
|
| 165 |
+
def thoughts(self) -> Optional[str]:
|
| 166 |
+
return self._thoughts
|
| 167 |
+
|
| 168 |
+
@property
|
| 169 |
+
def json_dumps(self) -> str:
|
| 170 |
+
return self._json_dumps
|
| 171 |
+
|
| 172 |
class APIKeyManager:
|
| 173 |
def __init__(self):
|
| 174 |
self.api_keys = re.findall(r"AIzaSy[a-zA-Z0-9_-]{33}", os.environ.get('KeyArray'))
|
|
|
|
| 404 |
response_type = 'streamGenerateContent' if stream else 'generateContent'
|
| 405 |
is_SSE = '&alt=sse' if stream else ''
|
| 406 |
|
| 407 |
+
contents, system_instruction, error_response = func.process_messages_for_gemini(messages)
|
| 408 |
|
| 409 |
if error_response:
|
| 410 |
logger.error(f"处理输入消息时出错↙\n {error_response}")
|
|
|
|
| 426 |
}
|
| 427 |
|
| 428 |
data = {
|
| 429 |
+
"contents": contents,
|
| 430 |
"generationConfig": {
|
| 431 |
"temperature": temperature,
|
| 432 |
"maxOutputTokens": max_tokens,
|
|
|
|
| 441 |
response.raise_for_status()
|
| 442 |
|
| 443 |
if stream:
|
|
|
|
| 444 |
return 1, response
|
| 445 |
else:
|
|
|
|
| 446 |
return 1, ResponseWrapper(response.json())
|
| 447 |
except requests.exceptions.RequestException as e:
|
| 448 |
return handle_api_error(e, attempt)
|
| 449 |
+
except StopCandidateException as e:
|
| 450 |
+
return handle_api_error(e, attempt)
|
| 451 |
|
| 452 |
def generate_stream(response):
|
| 453 |
+
logger.info(f"流式开始 →")
|
| 454 |
buffer = b""
|
| 455 |
try:
|
| 456 |
for line in response.iter_lines():
|
|
|
|
| 499 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
| 500 |
|
| 501 |
yield f"data: {json.dumps({'choices': [{'delta': {}, 'finish_reason': 'stop', 'index': 0}]})}\n\n"
|
| 502 |
+
logger.info(f"流式结束 ←")
|
| 503 |
+
logger.info(f"200!")
|
| 504 |
except Exception as e:
|
| 505 |
+
logger.error(f"流式处理错误↙\n{e}")
|
| 506 |
yield f"data: {json.dumps({'error': str(e)})}\n\n"
|
| 507 |
|
| 508 |
attempt = 0
|
|
|
|
| 512 |
logger.info(f"第 {attempt}/{MAX_RETRIES} 次尝试 ...")
|
| 513 |
success, response = do_request(current_api_key, attempt)
|
| 514 |
|
| 515 |
+
if success == 0:
|
| 516 |
+
continue
|
| 517 |
elif success == 2:
|
| 518 |
|
| 519 |
logger.error(f"{model} 很可能暂时不可用,请更换模型或未来一段时间再试")
|
|
|
|
| 525 |
}
|
| 526 |
return jsonify(response), 503
|
| 527 |
|
| 528 |
+
if stream:
|
| 529 |
+
return Response(
|
| 530 |
+
stream_with_context(generate_stream(response)),
|
| 531 |
+
mimetype='text/event-stream'
|
| 532 |
+
)
|
| 533 |
+
else:
|
| 534 |
+
try:
|
| 535 |
+
text_content = response.text
|
| 536 |
+
prompt_tokens = response.prompt_token_count
|
| 537 |
+
completion_tokens = response.candidates_token_count
|
| 538 |
+
total_tokens = response.total_token_count
|
| 539 |
+
finish_reason = response.finish_reason
|
| 540 |
+
json_dumps = response.json_dumps
|
| 541 |
+
logger.info(f"AI响应处理成功↓\n{json_dumps}")
|
| 542 |
+
if is_thinking and show_thoughts:
|
| 543 |
+
text_content = response.thoughts + '\n' + text_content
|
| 544 |
+
except StopCandidateException as e:
|
| 545 |
+
logger.error(f"生成内容失败↙\n{e}")
|
| 546 |
+
continue
|
| 547 |
+
|
| 548 |
+
response_data = {
|
| 549 |
+
'id': 'chatcmpl-xxxxxxxxxxxx',
|
| 550 |
+
'object': 'chat.completion',
|
| 551 |
+
'created': int(datetime.now().timestamp()),
|
| 552 |
+
'model': model,
|
| 553 |
+
'choices': [{
|
| 554 |
+
'index': 0,
|
| 555 |
+
'message': {
|
| 556 |
+
'role': 'assistant',
|
| 557 |
+
'content': text_content
|
| 558 |
+
},
|
| 559 |
+
'finish_reason': finish_reason
|
| 560 |
+
}],
|
| 561 |
+
'usage': {
|
| 562 |
+
'prompt_tokens': prompt_tokens,
|
| 563 |
+
'completion_tokens': completion_tokens,
|
| 564 |
+
'total_tokens': total_tokens
|
| 565 |
+
}
|
| 566 |
+
}
|
| 567 |
+
logger.info(f"200!")
|
| 568 |
+
return jsonify(response_data)
|
| 569 |
+
|
| 570 |
else:
|
| 571 |
logger.error(f"{MAX_RETRIES} 次尝试均失败,请调整配置,或等待官方恢复,或向Moonfanz反馈")
|
| 572 |
response = {
|
|
|
|
| 577 |
}
|
| 578 |
return jsonify(response), 500 if response is not None else 503
|
| 579 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 580 |
@app.route('/hf/v1/models', methods=['GET'])
|
| 581 |
def list_models():
|
| 582 |
response = {"object": "list", "data": GEMINI_MODELS}
|
func.py
CHANGED
|
@@ -35,7 +35,7 @@ def authenticate_request(request):
|
|
| 35 |
return True, None, None
|
| 36 |
|
| 37 |
def process_messages_for_gemini(messages):
|
| 38 |
-
|
| 39 |
errors = []
|
| 40 |
system_instruction_text = ""
|
| 41 |
is_system_phase = True
|
|
@@ -53,11 +53,11 @@ def process_messages_for_gemini(messages):
|
|
| 53 |
is_system_phase = False
|
| 54 |
|
| 55 |
if role == 'user':
|
| 56 |
-
|
| 57 |
elif role == 'system':
|
| 58 |
-
|
| 59 |
elif role == 'assistant':
|
| 60 |
-
|
| 61 |
else:
|
| 62 |
errors.append(f"Invalid role: {role}")
|
| 63 |
elif isinstance(content, list):
|
|
@@ -100,13 +100,13 @@ def process_messages_for_gemini(messages):
|
|
| 100 |
|
| 101 |
if parts:
|
| 102 |
if role in ['user', 'system']:
|
| 103 |
-
|
| 104 |
elif role in ['assistant']:
|
| 105 |
-
|
| 106 |
else:
|
| 107 |
errors.append(f"Invalid role: {role}")
|
| 108 |
|
| 109 |
if errors:
|
| 110 |
-
return
|
| 111 |
else:
|
| 112 |
-
return
|
|
|
|
| 35 |
return True, None, None
|
| 36 |
|
| 37 |
def process_messages_for_gemini(messages):
|
| 38 |
+
contents = []
|
| 39 |
errors = []
|
| 40 |
system_instruction_text = ""
|
| 41 |
is_system_phase = True
|
|
|
|
| 53 |
is_system_phase = False
|
| 54 |
|
| 55 |
if role == 'user':
|
| 56 |
+
contents.append({"role": "user", "parts": [{"text": content}]})
|
| 57 |
elif role == 'system':
|
| 58 |
+
contents.append({"role": "user", "parts": [{"text": content}]})
|
| 59 |
elif role == 'assistant':
|
| 60 |
+
contents.append({"role": "model", "parts": [{"text": content}]})
|
| 61 |
else:
|
| 62 |
errors.append(f"Invalid role: {role}")
|
| 63 |
elif isinstance(content, list):
|
|
|
|
| 100 |
|
| 101 |
if parts:
|
| 102 |
if role in ['user', 'system']:
|
| 103 |
+
contents.append({"role": "user", "parts": parts})
|
| 104 |
elif role in ['assistant']:
|
| 105 |
+
contents.append({"role": "model", "parts": parts})
|
| 106 |
else:
|
| 107 |
errors.append(f"Invalid role: {role}")
|
| 108 |
|
| 109 |
if errors:
|
| 110 |
+
return contents, {"parts": [{"text": system_instruction_text}]}, (jsonify({'error': errors}), 400)
|
| 111 |
else:
|
| 112 |
+
return contents, {"parts": [{"text": system_instruction_text}]}, None
|