Spaces:
Paused
Paused
| # test that the proxy actually does exception mapping to the OpenAI format | |
| import json | |
| import os | |
| import sys | |
| from unittest import mock | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| import asyncio | |
| import io | |
| import os | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import openai | |
| import pytest | |
| from fastapi import Response | |
| from fastapi.testclient import TestClient | |
| import litellm | |
| from litellm.proxy.proxy_server import ( # Replace with the actual module where your FastAPI router is defined | |
| initialize, | |
| router, | |
| save_worker_config, | |
| ) | |
| invalid_authentication_error_response = Response( | |
| status_code=401, | |
| content=json.dumps({"error": "Invalid Authentication"}), | |
| ) | |
| context_length_exceeded_error_response_dict = { | |
| "error": { | |
| "message": "AzureException - Error code: 400 - {'error': {'message': \"This model's maximum context length is 4096 tokens. However, your messages resulted in 10007 tokens. Please reduce the length of the messages.\", 'type': 'invalid_request_error', 'param': 'messages', 'code': 'context_length_exceeded'}}", | |
| "type": None, | |
| "param": None, | |
| "code": 400, | |
| }, | |
| } | |
| context_length_exceeded_error_response = Response( | |
| status_code=400, | |
| content=json.dumps(context_length_exceeded_error_response_dict), | |
| ) | |
| def client(): | |
| filepath = os.path.dirname(os.path.abspath(__file__)) | |
| config_fp = f"{filepath}/test_configs/test_bad_config.yaml" | |
| asyncio.run(initialize(config=config_fp)) | |
| from litellm.proxy.proxy_server import app | |
| return TestClient(app) | |
| # raise openai.AuthenticationError | |
| def test_chat_completion_exception(client): | |
| try: | |
| # Your test data | |
| test_data = { | |
| "model": "gpt-3.5-turbo", | |
| "messages": [ | |
| {"role": "user", "content": "hi"}, | |
| ], | |
| "max_tokens": 10, | |
| } | |
| response = client.post("/chat/completions", json=test_data) | |
| json_response = response.json() | |
| print("keys in json response", json_response.keys()) | |
| assert json_response.keys() == {"error"} | |
| print("ERROR=", json_response["error"]) | |
| assert isinstance(json_response["error"]["message"], str) | |
| assert ( | |
| "litellm.AuthenticationError: AuthenticationError" | |
| in json_response["error"]["message"] | |
| ) | |
| code_in_error = json_response["error"]["code"] | |
| # OpenAI SDK required code to be STR, https://github.com/BerriAI/litellm/issues/4970 | |
| # If we look on official python OpenAI lib, the code should be a string: | |
| # https://github.com/openai/openai-python/blob/195c05a64d39c87b2dfdf1eca2d339597f1fce03/src/openai/types/shared/error_object.py#L11 | |
| # Related LiteLLM issue: https://github.com/BerriAI/litellm/discussions/4834 | |
| assert type(code_in_error) == str | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| assert isinstance(openai_exception, openai.AuthenticationError) | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |
| # raise openai.AuthenticationError | |
| def test_chat_completion_exception_azure(mock_acompletion, client): | |
| try: | |
| # Your test data | |
| test_data = { | |
| "model": "azure-gpt-3.5-turbo", | |
| "messages": [ | |
| {"role": "user", "content": "hi"}, | |
| ], | |
| "max_tokens": 10, | |
| } | |
| response = client.post("/chat/completions", json=test_data) | |
| mock_acompletion.assert_called_once_with( | |
| **test_data, | |
| litellm_call_id=mock.ANY, | |
| litellm_logging_obj=mock.ANY, | |
| request_timeout=mock.ANY, | |
| metadata=mock.ANY, | |
| proxy_server_request=mock.ANY, | |
| ) | |
| json_response = response.json() | |
| print("keys in json response", json_response.keys()) | |
| assert json_response.keys() == {"error"} | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| print(openai_exception) | |
| assert isinstance(openai_exception, openai.AuthenticationError) | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |
| # raise openai.AuthenticationError | |
| def test_embedding_auth_exception_azure(mock_aembedding, client): | |
| try: | |
| # Your test data | |
| test_data = {"model": "azure-embedding", "input": ["hi"]} | |
| response = client.post("/embeddings", json=test_data) | |
| mock_aembedding.assert_called_once_with( | |
| **test_data, | |
| metadata=mock.ANY, | |
| proxy_server_request=mock.ANY, | |
| ) | |
| print("Response from proxy=", response) | |
| json_response = response.json() | |
| print("keys in json response", json_response.keys()) | |
| assert json_response.keys() == {"error"} | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| print("Exception raised=", openai_exception) | |
| assert isinstance(openai_exception, openai.AuthenticationError) | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |
| # raise openai.BadRequestError | |
| # chat/completions openai | |
| def test_exception_openai_bad_model(client): | |
| try: | |
| # Your test data | |
| test_data = { | |
| "model": "azure/GPT-12", | |
| "messages": [ | |
| {"role": "user", "content": "hi"}, | |
| ], | |
| "max_tokens": 10, | |
| } | |
| response = client.post("/chat/completions", json=test_data) | |
| json_response = response.json() | |
| print("keys in json response", json_response.keys()) | |
| assert json_response.keys() == {"error"} | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| print("Type of exception=", type(openai_exception)) | |
| assert isinstance(openai_exception, openai.BadRequestError) | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |
| # chat/completions any model | |
| def test_chat_completion_exception_any_model(client): | |
| try: | |
| # Your test data | |
| test_data = { | |
| "model": "Lite-GPT-12", | |
| "messages": [ | |
| {"role": "user", "content": "hi"}, | |
| ], | |
| "max_tokens": 10, | |
| } | |
| response = client.post("/chat/completions", json=test_data) | |
| json_response = response.json() | |
| assert json_response.keys() == {"error"} | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| assert isinstance(openai_exception, openai.BadRequestError) | |
| _error_message = openai_exception.message | |
| assert ( | |
| "/chat/completions: Invalid model name passed in model=Lite-GPT-12" | |
| in str(_error_message) | |
| ) | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |
| # embeddings any model | |
| def test_embedding_exception_any_model(client): | |
| try: | |
| # Your test data | |
| test_data = {"model": "Lite-GPT-12", "input": ["hi"]} | |
| response = client.post("/embeddings", json=test_data) | |
| print("Response from proxy=", response) | |
| print(response.json()) | |
| json_response = response.json() | |
| print("keys in json response", json_response.keys()) | |
| assert json_response.keys() == {"error"} | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| print("Exception raised=", openai_exception) | |
| assert isinstance(openai_exception, openai.BadRequestError) | |
| _error_message = openai_exception.message | |
| assert "/embeddings: Invalid model name passed in model=Lite-GPT-12" in str( | |
| _error_message | |
| ) | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |
| # raise openai.BadRequestError | |
| def test_chat_completion_exception_azure_context_window(mock_acompletion, client): | |
| try: | |
| # Your test data | |
| test_data = { | |
| "model": "working-azure-gpt-3.5-turbo", | |
| "messages": [ | |
| {"role": "user", "content": "hi" * 10000}, | |
| ], | |
| "max_tokens": 10, | |
| } | |
| response = None | |
| response = client.post("/chat/completions", json=test_data) | |
| print("got response from server", response) | |
| mock_acompletion.assert_called_once_with( | |
| **test_data, | |
| litellm_call_id=mock.ANY, | |
| litellm_logging_obj=mock.ANY, | |
| request_timeout=mock.ANY, | |
| metadata=mock.ANY, | |
| proxy_server_request=mock.ANY, | |
| ) | |
| json_response = response.json() | |
| print("keys in json response", json_response.keys()) | |
| assert json_response.keys() == {"error"} | |
| assert json_response == context_length_exceeded_error_response_dict | |
| # make an openai client to call _make_status_error_from_response | |
| openai_client = openai.OpenAI(api_key="anything") | |
| openai_exception = openai_client._make_status_error_from_response( | |
| response=response | |
| ) | |
| print("exception from proxy", openai_exception) | |
| assert isinstance(openai_exception, openai.BadRequestError) | |
| print("passed exception is of type BadRequestError") | |
| except Exception as e: | |
| pytest.fail(f"LiteLLM Proxy test failed. Exception {str(e)}") | |