Spaces:
Paused
Paused
| import json | |
| import os | |
| import sys | |
| from datetime import datetime | |
| from unittest.mock import AsyncMock, patch, MagicMock | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import httpx | |
| import pytest | |
| from respx import MockRouter | |
| import litellm | |
| from litellm import Choices, Message, ModelResponse | |
| from base_llm_unit_tests import BaseLLMChatTest, BaseOSeriesModelsTest | |
| async def test_o1_handle_system_role(model): | |
| """ | |
| Tests that: | |
| - max_tokens is translated to 'max_completion_tokens' | |
| - role 'system' is translated to 'user' | |
| """ | |
| from openai import AsyncOpenAI | |
| from litellm.utils import supports_system_messages | |
| os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" | |
| litellm.model_cost = litellm.get_model_cost_map(url="") | |
| litellm.set_verbose = True | |
| client = AsyncOpenAI(api_key="fake-api-key") | |
| with patch.object( | |
| client.chat.completions.with_raw_response, "create" | |
| ) as mock_client: | |
| try: | |
| await litellm.acompletion( | |
| model=model, | |
| max_tokens=10, | |
| messages=[{"role": "system", "content": "Be a good bot!"}], | |
| client=client, | |
| ) | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| mock_client.assert_called_once() | |
| request_body = mock_client.call_args.kwargs | |
| print("request_body: ", request_body) | |
| assert request_body["model"] == model | |
| assert request_body["max_completion_tokens"] == 10 | |
| if supports_system_messages(model, "openai"): | |
| assert request_body["messages"] == [ | |
| {"role": "system", "content": "Be a good bot!"} | |
| ] | |
| else: | |
| assert request_body["messages"] == [ | |
| {"role": "user", "content": "Be a good bot!"} | |
| ] | |
| async def test_o1_handle_tool_calling_optional_params( | |
| model, expected_tool_calling_support | |
| ): | |
| """ | |
| Tests that: | |
| - max_tokens is translated to 'max_completion_tokens' | |
| - role 'system' is translated to 'user' | |
| """ | |
| from openai import AsyncOpenAI | |
| from litellm.utils import ProviderConfigManager | |
| from litellm.types.utils import LlmProviders | |
| os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" | |
| litellm.model_cost = litellm.get_model_cost_map(url="") | |
| config = ProviderConfigManager.get_provider_chat_config( | |
| model=model, provider=LlmProviders.OPENAI | |
| ) | |
| supported_params = config.get_supported_openai_params(model=model) | |
| assert expected_tool_calling_support == ("tools" in supported_params) | |
| async def test_o1_max_completion_tokens(model: str): | |
| """ | |
| Tests that: | |
| - max_completion_tokens is passed directly to OpenAI chat completion models | |
| """ | |
| from openai import AsyncOpenAI | |
| litellm.set_verbose = True | |
| client = AsyncOpenAI(api_key="fake-api-key") | |
| with patch.object( | |
| client.chat.completions.with_raw_response, "create" | |
| ) as mock_client: | |
| try: | |
| await litellm.acompletion( | |
| model=model, | |
| max_completion_tokens=10, | |
| messages=[{"role": "user", "content": "Hello!"}], | |
| client=client, | |
| ) | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| mock_client.assert_called_once() | |
| request_body = mock_client.call_args.kwargs | |
| print("request_body: ", request_body) | |
| assert request_body["model"] == model | |
| assert request_body["max_completion_tokens"] == 10 | |
| assert request_body["messages"] == [{"role": "user", "content": "Hello!"}] | |
| def test_litellm_responses(): | |
| """ | |
| ensures that type of completion_tokens_details is correctly handled / returned | |
| """ | |
| from litellm import ModelResponse | |
| from litellm.types.utils import CompletionTokensDetails | |
| response = ModelResponse( | |
| usage={ | |
| "completion_tokens": 436, | |
| "prompt_tokens": 14, | |
| "total_tokens": 450, | |
| "completion_tokens_details": {"reasoning_tokens": 0}, | |
| } | |
| ) | |
| print("response: ", response) | |
| assert isinstance(response.usage.completion_tokens_details, CompletionTokensDetails) | |
| class TestOpenAIO1(BaseOSeriesModelsTest, BaseLLMChatTest): | |
| def get_base_completion_call_args(self): | |
| return { | |
| "model": "o1", | |
| } | |
| def get_client(self): | |
| from openai import OpenAI | |
| return OpenAI(api_key="fake-api-key") | |
| def test_tool_call_no_arguments(self, tool_call_no_arguments): | |
| """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" | |
| pass | |
| def test_prompt_caching(self): | |
| """Temporary override. o1 prompt caching is not working.""" | |
| pass | |
| class TestOpenAIO3(BaseOSeriesModelsTest, BaseLLMChatTest): | |
| def get_base_completion_call_args(self): | |
| return { | |
| "model": "o3-mini", | |
| } | |
| def get_client(self): | |
| from openai import OpenAI | |
| return OpenAI(api_key="fake-api-key") | |
| def test_tool_call_no_arguments(self, tool_call_no_arguments): | |
| """Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833""" | |
| pass | |
| def test_prompt_caching(self): | |
| """Override, as o3 prompt caching is flaky""" | |
| pass | |
| def test_o1_supports_vision(): | |
| """Test that o1 supports vision""" | |
| os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True" | |
| litellm.model_cost = litellm.get_model_cost_map(url="") | |
| for k, v in litellm.model_cost.items(): | |
| if k.startswith("o1") and v.get("litellm_provider") == "openai": | |
| assert v.get("supports_vision") is True, f"{k} does not support vision" | |
| def test_o3_reasoning_effort(): | |
| resp = litellm.completion( | |
| model="o3-mini", | |
| messages=[{"role": "user", "content": "Hello!"}], | |
| reasoning_effort="high", | |
| ) | |
| assert resp.choices[0].message.content is not None | |
| def test_streaming_response(model): | |
| """Test that streaming response is returned correctly""" | |
| from litellm import completion | |
| response = completion( | |
| model=model, | |
| messages=[ | |
| {"role": "system", "content": "Be a good bot!"}, | |
| {"role": "user", "content": "Hello!"}, | |
| ], | |
| stream=True, | |
| ) | |
| assert response is not None | |
| chunks = [] | |
| for chunk in response: | |
| chunks.append(chunk) | |
| resp = litellm.stream_chunk_builder(chunks=chunks) | |
| print(resp) | |