Spaces:

DesertWolf
/

test3

Paused

App Files Files Community

test3 / tests /llm_translation /test_openai_o1.py

DesertWolf

Upload folder using huggingface_hub

447ebeb verified 6 months ago

raw

history blame contribute delete

7.01 kB

	import json
	import os
	import sys
	from datetime import datetime
	from unittest.mock import AsyncMock, patch, MagicMock

	sys.path.insert(
	0, os.path.abspath("../..")
	) # Adds the parent directory to the system path


	import httpx
	import pytest
	from respx import MockRouter

	import litellm
	from litellm import Choices, Message, ModelResponse
	from base_llm_unit_tests import BaseLLMChatTest, BaseOSeriesModelsTest


	@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1"])
	@pytest.mark.asyncio
	async def test_o1_handle_system_role(model):
	"""
	Tests that:
	- max_tokens is translated to 'max_completion_tokens'
	- role 'system' is translated to 'user'
	"""
	from openai import AsyncOpenAI
	from litellm.utils import supports_system_messages

	os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
	litellm.model_cost = litellm.get_model_cost_map(url="")

	litellm.set_verbose = True

	client = AsyncOpenAI(api_key="fake-api-key")

	with patch.object(
	client.chat.completions.with_raw_response, "create"
	) as mock_client:
	try:
	await litellm.acompletion(
	model=model,
	max_tokens=10,
	messages=[{"role": "system", "content": "Be a good bot!"}],
	client=client,
	)
	except Exception as e:
	print(f"Error: {e}")

	mock_client.assert_called_once()
	request_body = mock_client.call_args.kwargs

	print("request_body: ", request_body)

	assert request_body["model"] == model
	assert request_body["max_completion_tokens"] == 10
	if supports_system_messages(model, "openai"):
	assert request_body["messages"] == [
	{"role": "system", "content": "Be a good bot!"}
	]
	else:
	assert request_body["messages"] == [
	{"role": "user", "content": "Be a good bot!"}
	]


	@pytest.mark.parametrize(
	"model, expected_tool_calling_support",
	[("o1-preview", False), ("o1-mini", False), ("o1", True)],
	)
	@pytest.mark.asyncio
	async def test_o1_handle_tool_calling_optional_params(
	model, expected_tool_calling_support
	):
	"""
	Tests that:
	- max_tokens is translated to 'max_completion_tokens'
	- role 'system' is translated to 'user'
	"""
	from openai import AsyncOpenAI
	from litellm.utils import ProviderConfigManager
	from litellm.types.utils import LlmProviders

	os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
	litellm.model_cost = litellm.get_model_cost_map(url="")

	config = ProviderConfigManager.get_provider_chat_config(
	model=model, provider=LlmProviders.OPENAI
	)

	supported_params = config.get_supported_openai_params(model=model)

	assert expected_tool_calling_support == ("tools" in supported_params)


	@pytest.mark.asyncio
	@pytest.mark.parametrize("model", ["gpt-4", "gpt-4-0314", "gpt-4-32k", "o1-preview"])
	async def test_o1_max_completion_tokens(model: str):
	"""
	Tests that:
	- max_completion_tokens is passed directly to OpenAI chat completion models
	"""
	from openai import AsyncOpenAI

	litellm.set_verbose = True

	client = AsyncOpenAI(api_key="fake-api-key")

	with patch.object(
	client.chat.completions.with_raw_response, "create"
	) as mock_client:
	try:
	await litellm.acompletion(
	model=model,
	max_completion_tokens=10,
	messages=[{"role": "user", "content": "Hello!"}],
	client=client,
	)
	except Exception as e:
	print(f"Error: {e}")

	mock_client.assert_called_once()
	request_body = mock_client.call_args.kwargs

	print("request_body: ", request_body)

	assert request_body["model"] == model
	assert request_body["max_completion_tokens"] == 10
	assert request_body["messages"] == [{"role": "user", "content": "Hello!"}]


	def test_litellm_responses():
	"""
	ensures that type of completion_tokens_details is correctly handled / returned
	"""
	from litellm import ModelResponse
	from litellm.types.utils import CompletionTokensDetails

	response = ModelResponse(
	usage={
	"completion_tokens": 436,
	"prompt_tokens": 14,
	"total_tokens": 450,
	"completion_tokens_details": {"reasoning_tokens": 0},
	}
	)

	print("response: ", response)

	assert isinstance(response.usage.completion_tokens_details, CompletionTokensDetails)


	class TestOpenAIO1(BaseOSeriesModelsTest, BaseLLMChatTest):
	def get_base_completion_call_args(self):
	return {
	"model": "o1",
	}

	def get_client(self):
	from openai import OpenAI

	return OpenAI(api_key="fake-api-key")

	def test_tool_call_no_arguments(self, tool_call_no_arguments):
	"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
	pass

	def test_prompt_caching(self):
	"""Temporary override. o1 prompt caching is not working."""
	pass


	class TestOpenAIO3(BaseOSeriesModelsTest, BaseLLMChatTest):
	def get_base_completion_call_args(self):
	return {
	"model": "o3-mini",
	}

	def get_client(self):
	from openai import OpenAI

	return OpenAI(api_key="fake-api-key")

	def test_tool_call_no_arguments(self, tool_call_no_arguments):
	"""Test that tool calls with no arguments is translated correctly. Relevant issue: https://github.com/BerriAI/litellm/issues/6833"""
	pass

	def test_prompt_caching(self):
	"""Override, as o3 prompt caching is flaky"""
	pass


	def test_o1_supports_vision():
	"""Test that o1 supports vision"""
	os.environ["LITELLM_LOCAL_MODEL_COST_MAP"] = "True"
	litellm.model_cost = litellm.get_model_cost_map(url="")
	for k, v in litellm.model_cost.items():
	if k.startswith("o1") and v.get("litellm_provider") == "openai":
	assert v.get("supports_vision") is True, f"{k} does not support vision"


	def test_o3_reasoning_effort():
	resp = litellm.completion(
	model="o3-mini",
	messages=[{"role": "user", "content": "Hello!"}],
	reasoning_effort="high",
	)
	assert resp.choices[0].message.content is not None


	@pytest.mark.parametrize("model", ["o1-preview", "o1-mini", "o1", "o3-mini"])
	def test_streaming_response(model):
	"""Test that streaming response is returned correctly"""
	from litellm import completion

	response = completion(
	model=model,
	messages=[
	{"role": "system", "content": "Be a good bot!"},
	{"role": "user", "content": "Hello!"},
	],
	stream=True,
	)

	assert response is not None

	chunks = []
	for chunk in response:
	chunks.append(chunk)

	resp = litellm.stream_chunk_builder(chunks=chunks)
	print(resp)