Spaces:
Paused
Paused
| # Test the following scenarios: | |
| # 1. Generate a Key, and use it to make a call | |
| import sys, os | |
| import traceback | |
| from dotenv import load_dotenv | |
| from fastapi import Request | |
| from datetime import datetime | |
| load_dotenv() | |
| import os, io, time | |
| # this file is to test litellm/proxy | |
| sys.path.insert( | |
| 0, os.path.abspath("../..") | |
| ) # Adds the parent directory to the system path | |
| import pytest, logging, asyncio | |
| import litellm, asyncio | |
| from litellm.proxy.proxy_server import token_counter | |
| from litellm.proxy.utils import PrismaClient, ProxyLogging, hash_token, update_spend | |
| from litellm._logging import verbose_proxy_logger | |
| verbose_proxy_logger.setLevel(level=logging.DEBUG) | |
| from litellm.proxy._types import TokenCountRequest, TokenCountResponse | |
| from litellm import Router | |
| async def test_vLLM_token_counting(): | |
| """ | |
| Test Token counter for vLLM models | |
| - User passes model="special-alias" | |
| - token_counter should infer that special_alias -> maps to wolfram/miquliz-120b-v2.0 | |
| -> token counter should use hugging face tokenizer | |
| """ | |
| llm_router = Router( | |
| model_list=[ | |
| { | |
| "model_name": "special-alias", | |
| "litellm_params": { | |
| "model": "openai/wolfram/miquliz-120b-v2.0", | |
| "api_base": "https://exampleopenaiendpoint-production.up.railway.app/", | |
| }, | |
| } | |
| ] | |
| ) | |
| setattr(litellm.proxy.proxy_server, "llm_router", llm_router) | |
| response = await token_counter( | |
| request=TokenCountRequest( | |
| model="special-alias", | |
| messages=[{"role": "user", "content": "hello"}], | |
| ) | |
| ) | |
| print("response: ", response) | |
| assert ( | |
| response.tokenizer_type == "openai_tokenizer" | |
| ) # SHOULD use the default tokenizer | |
| assert response.model_used == "wolfram/miquliz-120b-v2.0" | |
| async def test_token_counting_model_not_in_model_list(): | |
| """ | |
| Test Token counter - when a model is not in model_list | |
| -> should use the default OpenAI tokenizer | |
| """ | |
| llm_router = Router( | |
| model_list=[ | |
| { | |
| "model_name": "gpt-4", | |
| "litellm_params": { | |
| "model": "gpt-4", | |
| }, | |
| } | |
| ] | |
| ) | |
| setattr(litellm.proxy.proxy_server, "llm_router", llm_router) | |
| response = await token_counter( | |
| request=TokenCountRequest( | |
| model="special-alias", | |
| messages=[{"role": "user", "content": "hello"}], | |
| ) | |
| ) | |
| print("response: ", response) | |
| assert ( | |
| response.tokenizer_type == "openai_tokenizer" | |
| ) # SHOULD use the OpenAI tokenizer | |
| assert response.model_used == "special-alias" | |
| async def test_gpt_token_counting(): | |
| """ | |
| Test Token counter | |
| -> should work for gpt-4 | |
| """ | |
| llm_router = Router( | |
| model_list=[ | |
| { | |
| "model_name": "gpt-4", | |
| "litellm_params": { | |
| "model": "gpt-4", | |
| }, | |
| } | |
| ] | |
| ) | |
| setattr(litellm.proxy.proxy_server, "llm_router", llm_router) | |
| response = await token_counter( | |
| request=TokenCountRequest( | |
| model="gpt-4", | |
| messages=[{"role": "user", "content": "hello"}], | |
| ) | |
| ) | |
| print("response: ", response) | |
| assert ( | |
| response.tokenizer_type == "openai_tokenizer" | |
| ) # SHOULD use the OpenAI tokenizer | |
| assert response.request_model == "gpt-4" | |