Update app.py
Browse files
app.py
CHANGED
|
@@ -28,6 +28,14 @@ MODELS = [
|
|
| 28 |
"meta-llama/Meta-Llama-3.1-70B-Instruct"
|
| 29 |
]
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
def get_embeddings():
|
| 32 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
| 33 |
|
|
@@ -113,11 +121,20 @@ After writing the document, please provide a list of sources used in your respon
|
|
| 113 |
# Use Hugging Face API
|
| 114 |
client = InferenceClient(model, token=huggingface_token)
|
| 115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
main_content = ""
|
| 117 |
for i in range(num_calls):
|
| 118 |
for message in client.chat_completion(
|
| 119 |
messages=[{"role": "user", "content": prompt}],
|
| 120 |
-
|
| 121 |
temperature=temperature,
|
| 122 |
stream=False,
|
| 123 |
):
|
|
|
|
| 28 |
"meta-llama/Meta-Llama-3.1-70B-Instruct"
|
| 29 |
]
|
| 30 |
|
| 31 |
+
MODEL_TOKEN_LIMITS = {
|
| 32 |
+
"mistralai/Mistral-7B-Instruct-v0.3": 32768,
|
| 33 |
+
"mistralai/Mixtral-8x7B-Instruct-v0.1": 32768,
|
| 34 |
+
"mistralai/Mistral-Nemo-Instruct-2407": 32768,
|
| 35 |
+
"meta-llama/Meta-Llama-3.1-8B-Instruct": 8192,
|
| 36 |
+
"meta-llama/Meta-Llama-3.1-70B-Instruct": 8192,
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
def get_embeddings():
|
| 40 |
return HuggingFaceEmbeddings(model_name="sentence-transformers/stsb-roberta-large")
|
| 41 |
|
|
|
|
| 121 |
# Use Hugging Face API
|
| 122 |
client = InferenceClient(model, token=huggingface_token)
|
| 123 |
|
| 124 |
+
# Calculate input tokens (this is an approximation, you might need a more accurate method)
|
| 125 |
+
input_tokens = len(prompt.split())
|
| 126 |
+
|
| 127 |
+
# Get the token limit for the current model
|
| 128 |
+
model_token_limit = MODEL_TOKEN_LIMITS.get(model, 8192) # Default to 8192 if model not found
|
| 129 |
+
|
| 130 |
+
# Calculate max_new_tokens
|
| 131 |
+
max_new_tokens = min(model_token_limit - input_tokens, 4096) # Cap at 4096 to be safe
|
| 132 |
+
|
| 133 |
main_content = ""
|
| 134 |
for i in range(num_calls):
|
| 135 |
for message in client.chat_completion(
|
| 136 |
messages=[{"role": "user", "content": prompt}],
|
| 137 |
+
max_new_tokens=max_new_tokens,
|
| 138 |
temperature=temperature,
|
| 139 |
stream=False,
|
| 140 |
):
|