cortexso
/

llama3

@@ -16,7 +16,7 @@ stream: true # true | false
 # Engine / Model Settings
 ngl: 33 # Infer from base config.json -> num_attention_heads
 ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
-engine: cortex.llamacpp
 prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
 # Prompt template: Can only be retrieved from instruct model
 # - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053

 # Engine / Model Settings
 ngl: 33 # Infer from base config.json -> num_attention_heads
 ctx_len: 8192 # Infer from base config.json -> max_position_embeddings
+engine: llama-cpp
 prompt_template: "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|><|start_header_id|>user<|end_header_id|>\n\n{prompt}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n"
 # Prompt template: Can only be retrieved from instruct model
 # - https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct/blob/main/tokenizer_config.json#L2053