geoffmunn's picture
f16 changed to f32
27815e5 verified
raw
history blame
543 Bytes
# MODELFILE for Qwen3-Coder-30B-A3B-Instruct-GGUF
# Used by LM Studio, OpenWebUI, GPT4All, etc.
context_length: 32768
embedding: false
f32: cpu
# Chat template using ChatML (used by Qwen)
prompt_template: >-
<|im_start|>system
You are a helpful assistant.<|im_end|>
<|im_start|>user
{prompt}<|im_end|>
<|im_start|>assistant
# Stop sequences help end generation cleanly
stop: "<|im_end|>"
stop: "<|im_start|>"
# Default sampling
temperature: 0.6
top_p: 0.95
top_k: 20
min_p: 0.0
repeat_penalty: 1.1