Liontix's picture
Upload 3 files (#2)
af1bcf8 verified
import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
REPO_ID = "mradermacher/Qwen3-4B-Claude-Sonnet-4-Reasoning-Distill-Safetensor-GGUF"
MODEL_FILENAME = "qwen3-4b-claude-sonnet-4-reasoning-distill.Q8_0.gguf"
model_path = hf_hub_download(
repo_id=REPO_ID,
filename=MODEL_FILENAME,
local_dir="/home/user/app/models"
)
llm = Llama(
model_path=model_path,
n_ctx=4096,
n_threads=4,
temperature=0.4,
repeat_penalty=1.1,
)
# Claude-style system/user/assistant formatted prompt
def generate_response(user_input):
prompt = (
"<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n"
f"<|im_start|>user\n{user_input}<|im_end|>\n"
"<|im_start|>assistant\n"
).format(user_input=user_input)
output = llm(prompt, max_tokens=512, stop=["<|im_end|>"])
return output["choices"][0]["text"]
gr.Interface(
fn=generate_response,
inputs=gr.Textbox(label="Prompt", lines=4),
outputs=gr.Textbox(label="Claude-Sonnet Response"),
title="Claude Reasoning Chat - Qwen3-4B",
description="Uses Claude-style system/user/assistant prompting with Qwen3-4B Reasoning Distill model."
).launch()