|
|
import gradio as gr |
|
|
from huggingface_hub import hf_hub_download |
|
|
from llama_cpp import Llama |
|
|
|
|
|
REPO_ID = "mradermacher/Qwen3-4B-Claude-Sonnet-4-Reasoning-Distill-Safetensor-GGUF" |
|
|
MODEL_FILENAME = "qwen3-4b-claude-sonnet-4-reasoning-distill.Q8_0.gguf" |
|
|
|
|
|
model_path = hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=MODEL_FILENAME, |
|
|
local_dir="/home/user/app/models" |
|
|
) |
|
|
|
|
|
llm = Llama( |
|
|
model_path=model_path, |
|
|
n_ctx=4096, |
|
|
n_threads=4, |
|
|
temperature=0.4, |
|
|
repeat_penalty=1.1, |
|
|
) |
|
|
|
|
|
|
|
|
def generate_response(user_input): |
|
|
prompt = ( |
|
|
"<|im_start|>system\nYou are a helpful assistant.\n<|im_end|>\n" |
|
|
f"<|im_start|>user\n{user_input}<|im_end|>\n" |
|
|
"<|im_start|>assistant\n" |
|
|
).format(user_input=user_input) |
|
|
|
|
|
output = llm(prompt, max_tokens=512, stop=["<|im_end|>"]) |
|
|
return output["choices"][0]["text"] |
|
|
|
|
|
gr.Interface( |
|
|
fn=generate_response, |
|
|
inputs=gr.Textbox(label="Prompt", lines=4), |
|
|
outputs=gr.Textbox(label="Claude-Sonnet Response"), |
|
|
title="Claude Reasoning Chat - Qwen3-4B", |
|
|
description="Uses Claude-style system/user/assistant prompting with Qwen3-4B Reasoning Distill model." |
|
|
).launch() |
|
|
|