Commit
Β·
65b04d1
1
Parent(s):
c576ce6
Using smaller models
Browse files
app.py
CHANGED
|
@@ -7,9 +7,9 @@ set_seed(67)
|
|
| 7 |
device = "cpu"
|
| 8 |
|
| 9 |
# Initialize models and tokenizer
|
| 10 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
| 11 |
-
draft_model = AutoModelForCausalLM.from_pretrained("
|
| 12 |
-
verify_model = AutoModelForCausalLM.from_pretrained("
|
| 13 |
|
| 14 |
def draft(input_ids, gamma, confidence_threshold, eos_token, past_kv):
|
| 15 |
generated = input_ids.clone()
|
|
@@ -241,10 +241,10 @@ demo = gr.Interface(
|
|
| 241 |
outputs=gr.HTML(label="Speculative Decoding Visualization"),
|
| 242 |
title="π Speculative Decoding Demo",
|
| 243 |
description="""
|
| 244 |
-
**Speculative Decoding Visualization** using
|
| 245 |
|
| 246 |
-
- **Draft Model**:
|
| 247 |
-
- **Verify Model**:
|
| 248 |
|
| 249 |
**Color Legend:**
|
| 250 |
- π’ Green = Accepted tokens from draft model
|
|
|
|
| 7 |
device = "cpu"
|
| 8 |
|
| 9 |
# Initialize models and tokenizer
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M-Instruct")
|
| 11 |
+
draft_model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M-Instruct", torch_dtype=torch.bfloat16).to(device)
|
| 12 |
+
verify_model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-1.7B-Instruct", torch_dtype=torch.bfloat16).to(device)
|
| 13 |
|
| 14 |
def draft(input_ids, gamma, confidence_threshold, eos_token, past_kv):
|
| 15 |
generated = input_ids.clone()
|
|
|
|
| 241 |
outputs=gr.HTML(label="Speculative Decoding Visualization"),
|
| 242 |
title="π Speculative Decoding Demo",
|
| 243 |
description="""
|
| 244 |
+
**Speculative Decoding Visualization** using SmolLM2 models
|
| 245 |
|
| 246 |
+
- **Draft Model**: HuggingFaceTB/SmolLM2-135M-Instruct (fast)
|
| 247 |
+
- **Verify Model**: HuggingFaceTB/SmolLM2-1.7B-Instruct (accurate)
|
| 248 |
|
| 249 |
**Color Legend:**
|
| 250 |
- π’ Green = Accepted tokens from draft model
|