Upload 2 files
Browse files- finetune.py +3 -1
- optimize_lr.py +4 -1
finetune.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
|
| 3 |
CONTEXT_WINDOW = 1024 #has to fit in 4090
|
| 4 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
@@ -10,6 +11,7 @@ from transformers import (
|
|
| 10 |
import torch
|
| 11 |
from datasets import load_dataset
|
| 12 |
from huggingface_hub import login
|
|
|
|
| 13 |
|
| 14 |
# setup tokenizer
|
| 15 |
tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
|
|
@@ -73,7 +75,7 @@ training_args = TrainingArguments(
|
|
| 73 |
save_steps=500,
|
| 74 |
save_total_limit=2,
|
| 75 |
logging_steps=100,
|
| 76 |
-
learning_rate=
|
| 77 |
weight_decay=0.01,
|
| 78 |
fp16=False,
|
| 79 |
bf16=True,
|
|
|
|
| 1 |
import os
|
| 2 |
+
import json
|
| 3 |
|
| 4 |
CONTEXT_WINDOW = 1024 #has to fit in 4090
|
| 5 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
|
|
|
| 11 |
import torch
|
| 12 |
from datasets import load_dataset
|
| 13 |
from huggingface_hub import login
|
| 14 |
+
from optimize_lr import best_lr
|
| 15 |
|
| 16 |
# setup tokenizer
|
| 17 |
tokenizer = AutoTokenizer.from_pretrained("Zyphra/Zamba2-1.2B-instruct", token=HF_TOKEN)
|
|
|
|
| 75 |
save_steps=500,
|
| 76 |
save_total_limit=2,
|
| 77 |
logging_steps=100,
|
| 78 |
+
learning_rate=best_lr,
|
| 79 |
weight_decay=0.01,
|
| 80 |
fp16=False,
|
| 81 |
bf16=True,
|
optimize_lr.py
CHANGED
|
@@ -398,4 +398,7 @@ plot_gpr_results(study, final_optimization)
|
|
| 398 |
|
| 399 |
# Save all results
|
| 400 |
with open("lr_optimization_results.json", "w") as f:
|
| 401 |
-
json.dump(results, f, indent=4)
|
|
|
|
|
|
|
|
|
|
|
|
| 398 |
|
| 399 |
# Save all results
|
| 400 |
with open("lr_optimization_results.json", "w") as f:
|
| 401 |
+
json.dump(results, f, indent=4)
|
| 402 |
+
|
| 403 |
+
# Store best learning rate as a variable for finetune.py to use
|
| 404 |
+
best_lr = study.best_params["learning_rate"]
|