Update train.py
Browse files
train.py
CHANGED
|
@@ -8,11 +8,13 @@ model_name = "TheBloke/Llama-2-7B-GGUF"
|
|
| 8 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 10 |
|
| 11 |
-
# ✅ Step 2:
|
| 12 |
-
dataset1 = load_dataset("openai/webgpt", split="train") # Logical reasoning
|
| 13 |
dataset2 = load_dataset("lex_glue", split="train") # Formal/legal writing
|
| 14 |
dataset3 = load_dataset("scidataset", split="train") # Scientific accuracy
|
| 15 |
-
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# ✅ Step 3: Apply LoRA Fine-Tuning
|
| 18 |
lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)
|
|
|
|
| 8 |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16)
|
| 9 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 10 |
|
| 11 |
+
# ✅ Step 2: Load Training Datasets
|
| 12 |
+
dataset1 = load_dataset("openai/webgpt", split="train") # Logical reasoning & knowledge
|
| 13 |
dataset2 = load_dataset("lex_glue", split="train") # Formal/legal writing
|
| 14 |
dataset3 = load_dataset("scidataset", split="train") # Scientific accuracy
|
| 15 |
+
|
| 16 |
+
# Merge datasets
|
| 17 |
+
dataset = dataset1 + dataset2 + dataset3
|
| 18 |
|
| 19 |
# ✅ Step 3: Apply LoRA Fine-Tuning
|
| 20 |
lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)
|