Mirdehghan commited on
Commit
a93fbd2
·
verified ·
1 Parent(s): 43984d0

Create train.py

Browse files
Files changed (1) hide show
  1. train.py +94 -0
train.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ from datasets import load_dataset, Dataset
3
+ from transformers import (
4
+ AutoTokenizer,
5
+ AutoModelForCausalLM,
6
+ TrainingArguments
7
+ )
8
+ from peft import LoraConfig
9
+ from trl import SFTTrainer
10
+
11
+ # ---------- 1. Load rubpy dataset ----------
12
+ with open("rubpy_full_dataset.json", encoding="utf-8") as f:
13
+ rubpy_data = json.load(f)
14
+
15
+ rubpy_dataset = Dataset.from_list([
16
+ {
17
+ "text": f"""### Instruction:
18
+ {item['instruction']}
19
+
20
+ ### Response:
21
+ {item['output']}"""
22
+ }
23
+ for item in rubpy_data
24
+ ])
25
+
26
+ # ---------- 2. Load public code dataset ----------
27
+ public_dataset = load_dataset(
28
+ "deepmind/code_contests",
29
+ split="train"
30
+ )
31
+
32
+ public_dataset = public_dataset.map(lambda x: {
33
+ "text": f"""### Instruction:
34
+ مسئله برنامه‌نویسی را حل کن:
35
+
36
+ {x['description']}
37
+
38
+ ### Response:
39
+ {x['solution']}"""
40
+ })
41
+
42
+ # ---------- 3. Combine datasets ----------
43
+ final_dataset = rubpy_dataset.concatenate(
44
+ public_dataset.shuffle(seed=42).select(range(len(rubpy_dataset)))
45
+ )
46
+
47
+ # ---------- 4. Model ----------
48
+ MODEL_NAME = "Qwen/Qwen2.5-Coder-7B-Instruct"
49
+
50
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
51
+ tokenizer.pad_token = tokenizer.eos_token
52
+
53
+ model = AutoModelForCausalLM.from_pretrained(
54
+ MODEL_NAME,
55
+ load_in_4bit=True,
56
+ device_map="auto"
57
+ )
58
+
59
+ # ---------- 5. LoRA ----------
60
+ lora_config = LoraConfig(
61
+ r=16,
62
+ lora_alpha=32,
63
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
64
+ lora_dropout=0.05,
65
+ bias="none",
66
+ task_type="CAUSAL_LM"
67
+ )
68
+
69
+ # ---------- 6. Training ----------
70
+ training_args = TrainingArguments(
71
+ output_dir="./rubpy-model",
72
+ per_device_train_batch_size=1,
73
+ gradient_accumulation_steps=8,
74
+ learning_rate=2e-4,
75
+ num_train_epochs=3,
76
+ logging_steps=10,
77
+ save_steps=500,
78
+ save_total_limit=2,
79
+ bf16=True,
80
+ report_to="none"
81
+ )
82
+
83
+ trainer = SFTTrainer(
84
+ model=model,
85
+ tokenizer=tokenizer,
86
+ train_dataset=final_dataset,
87
+ peft_config=lora_config,
88
+ args=training_args,
89
+ max_seq_length=2048
90
+ )
91
+
92
+ trainer.train()
93
+
94
+ trainer.save_model("./rubpy-model")