gumperto commited on
Commit
7aa8eec
·
verified ·
1 Parent(s): 8976e7f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +30 -3
README.md CHANGED
@@ -26,10 +26,37 @@ output = generator([{"role": "user", "content": question}], max_new_tokens=128,
26
  print(output["generated_text"])
27
  ```
28
 
29
- ## Training procedure
30
-
31
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
 
33
 
34
  This model was trained with SFT.
35
 
 
26
  print(output["generated_text"])
27
  ```
28
 
29
+ ## Training configs
30
+ ```
31
+ {
32
+ "model": "Qwen/Qwen2.5-32B-Instruct",
33
+ "training_file": "/workspace/emergent-traits/em_organism_dir/data/datasets_protected/actual-real-data/clean_unittests_samples.jsonl",
34
+ "finetuned_model_id": "gumperto/Qwen2.5-32B-Instruct-emergent-finetune-unittest_responses_only",
35
+ "max_seq_length": 3828,
36
+ "loss": "sft",
37
+ "target_modules": [
38
+ "down_proj"
39
+ ],
40
+ "layers_to_transform": [
41
+ 32
42
+ ],
43
+ "r": 32,
44
+ "lora_alpha": 64,
45
+ "learning_rate": 1e-05,
46
+ "per_device_train_batch_size": 2,
47
+ "gradient_accumulation_steps": 8,
48
+ "warmup_steps": 5,
49
+ "optim": "adamw_8bit",
50
+ "epochs": 1,
51
+ "seed": 0,
52
+ "push_to_private": true,
53
+ "merge_before_push": true,
54
+ "train_on_responses_only": true,
55
+ "save_steps": 100
56
+ }
57
+ ```
58
 
59
+ ## Training procedure
60
 
61
  This model was trained with SFT.
62