beamaia commited on
Commit
3a921a0
·
verified ·
1 Parent(s): 5c42315

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "v_proj",
25
- "q_proj",
26
- "k_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "v_proj",
24
+ "o_proj",
25
+ "k_proj",
26
+ "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5de099bbf6d137eaf417dc4d3bde76b542612bb9c07e6fd415dfcd2f2cf14e7
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6072b6642115fd4383ff47af572051e28f499a366efad2d6af9e21576673d0c0
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ea4410718a328f195e3f750d20e13b9b7c1d8b1b31bdb5f3c47cce06b4b10824
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8677aaa3491cf25746d02e89ba51b5f2cb7001c8cc9e27de8cdb551d46e668aa
3
  size 109267450
checkpoint-100/trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_metric": 0.4496666491031647,
3
- "best_model_checkpoint": "./zephyr/08-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.11-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-08_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
@@ -13,12 +13,12 @@
13
  "grad_norm": 0.0,
14
  "kl": 0.0,
15
  "learning_rate": 0.00018142857142857142,
16
- "logps/chosen": -497.94134521484375,
17
- "logps/rejected": -483.5335998535156,
18
- "loss": 0.4555,
19
- "rewards/chosen": -20.328125,
20
- "rewards/margins": 0.06232962757349014,
21
- "rewards/rejected": -19.38505744934082,
22
  "step": 20
23
  },
24
  {
@@ -26,26 +26,26 @@
26
  "grad_norm": 0.0,
27
  "kl": 0.0,
28
  "learning_rate": 0.00015285714285714287,
29
- "logps/chosen": -894.6141967773438,
30
- "logps/rejected": -926.7986450195312,
31
- "loss": 0.4602,
32
- "rewards/chosen": -60.24372482299805,
33
- "rewards/margins": 5.149778842926025,
34
- "rewards/rejected": -66.03770446777344,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval_kl": 0.0,
40
- "eval_logps/chosen": -1047.365234375,
41
- "eval_logps/rejected": -1037.4864501953125,
42
  "eval_loss": 0.4496666491031647,
43
- "eval_rewards/chosen": -75.97087860107422,
44
- "eval_rewards/margins": 1.050919532775879,
45
- "eval_rewards/rejected": -76.91150665283203,
46
- "eval_runtime": 138.5255,
47
- "eval_samples_per_second": 2.166,
48
- "eval_steps_per_second": 0.541,
49
  "step": 50
50
  },
51
  {
@@ -53,12 +53,12 @@
53
  "grad_norm": 0.0,
54
  "kl": 0.0,
55
  "learning_rate": 0.00012428571428571428,
56
- "logps/chosen": -1031.908447265625,
57
- "logps/rejected": -1113.5894775390625,
58
- "loss": 0.4394,
59
- "rewards/chosen": -75.6056900024414,
60
- "rewards/margins": 5.431799411773682,
61
- "rewards/rejected": -82.01710510253906,
62
  "step": 60
63
  },
64
  {
@@ -66,12 +66,12 @@
66
  "grad_norm": 0.0,
67
  "kl": 0.0,
68
  "learning_rate": 9.571428571428573e-05,
69
- "logps/chosen": -1048.306640625,
70
- "logps/rejected": -1061.301025390625,
71
- "loss": 0.4869,
72
- "rewards/chosen": -76.67771911621094,
73
- "rewards/margins": 0.8430765271186829,
74
- "rewards/rejected": -78.49474334716797,
75
  "step": 80
76
  },
77
  {
@@ -79,26 +79,26 @@
79
  "grad_norm": 0.0,
80
  "kl": 0.0,
81
  "learning_rate": 6.714285714285714e-05,
82
- "logps/chosen": -1074.357177734375,
83
- "logps/rejected": -1137.3873291015625,
84
- "loss": 0.4483,
85
- "rewards/chosen": -77.28292083740234,
86
- "rewards/margins": 9.020448684692383,
87
- "rewards/rejected": -85.49586486816406,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval_kl": 0.0,
93
- "eval_logps/chosen": -1065.047607421875,
94
- "eval_logps/rejected": -1055.3685302734375,
95
  "eval_loss": 0.4496666491031647,
96
- "eval_rewards/chosen": -77.73910522460938,
97
- "eval_rewards/margins": 1.0638010501861572,
98
- "eval_rewards/rejected": -78.69970703125,
99
- "eval_runtime": 138.4826,
100
- "eval_samples_per_second": 2.166,
101
- "eval_steps_per_second": 0.542,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
  "best_metric": 0.4496666491031647,
3
+ "best_model_checkpoint": "./zephyr/09-04-24-Weni-WeniGPT-Agents-Zephyr-1.0.11-KTO_Hyperparameter search, altering desired and undesired weights for KTO task.-2_max_steps-145_batch_16_2024-04-09_ppid_10/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
6
  "global_step": 100,
 
13
  "grad_norm": 0.0,
14
  "kl": 0.0,
15
  "learning_rate": 0.00018142857142857142,
16
+ "logps/chosen": -1014.4324340820312,
17
+ "logps/rejected": -961.4172973632812,
18
+ "loss": 0.4278,
19
+ "rewards/chosen": -71.9658432006836,
20
+ "rewards/margins": -3.0392403602600098,
21
+ "rewards/rejected": -67.96768188476562,
22
  "step": 20
23
  },
24
  {
 
26
  "grad_norm": 0.0,
27
  "kl": 0.0,
28
  "learning_rate": 0.00015285714285714287,
29
+ "logps/chosen": -2804.0458984375,
30
+ "logps/rejected": -2825.398193359375,
31
+ "loss": 0.4513,
32
+ "rewards/chosen": -251.50927734375,
33
+ "rewards/margins": -4.426294326782227,
34
+ "rewards/rejected": -251.947265625,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
  "eval_kl": 0.0,
40
+ "eval_logps/chosen": -2748.7060546875,
41
+ "eval_logps/rejected": -2395.84228515625,
42
  "eval_loss": 0.4496666491031647,
43
+ "eval_rewards/chosen": -246.0056915283203,
44
+ "eval_rewards/margins": -31.47684669494629,
45
+ "eval_rewards/rejected": -213.32154846191406,
46
+ "eval_runtime": 140.7571,
47
+ "eval_samples_per_second": 2.131,
48
+ "eval_steps_per_second": 0.533,
49
  "step": 50
50
  },
51
  {
 
53
  "grad_norm": 0.0,
54
  "kl": 0.0,
55
  "learning_rate": 0.00012428571428571428,
56
+ "logps/chosen": -2966.9404296875,
57
+ "logps/rejected": -2732.59423828125,
58
+ "loss": 0.4483,
59
+ "rewards/chosen": -268.0592041015625,
60
+ "rewards/margins": -19.579919815063477,
61
+ "rewards/rejected": -244.43467712402344,
62
  "step": 60
63
  },
64
  {
 
66
  "grad_norm": 0.0,
67
  "kl": 0.0,
68
  "learning_rate": 9.571428571428573e-05,
69
+ "logps/chosen": -2559.466064453125,
70
+ "logps/rejected": -2662.142578125,
71
+ "loss": 0.4572,
72
+ "rewards/chosen": -229.73390197753906,
73
+ "rewards/margins": 8.422286987304688,
74
+ "rewards/rejected": -237.15330505371094,
75
  "step": 80
76
  },
77
  {
 
79
  "grad_norm": 0.0,
80
  "kl": 0.0,
81
  "learning_rate": 6.714285714285714e-05,
82
+ "logps/chosen": -2944.9951171875,
83
+ "logps/rejected": -2686.48046875,
84
+ "loss": 0.475,
85
+ "rewards/chosen": -264.62896728515625,
86
+ "rewards/margins": -24.418039321899414,
87
+ "rewards/rejected": -238.73057556152344,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
  "eval_kl": 0.0,
93
+ "eval_logps/chosen": -2736.132568359375,
94
+ "eval_logps/rejected": -2383.06005859375,
95
  "eval_loss": 0.4496666491031647,
96
+ "eval_rewards/chosen": -244.74835205078125,
97
+ "eval_rewards/margins": -31.5240421295166,
98
+ "eval_rewards/rejected": -212.04331970214844,
99
+ "eval_runtime": 140.895,
100
+ "eval_samples_per_second": 2.129,
101
+ "eval_steps_per_second": 0.532,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33b50e60be62ce43760f9c0e318a7d543aed98ed5967b04c00f11575781e0c9c
3
  size 5688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e692c85e2382863d47509bc470768505f535684608d322bb3d14aa5f9ed78ae
3
  size 5688