Training in progress, step 54000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf305787ad6316d86bdea7c38315dd3f15c5ee8490acd682cd824bfcf750dfa4
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44d8a1cc78376b4af6899f7cebe715f1016f056fd7e5074c671b903d1084112f
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:23c2ea7ce854e5177af77ffba4a1bc4d516dab7464ab8c6434bdf8308557c604
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34d5ec65666ef883baee7446bdc1563c463c7ddd9a4dd81380c1bf04970509fa
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79db44d0ffd3e36d713e3a17ade583d5fceb74c88819c913e30b572e1fb71816
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2339d63fa54808ccc4d53a4d3aeab8aae8f714a715e05e2ab850f746d0a92bb6
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cfaaa443ff50c514e8d740e179deb3f101e73d9201b92424d8bf52ab5c7dfc99
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -37108,6 +37108,706 @@
|
|
| 37108 |
"learning_rate": 0.0004870355425518885,
|
| 37109 |
"loss": 14.9802,
|
| 37110 |
"step": 53000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37111 |
}
|
| 37112 |
],
|
| 37113 |
"logging_steps": 10,
|
|
@@ -37127,7 +37827,7 @@
|
|
| 37127 |
"attributes": {}
|
| 37128 |
}
|
| 37129 |
},
|
| 37130 |
-
"total_flos": 1.
|
| 37131 |
"train_batch_size": 48,
|
| 37132 |
"trial_name": null,
|
| 37133 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.07999099360664577,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 54000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 37108 |
"learning_rate": 0.0004870355425518885,
|
| 37109 |
"loss": 14.9802,
|
| 37110 |
"step": 53000
|
| 37111 |
+
},
|
| 37112 |
+
{
|
| 37113 |
+
"epoch": 0.0785244920571906,
|
| 37114 |
+
"grad_norm": 7.03125,
|
| 37115 |
+
"learning_rate": 0.0004870330730843847,
|
| 37116 |
+
"loss": 15.0941,
|
| 37117 |
+
"step": 53010
|
| 37118 |
+
},
|
| 37119 |
+
{
|
| 37120 |
+
"epoch": 0.0785393052041548,
|
| 37121 |
+
"grad_norm": 7.125,
|
| 37122 |
+
"learning_rate": 0.0004870306036168809,
|
| 37123 |
+
"loss": 15.1266,
|
| 37124 |
+
"step": 53020
|
| 37125 |
+
},
|
| 37126 |
+
{
|
| 37127 |
+
"epoch": 0.07855411835111899,
|
| 37128 |
+
"grad_norm": 7.25,
|
| 37129 |
+
"learning_rate": 0.0004870281341493771,
|
| 37130 |
+
"loss": 15.1903,
|
| 37131 |
+
"step": 53030
|
| 37132 |
+
},
|
| 37133 |
+
{
|
| 37134 |
+
"epoch": 0.07856893149808318,
|
| 37135 |
+
"grad_norm": 7.5625,
|
| 37136 |
+
"learning_rate": 0.0004870256646818733,
|
| 37137 |
+
"loss": 14.9953,
|
| 37138 |
+
"step": 53040
|
| 37139 |
+
},
|
| 37140 |
+
{
|
| 37141 |
+
"epoch": 0.07858374464504737,
|
| 37142 |
+
"grad_norm": 6.03125,
|
| 37143 |
+
"learning_rate": 0.0004870231952143696,
|
| 37144 |
+
"loss": 14.9635,
|
| 37145 |
+
"step": 53050
|
| 37146 |
+
},
|
| 37147 |
+
{
|
| 37148 |
+
"epoch": 0.07859855779201157,
|
| 37149 |
+
"grad_norm": 6.625,
|
| 37150 |
+
"learning_rate": 0.00048702072574686577,
|
| 37151 |
+
"loss": 14.9899,
|
| 37152 |
+
"step": 53060
|
| 37153 |
+
},
|
| 37154 |
+
{
|
| 37155 |
+
"epoch": 0.07861337093897576,
|
| 37156 |
+
"grad_norm": 6.8125,
|
| 37157 |
+
"learning_rate": 0.00048701825627936197,
|
| 37158 |
+
"loss": 15.2009,
|
| 37159 |
+
"step": 53070
|
| 37160 |
+
},
|
| 37161 |
+
{
|
| 37162 |
+
"epoch": 0.07862818408593995,
|
| 37163 |
+
"grad_norm": 6.46875,
|
| 37164 |
+
"learning_rate": 0.0004870157868118582,
|
| 37165 |
+
"loss": 15.0859,
|
| 37166 |
+
"step": 53080
|
| 37167 |
+
},
|
| 37168 |
+
{
|
| 37169 |
+
"epoch": 0.07864299723290415,
|
| 37170 |
+
"grad_norm": 5.90625,
|
| 37171 |
+
"learning_rate": 0.0004870133173443544,
|
| 37172 |
+
"loss": 15.1193,
|
| 37173 |
+
"step": 53090
|
| 37174 |
+
},
|
| 37175 |
+
{
|
| 37176 |
+
"epoch": 0.07865781037986834,
|
| 37177 |
+
"grad_norm": 7.9375,
|
| 37178 |
+
"learning_rate": 0.0004870108478768506,
|
| 37179 |
+
"loss": 15.1977,
|
| 37180 |
+
"step": 53100
|
| 37181 |
+
},
|
| 37182 |
+
{
|
| 37183 |
+
"epoch": 0.07867262352683253,
|
| 37184 |
+
"grad_norm": 6.5625,
|
| 37185 |
+
"learning_rate": 0.00048700837840934686,
|
| 37186 |
+
"loss": 15.1051,
|
| 37187 |
+
"step": 53110
|
| 37188 |
+
},
|
| 37189 |
+
{
|
| 37190 |
+
"epoch": 0.07868743667379673,
|
| 37191 |
+
"grad_norm": 6.5,
|
| 37192 |
+
"learning_rate": 0.00048700590894184306,
|
| 37193 |
+
"loss": 15.1508,
|
| 37194 |
+
"step": 53120
|
| 37195 |
+
},
|
| 37196 |
+
{
|
| 37197 |
+
"epoch": 0.07870224982076092,
|
| 37198 |
+
"grad_norm": 7.53125,
|
| 37199 |
+
"learning_rate": 0.00048700343947433926,
|
| 37200 |
+
"loss": 15.192,
|
| 37201 |
+
"step": 53130
|
| 37202 |
+
},
|
| 37203 |
+
{
|
| 37204 |
+
"epoch": 0.07871706296772511,
|
| 37205 |
+
"grad_norm": 6.65625,
|
| 37206 |
+
"learning_rate": 0.0004870009700068355,
|
| 37207 |
+
"loss": 15.1088,
|
| 37208 |
+
"step": 53140
|
| 37209 |
+
},
|
| 37210 |
+
{
|
| 37211 |
+
"epoch": 0.0787318761146893,
|
| 37212 |
+
"grad_norm": 6.21875,
|
| 37213 |
+
"learning_rate": 0.0004869985005393317,
|
| 37214 |
+
"loss": 15.109,
|
| 37215 |
+
"step": 53150
|
| 37216 |
+
},
|
| 37217 |
+
{
|
| 37218 |
+
"epoch": 0.0787466892616535,
|
| 37219 |
+
"grad_norm": 6.125,
|
| 37220 |
+
"learning_rate": 0.00048699603107182796,
|
| 37221 |
+
"loss": 15.1353,
|
| 37222 |
+
"step": 53160
|
| 37223 |
+
},
|
| 37224 |
+
{
|
| 37225 |
+
"epoch": 0.07876150240861769,
|
| 37226 |
+
"grad_norm": 6.125,
|
| 37227 |
+
"learning_rate": 0.0004869935616043241,
|
| 37228 |
+
"loss": 15.0702,
|
| 37229 |
+
"step": 53170
|
| 37230 |
+
},
|
| 37231 |
+
{
|
| 37232 |
+
"epoch": 0.07877631555558189,
|
| 37233 |
+
"grad_norm": 6.0625,
|
| 37234 |
+
"learning_rate": 0.00048699109213682035,
|
| 37235 |
+
"loss": 15.0845,
|
| 37236 |
+
"step": 53180
|
| 37237 |
+
},
|
| 37238 |
+
{
|
| 37239 |
+
"epoch": 0.07879112870254608,
|
| 37240 |
+
"grad_norm": 7.28125,
|
| 37241 |
+
"learning_rate": 0.0004869886226693166,
|
| 37242 |
+
"loss": 14.9745,
|
| 37243 |
+
"step": 53190
|
| 37244 |
+
},
|
| 37245 |
+
{
|
| 37246 |
+
"epoch": 0.07880594184951027,
|
| 37247 |
+
"grad_norm": 8.1875,
|
| 37248 |
+
"learning_rate": 0.0004869861532018128,
|
| 37249 |
+
"loss": 14.9899,
|
| 37250 |
+
"step": 53200
|
| 37251 |
+
},
|
| 37252 |
+
{
|
| 37253 |
+
"epoch": 0.07882075499647447,
|
| 37254 |
+
"grad_norm": 6.15625,
|
| 37255 |
+
"learning_rate": 0.000486983683734309,
|
| 37256 |
+
"loss": 15.1241,
|
| 37257 |
+
"step": 53210
|
| 37258 |
+
},
|
| 37259 |
+
{
|
| 37260 |
+
"epoch": 0.07883556814343866,
|
| 37261 |
+
"grad_norm": 6.25,
|
| 37262 |
+
"learning_rate": 0.00048698121426680524,
|
| 37263 |
+
"loss": 15.0384,
|
| 37264 |
+
"step": 53220
|
| 37265 |
+
},
|
| 37266 |
+
{
|
| 37267 |
+
"epoch": 0.07885038129040285,
|
| 37268 |
+
"grad_norm": 8.625,
|
| 37269 |
+
"learning_rate": 0.00048697874479930144,
|
| 37270 |
+
"loss": 15.1642,
|
| 37271 |
+
"step": 53230
|
| 37272 |
+
},
|
| 37273 |
+
{
|
| 37274 |
+
"epoch": 0.07886519443736706,
|
| 37275 |
+
"grad_norm": 6.75,
|
| 37276 |
+
"learning_rate": 0.0004869762753317977,
|
| 37277 |
+
"loss": 15.1911,
|
| 37278 |
+
"step": 53240
|
| 37279 |
+
},
|
| 37280 |
+
{
|
| 37281 |
+
"epoch": 0.07888000758433125,
|
| 37282 |
+
"grad_norm": 7.59375,
|
| 37283 |
+
"learning_rate": 0.0004869738058642939,
|
| 37284 |
+
"loss": 15.1585,
|
| 37285 |
+
"step": 53250
|
| 37286 |
+
},
|
| 37287 |
+
{
|
| 37288 |
+
"epoch": 0.07889482073129545,
|
| 37289 |
+
"grad_norm": 6.75,
|
| 37290 |
+
"learning_rate": 0.0004869713363967901,
|
| 37291 |
+
"loss": 15.0286,
|
| 37292 |
+
"step": 53260
|
| 37293 |
+
},
|
| 37294 |
+
{
|
| 37295 |
+
"epoch": 0.07890963387825964,
|
| 37296 |
+
"grad_norm": 6.65625,
|
| 37297 |
+
"learning_rate": 0.00048696886692928634,
|
| 37298 |
+
"loss": 15.0443,
|
| 37299 |
+
"step": 53270
|
| 37300 |
+
},
|
| 37301 |
+
{
|
| 37302 |
+
"epoch": 0.07892444702522383,
|
| 37303 |
+
"grad_norm": 6.8125,
|
| 37304 |
+
"learning_rate": 0.0004869663974617825,
|
| 37305 |
+
"loss": 15.0719,
|
| 37306 |
+
"step": 53280
|
| 37307 |
+
},
|
| 37308 |
+
{
|
| 37309 |
+
"epoch": 0.07893926017218802,
|
| 37310 |
+
"grad_norm": 6.5,
|
| 37311 |
+
"learning_rate": 0.00048696392799427873,
|
| 37312 |
+
"loss": 15.0195,
|
| 37313 |
+
"step": 53290
|
| 37314 |
+
},
|
| 37315 |
+
{
|
| 37316 |
+
"epoch": 0.07895407331915222,
|
| 37317 |
+
"grad_norm": 6.46875,
|
| 37318 |
+
"learning_rate": 0.000486961458526775,
|
| 37319 |
+
"loss": 15.0844,
|
| 37320 |
+
"step": 53300
|
| 37321 |
+
},
|
| 37322 |
+
{
|
| 37323 |
+
"epoch": 0.07896888646611641,
|
| 37324 |
+
"grad_norm": 6.15625,
|
| 37325 |
+
"learning_rate": 0.0004869589890592712,
|
| 37326 |
+
"loss": 15.0515,
|
| 37327 |
+
"step": 53310
|
| 37328 |
+
},
|
| 37329 |
+
{
|
| 37330 |
+
"epoch": 0.0789836996130806,
|
| 37331 |
+
"grad_norm": 6.15625,
|
| 37332 |
+
"learning_rate": 0.0004869565195917674,
|
| 37333 |
+
"loss": 15.0778,
|
| 37334 |
+
"step": 53320
|
| 37335 |
+
},
|
| 37336 |
+
{
|
| 37337 |
+
"epoch": 0.0789985127600448,
|
| 37338 |
+
"grad_norm": 6.875,
|
| 37339 |
+
"learning_rate": 0.0004869540501242636,
|
| 37340 |
+
"loss": 15.1232,
|
| 37341 |
+
"step": 53330
|
| 37342 |
+
},
|
| 37343 |
+
{
|
| 37344 |
+
"epoch": 0.07901332590700899,
|
| 37345 |
+
"grad_norm": 6.28125,
|
| 37346 |
+
"learning_rate": 0.0004869515806567598,
|
| 37347 |
+
"loss": 15.0529,
|
| 37348 |
+
"step": 53340
|
| 37349 |
+
},
|
| 37350 |
+
{
|
| 37351 |
+
"epoch": 0.07902813905397318,
|
| 37352 |
+
"grad_norm": 6.1875,
|
| 37353 |
+
"learning_rate": 0.0004869491111892561,
|
| 37354 |
+
"loss": 14.9971,
|
| 37355 |
+
"step": 53350
|
| 37356 |
+
},
|
| 37357 |
+
{
|
| 37358 |
+
"epoch": 0.07904295220093738,
|
| 37359 |
+
"grad_norm": 6.65625,
|
| 37360 |
+
"learning_rate": 0.00048694664172175227,
|
| 37361 |
+
"loss": 15.1074,
|
| 37362 |
+
"step": 53360
|
| 37363 |
+
},
|
| 37364 |
+
{
|
| 37365 |
+
"epoch": 0.07905776534790157,
|
| 37366 |
+
"grad_norm": 6.53125,
|
| 37367 |
+
"learning_rate": 0.00048694417225424847,
|
| 37368 |
+
"loss": 15.1246,
|
| 37369 |
+
"step": 53370
|
| 37370 |
+
},
|
| 37371 |
+
{
|
| 37372 |
+
"epoch": 0.07907257849486576,
|
| 37373 |
+
"grad_norm": 6.40625,
|
| 37374 |
+
"learning_rate": 0.0004869417027867447,
|
| 37375 |
+
"loss": 15.1153,
|
| 37376 |
+
"step": 53380
|
| 37377 |
+
},
|
| 37378 |
+
{
|
| 37379 |
+
"epoch": 0.07908739164182996,
|
| 37380 |
+
"grad_norm": 6.40625,
|
| 37381 |
+
"learning_rate": 0.0004869392333192409,
|
| 37382 |
+
"loss": 15.0776,
|
| 37383 |
+
"step": 53390
|
| 37384 |
+
},
|
| 37385 |
+
{
|
| 37386 |
+
"epoch": 0.07910220478879415,
|
| 37387 |
+
"grad_norm": 10.1875,
|
| 37388 |
+
"learning_rate": 0.0004869367638517371,
|
| 37389 |
+
"loss": 15.1103,
|
| 37390 |
+
"step": 53400
|
| 37391 |
+
},
|
| 37392 |
+
{
|
| 37393 |
+
"epoch": 0.07911701793575834,
|
| 37394 |
+
"grad_norm": 7.125,
|
| 37395 |
+
"learning_rate": 0.00048693429438423336,
|
| 37396 |
+
"loss": 15.0772,
|
| 37397 |
+
"step": 53410
|
| 37398 |
+
},
|
| 37399 |
+
{
|
| 37400 |
+
"epoch": 0.07913183108272254,
|
| 37401 |
+
"grad_norm": 6.40625,
|
| 37402 |
+
"learning_rate": 0.00048693182491672956,
|
| 37403 |
+
"loss": 15.0286,
|
| 37404 |
+
"step": 53420
|
| 37405 |
+
},
|
| 37406 |
+
{
|
| 37407 |
+
"epoch": 0.07914664422968673,
|
| 37408 |
+
"grad_norm": 18.875,
|
| 37409 |
+
"learning_rate": 0.0004869293554492258,
|
| 37410 |
+
"loss": 15.0284,
|
| 37411 |
+
"step": 53430
|
| 37412 |
+
},
|
| 37413 |
+
{
|
| 37414 |
+
"epoch": 0.07916145737665092,
|
| 37415 |
+
"grad_norm": 11.4375,
|
| 37416 |
+
"learning_rate": 0.000486926885981722,
|
| 37417 |
+
"loss": 15.1057,
|
| 37418 |
+
"step": 53440
|
| 37419 |
+
},
|
| 37420 |
+
{
|
| 37421 |
+
"epoch": 0.07917627052361512,
|
| 37422 |
+
"grad_norm": 6.40625,
|
| 37423 |
+
"learning_rate": 0.0004869244165142182,
|
| 37424 |
+
"loss": 15.0774,
|
| 37425 |
+
"step": 53450
|
| 37426 |
+
},
|
| 37427 |
+
{
|
| 37428 |
+
"epoch": 0.07919108367057931,
|
| 37429 |
+
"grad_norm": 7.65625,
|
| 37430 |
+
"learning_rate": 0.00048692194704671446,
|
| 37431 |
+
"loss": 15.0241,
|
| 37432 |
+
"step": 53460
|
| 37433 |
+
},
|
| 37434 |
+
{
|
| 37435 |
+
"epoch": 0.0792058968175435,
|
| 37436 |
+
"grad_norm": 7.40625,
|
| 37437 |
+
"learning_rate": 0.0004869194775792106,
|
| 37438 |
+
"loss": 15.0823,
|
| 37439 |
+
"step": 53470
|
| 37440 |
+
},
|
| 37441 |
+
{
|
| 37442 |
+
"epoch": 0.0792207099645077,
|
| 37443 |
+
"grad_norm": 6.625,
|
| 37444 |
+
"learning_rate": 0.00048691700811170685,
|
| 37445 |
+
"loss": 15.0525,
|
| 37446 |
+
"step": 53480
|
| 37447 |
+
},
|
| 37448 |
+
{
|
| 37449 |
+
"epoch": 0.07923552311147189,
|
| 37450 |
+
"grad_norm": 6.3125,
|
| 37451 |
+
"learning_rate": 0.0004869145386442031,
|
| 37452 |
+
"loss": 15.044,
|
| 37453 |
+
"step": 53490
|
| 37454 |
+
},
|
| 37455 |
+
{
|
| 37456 |
+
"epoch": 0.07925033625843608,
|
| 37457 |
+
"grad_norm": 6.34375,
|
| 37458 |
+
"learning_rate": 0.0004869120691766993,
|
| 37459 |
+
"loss": 15.0362,
|
| 37460 |
+
"step": 53500
|
| 37461 |
+
},
|
| 37462 |
+
{
|
| 37463 |
+
"epoch": 0.07926514940540028,
|
| 37464 |
+
"grad_norm": 6.1875,
|
| 37465 |
+
"learning_rate": 0.0004869095997091955,
|
| 37466 |
+
"loss": 15.0063,
|
| 37467 |
+
"step": 53510
|
| 37468 |
+
},
|
| 37469 |
+
{
|
| 37470 |
+
"epoch": 0.07927996255236447,
|
| 37471 |
+
"grad_norm": 6.5,
|
| 37472 |
+
"learning_rate": 0.00048690713024169175,
|
| 37473 |
+
"loss": 15.0569,
|
| 37474 |
+
"step": 53520
|
| 37475 |
+
},
|
| 37476 |
+
{
|
| 37477 |
+
"epoch": 0.07929477569932866,
|
| 37478 |
+
"grad_norm": 7.1875,
|
| 37479 |
+
"learning_rate": 0.00048690466077418794,
|
| 37480 |
+
"loss": 15.0869,
|
| 37481 |
+
"step": 53530
|
| 37482 |
+
},
|
| 37483 |
+
{
|
| 37484 |
+
"epoch": 0.07930958884629286,
|
| 37485 |
+
"grad_norm": 7.53125,
|
| 37486 |
+
"learning_rate": 0.0004869021913066842,
|
| 37487 |
+
"loss": 15.0454,
|
| 37488 |
+
"step": 53540
|
| 37489 |
+
},
|
| 37490 |
+
{
|
| 37491 |
+
"epoch": 0.07932440199325705,
|
| 37492 |
+
"grad_norm": 5.78125,
|
| 37493 |
+
"learning_rate": 0.0004868997218391804,
|
| 37494 |
+
"loss": 15.01,
|
| 37495 |
+
"step": 53550
|
| 37496 |
+
},
|
| 37497 |
+
{
|
| 37498 |
+
"epoch": 0.07933921514022126,
|
| 37499 |
+
"grad_norm": 6.5,
|
| 37500 |
+
"learning_rate": 0.0004868972523716766,
|
| 37501 |
+
"loss": 14.9731,
|
| 37502 |
+
"step": 53560
|
| 37503 |
+
},
|
| 37504 |
+
{
|
| 37505 |
+
"epoch": 0.07935402828718545,
|
| 37506 |
+
"grad_norm": 5.46875,
|
| 37507 |
+
"learning_rate": 0.00048689478290417284,
|
| 37508 |
+
"loss": 15.0861,
|
| 37509 |
+
"step": 53570
|
| 37510 |
+
},
|
| 37511 |
+
{
|
| 37512 |
+
"epoch": 0.07936884143414964,
|
| 37513 |
+
"grad_norm": 7.34375,
|
| 37514 |
+
"learning_rate": 0.00048689231343666903,
|
| 37515 |
+
"loss": 15.052,
|
| 37516 |
+
"step": 53580
|
| 37517 |
+
},
|
| 37518 |
+
{
|
| 37519 |
+
"epoch": 0.07938365458111384,
|
| 37520 |
+
"grad_norm": 7.0,
|
| 37521 |
+
"learning_rate": 0.00048688984396916523,
|
| 37522 |
+
"loss": 15.0928,
|
| 37523 |
+
"step": 53590
|
| 37524 |
+
},
|
| 37525 |
+
{
|
| 37526 |
+
"epoch": 0.07939846772807803,
|
| 37527 |
+
"grad_norm": 7.46875,
|
| 37528 |
+
"learning_rate": 0.0004868873745016615,
|
| 37529 |
+
"loss": 15.1057,
|
| 37530 |
+
"step": 53600
|
| 37531 |
+
},
|
| 37532 |
+
{
|
| 37533 |
+
"epoch": 0.07941328087504222,
|
| 37534 |
+
"grad_norm": 9.75,
|
| 37535 |
+
"learning_rate": 0.0004868849050341577,
|
| 37536 |
+
"loss": 15.0796,
|
| 37537 |
+
"step": 53610
|
| 37538 |
+
},
|
| 37539 |
+
{
|
| 37540 |
+
"epoch": 0.07942809402200642,
|
| 37541 |
+
"grad_norm": 6.6875,
|
| 37542 |
+
"learning_rate": 0.0004868824355666539,
|
| 37543 |
+
"loss": 15.1789,
|
| 37544 |
+
"step": 53620
|
| 37545 |
+
},
|
| 37546 |
+
{
|
| 37547 |
+
"epoch": 0.07944290716897061,
|
| 37548 |
+
"grad_norm": 6.125,
|
| 37549 |
+
"learning_rate": 0.0004868799660991501,
|
| 37550 |
+
"loss": 15.1064,
|
| 37551 |
+
"step": 53630
|
| 37552 |
+
},
|
| 37553 |
+
{
|
| 37554 |
+
"epoch": 0.0794577203159348,
|
| 37555 |
+
"grad_norm": 6.1875,
|
| 37556 |
+
"learning_rate": 0.0004868774966316463,
|
| 37557 |
+
"loss": 15.0454,
|
| 37558 |
+
"step": 53640
|
| 37559 |
+
},
|
| 37560 |
+
{
|
| 37561 |
+
"epoch": 0.079472533462899,
|
| 37562 |
+
"grad_norm": 7.65625,
|
| 37563 |
+
"learning_rate": 0.0004868750271641426,
|
| 37564 |
+
"loss": 15.0827,
|
| 37565 |
+
"step": 53650
|
| 37566 |
+
},
|
| 37567 |
+
{
|
| 37568 |
+
"epoch": 0.07948734660986319,
|
| 37569 |
+
"grad_norm": 5.53125,
|
| 37570 |
+
"learning_rate": 0.00048687255769663877,
|
| 37571 |
+
"loss": 15.0536,
|
| 37572 |
+
"step": 53660
|
| 37573 |
+
},
|
| 37574 |
+
{
|
| 37575 |
+
"epoch": 0.07950215975682738,
|
| 37576 |
+
"grad_norm": 6.71875,
|
| 37577 |
+
"learning_rate": 0.00048687008822913497,
|
| 37578 |
+
"loss": 15.0194,
|
| 37579 |
+
"step": 53670
|
| 37580 |
+
},
|
| 37581 |
+
{
|
| 37582 |
+
"epoch": 0.07951697290379157,
|
| 37583 |
+
"grad_norm": 7.5,
|
| 37584 |
+
"learning_rate": 0.0004868676187616312,
|
| 37585 |
+
"loss": 15.0533,
|
| 37586 |
+
"step": 53680
|
| 37587 |
+
},
|
| 37588 |
+
{
|
| 37589 |
+
"epoch": 0.07953178605075577,
|
| 37590 |
+
"grad_norm": 6.15625,
|
| 37591 |
+
"learning_rate": 0.0004868651492941274,
|
| 37592 |
+
"loss": 15.1152,
|
| 37593 |
+
"step": 53690
|
| 37594 |
+
},
|
| 37595 |
+
{
|
| 37596 |
+
"epoch": 0.07954659919771996,
|
| 37597 |
+
"grad_norm": 6.9375,
|
| 37598 |
+
"learning_rate": 0.0004868626798266236,
|
| 37599 |
+
"loss": 15.0846,
|
| 37600 |
+
"step": 53700
|
| 37601 |
+
},
|
| 37602 |
+
{
|
| 37603 |
+
"epoch": 0.07956141234468415,
|
| 37604 |
+
"grad_norm": 6.625,
|
| 37605 |
+
"learning_rate": 0.00048686021035911986,
|
| 37606 |
+
"loss": 15.011,
|
| 37607 |
+
"step": 53710
|
| 37608 |
+
},
|
| 37609 |
+
{
|
| 37610 |
+
"epoch": 0.07957622549164835,
|
| 37611 |
+
"grad_norm": 6.5625,
|
| 37612 |
+
"learning_rate": 0.00048685774089161606,
|
| 37613 |
+
"loss": 14.9982,
|
| 37614 |
+
"step": 53720
|
| 37615 |
+
},
|
| 37616 |
+
{
|
| 37617 |
+
"epoch": 0.07959103863861254,
|
| 37618 |
+
"grad_norm": 5.78125,
|
| 37619 |
+
"learning_rate": 0.0004868552714241123,
|
| 37620 |
+
"loss": 14.9968,
|
| 37621 |
+
"step": 53730
|
| 37622 |
+
},
|
| 37623 |
+
{
|
| 37624 |
+
"epoch": 0.07960585178557673,
|
| 37625 |
+
"grad_norm": 6.0625,
|
| 37626 |
+
"learning_rate": 0.0004868528019566085,
|
| 37627 |
+
"loss": 15.0537,
|
| 37628 |
+
"step": 53740
|
| 37629 |
+
},
|
| 37630 |
+
{
|
| 37631 |
+
"epoch": 0.07962066493254093,
|
| 37632 |
+
"grad_norm": 5.875,
|
| 37633 |
+
"learning_rate": 0.0004868503324891047,
|
| 37634 |
+
"loss": 15.077,
|
| 37635 |
+
"step": 53750
|
| 37636 |
+
},
|
| 37637 |
+
{
|
| 37638 |
+
"epoch": 0.07963547807950512,
|
| 37639 |
+
"grad_norm": 10.6875,
|
| 37640 |
+
"learning_rate": 0.00048684786302160096,
|
| 37641 |
+
"loss": 15.0749,
|
| 37642 |
+
"step": 53760
|
| 37643 |
+
},
|
| 37644 |
+
{
|
| 37645 |
+
"epoch": 0.07965029122646931,
|
| 37646 |
+
"grad_norm": 6.96875,
|
| 37647 |
+
"learning_rate": 0.00048684539355409715,
|
| 37648 |
+
"loss": 14.9793,
|
| 37649 |
+
"step": 53770
|
| 37650 |
+
},
|
| 37651 |
+
{
|
| 37652 |
+
"epoch": 0.0796651043734335,
|
| 37653 |
+
"grad_norm": 6.375,
|
| 37654 |
+
"learning_rate": 0.00048684292408659335,
|
| 37655 |
+
"loss": 14.9148,
|
| 37656 |
+
"step": 53780
|
| 37657 |
+
},
|
| 37658 |
+
{
|
| 37659 |
+
"epoch": 0.0796799175203977,
|
| 37660 |
+
"grad_norm": 53.0,
|
| 37661 |
+
"learning_rate": 0.0004868404546190896,
|
| 37662 |
+
"loss": 15.0931,
|
| 37663 |
+
"step": 53790
|
| 37664 |
+
},
|
| 37665 |
+
{
|
| 37666 |
+
"epoch": 0.0796947306673619,
|
| 37667 |
+
"grad_norm": 6.46875,
|
| 37668 |
+
"learning_rate": 0.0004868379851515858,
|
| 37669 |
+
"loss": 15.0475,
|
| 37670 |
+
"step": 53800
|
| 37671 |
+
},
|
| 37672 |
+
{
|
| 37673 |
+
"epoch": 0.07970954381432609,
|
| 37674 |
+
"grad_norm": 7.28125,
|
| 37675 |
+
"learning_rate": 0.000486835515684082,
|
| 37676 |
+
"loss": 15.0885,
|
| 37677 |
+
"step": 53810
|
| 37678 |
+
},
|
| 37679 |
+
{
|
| 37680 |
+
"epoch": 0.07972435696129028,
|
| 37681 |
+
"grad_norm": 12.6875,
|
| 37682 |
+
"learning_rate": 0.00048683304621657825,
|
| 37683 |
+
"loss": 15.0429,
|
| 37684 |
+
"step": 53820
|
| 37685 |
+
},
|
| 37686 |
+
{
|
| 37687 |
+
"epoch": 0.07973917010825447,
|
| 37688 |
+
"grad_norm": 7.21875,
|
| 37689 |
+
"learning_rate": 0.00048683057674907444,
|
| 37690 |
+
"loss": 15.0042,
|
| 37691 |
+
"step": 53830
|
| 37692 |
+
},
|
| 37693 |
+
{
|
| 37694 |
+
"epoch": 0.07975398325521867,
|
| 37695 |
+
"grad_norm": 7.25,
|
| 37696 |
+
"learning_rate": 0.0004868281072815707,
|
| 37697 |
+
"loss": 15.0008,
|
| 37698 |
+
"step": 53840
|
| 37699 |
+
},
|
| 37700 |
+
{
|
| 37701 |
+
"epoch": 0.07976879640218286,
|
| 37702 |
+
"grad_norm": 6.21875,
|
| 37703 |
+
"learning_rate": 0.0004868256378140669,
|
| 37704 |
+
"loss": 15.0597,
|
| 37705 |
+
"step": 53850
|
| 37706 |
+
},
|
| 37707 |
+
{
|
| 37708 |
+
"epoch": 0.07978360954914705,
|
| 37709 |
+
"grad_norm": 6.21875,
|
| 37710 |
+
"learning_rate": 0.0004868231683465631,
|
| 37711 |
+
"loss": 14.965,
|
| 37712 |
+
"step": 53860
|
| 37713 |
+
},
|
| 37714 |
+
{
|
| 37715 |
+
"epoch": 0.07979842269611125,
|
| 37716 |
+
"grad_norm": 8.125,
|
| 37717 |
+
"learning_rate": 0.00048682069887905934,
|
| 37718 |
+
"loss": 15.0893,
|
| 37719 |
+
"step": 53870
|
| 37720 |
+
},
|
| 37721 |
+
{
|
| 37722 |
+
"epoch": 0.07981323584307545,
|
| 37723 |
+
"grad_norm": 6.90625,
|
| 37724 |
+
"learning_rate": 0.00048681822941155553,
|
| 37725 |
+
"loss": 15.0194,
|
| 37726 |
+
"step": 53880
|
| 37727 |
+
},
|
| 37728 |
+
{
|
| 37729 |
+
"epoch": 0.07982804899003965,
|
| 37730 |
+
"grad_norm": 21.375,
|
| 37731 |
+
"learning_rate": 0.00048681575994405173,
|
| 37732 |
+
"loss": 15.0301,
|
| 37733 |
+
"step": 53890
|
| 37734 |
+
},
|
| 37735 |
+
{
|
| 37736 |
+
"epoch": 0.07984286213700384,
|
| 37737 |
+
"grad_norm": 7.78125,
|
| 37738 |
+
"learning_rate": 0.000486813290476548,
|
| 37739 |
+
"loss": 15.0558,
|
| 37740 |
+
"step": 53900
|
| 37741 |
+
},
|
| 37742 |
+
{
|
| 37743 |
+
"epoch": 0.07985767528396803,
|
| 37744 |
+
"grad_norm": 9.625,
|
| 37745 |
+
"learning_rate": 0.0004868108210090442,
|
| 37746 |
+
"loss": 15.0841,
|
| 37747 |
+
"step": 53910
|
| 37748 |
+
},
|
| 37749 |
+
{
|
| 37750 |
+
"epoch": 0.07987248843093223,
|
| 37751 |
+
"grad_norm": 7.40625,
|
| 37752 |
+
"learning_rate": 0.00048680835154154043,
|
| 37753 |
+
"loss": 15.0492,
|
| 37754 |
+
"step": 53920
|
| 37755 |
+
},
|
| 37756 |
+
{
|
| 37757 |
+
"epoch": 0.07988730157789642,
|
| 37758 |
+
"grad_norm": 7.5625,
|
| 37759 |
+
"learning_rate": 0.00048680588207403663,
|
| 37760 |
+
"loss": 15.0187,
|
| 37761 |
+
"step": 53930
|
| 37762 |
+
},
|
| 37763 |
+
{
|
| 37764 |
+
"epoch": 0.07990211472486061,
|
| 37765 |
+
"grad_norm": 6.6875,
|
| 37766 |
+
"learning_rate": 0.0004868034126065328,
|
| 37767 |
+
"loss": 14.9801,
|
| 37768 |
+
"step": 53940
|
| 37769 |
+
},
|
| 37770 |
+
{
|
| 37771 |
+
"epoch": 0.0799169278718248,
|
| 37772 |
+
"grad_norm": 6.65625,
|
| 37773 |
+
"learning_rate": 0.0004868009431390291,
|
| 37774 |
+
"loss": 14.9848,
|
| 37775 |
+
"step": 53950
|
| 37776 |
+
},
|
| 37777 |
+
{
|
| 37778 |
+
"epoch": 0.079931741018789,
|
| 37779 |
+
"grad_norm": 6.21875,
|
| 37780 |
+
"learning_rate": 0.0004867984736715253,
|
| 37781 |
+
"loss": 14.9927,
|
| 37782 |
+
"step": 53960
|
| 37783 |
+
},
|
| 37784 |
+
{
|
| 37785 |
+
"epoch": 0.07994655416575319,
|
| 37786 |
+
"grad_norm": 6.375,
|
| 37787 |
+
"learning_rate": 0.00048679600420402147,
|
| 37788 |
+
"loss": 15.0322,
|
| 37789 |
+
"step": 53970
|
| 37790 |
+
},
|
| 37791 |
+
{
|
| 37792 |
+
"epoch": 0.07996136731271739,
|
| 37793 |
+
"grad_norm": 6.8125,
|
| 37794 |
+
"learning_rate": 0.0004867935347365177,
|
| 37795 |
+
"loss": 15.0364,
|
| 37796 |
+
"step": 53980
|
| 37797 |
+
},
|
| 37798 |
+
{
|
| 37799 |
+
"epoch": 0.07997618045968158,
|
| 37800 |
+
"grad_norm": 6.53125,
|
| 37801 |
+
"learning_rate": 0.0004867910652690139,
|
| 37802 |
+
"loss": 14.9742,
|
| 37803 |
+
"step": 53990
|
| 37804 |
+
},
|
| 37805 |
+
{
|
| 37806 |
+
"epoch": 0.07999099360664577,
|
| 37807 |
+
"grad_norm": 58.0,
|
| 37808 |
+
"learning_rate": 0.0004867885958015101,
|
| 37809 |
+
"loss": 15.0833,
|
| 37810 |
+
"step": 54000
|
| 37811 |
}
|
| 37812 |
],
|
| 37813 |
"logging_steps": 10,
|
|
|
|
| 37827 |
"attributes": {}
|
| 37828 |
}
|
| 37829 |
},
|
| 37830 |
+
"total_flos": 1.1669001977630556e+20,
|
| 37831 |
"train_batch_size": 48,
|
| 37832 |
"trial_name": null,
|
| 37833 |
"trial_params": null
|