Training in progress, step 123000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e6c29c60b4be649b8c35881bfd701df8ed9ad086927795ba8119504814ccc63
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7893e25921a46906c89e0422b957011974864566379e8e520e9cb3dcb18624a
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce2fea6d96cf72a768fa7a147391dd04b136c1b2c50e77e9675c8f55cb2b1eaa
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74b4a4d321b5c5ed09a82f77da55cc3d586ac287fee30013935a69f592ce05be
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c35d4bd4f1454a7b87cbf6da1f2688fe2add4104efb8d280fd7dde4cb2eaf2d6
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:505f4e13010a4d0cc4f9be37b8e271ade3bd1580ce75e87673ff271bd9e88c38
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6cf85b6d54d63d639c68431b39fe1a75b74f180590a5701ac64352393f1a29a1
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -85408,6 +85408,706 @@
|
|
| 85408 |
"learning_rate": 0.000490000385173519,
|
| 85409 |
"loss": 14.6893,
|
| 85410 |
"step": 122000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85411 |
}
|
| 85412 |
],
|
| 85413 |
"logging_steps": 10,
|
|
@@ -85427,7 +86127,7 @@
|
|
| 85427 |
"attributes": {}
|
| 85428 |
}
|
| 85429 |
},
|
| 85430 |
-
"total_flos": 2.
|
| 85431 |
"train_batch_size": 16,
|
| 85432 |
"trial_name": null,
|
| 85433 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.06073390255319401,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 123000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 85408 |
"learning_rate": 0.000490000385173519,
|
| 85409 |
"loss": 14.6893,
|
| 85410 |
"step": 122000
|
| 85411 |
+
},
|
| 85412 |
+
{
|
| 85413 |
+
"epoch": 0.06024506870337562,
|
| 85414 |
+
"grad_norm": 8.5625,
|
| 85415 |
+
"learning_rate": 0.0004899995621531792,
|
| 85416 |
+
"loss": 14.7306,
|
| 85417 |
+
"step": 122010
|
| 85418 |
+
},
|
| 85419 |
+
{
|
| 85420 |
+
"epoch": 0.060250006419030354,
|
| 85421 |
+
"grad_norm": 8.6875,
|
| 85422 |
+
"learning_rate": 0.0004899987391328394,
|
| 85423 |
+
"loss": 14.6219,
|
| 85424 |
+
"step": 122020
|
| 85425 |
+
},
|
| 85426 |
+
{
|
| 85427 |
+
"epoch": 0.06025494413468508,
|
| 85428 |
+
"grad_norm": 8.6875,
|
| 85429 |
+
"learning_rate": 0.0004899979161124997,
|
| 85430 |
+
"loss": 14.5998,
|
| 85431 |
+
"step": 122030
|
| 85432 |
+
},
|
| 85433 |
+
{
|
| 85434 |
+
"epoch": 0.060259881850339814,
|
| 85435 |
+
"grad_norm": 8.6875,
|
| 85436 |
+
"learning_rate": 0.0004899970930921599,
|
| 85437 |
+
"loss": 14.6914,
|
| 85438 |
+
"step": 122040
|
| 85439 |
+
},
|
| 85440 |
+
{
|
| 85441 |
+
"epoch": 0.06026481956599455,
|
| 85442 |
+
"grad_norm": 8.1875,
|
| 85443 |
+
"learning_rate": 0.0004899962700718201,
|
| 85444 |
+
"loss": 14.5592,
|
| 85445 |
+
"step": 122050
|
| 85446 |
+
},
|
| 85447 |
+
{
|
| 85448 |
+
"epoch": 0.060269757281649274,
|
| 85449 |
+
"grad_norm": 14.9375,
|
| 85450 |
+
"learning_rate": 0.0004899954470514803,
|
| 85451 |
+
"loss": 14.6223,
|
| 85452 |
+
"step": 122060
|
| 85453 |
+
},
|
| 85454 |
+
{
|
| 85455 |
+
"epoch": 0.06027469499730401,
|
| 85456 |
+
"grad_norm": 9.125,
|
| 85457 |
+
"learning_rate": 0.0004899946240311405,
|
| 85458 |
+
"loss": 14.6105,
|
| 85459 |
+
"step": 122070
|
| 85460 |
+
},
|
| 85461 |
+
{
|
| 85462 |
+
"epoch": 0.06027963271295874,
|
| 85463 |
+
"grad_norm": 12.0,
|
| 85464 |
+
"learning_rate": 0.0004899938010108006,
|
| 85465 |
+
"loss": 14.6793,
|
| 85466 |
+
"step": 122080
|
| 85467 |
+
},
|
| 85468 |
+
{
|
| 85469 |
+
"epoch": 0.06028457042861347,
|
| 85470 |
+
"grad_norm": 10.0625,
|
| 85471 |
+
"learning_rate": 0.0004899929779904609,
|
| 85472 |
+
"loss": 14.6072,
|
| 85473 |
+
"step": 122090
|
| 85474 |
+
},
|
| 85475 |
+
{
|
| 85476 |
+
"epoch": 0.0602895081442682,
|
| 85477 |
+
"grad_norm": 8.1875,
|
| 85478 |
+
"learning_rate": 0.000489992154970121,
|
| 85479 |
+
"loss": 14.6658,
|
| 85480 |
+
"step": 122100
|
| 85481 |
+
},
|
| 85482 |
+
{
|
| 85483 |
+
"epoch": 0.060294445859922934,
|
| 85484 |
+
"grad_norm": 9.0625,
|
| 85485 |
+
"learning_rate": 0.0004899913319497813,
|
| 85486 |
+
"loss": 14.7316,
|
| 85487 |
+
"step": 122110
|
| 85488 |
+
},
|
| 85489 |
+
{
|
| 85490 |
+
"epoch": 0.06029938357557766,
|
| 85491 |
+
"grad_norm": 9.1875,
|
| 85492 |
+
"learning_rate": 0.0004899905089294415,
|
| 85493 |
+
"loss": 14.6902,
|
| 85494 |
+
"step": 122120
|
| 85495 |
+
},
|
| 85496 |
+
{
|
| 85497 |
+
"epoch": 0.06030432129123239,
|
| 85498 |
+
"grad_norm": 9.5625,
|
| 85499 |
+
"learning_rate": 0.0004899896859091017,
|
| 85500 |
+
"loss": 14.5037,
|
| 85501 |
+
"step": 122130
|
| 85502 |
+
},
|
| 85503 |
+
{
|
| 85504 |
+
"epoch": 0.06030925900688713,
|
| 85505 |
+
"grad_norm": 8.3125,
|
| 85506 |
+
"learning_rate": 0.0004899888628887619,
|
| 85507 |
+
"loss": 14.6571,
|
| 85508 |
+
"step": 122140
|
| 85509 |
+
},
|
| 85510 |
+
{
|
| 85511 |
+
"epoch": 0.06031419672254186,
|
| 85512 |
+
"grad_norm": 8.375,
|
| 85513 |
+
"learning_rate": 0.0004899880398684222,
|
| 85514 |
+
"loss": 14.4471,
|
| 85515 |
+
"step": 122150
|
| 85516 |
+
},
|
| 85517 |
+
{
|
| 85518 |
+
"epoch": 0.06031913443819659,
|
| 85519 |
+
"grad_norm": 10.125,
|
| 85520 |
+
"learning_rate": 0.0004899872168480822,
|
| 85521 |
+
"loss": 14.9159,
|
| 85522 |
+
"step": 122160
|
| 85523 |
+
},
|
| 85524 |
+
{
|
| 85525 |
+
"epoch": 0.06032407215385132,
|
| 85526 |
+
"grad_norm": 10.0625,
|
| 85527 |
+
"learning_rate": 0.0004899863938277425,
|
| 85528 |
+
"loss": 14.6641,
|
| 85529 |
+
"step": 122170
|
| 85530 |
+
},
|
| 85531 |
+
{
|
| 85532 |
+
"epoch": 0.06032900986950605,
|
| 85533 |
+
"grad_norm": 7.71875,
|
| 85534 |
+
"learning_rate": 0.0004899855708074027,
|
| 85535 |
+
"loss": 14.4194,
|
| 85536 |
+
"step": 122180
|
| 85537 |
+
},
|
| 85538 |
+
{
|
| 85539 |
+
"epoch": 0.06033394758516078,
|
| 85540 |
+
"grad_norm": 7.40625,
|
| 85541 |
+
"learning_rate": 0.0004899847477870629,
|
| 85542 |
+
"loss": 14.6702,
|
| 85543 |
+
"step": 122190
|
| 85544 |
+
},
|
| 85545 |
+
{
|
| 85546 |
+
"epoch": 0.06033888530081551,
|
| 85547 |
+
"grad_norm": 11.3125,
|
| 85548 |
+
"learning_rate": 0.0004899839247667231,
|
| 85549 |
+
"loss": 14.4618,
|
| 85550 |
+
"step": 122200
|
| 85551 |
+
},
|
| 85552 |
+
{
|
| 85553 |
+
"epoch": 0.06034382301647025,
|
| 85554 |
+
"grad_norm": 11.0625,
|
| 85555 |
+
"learning_rate": 0.0004899831017463833,
|
| 85556 |
+
"loss": 14.5465,
|
| 85557 |
+
"step": 122210
|
| 85558 |
+
},
|
| 85559 |
+
{
|
| 85560 |
+
"epoch": 0.06034876073212497,
|
| 85561 |
+
"grad_norm": 9.625,
|
| 85562 |
+
"learning_rate": 0.0004899822787260435,
|
| 85563 |
+
"loss": 14.7497,
|
| 85564 |
+
"step": 122220
|
| 85565 |
+
},
|
| 85566 |
+
{
|
| 85567 |
+
"epoch": 0.060353698447779706,
|
| 85568 |
+
"grad_norm": 11.4375,
|
| 85569 |
+
"learning_rate": 0.0004899814557057038,
|
| 85570 |
+
"loss": 14.5346,
|
| 85571 |
+
"step": 122230
|
| 85572 |
+
},
|
| 85573 |
+
{
|
| 85574 |
+
"epoch": 0.06035863616343444,
|
| 85575 |
+
"grad_norm": 8.5625,
|
| 85576 |
+
"learning_rate": 0.000489980632685364,
|
| 85577 |
+
"loss": 14.5608,
|
| 85578 |
+
"step": 122240
|
| 85579 |
+
},
|
| 85580 |
+
{
|
| 85581 |
+
"epoch": 0.060363573879089166,
|
| 85582 |
+
"grad_norm": 8.375,
|
| 85583 |
+
"learning_rate": 0.0004899798096650241,
|
| 85584 |
+
"loss": 14.5786,
|
| 85585 |
+
"step": 122250
|
| 85586 |
+
},
|
| 85587 |
+
{
|
| 85588 |
+
"epoch": 0.0603685115947439,
|
| 85589 |
+
"grad_norm": 11.3125,
|
| 85590 |
+
"learning_rate": 0.0004899789866446844,
|
| 85591 |
+
"loss": 14.5332,
|
| 85592 |
+
"step": 122260
|
| 85593 |
+
},
|
| 85594 |
+
{
|
| 85595 |
+
"epoch": 0.06037344931039863,
|
| 85596 |
+
"grad_norm": 55.25,
|
| 85597 |
+
"learning_rate": 0.0004899781636243445,
|
| 85598 |
+
"loss": 14.5888,
|
| 85599 |
+
"step": 122270
|
| 85600 |
+
},
|
| 85601 |
+
{
|
| 85602 |
+
"epoch": 0.06037838702605336,
|
| 85603 |
+
"grad_norm": 8.1875,
|
| 85604 |
+
"learning_rate": 0.0004899773406040048,
|
| 85605 |
+
"loss": 14.7294,
|
| 85606 |
+
"step": 122280
|
| 85607 |
+
},
|
| 85608 |
+
{
|
| 85609 |
+
"epoch": 0.06038332474170809,
|
| 85610 |
+
"grad_norm": 10.625,
|
| 85611 |
+
"learning_rate": 0.000489976517583665,
|
| 85612 |
+
"loss": 14.5032,
|
| 85613 |
+
"step": 122290
|
| 85614 |
+
},
|
| 85615 |
+
{
|
| 85616 |
+
"epoch": 0.060388262457362826,
|
| 85617 |
+
"grad_norm": 8.5,
|
| 85618 |
+
"learning_rate": 0.0004899756945633252,
|
| 85619 |
+
"loss": 14.6904,
|
| 85620 |
+
"step": 122300
|
| 85621 |
+
},
|
| 85622 |
+
{
|
| 85623 |
+
"epoch": 0.06039320017301756,
|
| 85624 |
+
"grad_norm": 12.25,
|
| 85625 |
+
"learning_rate": 0.0004899748715429854,
|
| 85626 |
+
"loss": 14.7875,
|
| 85627 |
+
"step": 122310
|
| 85628 |
+
},
|
| 85629 |
+
{
|
| 85630 |
+
"epoch": 0.060398137888672286,
|
| 85631 |
+
"grad_norm": 7.75,
|
| 85632 |
+
"learning_rate": 0.0004899740485226455,
|
| 85633 |
+
"loss": 14.519,
|
| 85634 |
+
"step": 122320
|
| 85635 |
+
},
|
| 85636 |
+
{
|
| 85637 |
+
"epoch": 0.06040307560432702,
|
| 85638 |
+
"grad_norm": 10.875,
|
| 85639 |
+
"learning_rate": 0.0004899732255023057,
|
| 85640 |
+
"loss": 14.7135,
|
| 85641 |
+
"step": 122330
|
| 85642 |
+
},
|
| 85643 |
+
{
|
| 85644 |
+
"epoch": 0.06040801331998175,
|
| 85645 |
+
"grad_norm": 16.75,
|
| 85646 |
+
"learning_rate": 0.000489972402481966,
|
| 85647 |
+
"loss": 14.7901,
|
| 85648 |
+
"step": 122340
|
| 85649 |
+
},
|
| 85650 |
+
{
|
| 85651 |
+
"epoch": 0.06041295103563648,
|
| 85652 |
+
"grad_norm": 12.625,
|
| 85653 |
+
"learning_rate": 0.0004899715794616262,
|
| 85654 |
+
"loss": 14.6928,
|
| 85655 |
+
"step": 122350
|
| 85656 |
+
},
|
| 85657 |
+
{
|
| 85658 |
+
"epoch": 0.06041788875129121,
|
| 85659 |
+
"grad_norm": 8.125,
|
| 85660 |
+
"learning_rate": 0.0004899707564412864,
|
| 85661 |
+
"loss": 14.7452,
|
| 85662 |
+
"step": 122360
|
| 85663 |
+
},
|
| 85664 |
+
{
|
| 85665 |
+
"epoch": 0.060422826466945946,
|
| 85666 |
+
"grad_norm": 10.3125,
|
| 85667 |
+
"learning_rate": 0.0004899699334209466,
|
| 85668 |
+
"loss": 14.8396,
|
| 85669 |
+
"step": 122370
|
| 85670 |
+
},
|
| 85671 |
+
{
|
| 85672 |
+
"epoch": 0.06042776418260067,
|
| 85673 |
+
"grad_norm": 8.625,
|
| 85674 |
+
"learning_rate": 0.0004899691104006068,
|
| 85675 |
+
"loss": 14.6461,
|
| 85676 |
+
"step": 122380
|
| 85677 |
+
},
|
| 85678 |
+
{
|
| 85679 |
+
"epoch": 0.060432701898255406,
|
| 85680 |
+
"grad_norm": 8.6875,
|
| 85681 |
+
"learning_rate": 0.000489968287380267,
|
| 85682 |
+
"loss": 14.8123,
|
| 85683 |
+
"step": 122390
|
| 85684 |
+
},
|
| 85685 |
+
{
|
| 85686 |
+
"epoch": 0.06043763961391014,
|
| 85687 |
+
"grad_norm": 8.75,
|
| 85688 |
+
"learning_rate": 0.0004899674643599273,
|
| 85689 |
+
"loss": 14.5848,
|
| 85690 |
+
"step": 122400
|
| 85691 |
+
},
|
| 85692 |
+
{
|
| 85693 |
+
"epoch": 0.060442577329564866,
|
| 85694 |
+
"grad_norm": 9.3125,
|
| 85695 |
+
"learning_rate": 0.0004899666413395874,
|
| 85696 |
+
"loss": 14.5446,
|
| 85697 |
+
"step": 122410
|
| 85698 |
+
},
|
| 85699 |
+
{
|
| 85700 |
+
"epoch": 0.0604475150452196,
|
| 85701 |
+
"grad_norm": 9.125,
|
| 85702 |
+
"learning_rate": 0.0004899658183192476,
|
| 85703 |
+
"loss": 14.6535,
|
| 85704 |
+
"step": 122420
|
| 85705 |
+
},
|
| 85706 |
+
{
|
| 85707 |
+
"epoch": 0.06045245276087433,
|
| 85708 |
+
"grad_norm": 7.71875,
|
| 85709 |
+
"learning_rate": 0.0004899649952989078,
|
| 85710 |
+
"loss": 14.7343,
|
| 85711 |
+
"step": 122430
|
| 85712 |
+
},
|
| 85713 |
+
{
|
| 85714 |
+
"epoch": 0.06045739047652906,
|
| 85715 |
+
"grad_norm": 9.0,
|
| 85716 |
+
"learning_rate": 0.000489964172278568,
|
| 85717 |
+
"loss": 14.7516,
|
| 85718 |
+
"step": 122440
|
| 85719 |
+
},
|
| 85720 |
+
{
|
| 85721 |
+
"epoch": 0.06046232819218379,
|
| 85722 |
+
"grad_norm": 8.375,
|
| 85723 |
+
"learning_rate": 0.0004899633492582282,
|
| 85724 |
+
"loss": 14.397,
|
| 85725 |
+
"step": 122450
|
| 85726 |
+
},
|
| 85727 |
+
{
|
| 85728 |
+
"epoch": 0.060467265907838526,
|
| 85729 |
+
"grad_norm": 10.3125,
|
| 85730 |
+
"learning_rate": 0.0004899625262378885,
|
| 85731 |
+
"loss": 14.6658,
|
| 85732 |
+
"step": 122460
|
| 85733 |
+
},
|
| 85734 |
+
{
|
| 85735 |
+
"epoch": 0.06047220362349326,
|
| 85736 |
+
"grad_norm": 17.125,
|
| 85737 |
+
"learning_rate": 0.0004899617032175487,
|
| 85738 |
+
"loss": 14.6587,
|
| 85739 |
+
"step": 122470
|
| 85740 |
+
},
|
| 85741 |
+
{
|
| 85742 |
+
"epoch": 0.060477141339147986,
|
| 85743 |
+
"grad_norm": 11.625,
|
| 85744 |
+
"learning_rate": 0.0004899608801972089,
|
| 85745 |
+
"loss": 14.542,
|
| 85746 |
+
"step": 122480
|
| 85747 |
+
},
|
| 85748 |
+
{
|
| 85749 |
+
"epoch": 0.06048207905480272,
|
| 85750 |
+
"grad_norm": 10.75,
|
| 85751 |
+
"learning_rate": 0.000489960057176869,
|
| 85752 |
+
"loss": 14.8654,
|
| 85753 |
+
"step": 122490
|
| 85754 |
+
},
|
| 85755 |
+
{
|
| 85756 |
+
"epoch": 0.06048701677045745,
|
| 85757 |
+
"grad_norm": 9.3125,
|
| 85758 |
+
"learning_rate": 0.0004899592341565292,
|
| 85759 |
+
"loss": 14.757,
|
| 85760 |
+
"step": 122500
|
| 85761 |
+
},
|
| 85762 |
+
{
|
| 85763 |
+
"epoch": 0.06049195448611218,
|
| 85764 |
+
"grad_norm": 11.0625,
|
| 85765 |
+
"learning_rate": 0.0004899584111361894,
|
| 85766 |
+
"loss": 14.659,
|
| 85767 |
+
"step": 122510
|
| 85768 |
+
},
|
| 85769 |
+
{
|
| 85770 |
+
"epoch": 0.06049689220176691,
|
| 85771 |
+
"grad_norm": 9.4375,
|
| 85772 |
+
"learning_rate": 0.0004899575881158497,
|
| 85773 |
+
"loss": 14.6703,
|
| 85774 |
+
"step": 122520
|
| 85775 |
+
},
|
| 85776 |
+
{
|
| 85777 |
+
"epoch": 0.060501829917421646,
|
| 85778 |
+
"grad_norm": 9.125,
|
| 85779 |
+
"learning_rate": 0.0004899567650955099,
|
| 85780 |
+
"loss": 14.7973,
|
| 85781 |
+
"step": 122530
|
| 85782 |
+
},
|
| 85783 |
+
{
|
| 85784 |
+
"epoch": 0.06050676763307637,
|
| 85785 |
+
"grad_norm": 7.8125,
|
| 85786 |
+
"learning_rate": 0.0004899559420751701,
|
| 85787 |
+
"loss": 14.6864,
|
| 85788 |
+
"step": 122540
|
| 85789 |
+
},
|
| 85790 |
+
{
|
| 85791 |
+
"epoch": 0.060511705348731105,
|
| 85792 |
+
"grad_norm": 8.125,
|
| 85793 |
+
"learning_rate": 0.0004899551190548303,
|
| 85794 |
+
"loss": 14.7156,
|
| 85795 |
+
"step": 122550
|
| 85796 |
+
},
|
| 85797 |
+
{
|
| 85798 |
+
"epoch": 0.06051664306438584,
|
| 85799 |
+
"grad_norm": 7.9375,
|
| 85800 |
+
"learning_rate": 0.0004899542960344905,
|
| 85801 |
+
"loss": 14.6875,
|
| 85802 |
+
"step": 122560
|
| 85803 |
+
},
|
| 85804 |
+
{
|
| 85805 |
+
"epoch": 0.060521580780040565,
|
| 85806 |
+
"grad_norm": 9.5,
|
| 85807 |
+
"learning_rate": 0.0004899534730141507,
|
| 85808 |
+
"loss": 14.4033,
|
| 85809 |
+
"step": 122570
|
| 85810 |
+
},
|
| 85811 |
+
{
|
| 85812 |
+
"epoch": 0.0605265184956953,
|
| 85813 |
+
"grad_norm": 9.9375,
|
| 85814 |
+
"learning_rate": 0.0004899526499938109,
|
| 85815 |
+
"loss": 14.635,
|
| 85816 |
+
"step": 122580
|
| 85817 |
+
},
|
| 85818 |
+
{
|
| 85819 |
+
"epoch": 0.06053145621135003,
|
| 85820 |
+
"grad_norm": 8.5625,
|
| 85821 |
+
"learning_rate": 0.0004899518269734711,
|
| 85822 |
+
"loss": 14.6508,
|
| 85823 |
+
"step": 122590
|
| 85824 |
+
},
|
| 85825 |
+
{
|
| 85826 |
+
"epoch": 0.06053639392700476,
|
| 85827 |
+
"grad_norm": 22.25,
|
| 85828 |
+
"learning_rate": 0.0004899510039531313,
|
| 85829 |
+
"loss": 14.8177,
|
| 85830 |
+
"step": 122600
|
| 85831 |
+
},
|
| 85832 |
+
{
|
| 85833 |
+
"epoch": 0.06054133164265949,
|
| 85834 |
+
"grad_norm": 48.0,
|
| 85835 |
+
"learning_rate": 0.0004899501809327915,
|
| 85836 |
+
"loss": 14.6766,
|
| 85837 |
+
"step": 122610
|
| 85838 |
+
},
|
| 85839 |
+
{
|
| 85840 |
+
"epoch": 0.060546269358314225,
|
| 85841 |
+
"grad_norm": 9.0,
|
| 85842 |
+
"learning_rate": 0.0004899493579124517,
|
| 85843 |
+
"loss": 14.6936,
|
| 85844 |
+
"step": 122620
|
| 85845 |
+
},
|
| 85846 |
+
{
|
| 85847 |
+
"epoch": 0.06055120707396896,
|
| 85848 |
+
"grad_norm": 8.75,
|
| 85849 |
+
"learning_rate": 0.000489948534892112,
|
| 85850 |
+
"loss": 14.6728,
|
| 85851 |
+
"step": 122630
|
| 85852 |
+
},
|
| 85853 |
+
{
|
| 85854 |
+
"epoch": 0.060556144789623685,
|
| 85855 |
+
"grad_norm": 10.125,
|
| 85856 |
+
"learning_rate": 0.0004899477118717722,
|
| 85857 |
+
"loss": 14.6365,
|
| 85858 |
+
"step": 122640
|
| 85859 |
+
},
|
| 85860 |
+
{
|
| 85861 |
+
"epoch": 0.06056108250527842,
|
| 85862 |
+
"grad_norm": 10.125,
|
| 85863 |
+
"learning_rate": 0.0004899468888514323,
|
| 85864 |
+
"loss": 14.4662,
|
| 85865 |
+
"step": 122650
|
| 85866 |
+
},
|
| 85867 |
+
{
|
| 85868 |
+
"epoch": 0.06056602022093315,
|
| 85869 |
+
"grad_norm": 8.625,
|
| 85870 |
+
"learning_rate": 0.0004899460658310925,
|
| 85871 |
+
"loss": 14.5871,
|
| 85872 |
+
"step": 122660
|
| 85873 |
+
},
|
| 85874 |
+
{
|
| 85875 |
+
"epoch": 0.06057095793658788,
|
| 85876 |
+
"grad_norm": 9.0625,
|
| 85877 |
+
"learning_rate": 0.0004899452428107527,
|
| 85878 |
+
"loss": 14.8044,
|
| 85879 |
+
"step": 122670
|
| 85880 |
+
},
|
| 85881 |
+
{
|
| 85882 |
+
"epoch": 0.06057589565224261,
|
| 85883 |
+
"grad_norm": 17.875,
|
| 85884 |
+
"learning_rate": 0.0004899444197904129,
|
| 85885 |
+
"loss": 14.7411,
|
| 85886 |
+
"step": 122680
|
| 85887 |
+
},
|
| 85888 |
+
{
|
| 85889 |
+
"epoch": 0.060580833367897345,
|
| 85890 |
+
"grad_norm": 7.65625,
|
| 85891 |
+
"learning_rate": 0.0004899435967700732,
|
| 85892 |
+
"loss": 14.6095,
|
| 85893 |
+
"step": 122690
|
| 85894 |
+
},
|
| 85895 |
+
{
|
| 85896 |
+
"epoch": 0.06058577108355207,
|
| 85897 |
+
"grad_norm": 11.875,
|
| 85898 |
+
"learning_rate": 0.0004899427737497334,
|
| 85899 |
+
"loss": 14.6202,
|
| 85900 |
+
"step": 122700
|
| 85901 |
+
},
|
| 85902 |
+
{
|
| 85903 |
+
"epoch": 0.060590708799206805,
|
| 85904 |
+
"grad_norm": 8.625,
|
| 85905 |
+
"learning_rate": 0.0004899419507293936,
|
| 85906 |
+
"loss": 14.6142,
|
| 85907 |
+
"step": 122710
|
| 85908 |
+
},
|
| 85909 |
+
{
|
| 85910 |
+
"epoch": 0.06059564651486154,
|
| 85911 |
+
"grad_norm": 8.375,
|
| 85912 |
+
"learning_rate": 0.0004899411277090538,
|
| 85913 |
+
"loss": 14.7502,
|
| 85914 |
+
"step": 122720
|
| 85915 |
+
},
|
| 85916 |
+
{
|
| 85917 |
+
"epoch": 0.060600584230516265,
|
| 85918 |
+
"grad_norm": 8.3125,
|
| 85919 |
+
"learning_rate": 0.0004899403046887139,
|
| 85920 |
+
"loss": 14.5435,
|
| 85921 |
+
"step": 122730
|
| 85922 |
+
},
|
| 85923 |
+
{
|
| 85924 |
+
"epoch": 0.060605521946171,
|
| 85925 |
+
"grad_norm": 10.0625,
|
| 85926 |
+
"learning_rate": 0.0004899394816683741,
|
| 85927 |
+
"loss": 14.602,
|
| 85928 |
+
"step": 122740
|
| 85929 |
+
},
|
| 85930 |
+
{
|
| 85931 |
+
"epoch": 0.06061045966182573,
|
| 85932 |
+
"grad_norm": 9.0,
|
| 85933 |
+
"learning_rate": 0.0004899386586480344,
|
| 85934 |
+
"loss": 14.9614,
|
| 85935 |
+
"step": 122750
|
| 85936 |
+
},
|
| 85937 |
+
{
|
| 85938 |
+
"epoch": 0.06061539737748046,
|
| 85939 |
+
"grad_norm": 9.25,
|
| 85940 |
+
"learning_rate": 0.0004899378356276946,
|
| 85941 |
+
"loss": 14.6614,
|
| 85942 |
+
"step": 122760
|
| 85943 |
+
},
|
| 85944 |
+
{
|
| 85945 |
+
"epoch": 0.06062033509313519,
|
| 85946 |
+
"grad_norm": 30.25,
|
| 85947 |
+
"learning_rate": 0.0004899370126073548,
|
| 85948 |
+
"loss": 14.6029,
|
| 85949 |
+
"step": 122770
|
| 85950 |
+
},
|
| 85951 |
+
{
|
| 85952 |
+
"epoch": 0.060625272808789925,
|
| 85953 |
+
"grad_norm": 10.625,
|
| 85954 |
+
"learning_rate": 0.000489936189587015,
|
| 85955 |
+
"loss": 14.6339,
|
| 85956 |
+
"step": 122780
|
| 85957 |
+
},
|
| 85958 |
+
{
|
| 85959 |
+
"epoch": 0.06063021052444466,
|
| 85960 |
+
"grad_norm": 8.5,
|
| 85961 |
+
"learning_rate": 0.0004899353665666752,
|
| 85962 |
+
"loss": 14.6306,
|
| 85963 |
+
"step": 122790
|
| 85964 |
+
},
|
| 85965 |
+
{
|
| 85966 |
+
"epoch": 0.060635148240099385,
|
| 85967 |
+
"grad_norm": 9.75,
|
| 85968 |
+
"learning_rate": 0.0004899345435463354,
|
| 85969 |
+
"loss": 14.6902,
|
| 85970 |
+
"step": 122800
|
| 85971 |
+
},
|
| 85972 |
+
{
|
| 85973 |
+
"epoch": 0.06064008595575412,
|
| 85974 |
+
"grad_norm": 8.125,
|
| 85975 |
+
"learning_rate": 0.0004899337205259957,
|
| 85976 |
+
"loss": 14.7273,
|
| 85977 |
+
"step": 122810
|
| 85978 |
+
},
|
| 85979 |
+
{
|
| 85980 |
+
"epoch": 0.06064502367140885,
|
| 85981 |
+
"grad_norm": 10.75,
|
| 85982 |
+
"learning_rate": 0.0004899328975056557,
|
| 85983 |
+
"loss": 14.7909,
|
| 85984 |
+
"step": 122820
|
| 85985 |
+
},
|
| 85986 |
+
{
|
| 85987 |
+
"epoch": 0.06064996138706358,
|
| 85988 |
+
"grad_norm": 11.0,
|
| 85989 |
+
"learning_rate": 0.000489932074485316,
|
| 85990 |
+
"loss": 14.5289,
|
| 85991 |
+
"step": 122830
|
| 85992 |
+
},
|
| 85993 |
+
{
|
| 85994 |
+
"epoch": 0.06065489910271831,
|
| 85995 |
+
"grad_norm": 9.5625,
|
| 85996 |
+
"learning_rate": 0.0004899312514649762,
|
| 85997 |
+
"loss": 14.6377,
|
| 85998 |
+
"step": 122840
|
| 85999 |
+
},
|
| 86000 |
+
{
|
| 86001 |
+
"epoch": 0.060659836818373045,
|
| 86002 |
+
"grad_norm": 10.125,
|
| 86003 |
+
"learning_rate": 0.0004899304284446364,
|
| 86004 |
+
"loss": 14.6517,
|
| 86005 |
+
"step": 122850
|
| 86006 |
+
},
|
| 86007 |
+
{
|
| 86008 |
+
"epoch": 0.06066477453402777,
|
| 86009 |
+
"grad_norm": 9.75,
|
| 86010 |
+
"learning_rate": 0.0004899296054242966,
|
| 86011 |
+
"loss": 14.5615,
|
| 86012 |
+
"step": 122860
|
| 86013 |
+
},
|
| 86014 |
+
{
|
| 86015 |
+
"epoch": 0.060669712249682504,
|
| 86016 |
+
"grad_norm": 8.9375,
|
| 86017 |
+
"learning_rate": 0.0004899287824039569,
|
| 86018 |
+
"loss": 14.6623,
|
| 86019 |
+
"step": 122870
|
| 86020 |
+
},
|
| 86021 |
+
{
|
| 86022 |
+
"epoch": 0.06067464996533724,
|
| 86023 |
+
"grad_norm": 9.125,
|
| 86024 |
+
"learning_rate": 0.000489927959383617,
|
| 86025 |
+
"loss": 14.6153,
|
| 86026 |
+
"step": 122880
|
| 86027 |
+
},
|
| 86028 |
+
{
|
| 86029 |
+
"epoch": 0.060679587680991964,
|
| 86030 |
+
"grad_norm": 49.0,
|
| 86031 |
+
"learning_rate": 0.0004899271363632773,
|
| 86032 |
+
"loss": 14.6059,
|
| 86033 |
+
"step": 122890
|
| 86034 |
+
},
|
| 86035 |
+
{
|
| 86036 |
+
"epoch": 0.0606845253966467,
|
| 86037 |
+
"grad_norm": 11.0,
|
| 86038 |
+
"learning_rate": 0.0004899263133429374,
|
| 86039 |
+
"loss": 14.5997,
|
| 86040 |
+
"step": 122900
|
| 86041 |
+
},
|
| 86042 |
+
{
|
| 86043 |
+
"epoch": 0.06068946311230143,
|
| 86044 |
+
"grad_norm": 8.25,
|
| 86045 |
+
"learning_rate": 0.0004899254903225976,
|
| 86046 |
+
"loss": 14.7843,
|
| 86047 |
+
"step": 122910
|
| 86048 |
+
},
|
| 86049 |
+
{
|
| 86050 |
+
"epoch": 0.06069440082795616,
|
| 86051 |
+
"grad_norm": 8.8125,
|
| 86052 |
+
"learning_rate": 0.0004899246673022579,
|
| 86053 |
+
"loss": 14.7203,
|
| 86054 |
+
"step": 122920
|
| 86055 |
+
},
|
| 86056 |
+
{
|
| 86057 |
+
"epoch": 0.06069933854361089,
|
| 86058 |
+
"grad_norm": 10.75,
|
| 86059 |
+
"learning_rate": 0.0004899238442819181,
|
| 86060 |
+
"loss": 14.5264,
|
| 86061 |
+
"step": 122930
|
| 86062 |
+
},
|
| 86063 |
+
{
|
| 86064 |
+
"epoch": 0.060704276259265624,
|
| 86065 |
+
"grad_norm": 12.5,
|
| 86066 |
+
"learning_rate": 0.0004899230212615783,
|
| 86067 |
+
"loss": 14.6797,
|
| 86068 |
+
"step": 122940
|
| 86069 |
+
},
|
| 86070 |
+
{
|
| 86071 |
+
"epoch": 0.06070921397492036,
|
| 86072 |
+
"grad_norm": 8.4375,
|
| 86073 |
+
"learning_rate": 0.0004899221982412385,
|
| 86074 |
+
"loss": 14.6443,
|
| 86075 |
+
"step": 122950
|
| 86076 |
+
},
|
| 86077 |
+
{
|
| 86078 |
+
"epoch": 0.060714151690575084,
|
| 86079 |
+
"grad_norm": 8.0625,
|
| 86080 |
+
"learning_rate": 0.0004899213752208987,
|
| 86081 |
+
"loss": 14.4232,
|
| 86082 |
+
"step": 122960
|
| 86083 |
+
},
|
| 86084 |
+
{
|
| 86085 |
+
"epoch": 0.06071908940622982,
|
| 86086 |
+
"grad_norm": 8.5625,
|
| 86087 |
+
"learning_rate": 0.0004899205522005589,
|
| 86088 |
+
"loss": 14.6483,
|
| 86089 |
+
"step": 122970
|
| 86090 |
+
},
|
| 86091 |
+
{
|
| 86092 |
+
"epoch": 0.06072402712188455,
|
| 86093 |
+
"grad_norm": 7.90625,
|
| 86094 |
+
"learning_rate": 0.0004899197291802191,
|
| 86095 |
+
"loss": 14.7229,
|
| 86096 |
+
"step": 122980
|
| 86097 |
+
},
|
| 86098 |
+
{
|
| 86099 |
+
"epoch": 0.06072896483753928,
|
| 86100 |
+
"grad_norm": 8.6875,
|
| 86101 |
+
"learning_rate": 0.0004899189061598792,
|
| 86102 |
+
"loss": 14.6283,
|
| 86103 |
+
"step": 122990
|
| 86104 |
+
},
|
| 86105 |
+
{
|
| 86106 |
+
"epoch": 0.06073390255319401,
|
| 86107 |
+
"grad_norm": 8.9375,
|
| 86108 |
+
"learning_rate": 0.0004899180831395395,
|
| 86109 |
+
"loss": 14.5907,
|
| 86110 |
+
"step": 123000
|
| 86111 |
}
|
| 86112 |
],
|
| 86113 |
"logging_steps": 10,
|
|
|
|
| 86127 |
"attributes": {}
|
| 86128 |
}
|
| 86129 |
},
|
| 86130 |
+
"total_flos": 2.6188642487454034e+20,
|
| 86131 |
"train_batch_size": 16,
|
| 86132 |
"trial_name": null,
|
| 86133 |
"trial_params": null
|