Training in progress, step 53000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5f27a56368056fc32d4b7e5d23c6492ac971d8672b0381112d9e6374b2bcfdd7
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40a7260a893151ae68ff9abddac367eb1c219ee970995d15c8fbd3b96152db45
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd46a22c8c930911cd104bb7d31b99dedfb69975a9d8245b24717b65ad63864b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:401180f380b7886e93ac8125388345d73ee01c7449dcd0815ad64b364f075d7b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bbd92622c25bc4db7b24f22c5a71d93f982330fe4e14e84d2a4379f3e6e3b40
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b64aa4d617fefae261dcad0912c0ab593e28dac4ab1727f422692cb316f9b8a1
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb2b6eb9245f28b41270f3392f30c7084537d421539e976f89ee6d918cdaa909
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -36408,6 +36408,706 @@
|
|
| 36408 |
"learning_rate": 0.00048728248930226674,
|
| 36409 |
"loss": 15.0545,
|
| 36410 |
"step": 52000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36411 |
}
|
| 36412 |
],
|
| 36413 |
"logging_steps": 10,
|
|
@@ -36427,7 +37127,7 @@
|
|
| 36427 |
"attributes": {}
|
| 36428 |
}
|
| 36429 |
},
|
| 36430 |
-
"total_flos": 1.
|
| 36431 |
"train_batch_size": 48,
|
| 36432 |
"trial_name": null,
|
| 36433 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.07850967891022641,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 53000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 36408 |
"learning_rate": 0.00048728248930226674,
|
| 36409 |
"loss": 15.0545,
|
| 36410 |
"step": 52000
|
| 36411 |
+
},
|
| 36412 |
+
{
|
| 36413 |
+
"epoch": 0.07704317736077124,
|
| 36414 |
+
"grad_norm": 7.90625,
|
| 36415 |
+
"learning_rate": 0.000487280019834763,
|
| 36416 |
+
"loss": 15.0789,
|
| 36417 |
+
"step": 52010
|
| 36418 |
+
},
|
| 36419 |
+
{
|
| 36420 |
+
"epoch": 0.07705799050773543,
|
| 36421 |
+
"grad_norm": 6.3125,
|
| 36422 |
+
"learning_rate": 0.00048727755036725924,
|
| 36423 |
+
"loss": 15.1253,
|
| 36424 |
+
"step": 52020
|
| 36425 |
+
},
|
| 36426 |
+
{
|
| 36427 |
+
"epoch": 0.07707280365469962,
|
| 36428 |
+
"grad_norm": 9.6875,
|
| 36429 |
+
"learning_rate": 0.00048727508089975544,
|
| 36430 |
+
"loss": 15.152,
|
| 36431 |
+
"step": 52030
|
| 36432 |
+
},
|
| 36433 |
+
{
|
| 36434 |
+
"epoch": 0.07708761680166382,
|
| 36435 |
+
"grad_norm": 6.15625,
|
| 36436 |
+
"learning_rate": 0.00048727261143225164,
|
| 36437 |
+
"loss": 15.2387,
|
| 36438 |
+
"step": 52040
|
| 36439 |
+
},
|
| 36440 |
+
{
|
| 36441 |
+
"epoch": 0.07710242994862801,
|
| 36442 |
+
"grad_norm": 6.78125,
|
| 36443 |
+
"learning_rate": 0.0004872701419647479,
|
| 36444 |
+
"loss": 15.1069,
|
| 36445 |
+
"step": 52050
|
| 36446 |
+
},
|
| 36447 |
+
{
|
| 36448 |
+
"epoch": 0.0771172430955922,
|
| 36449 |
+
"grad_norm": 6.4375,
|
| 36450 |
+
"learning_rate": 0.0004872676724972441,
|
| 36451 |
+
"loss": 15.0691,
|
| 36452 |
+
"step": 52060
|
| 36453 |
+
},
|
| 36454 |
+
{
|
| 36455 |
+
"epoch": 0.0771320562425564,
|
| 36456 |
+
"grad_norm": 7.75,
|
| 36457 |
+
"learning_rate": 0.00048726520302974034,
|
| 36458 |
+
"loss": 15.1991,
|
| 36459 |
+
"step": 52070
|
| 36460 |
+
},
|
| 36461 |
+
{
|
| 36462 |
+
"epoch": 0.07714686938952059,
|
| 36463 |
+
"grad_norm": 6.21875,
|
| 36464 |
+
"learning_rate": 0.0004872627335622365,
|
| 36465 |
+
"loss": 15.1124,
|
| 36466 |
+
"step": 52080
|
| 36467 |
+
},
|
| 36468 |
+
{
|
| 36469 |
+
"epoch": 0.07716168253648478,
|
| 36470 |
+
"grad_norm": 6.25,
|
| 36471 |
+
"learning_rate": 0.00048726026409473273,
|
| 36472 |
+
"loss": 15.2277,
|
| 36473 |
+
"step": 52090
|
| 36474 |
+
},
|
| 36475 |
+
{
|
| 36476 |
+
"epoch": 0.07717649568344898,
|
| 36477 |
+
"grad_norm": 6.1875,
|
| 36478 |
+
"learning_rate": 0.000487257794627229,
|
| 36479 |
+
"loss": 15.171,
|
| 36480 |
+
"step": 52100
|
| 36481 |
+
},
|
| 36482 |
+
{
|
| 36483 |
+
"epoch": 0.07719130883041317,
|
| 36484 |
+
"grad_norm": 6.625,
|
| 36485 |
+
"learning_rate": 0.0004872553251597251,
|
| 36486 |
+
"loss": 15.1479,
|
| 36487 |
+
"step": 52110
|
| 36488 |
+
},
|
| 36489 |
+
{
|
| 36490 |
+
"epoch": 0.07720612197737736,
|
| 36491 |
+
"grad_norm": 6.0,
|
| 36492 |
+
"learning_rate": 0.0004872528556922214,
|
| 36493 |
+
"loss": 15.1086,
|
| 36494 |
+
"step": 52120
|
| 36495 |
+
},
|
| 36496 |
+
{
|
| 36497 |
+
"epoch": 0.07722093512434156,
|
| 36498 |
+
"grad_norm": 6.96875,
|
| 36499 |
+
"learning_rate": 0.0004872503862247176,
|
| 36500 |
+
"loss": 15.1757,
|
| 36501 |
+
"step": 52130
|
| 36502 |
+
},
|
| 36503 |
+
{
|
| 36504 |
+
"epoch": 0.07723574827130575,
|
| 36505 |
+
"grad_norm": 6.53125,
|
| 36506 |
+
"learning_rate": 0.0004872479167572138,
|
| 36507 |
+
"loss": 15.0438,
|
| 36508 |
+
"step": 52140
|
| 36509 |
+
},
|
| 36510 |
+
{
|
| 36511 |
+
"epoch": 0.07725056141826994,
|
| 36512 |
+
"grad_norm": 6.25,
|
| 36513 |
+
"learning_rate": 0.00048724544728971,
|
| 36514 |
+
"loss": 15.0921,
|
| 36515 |
+
"step": 52150
|
| 36516 |
+
},
|
| 36517 |
+
{
|
| 36518 |
+
"epoch": 0.07726537456523414,
|
| 36519 |
+
"grad_norm": 5.96875,
|
| 36520 |
+
"learning_rate": 0.00048724297782220627,
|
| 36521 |
+
"loss": 15.1225,
|
| 36522 |
+
"step": 52160
|
| 36523 |
+
},
|
| 36524 |
+
{
|
| 36525 |
+
"epoch": 0.07728018771219833,
|
| 36526 |
+
"grad_norm": 6.46875,
|
| 36527 |
+
"learning_rate": 0.00048724050835470247,
|
| 36528 |
+
"loss": 15.134,
|
| 36529 |
+
"step": 52170
|
| 36530 |
+
},
|
| 36531 |
+
{
|
| 36532 |
+
"epoch": 0.07729500085916252,
|
| 36533 |
+
"grad_norm": 5.78125,
|
| 36534 |
+
"learning_rate": 0.0004872380388871987,
|
| 36535 |
+
"loss": 15.1183,
|
| 36536 |
+
"step": 52180
|
| 36537 |
+
},
|
| 36538 |
+
{
|
| 36539 |
+
"epoch": 0.07730981400612671,
|
| 36540 |
+
"grad_norm": 7.4375,
|
| 36541 |
+
"learning_rate": 0.00048723556941969486,
|
| 36542 |
+
"loss": 15.1365,
|
| 36543 |
+
"step": 52190
|
| 36544 |
+
},
|
| 36545 |
+
{
|
| 36546 |
+
"epoch": 0.07732462715309091,
|
| 36547 |
+
"grad_norm": 8.875,
|
| 36548 |
+
"learning_rate": 0.0004872330999521911,
|
| 36549 |
+
"loss": 15.1424,
|
| 36550 |
+
"step": 52200
|
| 36551 |
+
},
|
| 36552 |
+
{
|
| 36553 |
+
"epoch": 0.0773394403000551,
|
| 36554 |
+
"grad_norm": 9.6875,
|
| 36555 |
+
"learning_rate": 0.00048723063048468736,
|
| 36556 |
+
"loss": 15.2018,
|
| 36557 |
+
"step": 52210
|
| 36558 |
+
},
|
| 36559 |
+
{
|
| 36560 |
+
"epoch": 0.0773542534470193,
|
| 36561 |
+
"grad_norm": 7.75,
|
| 36562 |
+
"learning_rate": 0.00048722816101718356,
|
| 36563 |
+
"loss": 15.1598,
|
| 36564 |
+
"step": 52220
|
| 36565 |
+
},
|
| 36566 |
+
{
|
| 36567 |
+
"epoch": 0.07736906659398349,
|
| 36568 |
+
"grad_norm": 6.3125,
|
| 36569 |
+
"learning_rate": 0.00048722569154967976,
|
| 36570 |
+
"loss": 15.1861,
|
| 36571 |
+
"step": 52230
|
| 36572 |
+
},
|
| 36573 |
+
{
|
| 36574 |
+
"epoch": 0.07738387974094768,
|
| 36575 |
+
"grad_norm": 7.375,
|
| 36576 |
+
"learning_rate": 0.000487223222082176,
|
| 36577 |
+
"loss": 15.1453,
|
| 36578 |
+
"step": 52240
|
| 36579 |
+
},
|
| 36580 |
+
{
|
| 36581 |
+
"epoch": 0.07739869288791187,
|
| 36582 |
+
"grad_norm": 5.84375,
|
| 36583 |
+
"learning_rate": 0.0004872207526146722,
|
| 36584 |
+
"loss": 15.1308,
|
| 36585 |
+
"step": 52250
|
| 36586 |
+
},
|
| 36587 |
+
{
|
| 36588 |
+
"epoch": 0.07741350603487607,
|
| 36589 |
+
"grad_norm": 55.25,
|
| 36590 |
+
"learning_rate": 0.00048721828314716845,
|
| 36591 |
+
"loss": 15.0651,
|
| 36592 |
+
"step": 52260
|
| 36593 |
+
},
|
| 36594 |
+
{
|
| 36595 |
+
"epoch": 0.07742831918184026,
|
| 36596 |
+
"grad_norm": 6.84375,
|
| 36597 |
+
"learning_rate": 0.0004872158136796646,
|
| 36598 |
+
"loss": 15.0813,
|
| 36599 |
+
"step": 52270
|
| 36600 |
+
},
|
| 36601 |
+
{
|
| 36602 |
+
"epoch": 0.07744313232880445,
|
| 36603 |
+
"grad_norm": 6.03125,
|
| 36604 |
+
"learning_rate": 0.00048721334421216085,
|
| 36605 |
+
"loss": 15.2112,
|
| 36606 |
+
"step": 52280
|
| 36607 |
+
},
|
| 36608 |
+
{
|
| 36609 |
+
"epoch": 0.07745794547576866,
|
| 36610 |
+
"grad_norm": 5.90625,
|
| 36611 |
+
"learning_rate": 0.0004872108747446571,
|
| 36612 |
+
"loss": 15.1543,
|
| 36613 |
+
"step": 52290
|
| 36614 |
+
},
|
| 36615 |
+
{
|
| 36616 |
+
"epoch": 0.07747275862273285,
|
| 36617 |
+
"grad_norm": 7.1875,
|
| 36618 |
+
"learning_rate": 0.00048720840527715324,
|
| 36619 |
+
"loss": 15.1687,
|
| 36620 |
+
"step": 52300
|
| 36621 |
+
},
|
| 36622 |
+
{
|
| 36623 |
+
"epoch": 0.07748757176969705,
|
| 36624 |
+
"grad_norm": 6.96875,
|
| 36625 |
+
"learning_rate": 0.0004872059358096495,
|
| 36626 |
+
"loss": 15.0261,
|
| 36627 |
+
"step": 52310
|
| 36628 |
+
},
|
| 36629 |
+
{
|
| 36630 |
+
"epoch": 0.07750238491666124,
|
| 36631 |
+
"grad_norm": 5.71875,
|
| 36632 |
+
"learning_rate": 0.00048720346634214574,
|
| 36633 |
+
"loss": 15.1831,
|
| 36634 |
+
"step": 52320
|
| 36635 |
+
},
|
| 36636 |
+
{
|
| 36637 |
+
"epoch": 0.07751719806362543,
|
| 36638 |
+
"grad_norm": 5.625,
|
| 36639 |
+
"learning_rate": 0.00048720099687464194,
|
| 36640 |
+
"loss": 15.1855,
|
| 36641 |
+
"step": 52330
|
| 36642 |
+
},
|
| 36643 |
+
{
|
| 36644 |
+
"epoch": 0.07753201121058963,
|
| 36645 |
+
"grad_norm": 6.21875,
|
| 36646 |
+
"learning_rate": 0.00048719852740713814,
|
| 36647 |
+
"loss": 15.1639,
|
| 36648 |
+
"step": 52340
|
| 36649 |
+
},
|
| 36650 |
+
{
|
| 36651 |
+
"epoch": 0.07754682435755382,
|
| 36652 |
+
"grad_norm": 6.6875,
|
| 36653 |
+
"learning_rate": 0.0004871960579396344,
|
| 36654 |
+
"loss": 15.1922,
|
| 36655 |
+
"step": 52350
|
| 36656 |
+
},
|
| 36657 |
+
{
|
| 36658 |
+
"epoch": 0.07756163750451801,
|
| 36659 |
+
"grad_norm": 6.09375,
|
| 36660 |
+
"learning_rate": 0.0004871935884721306,
|
| 36661 |
+
"loss": 15.1293,
|
| 36662 |
+
"step": 52360
|
| 36663 |
+
},
|
| 36664 |
+
{
|
| 36665 |
+
"epoch": 0.0775764506514822,
|
| 36666 |
+
"grad_norm": 5.90625,
|
| 36667 |
+
"learning_rate": 0.00048719111900462684,
|
| 36668 |
+
"loss": 15.1444,
|
| 36669 |
+
"step": 52370
|
| 36670 |
+
},
|
| 36671 |
+
{
|
| 36672 |
+
"epoch": 0.0775912637984464,
|
| 36673 |
+
"grad_norm": 6.09375,
|
| 36674 |
+
"learning_rate": 0.000487188649537123,
|
| 36675 |
+
"loss": 15.1006,
|
| 36676 |
+
"step": 52380
|
| 36677 |
+
},
|
| 36678 |
+
{
|
| 36679 |
+
"epoch": 0.0776060769454106,
|
| 36680 |
+
"grad_norm": 6.6875,
|
| 36681 |
+
"learning_rate": 0.00048718618006961923,
|
| 36682 |
+
"loss": 15.1289,
|
| 36683 |
+
"step": 52390
|
| 36684 |
+
},
|
| 36685 |
+
{
|
| 36686 |
+
"epoch": 0.07762089009237479,
|
| 36687 |
+
"grad_norm": 9.5625,
|
| 36688 |
+
"learning_rate": 0.0004871837106021155,
|
| 36689 |
+
"loss": 15.2041,
|
| 36690 |
+
"step": 52400
|
| 36691 |
+
},
|
| 36692 |
+
{
|
| 36693 |
+
"epoch": 0.07763570323933898,
|
| 36694 |
+
"grad_norm": 6.75,
|
| 36695 |
+
"learning_rate": 0.0004871812411346117,
|
| 36696 |
+
"loss": 15.1185,
|
| 36697 |
+
"step": 52410
|
| 36698 |
+
},
|
| 36699 |
+
{
|
| 36700 |
+
"epoch": 0.07765051638630317,
|
| 36701 |
+
"grad_norm": 6.15625,
|
| 36702 |
+
"learning_rate": 0.0004871787716671079,
|
| 36703 |
+
"loss": 15.0778,
|
| 36704 |
+
"step": 52420
|
| 36705 |
+
},
|
| 36706 |
+
{
|
| 36707 |
+
"epoch": 0.07766532953326737,
|
| 36708 |
+
"grad_norm": 6.0,
|
| 36709 |
+
"learning_rate": 0.0004871763021996041,
|
| 36710 |
+
"loss": 15.0567,
|
| 36711 |
+
"step": 52430
|
| 36712 |
+
},
|
| 36713 |
+
{
|
| 36714 |
+
"epoch": 0.07768014268023156,
|
| 36715 |
+
"grad_norm": 7.3125,
|
| 36716 |
+
"learning_rate": 0.0004871738327321003,
|
| 36717 |
+
"loss": 15.0504,
|
| 36718 |
+
"step": 52440
|
| 36719 |
+
},
|
| 36720 |
+
{
|
| 36721 |
+
"epoch": 0.07769495582719575,
|
| 36722 |
+
"grad_norm": 7.1875,
|
| 36723 |
+
"learning_rate": 0.0004871713632645965,
|
| 36724 |
+
"loss": 15.094,
|
| 36725 |
+
"step": 52450
|
| 36726 |
+
},
|
| 36727 |
+
{
|
| 36728 |
+
"epoch": 0.07770976897415995,
|
| 36729 |
+
"grad_norm": 7.09375,
|
| 36730 |
+
"learning_rate": 0.00048716889379709277,
|
| 36731 |
+
"loss": 15.0712,
|
| 36732 |
+
"step": 52460
|
| 36733 |
+
},
|
| 36734 |
+
{
|
| 36735 |
+
"epoch": 0.07772458212112414,
|
| 36736 |
+
"grad_norm": 6.625,
|
| 36737 |
+
"learning_rate": 0.00048716642432958897,
|
| 36738 |
+
"loss": 15.0745,
|
| 36739 |
+
"step": 52470
|
| 36740 |
+
},
|
| 36741 |
+
{
|
| 36742 |
+
"epoch": 0.07773939526808833,
|
| 36743 |
+
"grad_norm": 5.5625,
|
| 36744 |
+
"learning_rate": 0.0004871639548620852,
|
| 36745 |
+
"loss": 15.0134,
|
| 36746 |
+
"step": 52480
|
| 36747 |
+
},
|
| 36748 |
+
{
|
| 36749 |
+
"epoch": 0.07775420841505253,
|
| 36750 |
+
"grad_norm": 6.25,
|
| 36751 |
+
"learning_rate": 0.00048716148539458136,
|
| 36752 |
+
"loss": 15.1188,
|
| 36753 |
+
"step": 52490
|
| 36754 |
+
},
|
| 36755 |
+
{
|
| 36756 |
+
"epoch": 0.07776902156201672,
|
| 36757 |
+
"grad_norm": 6.78125,
|
| 36758 |
+
"learning_rate": 0.0004871590159270776,
|
| 36759 |
+
"loss": 15.0676,
|
| 36760 |
+
"step": 52500
|
| 36761 |
+
},
|
| 36762 |
+
{
|
| 36763 |
+
"epoch": 0.07778383470898091,
|
| 36764 |
+
"grad_norm": 5.78125,
|
| 36765 |
+
"learning_rate": 0.00048715654645957386,
|
| 36766 |
+
"loss": 15.1952,
|
| 36767 |
+
"step": 52510
|
| 36768 |
+
},
|
| 36769 |
+
{
|
| 36770 |
+
"epoch": 0.0777986478559451,
|
| 36771 |
+
"grad_norm": 7.5,
|
| 36772 |
+
"learning_rate": 0.00048715407699207006,
|
| 36773 |
+
"loss": 15.1603,
|
| 36774 |
+
"step": 52520
|
| 36775 |
+
},
|
| 36776 |
+
{
|
| 36777 |
+
"epoch": 0.0778134610029093,
|
| 36778 |
+
"grad_norm": 6.0625,
|
| 36779 |
+
"learning_rate": 0.00048715160752456626,
|
| 36780 |
+
"loss": 15.1377,
|
| 36781 |
+
"step": 52530
|
| 36782 |
+
},
|
| 36783 |
+
{
|
| 36784 |
+
"epoch": 0.07782827414987349,
|
| 36785 |
+
"grad_norm": 8.8125,
|
| 36786 |
+
"learning_rate": 0.0004871491380570625,
|
| 36787 |
+
"loss": 15.0991,
|
| 36788 |
+
"step": 52540
|
| 36789 |
+
},
|
| 36790 |
+
{
|
| 36791 |
+
"epoch": 0.07784308729683768,
|
| 36792 |
+
"grad_norm": 7.3125,
|
| 36793 |
+
"learning_rate": 0.0004871466685895587,
|
| 36794 |
+
"loss": 15.0923,
|
| 36795 |
+
"step": 52550
|
| 36796 |
+
},
|
| 36797 |
+
{
|
| 36798 |
+
"epoch": 0.07785790044380188,
|
| 36799 |
+
"grad_norm": 7.59375,
|
| 36800 |
+
"learning_rate": 0.00048714419912205496,
|
| 36801 |
+
"loss": 15.1003,
|
| 36802 |
+
"step": 52560
|
| 36803 |
+
},
|
| 36804 |
+
{
|
| 36805 |
+
"epoch": 0.07787271359076607,
|
| 36806 |
+
"grad_norm": 6.625,
|
| 36807 |
+
"learning_rate": 0.0004871417296545511,
|
| 36808 |
+
"loss": 15.0931,
|
| 36809 |
+
"step": 52570
|
| 36810 |
+
},
|
| 36811 |
+
{
|
| 36812 |
+
"epoch": 0.07788752673773026,
|
| 36813 |
+
"grad_norm": 7.28125,
|
| 36814 |
+
"learning_rate": 0.00048713926018704735,
|
| 36815 |
+
"loss": 15.0789,
|
| 36816 |
+
"step": 52580
|
| 36817 |
+
},
|
| 36818 |
+
{
|
| 36819 |
+
"epoch": 0.07790233988469446,
|
| 36820 |
+
"grad_norm": 6.1875,
|
| 36821 |
+
"learning_rate": 0.0004871367907195436,
|
| 36822 |
+
"loss": 15.0337,
|
| 36823 |
+
"step": 52590
|
| 36824 |
+
},
|
| 36825 |
+
{
|
| 36826 |
+
"epoch": 0.07791715303165865,
|
| 36827 |
+
"grad_norm": 5.9375,
|
| 36828 |
+
"learning_rate": 0.00048713432125203974,
|
| 36829 |
+
"loss": 15.1822,
|
| 36830 |
+
"step": 52600
|
| 36831 |
+
},
|
| 36832 |
+
{
|
| 36833 |
+
"epoch": 0.07793196617862286,
|
| 36834 |
+
"grad_norm": 5.90625,
|
| 36835 |
+
"learning_rate": 0.000487131851784536,
|
| 36836 |
+
"loss": 15.0653,
|
| 36837 |
+
"step": 52610
|
| 36838 |
+
},
|
| 36839 |
+
{
|
| 36840 |
+
"epoch": 0.07794677932558705,
|
| 36841 |
+
"grad_norm": 71.5,
|
| 36842 |
+
"learning_rate": 0.00048712938231703224,
|
| 36843 |
+
"loss": 15.0759,
|
| 36844 |
+
"step": 52620
|
| 36845 |
+
},
|
| 36846 |
+
{
|
| 36847 |
+
"epoch": 0.07796159247255124,
|
| 36848 |
+
"grad_norm": 6.4375,
|
| 36849 |
+
"learning_rate": 0.00048712691284952844,
|
| 36850 |
+
"loss": 15.1957,
|
| 36851 |
+
"step": 52630
|
| 36852 |
+
},
|
| 36853 |
+
{
|
| 36854 |
+
"epoch": 0.07797640561951544,
|
| 36855 |
+
"grad_norm": 7.96875,
|
| 36856 |
+
"learning_rate": 0.00048712444338202464,
|
| 36857 |
+
"loss": 15.0842,
|
| 36858 |
+
"step": 52640
|
| 36859 |
+
},
|
| 36860 |
+
{
|
| 36861 |
+
"epoch": 0.07799121876647963,
|
| 36862 |
+
"grad_norm": 6.6875,
|
| 36863 |
+
"learning_rate": 0.0004871219739145209,
|
| 36864 |
+
"loss": 15.1615,
|
| 36865 |
+
"step": 52650
|
| 36866 |
+
},
|
| 36867 |
+
{
|
| 36868 |
+
"epoch": 0.07800603191344382,
|
| 36869 |
+
"grad_norm": 6.375,
|
| 36870 |
+
"learning_rate": 0.0004871195044470171,
|
| 36871 |
+
"loss": 15.0655,
|
| 36872 |
+
"step": 52660
|
| 36873 |
+
},
|
| 36874 |
+
{
|
| 36875 |
+
"epoch": 0.07802084506040802,
|
| 36876 |
+
"grad_norm": 7.125,
|
| 36877 |
+
"learning_rate": 0.00048711703497951334,
|
| 36878 |
+
"loss": 15.098,
|
| 36879 |
+
"step": 52670
|
| 36880 |
+
},
|
| 36881 |
+
{
|
| 36882 |
+
"epoch": 0.07803565820737221,
|
| 36883 |
+
"grad_norm": 8.8125,
|
| 36884 |
+
"learning_rate": 0.0004871145655120095,
|
| 36885 |
+
"loss": 15.1145,
|
| 36886 |
+
"step": 52680
|
| 36887 |
+
},
|
| 36888 |
+
{
|
| 36889 |
+
"epoch": 0.0780504713543364,
|
| 36890 |
+
"grad_norm": 6.25,
|
| 36891 |
+
"learning_rate": 0.00048711209604450573,
|
| 36892 |
+
"loss": 15.0961,
|
| 36893 |
+
"step": 52690
|
| 36894 |
+
},
|
| 36895 |
+
{
|
| 36896 |
+
"epoch": 0.0780652845013006,
|
| 36897 |
+
"grad_norm": 6.25,
|
| 36898 |
+
"learning_rate": 0.000487109626577002,
|
| 36899 |
+
"loss": 15.1241,
|
| 36900 |
+
"step": 52700
|
| 36901 |
+
},
|
| 36902 |
+
{
|
| 36903 |
+
"epoch": 0.07808009764826479,
|
| 36904 |
+
"grad_norm": 6.21875,
|
| 36905 |
+
"learning_rate": 0.0004871071571094982,
|
| 36906 |
+
"loss": 15.1092,
|
| 36907 |
+
"step": 52710
|
| 36908 |
+
},
|
| 36909 |
+
{
|
| 36910 |
+
"epoch": 0.07809491079522898,
|
| 36911 |
+
"grad_norm": 6.875,
|
| 36912 |
+
"learning_rate": 0.0004871046876419944,
|
| 36913 |
+
"loss": 14.9957,
|
| 36914 |
+
"step": 52720
|
| 36915 |
+
},
|
| 36916 |
+
{
|
| 36917 |
+
"epoch": 0.07810972394219318,
|
| 36918 |
+
"grad_norm": 7.0,
|
| 36919 |
+
"learning_rate": 0.0004871022181744906,
|
| 36920 |
+
"loss": 15.0922,
|
| 36921 |
+
"step": 52730
|
| 36922 |
+
},
|
| 36923 |
+
{
|
| 36924 |
+
"epoch": 0.07812453708915737,
|
| 36925 |
+
"grad_norm": 26.875,
|
| 36926 |
+
"learning_rate": 0.0004870997487069868,
|
| 36927 |
+
"loss": 15.1285,
|
| 36928 |
+
"step": 52740
|
| 36929 |
+
},
|
| 36930 |
+
{
|
| 36931 |
+
"epoch": 0.07813935023612156,
|
| 36932 |
+
"grad_norm": 6.78125,
|
| 36933 |
+
"learning_rate": 0.0004870972792394831,
|
| 36934 |
+
"loss": 15.0511,
|
| 36935 |
+
"step": 52750
|
| 36936 |
+
},
|
| 36937 |
+
{
|
| 36938 |
+
"epoch": 0.07815416338308576,
|
| 36939 |
+
"grad_norm": 7.71875,
|
| 36940 |
+
"learning_rate": 0.00048709480977197927,
|
| 36941 |
+
"loss": 15.0175,
|
| 36942 |
+
"step": 52760
|
| 36943 |
+
},
|
| 36944 |
+
{
|
| 36945 |
+
"epoch": 0.07816897653004995,
|
| 36946 |
+
"grad_norm": 7.0625,
|
| 36947 |
+
"learning_rate": 0.00048709234030447547,
|
| 36948 |
+
"loss": 15.1444,
|
| 36949 |
+
"step": 52770
|
| 36950 |
+
},
|
| 36951 |
+
{
|
| 36952 |
+
"epoch": 0.07818378967701414,
|
| 36953 |
+
"grad_norm": 6.4375,
|
| 36954 |
+
"learning_rate": 0.0004870898708369717,
|
| 36955 |
+
"loss": 15.0849,
|
| 36956 |
+
"step": 52780
|
| 36957 |
+
},
|
| 36958 |
+
{
|
| 36959 |
+
"epoch": 0.07819860282397834,
|
| 36960 |
+
"grad_norm": 6.59375,
|
| 36961 |
+
"learning_rate": 0.00048708740136946786,
|
| 36962 |
+
"loss": 15.0754,
|
| 36963 |
+
"step": 52790
|
| 36964 |
+
},
|
| 36965 |
+
{
|
| 36966 |
+
"epoch": 0.07821341597094253,
|
| 36967 |
+
"grad_norm": 7.0625,
|
| 36968 |
+
"learning_rate": 0.0004870849319019641,
|
| 36969 |
+
"loss": 14.9947,
|
| 36970 |
+
"step": 52800
|
| 36971 |
+
},
|
| 36972 |
+
{
|
| 36973 |
+
"epoch": 0.07822822911790672,
|
| 36974 |
+
"grad_norm": 6.1875,
|
| 36975 |
+
"learning_rate": 0.00048708246243446036,
|
| 36976 |
+
"loss": 15.0559,
|
| 36977 |
+
"step": 52810
|
| 36978 |
+
},
|
| 36979 |
+
{
|
| 36980 |
+
"epoch": 0.07824304226487092,
|
| 36981 |
+
"grad_norm": 6.46875,
|
| 36982 |
+
"learning_rate": 0.00048707999296695656,
|
| 36983 |
+
"loss": 15.1065,
|
| 36984 |
+
"step": 52820
|
| 36985 |
+
},
|
| 36986 |
+
{
|
| 36987 |
+
"epoch": 0.07825785541183511,
|
| 36988 |
+
"grad_norm": 6.375,
|
| 36989 |
+
"learning_rate": 0.00048707752349945276,
|
| 36990 |
+
"loss": 15.0698,
|
| 36991 |
+
"step": 52830
|
| 36992 |
+
},
|
| 36993 |
+
{
|
| 36994 |
+
"epoch": 0.0782726685587993,
|
| 36995 |
+
"grad_norm": 7.3125,
|
| 36996 |
+
"learning_rate": 0.000487075054031949,
|
| 36997 |
+
"loss": 15.0243,
|
| 36998 |
+
"step": 52840
|
| 36999 |
+
},
|
| 37000 |
+
{
|
| 37001 |
+
"epoch": 0.0782874817057635,
|
| 37002 |
+
"grad_norm": 6.8125,
|
| 37003 |
+
"learning_rate": 0.0004870725845644452,
|
| 37004 |
+
"loss": 15.0918,
|
| 37005 |
+
"step": 52850
|
| 37006 |
+
},
|
| 37007 |
+
{
|
| 37008 |
+
"epoch": 0.07830229485272769,
|
| 37009 |
+
"grad_norm": 6.21875,
|
| 37010 |
+
"learning_rate": 0.00048707011509694146,
|
| 37011 |
+
"loss": 15.1827,
|
| 37012 |
+
"step": 52860
|
| 37013 |
+
},
|
| 37014 |
+
{
|
| 37015 |
+
"epoch": 0.07831710799969188,
|
| 37016 |
+
"grad_norm": 7.8125,
|
| 37017 |
+
"learning_rate": 0.0004870676456294376,
|
| 37018 |
+
"loss": 15.0726,
|
| 37019 |
+
"step": 52870
|
| 37020 |
+
},
|
| 37021 |
+
{
|
| 37022 |
+
"epoch": 0.07833192114665607,
|
| 37023 |
+
"grad_norm": 6.59375,
|
| 37024 |
+
"learning_rate": 0.00048706517616193385,
|
| 37025 |
+
"loss": 15.0329,
|
| 37026 |
+
"step": 52880
|
| 37027 |
+
},
|
| 37028 |
+
{
|
| 37029 |
+
"epoch": 0.07834673429362027,
|
| 37030 |
+
"grad_norm": 6.5,
|
| 37031 |
+
"learning_rate": 0.0004870627066944301,
|
| 37032 |
+
"loss": 15.0297,
|
| 37033 |
+
"step": 52890
|
| 37034 |
+
},
|
| 37035 |
+
{
|
| 37036 |
+
"epoch": 0.07836154744058446,
|
| 37037 |
+
"grad_norm": 6.53125,
|
| 37038 |
+
"learning_rate": 0.0004870602372269263,
|
| 37039 |
+
"loss": 15.0309,
|
| 37040 |
+
"step": 52900
|
| 37041 |
+
},
|
| 37042 |
+
{
|
| 37043 |
+
"epoch": 0.07837636058754865,
|
| 37044 |
+
"grad_norm": 6.71875,
|
| 37045 |
+
"learning_rate": 0.0004870577677594225,
|
| 37046 |
+
"loss": 15.1305,
|
| 37047 |
+
"step": 52910
|
| 37048 |
+
},
|
| 37049 |
+
{
|
| 37050 |
+
"epoch": 0.07839117373451286,
|
| 37051 |
+
"grad_norm": 6.625,
|
| 37052 |
+
"learning_rate": 0.00048705529829191874,
|
| 37053 |
+
"loss": 15.0027,
|
| 37054 |
+
"step": 52920
|
| 37055 |
+
},
|
| 37056 |
+
{
|
| 37057 |
+
"epoch": 0.07840598688147705,
|
| 37058 |
+
"grad_norm": 6.1875,
|
| 37059 |
+
"learning_rate": 0.00048705282882441494,
|
| 37060 |
+
"loss": 15.0698,
|
| 37061 |
+
"step": 52930
|
| 37062 |
+
},
|
| 37063 |
+
{
|
| 37064 |
+
"epoch": 0.07842080002844125,
|
| 37065 |
+
"grad_norm": 9.1875,
|
| 37066 |
+
"learning_rate": 0.0004870503593569112,
|
| 37067 |
+
"loss": 14.9449,
|
| 37068 |
+
"step": 52940
|
| 37069 |
+
},
|
| 37070 |
+
{
|
| 37071 |
+
"epoch": 0.07843561317540544,
|
| 37072 |
+
"grad_norm": 6.71875,
|
| 37073 |
+
"learning_rate": 0.0004870478898894074,
|
| 37074 |
+
"loss": 15.162,
|
| 37075 |
+
"step": 52950
|
| 37076 |
+
},
|
| 37077 |
+
{
|
| 37078 |
+
"epoch": 0.07845042632236963,
|
| 37079 |
+
"grad_norm": 6.625,
|
| 37080 |
+
"learning_rate": 0.0004870454204219036,
|
| 37081 |
+
"loss": 15.0779,
|
| 37082 |
+
"step": 52960
|
| 37083 |
+
},
|
| 37084 |
+
{
|
| 37085 |
+
"epoch": 0.07846523946933383,
|
| 37086 |
+
"grad_norm": 5.90625,
|
| 37087 |
+
"learning_rate": 0.00048704295095439984,
|
| 37088 |
+
"loss": 15.0824,
|
| 37089 |
+
"step": 52970
|
| 37090 |
+
},
|
| 37091 |
+
{
|
| 37092 |
+
"epoch": 0.07848005261629802,
|
| 37093 |
+
"grad_norm": 6.21875,
|
| 37094 |
+
"learning_rate": 0.000487040481486896,
|
| 37095 |
+
"loss": 15.0998,
|
| 37096 |
+
"step": 52980
|
| 37097 |
+
},
|
| 37098 |
+
{
|
| 37099 |
+
"epoch": 0.07849486576326221,
|
| 37100 |
+
"grad_norm": 6.6875,
|
| 37101 |
+
"learning_rate": 0.00048703801201939223,
|
| 37102 |
+
"loss": 15.1059,
|
| 37103 |
+
"step": 52990
|
| 37104 |
+
},
|
| 37105 |
+
{
|
| 37106 |
+
"epoch": 0.07850967891022641,
|
| 37107 |
+
"grad_norm": 6.21875,
|
| 37108 |
+
"learning_rate": 0.0004870355425518885,
|
| 37109 |
+
"loss": 14.9802,
|
| 37110 |
+
"step": 53000
|
| 37111 |
}
|
| 37112 |
],
|
| 37113 |
"logging_steps": 10,
|
|
|
|
| 37127 |
"attributes": {}
|
| 37128 |
}
|
| 37129 |
},
|
| 37130 |
+
"total_flos": 1.145286727705905e+20,
|
| 37131 |
"train_batch_size": 48,
|
| 37132 |
"trial_name": null,
|
| 37133 |
"trial_params": null
|