Training in progress, step 145000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07ff84bcfe00192074c69a4c114605c5b9e6fb38ace78798ed55a64c77b03044
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d24c9fd30d40e77558d407701661e07f72772bcc8a9ed8d75bb380c21445697
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f35fcd7367edf1bdd8195f2a5cd469a85fc53ea56623beee72bf212bd942346a
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:167b94303329e07bc590b6d46ead45c44beadf9b9b799733bd817bfc53423bc3
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:10e62fb98f155f03d0926a07b1cd7c533ad37396e053bf0f6f00cf68f9ab669b
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4421eaf2f0daad631ac0c96e0e8b2f77b41118de9d91e399e26c3c594692f003
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e48e3a169afae65bdfd20b7e41e431b4600bbdd3a9abb25db698202ec341cca8
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -100808,6 +100808,706 @@
|
|
| 100808 |
"learning_rate": 0.000488189740425969,
|
| 100809 |
"loss": 14.344,
|
| 100810 |
"step": 144000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100811 |
}
|
| 100812 |
],
|
| 100813 |
"logging_steps": 10,
|
|
@@ -100827,7 +101527,7 @@
|
|
| 100827 |
"attributes": {}
|
| 100828 |
}
|
| 100829 |
},
|
| 100830 |
-
"total_flos": 2.
|
| 100831 |
"train_batch_size": 16,
|
| 100832 |
"trial_name": null,
|
| 100833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.07159687699360269,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 145000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 100808 |
"learning_rate": 0.000488189740425969,
|
| 100809 |
"loss": 14.344,
|
| 100810 |
"step": 144000
|
| 100811 |
+
},
|
| 100812 |
+
{
|
| 100813 |
+
"epoch": 0.0711080431437843,
|
| 100814 |
+
"grad_norm": 8.9375,
|
| 100815 |
+
"learning_rate": 0.0004881889174056292,
|
| 100816 |
+
"loss": 14.4996,
|
| 100817 |
+
"step": 144010
|
| 100818 |
+
},
|
| 100819 |
+
{
|
| 100820 |
+
"epoch": 0.07111298085943904,
|
| 100821 |
+
"grad_norm": 8.1875,
|
| 100822 |
+
"learning_rate": 0.0004881880943852894,
|
| 100823 |
+
"loss": 14.5047,
|
| 100824 |
+
"step": 144020
|
| 100825 |
+
},
|
| 100826 |
+
{
|
| 100827 |
+
"epoch": 0.07111791857509377,
|
| 100828 |
+
"grad_norm": 11.75,
|
| 100829 |
+
"learning_rate": 0.0004881872713649496,
|
| 100830 |
+
"loss": 14.4017,
|
| 100831 |
+
"step": 144030
|
| 100832 |
+
},
|
| 100833 |
+
{
|
| 100834 |
+
"epoch": 0.07112285629074849,
|
| 100835 |
+
"grad_norm": 7.84375,
|
| 100836 |
+
"learning_rate": 0.00048818644834460986,
|
| 100837 |
+
"loss": 14.4821,
|
| 100838 |
+
"step": 144040
|
| 100839 |
+
},
|
| 100840 |
+
{
|
| 100841 |
+
"epoch": 0.07112779400640323,
|
| 100842 |
+
"grad_norm": 9.5625,
|
| 100843 |
+
"learning_rate": 0.00048818562532427004,
|
| 100844 |
+
"loss": 14.5003,
|
| 100845 |
+
"step": 144050
|
| 100846 |
+
},
|
| 100847 |
+
{
|
| 100848 |
+
"epoch": 0.07113273172205796,
|
| 100849 |
+
"grad_norm": 9.125,
|
| 100850 |
+
"learning_rate": 0.00048818480230393023,
|
| 100851 |
+
"loss": 14.3626,
|
| 100852 |
+
"step": 144060
|
| 100853 |
+
},
|
| 100854 |
+
{
|
| 100855 |
+
"epoch": 0.07113766943771269,
|
| 100856 |
+
"grad_norm": 12.1875,
|
| 100857 |
+
"learning_rate": 0.0004881839792835904,
|
| 100858 |
+
"loss": 14.5504,
|
| 100859 |
+
"step": 144070
|
| 100860 |
+
},
|
| 100861 |
+
{
|
| 100862 |
+
"epoch": 0.07114260715336743,
|
| 100863 |
+
"grad_norm": 8.4375,
|
| 100864 |
+
"learning_rate": 0.00048818315626325066,
|
| 100865 |
+
"loss": 14.5479,
|
| 100866 |
+
"step": 144080
|
| 100867 |
+
},
|
| 100868 |
+
{
|
| 100869 |
+
"epoch": 0.07114754486902215,
|
| 100870 |
+
"grad_norm": 9.375,
|
| 100871 |
+
"learning_rate": 0.00048818233324291084,
|
| 100872 |
+
"loss": 14.2452,
|
| 100873 |
+
"step": 144090
|
| 100874 |
+
},
|
| 100875 |
+
{
|
| 100876 |
+
"epoch": 0.07115248258467688,
|
| 100877 |
+
"grad_norm": 9.875,
|
| 100878 |
+
"learning_rate": 0.0004881815102225711,
|
| 100879 |
+
"loss": 14.5508,
|
| 100880 |
+
"step": 144100
|
| 100881 |
+
},
|
| 100882 |
+
{
|
| 100883 |
+
"epoch": 0.07115742030033162,
|
| 100884 |
+
"grad_norm": 10.0625,
|
| 100885 |
+
"learning_rate": 0.00048818068720223127,
|
| 100886 |
+
"loss": 14.5438,
|
| 100887 |
+
"step": 144110
|
| 100888 |
+
},
|
| 100889 |
+
{
|
| 100890 |
+
"epoch": 0.07116235801598635,
|
| 100891 |
+
"grad_norm": 9.0,
|
| 100892 |
+
"learning_rate": 0.00048817986418189145,
|
| 100893 |
+
"loss": 14.3662,
|
| 100894 |
+
"step": 144120
|
| 100895 |
+
},
|
| 100896 |
+
{
|
| 100897 |
+
"epoch": 0.07116729573164107,
|
| 100898 |
+
"grad_norm": 9.0,
|
| 100899 |
+
"learning_rate": 0.00048817904116155164,
|
| 100900 |
+
"loss": 14.31,
|
| 100901 |
+
"step": 144130
|
| 100902 |
+
},
|
| 100903 |
+
{
|
| 100904 |
+
"epoch": 0.07117223344729581,
|
| 100905 |
+
"grad_norm": 7.53125,
|
| 100906 |
+
"learning_rate": 0.0004881782181412119,
|
| 100907 |
+
"loss": 14.4795,
|
| 100908 |
+
"step": 144140
|
| 100909 |
+
},
|
| 100910 |
+
{
|
| 100911 |
+
"epoch": 0.07117717116295054,
|
| 100912 |
+
"grad_norm": 9.0,
|
| 100913 |
+
"learning_rate": 0.00048817739512087207,
|
| 100914 |
+
"loss": 14.3714,
|
| 100915 |
+
"step": 144150
|
| 100916 |
+
},
|
| 100917 |
+
{
|
| 100918 |
+
"epoch": 0.07118210887860528,
|
| 100919 |
+
"grad_norm": 8.125,
|
| 100920 |
+
"learning_rate": 0.0004881765721005323,
|
| 100921 |
+
"loss": 14.55,
|
| 100922 |
+
"step": 144160
|
| 100923 |
+
},
|
| 100924 |
+
{
|
| 100925 |
+
"epoch": 0.07118704659426,
|
| 100926 |
+
"grad_norm": 9.4375,
|
| 100927 |
+
"learning_rate": 0.00048817574908019244,
|
| 100928 |
+
"loss": 14.5383,
|
| 100929 |
+
"step": 144170
|
| 100930 |
+
},
|
| 100931 |
+
{
|
| 100932 |
+
"epoch": 0.07119198430991473,
|
| 100933 |
+
"grad_norm": 9.5625,
|
| 100934 |
+
"learning_rate": 0.0004881749260598527,
|
| 100935 |
+
"loss": 14.3071,
|
| 100936 |
+
"step": 144180
|
| 100937 |
+
},
|
| 100938 |
+
{
|
| 100939 |
+
"epoch": 0.07119692202556947,
|
| 100940 |
+
"grad_norm": 8.8125,
|
| 100941 |
+
"learning_rate": 0.00048817410303951287,
|
| 100942 |
+
"loss": 14.4196,
|
| 100943 |
+
"step": 144190
|
| 100944 |
+
},
|
| 100945 |
+
{
|
| 100946 |
+
"epoch": 0.0712018597412242,
|
| 100947 |
+
"grad_norm": 10.1875,
|
| 100948 |
+
"learning_rate": 0.0004881732800191731,
|
| 100949 |
+
"loss": 14.3529,
|
| 100950 |
+
"step": 144200
|
| 100951 |
+
},
|
| 100952 |
+
{
|
| 100953 |
+
"epoch": 0.07120679745687893,
|
| 100954 |
+
"grad_norm": 9.625,
|
| 100955 |
+
"learning_rate": 0.0004881724569988333,
|
| 100956 |
+
"loss": 14.356,
|
| 100957 |
+
"step": 144210
|
| 100958 |
+
},
|
| 100959 |
+
{
|
| 100960 |
+
"epoch": 0.07121173517253367,
|
| 100961 |
+
"grad_norm": 8.6875,
|
| 100962 |
+
"learning_rate": 0.00048817163397849354,
|
| 100963 |
+
"loss": 14.4222,
|
| 100964 |
+
"step": 144220
|
| 100965 |
+
},
|
| 100966 |
+
{
|
| 100967 |
+
"epoch": 0.07121667288818839,
|
| 100968 |
+
"grad_norm": 8.625,
|
| 100969 |
+
"learning_rate": 0.00048817081095815367,
|
| 100970 |
+
"loss": 14.3854,
|
| 100971 |
+
"step": 144230
|
| 100972 |
+
},
|
| 100973 |
+
{
|
| 100974 |
+
"epoch": 0.07122161060384312,
|
| 100975 |
+
"grad_norm": 8.375,
|
| 100976 |
+
"learning_rate": 0.0004881699879378139,
|
| 100977 |
+
"loss": 14.5936,
|
| 100978 |
+
"step": 144240
|
| 100979 |
+
},
|
| 100980 |
+
{
|
| 100981 |
+
"epoch": 0.07122654831949786,
|
| 100982 |
+
"grad_norm": 8.6875,
|
| 100983 |
+
"learning_rate": 0.0004881691649174741,
|
| 100984 |
+
"loss": 14.5264,
|
| 100985 |
+
"step": 144250
|
| 100986 |
+
},
|
| 100987 |
+
{
|
| 100988 |
+
"epoch": 0.07123148603515259,
|
| 100989 |
+
"grad_norm": 11.5625,
|
| 100990 |
+
"learning_rate": 0.00048816834189713433,
|
| 100991 |
+
"loss": 14.5488,
|
| 100992 |
+
"step": 144260
|
| 100993 |
+
},
|
| 100994 |
+
{
|
| 100995 |
+
"epoch": 0.07123642375080731,
|
| 100996 |
+
"grad_norm": 8.5,
|
| 100997 |
+
"learning_rate": 0.0004881675188767945,
|
| 100998 |
+
"loss": 14.5507,
|
| 100999 |
+
"step": 144270
|
| 101000 |
+
},
|
| 101001 |
+
{
|
| 101002 |
+
"epoch": 0.07124136146646205,
|
| 101003 |
+
"grad_norm": 7.71875,
|
| 101004 |
+
"learning_rate": 0.0004881666958564547,
|
| 101005 |
+
"loss": 14.3742,
|
| 101006 |
+
"step": 144280
|
| 101007 |
+
},
|
| 101008 |
+
{
|
| 101009 |
+
"epoch": 0.07124629918211678,
|
| 101010 |
+
"grad_norm": 8.75,
|
| 101011 |
+
"learning_rate": 0.0004881658728361149,
|
| 101012 |
+
"loss": 14.6164,
|
| 101013 |
+
"step": 144290
|
| 101014 |
+
},
|
| 101015 |
+
{
|
| 101016 |
+
"epoch": 0.0712512368977715,
|
| 101017 |
+
"grad_norm": 7.90625,
|
| 101018 |
+
"learning_rate": 0.00048816504981577513,
|
| 101019 |
+
"loss": 14.4916,
|
| 101020 |
+
"step": 144300
|
| 101021 |
+
},
|
| 101022 |
+
{
|
| 101023 |
+
"epoch": 0.07125617461342625,
|
| 101024 |
+
"grad_norm": 9.5625,
|
| 101025 |
+
"learning_rate": 0.0004881642267954353,
|
| 101026 |
+
"loss": 14.5795,
|
| 101027 |
+
"step": 144310
|
| 101028 |
+
},
|
| 101029 |
+
{
|
| 101030 |
+
"epoch": 0.07126111232908097,
|
| 101031 |
+
"grad_norm": 8.4375,
|
| 101032 |
+
"learning_rate": 0.00048816340377509556,
|
| 101033 |
+
"loss": 14.3918,
|
| 101034 |
+
"step": 144320
|
| 101035 |
+
},
|
| 101036 |
+
{
|
| 101037 |
+
"epoch": 0.0712660500447357,
|
| 101038 |
+
"grad_norm": 9.0625,
|
| 101039 |
+
"learning_rate": 0.00048816258075475575,
|
| 101040 |
+
"loss": 14.5638,
|
| 101041 |
+
"step": 144330
|
| 101042 |
+
},
|
| 101043 |
+
{
|
| 101044 |
+
"epoch": 0.07127098776039044,
|
| 101045 |
+
"grad_norm": 11.0,
|
| 101046 |
+
"learning_rate": 0.00048816175773441593,
|
| 101047 |
+
"loss": 14.3977,
|
| 101048 |
+
"step": 144340
|
| 101049 |
+
},
|
| 101050 |
+
{
|
| 101051 |
+
"epoch": 0.07127592547604517,
|
| 101052 |
+
"grad_norm": 11.4375,
|
| 101053 |
+
"learning_rate": 0.0004881609347140761,
|
| 101054 |
+
"loss": 14.5953,
|
| 101055 |
+
"step": 144350
|
| 101056 |
+
},
|
| 101057 |
+
{
|
| 101058 |
+
"epoch": 0.07128086319169989,
|
| 101059 |
+
"grad_norm": 9.3125,
|
| 101060 |
+
"learning_rate": 0.00048816011169373636,
|
| 101061 |
+
"loss": 14.3013,
|
| 101062 |
+
"step": 144360
|
| 101063 |
+
},
|
| 101064 |
+
{
|
| 101065 |
+
"epoch": 0.07128580090735463,
|
| 101066 |
+
"grad_norm": 8.4375,
|
| 101067 |
+
"learning_rate": 0.0004881592886733966,
|
| 101068 |
+
"loss": 14.2362,
|
| 101069 |
+
"step": 144370
|
| 101070 |
+
},
|
| 101071 |
+
{
|
| 101072 |
+
"epoch": 0.07129073862300936,
|
| 101073 |
+
"grad_norm": 7.96875,
|
| 101074 |
+
"learning_rate": 0.0004881584656530568,
|
| 101075 |
+
"loss": 14.5241,
|
| 101076 |
+
"step": 144380
|
| 101077 |
+
},
|
| 101078 |
+
{
|
| 101079 |
+
"epoch": 0.07129567633866409,
|
| 101080 |
+
"grad_norm": 14.6875,
|
| 101081 |
+
"learning_rate": 0.00048815764263271703,
|
| 101082 |
+
"loss": 14.3319,
|
| 101083 |
+
"step": 144390
|
| 101084 |
+
},
|
| 101085 |
+
{
|
| 101086 |
+
"epoch": 0.07130061405431883,
|
| 101087 |
+
"grad_norm": 9.0625,
|
| 101088 |
+
"learning_rate": 0.00048815681961237716,
|
| 101089 |
+
"loss": 14.2392,
|
| 101090 |
+
"step": 144400
|
| 101091 |
+
},
|
| 101092 |
+
{
|
| 101093 |
+
"epoch": 0.07130555176997355,
|
| 101094 |
+
"grad_norm": 8.125,
|
| 101095 |
+
"learning_rate": 0.0004881559965920374,
|
| 101096 |
+
"loss": 14.3983,
|
| 101097 |
+
"step": 144410
|
| 101098 |
+
},
|
| 101099 |
+
{
|
| 101100 |
+
"epoch": 0.07131048948562828,
|
| 101101 |
+
"grad_norm": 8.125,
|
| 101102 |
+
"learning_rate": 0.0004881551735716976,
|
| 101103 |
+
"loss": 14.5738,
|
| 101104 |
+
"step": 144420
|
| 101105 |
+
},
|
| 101106 |
+
{
|
| 101107 |
+
"epoch": 0.07131542720128302,
|
| 101108 |
+
"grad_norm": 10.125,
|
| 101109 |
+
"learning_rate": 0.00048815435055135783,
|
| 101110 |
+
"loss": 14.446,
|
| 101111 |
+
"step": 144430
|
| 101112 |
+
},
|
| 101113 |
+
{
|
| 101114 |
+
"epoch": 0.07132036491693775,
|
| 101115 |
+
"grad_norm": 80.5,
|
| 101116 |
+
"learning_rate": 0.000488153527531018,
|
| 101117 |
+
"loss": 14.4355,
|
| 101118 |
+
"step": 144440
|
| 101119 |
+
},
|
| 101120 |
+
{
|
| 101121 |
+
"epoch": 0.07132530263259247,
|
| 101122 |
+
"grad_norm": 9.6875,
|
| 101123 |
+
"learning_rate": 0.0004881527045106782,
|
| 101124 |
+
"loss": 14.5617,
|
| 101125 |
+
"step": 144450
|
| 101126 |
+
},
|
| 101127 |
+
{
|
| 101128 |
+
"epoch": 0.07133024034824721,
|
| 101129 |
+
"grad_norm": 9.5625,
|
| 101130 |
+
"learning_rate": 0.0004881518814903384,
|
| 101131 |
+
"loss": 14.3302,
|
| 101132 |
+
"step": 144460
|
| 101133 |
+
},
|
| 101134 |
+
{
|
| 101135 |
+
"epoch": 0.07133517806390194,
|
| 101136 |
+
"grad_norm": 10.375,
|
| 101137 |
+
"learning_rate": 0.00048815105846999863,
|
| 101138 |
+
"loss": 14.4372,
|
| 101139 |
+
"step": 144470
|
| 101140 |
+
},
|
| 101141 |
+
{
|
| 101142 |
+
"epoch": 0.07134011577955668,
|
| 101143 |
+
"grad_norm": 8.8125,
|
| 101144 |
+
"learning_rate": 0.0004881502354496588,
|
| 101145 |
+
"loss": 14.3109,
|
| 101146 |
+
"step": 144480
|
| 101147 |
+
},
|
| 101148 |
+
{
|
| 101149 |
+
"epoch": 0.0713450534952114,
|
| 101150 |
+
"grad_norm": 11.0,
|
| 101151 |
+
"learning_rate": 0.00048814941242931905,
|
| 101152 |
+
"loss": 14.7036,
|
| 101153 |
+
"step": 144490
|
| 101154 |
+
},
|
| 101155 |
+
{
|
| 101156 |
+
"epoch": 0.07134999121086613,
|
| 101157 |
+
"grad_norm": 7.625,
|
| 101158 |
+
"learning_rate": 0.00048814858940897924,
|
| 101159 |
+
"loss": 14.4057,
|
| 101160 |
+
"step": 144500
|
| 101161 |
+
},
|
| 101162 |
+
{
|
| 101163 |
+
"epoch": 0.07135492892652087,
|
| 101164 |
+
"grad_norm": 8.5,
|
| 101165 |
+
"learning_rate": 0.0004881477663886394,
|
| 101166 |
+
"loss": 14.4329,
|
| 101167 |
+
"step": 144510
|
| 101168 |
+
},
|
| 101169 |
+
{
|
| 101170 |
+
"epoch": 0.0713598666421756,
|
| 101171 |
+
"grad_norm": 8.875,
|
| 101172 |
+
"learning_rate": 0.0004881469433682996,
|
| 101173 |
+
"loss": 14.4195,
|
| 101174 |
+
"step": 144520
|
| 101175 |
+
},
|
| 101176 |
+
{
|
| 101177 |
+
"epoch": 0.07136480435783032,
|
| 101178 |
+
"grad_norm": 8.6875,
|
| 101179 |
+
"learning_rate": 0.00048814612034795985,
|
| 101180 |
+
"loss": 14.4245,
|
| 101181 |
+
"step": 144530
|
| 101182 |
+
},
|
| 101183 |
+
{
|
| 101184 |
+
"epoch": 0.07136974207348507,
|
| 101185 |
+
"grad_norm": 8.875,
|
| 101186 |
+
"learning_rate": 0.00048814529732762004,
|
| 101187 |
+
"loss": 14.5225,
|
| 101188 |
+
"step": 144540
|
| 101189 |
+
},
|
| 101190 |
+
{
|
| 101191 |
+
"epoch": 0.07137467978913979,
|
| 101192 |
+
"grad_norm": 9.75,
|
| 101193 |
+
"learning_rate": 0.0004881444743072803,
|
| 101194 |
+
"loss": 14.3975,
|
| 101195 |
+
"step": 144550
|
| 101196 |
+
},
|
| 101197 |
+
{
|
| 101198 |
+
"epoch": 0.07137961750479452,
|
| 101199 |
+
"grad_norm": 8.625,
|
| 101200 |
+
"learning_rate": 0.0004881436512869404,
|
| 101201 |
+
"loss": 14.2411,
|
| 101202 |
+
"step": 144560
|
| 101203 |
+
},
|
| 101204 |
+
{
|
| 101205 |
+
"epoch": 0.07138455522044926,
|
| 101206 |
+
"grad_norm": 9.875,
|
| 101207 |
+
"learning_rate": 0.00048814282826660065,
|
| 101208 |
+
"loss": 14.2595,
|
| 101209 |
+
"step": 144570
|
| 101210 |
+
},
|
| 101211 |
+
{
|
| 101212 |
+
"epoch": 0.07138949293610398,
|
| 101213 |
+
"grad_norm": 9.6875,
|
| 101214 |
+
"learning_rate": 0.00048814200524626084,
|
| 101215 |
+
"loss": 14.3683,
|
| 101216 |
+
"step": 144580
|
| 101217 |
+
},
|
| 101218 |
+
{
|
| 101219 |
+
"epoch": 0.07139443065175871,
|
| 101220 |
+
"grad_norm": 9.875,
|
| 101221 |
+
"learning_rate": 0.0004881411822259211,
|
| 101222 |
+
"loss": 14.4,
|
| 101223 |
+
"step": 144590
|
| 101224 |
+
},
|
| 101225 |
+
{
|
| 101226 |
+
"epoch": 0.07139936836741345,
|
| 101227 |
+
"grad_norm": 8.9375,
|
| 101228 |
+
"learning_rate": 0.00048814035920558127,
|
| 101229 |
+
"loss": 14.6331,
|
| 101230 |
+
"step": 144600
|
| 101231 |
+
},
|
| 101232 |
+
{
|
| 101233 |
+
"epoch": 0.07140430608306818,
|
| 101234 |
+
"grad_norm": 8.8125,
|
| 101235 |
+
"learning_rate": 0.0004881395361852415,
|
| 101236 |
+
"loss": 14.5715,
|
| 101237 |
+
"step": 144610
|
| 101238 |
+
},
|
| 101239 |
+
{
|
| 101240 |
+
"epoch": 0.0714092437987229,
|
| 101241 |
+
"grad_norm": 37.25,
|
| 101242 |
+
"learning_rate": 0.00048813871316490164,
|
| 101243 |
+
"loss": 14.3596,
|
| 101244 |
+
"step": 144620
|
| 101245 |
+
},
|
| 101246 |
+
{
|
| 101247 |
+
"epoch": 0.07141418151437764,
|
| 101248 |
+
"grad_norm": 8.375,
|
| 101249 |
+
"learning_rate": 0.0004881378901445619,
|
| 101250 |
+
"loss": 14.3384,
|
| 101251 |
+
"step": 144630
|
| 101252 |
+
},
|
| 101253 |
+
{
|
| 101254 |
+
"epoch": 0.07141911923003237,
|
| 101255 |
+
"grad_norm": 10.3125,
|
| 101256 |
+
"learning_rate": 0.00048813706712422207,
|
| 101257 |
+
"loss": 14.5801,
|
| 101258 |
+
"step": 144640
|
| 101259 |
+
},
|
| 101260 |
+
{
|
| 101261 |
+
"epoch": 0.0714240569456871,
|
| 101262 |
+
"grad_norm": 9.0625,
|
| 101263 |
+
"learning_rate": 0.0004881362441038823,
|
| 101264 |
+
"loss": 14.4412,
|
| 101265 |
+
"step": 144650
|
| 101266 |
+
},
|
| 101267 |
+
{
|
| 101268 |
+
"epoch": 0.07142899466134184,
|
| 101269 |
+
"grad_norm": 8.625,
|
| 101270 |
+
"learning_rate": 0.0004881354210835425,
|
| 101271 |
+
"loss": 14.5864,
|
| 101272 |
+
"step": 144660
|
| 101273 |
+
},
|
| 101274 |
+
{
|
| 101275 |
+
"epoch": 0.07143393237699656,
|
| 101276 |
+
"grad_norm": 8.5,
|
| 101277 |
+
"learning_rate": 0.0004881345980632027,
|
| 101278 |
+
"loss": 14.5571,
|
| 101279 |
+
"step": 144670
|
| 101280 |
+
},
|
| 101281 |
+
{
|
| 101282 |
+
"epoch": 0.07143887009265129,
|
| 101283 |
+
"grad_norm": 8.8125,
|
| 101284 |
+
"learning_rate": 0.00048813377504286287,
|
| 101285 |
+
"loss": 14.4896,
|
| 101286 |
+
"step": 144680
|
| 101287 |
+
},
|
| 101288 |
+
{
|
| 101289 |
+
"epoch": 0.07144380780830603,
|
| 101290 |
+
"grad_norm": 8.5,
|
| 101291 |
+
"learning_rate": 0.0004881329520225231,
|
| 101292 |
+
"loss": 14.509,
|
| 101293 |
+
"step": 144690
|
| 101294 |
+
},
|
| 101295 |
+
{
|
| 101296 |
+
"epoch": 0.07144874552396076,
|
| 101297 |
+
"grad_norm": 20.0,
|
| 101298 |
+
"learning_rate": 0.00048813212900218335,
|
| 101299 |
+
"loss": 14.4541,
|
| 101300 |
+
"step": 144700
|
| 101301 |
+
},
|
| 101302 |
+
{
|
| 101303 |
+
"epoch": 0.07145368323961548,
|
| 101304 |
+
"grad_norm": 11.6875,
|
| 101305 |
+
"learning_rate": 0.00048813130598184353,
|
| 101306 |
+
"loss": 14.5157,
|
| 101307 |
+
"step": 144710
|
| 101308 |
+
},
|
| 101309 |
+
{
|
| 101310 |
+
"epoch": 0.07145862095527022,
|
| 101311 |
+
"grad_norm": 8.375,
|
| 101312 |
+
"learning_rate": 0.0004881304829615038,
|
| 101313 |
+
"loss": 14.4132,
|
| 101314 |
+
"step": 144720
|
| 101315 |
+
},
|
| 101316 |
+
{
|
| 101317 |
+
"epoch": 0.07146355867092495,
|
| 101318 |
+
"grad_norm": 9.25,
|
| 101319 |
+
"learning_rate": 0.0004881296599411639,
|
| 101320 |
+
"loss": 14.3134,
|
| 101321 |
+
"step": 144730
|
| 101322 |
+
},
|
| 101323 |
+
{
|
| 101324 |
+
"epoch": 0.07146849638657968,
|
| 101325 |
+
"grad_norm": 9.1875,
|
| 101326 |
+
"learning_rate": 0.00048812883692082415,
|
| 101327 |
+
"loss": 14.5941,
|
| 101328 |
+
"step": 144740
|
| 101329 |
+
},
|
| 101330 |
+
{
|
| 101331 |
+
"epoch": 0.07147343410223442,
|
| 101332 |
+
"grad_norm": 8.5,
|
| 101333 |
+
"learning_rate": 0.00048812801390048433,
|
| 101334 |
+
"loss": 14.4122,
|
| 101335 |
+
"step": 144750
|
| 101336 |
+
},
|
| 101337 |
+
{
|
| 101338 |
+
"epoch": 0.07147837181788914,
|
| 101339 |
+
"grad_norm": 8.8125,
|
| 101340 |
+
"learning_rate": 0.0004881271908801446,
|
| 101341 |
+
"loss": 14.4311,
|
| 101342 |
+
"step": 144760
|
| 101343 |
+
},
|
| 101344 |
+
{
|
| 101345 |
+
"epoch": 0.07148330953354387,
|
| 101346 |
+
"grad_norm": 8.875,
|
| 101347 |
+
"learning_rate": 0.00048812636785980476,
|
| 101348 |
+
"loss": 14.6046,
|
| 101349 |
+
"step": 144770
|
| 101350 |
+
},
|
| 101351 |
+
{
|
| 101352 |
+
"epoch": 0.07148824724919861,
|
| 101353 |
+
"grad_norm": 9.625,
|
| 101354 |
+
"learning_rate": 0.000488125544839465,
|
| 101355 |
+
"loss": 14.6219,
|
| 101356 |
+
"step": 144780
|
| 101357 |
+
},
|
| 101358 |
+
{
|
| 101359 |
+
"epoch": 0.07149318496485334,
|
| 101360 |
+
"grad_norm": 11.25,
|
| 101361 |
+
"learning_rate": 0.00048812472181912513,
|
| 101362 |
+
"loss": 14.3943,
|
| 101363 |
+
"step": 144790
|
| 101364 |
+
},
|
| 101365 |
+
{
|
| 101366 |
+
"epoch": 0.07149812268050808,
|
| 101367 |
+
"grad_norm": 11.125,
|
| 101368 |
+
"learning_rate": 0.0004881238987987854,
|
| 101369 |
+
"loss": 14.3531,
|
| 101370 |
+
"step": 144800
|
| 101371 |
+
},
|
| 101372 |
+
{
|
| 101373 |
+
"epoch": 0.0715030603961628,
|
| 101374 |
+
"grad_norm": 8.0625,
|
| 101375 |
+
"learning_rate": 0.00048812307577844556,
|
| 101376 |
+
"loss": 14.4368,
|
| 101377 |
+
"step": 144810
|
| 101378 |
+
},
|
| 101379 |
+
{
|
| 101380 |
+
"epoch": 0.07150799811181753,
|
| 101381 |
+
"grad_norm": 10.25,
|
| 101382 |
+
"learning_rate": 0.0004881222527581058,
|
| 101383 |
+
"loss": 14.3903,
|
| 101384 |
+
"step": 144820
|
| 101385 |
+
},
|
| 101386 |
+
{
|
| 101387 |
+
"epoch": 0.07151293582747227,
|
| 101388 |
+
"grad_norm": 7.8125,
|
| 101389 |
+
"learning_rate": 0.000488121429737766,
|
| 101390 |
+
"loss": 14.4398,
|
| 101391 |
+
"step": 144830
|
| 101392 |
+
},
|
| 101393 |
+
{
|
| 101394 |
+
"epoch": 0.071517873543127,
|
| 101395 |
+
"grad_norm": 9.375,
|
| 101396 |
+
"learning_rate": 0.00048812060671742617,
|
| 101397 |
+
"loss": 14.4256,
|
| 101398 |
+
"step": 144840
|
| 101399 |
+
},
|
| 101400 |
+
{
|
| 101401 |
+
"epoch": 0.07152281125878172,
|
| 101402 |
+
"grad_norm": 9.0,
|
| 101403 |
+
"learning_rate": 0.00048811978369708636,
|
| 101404 |
+
"loss": 14.3634,
|
| 101405 |
+
"step": 144850
|
| 101406 |
+
},
|
| 101407 |
+
{
|
| 101408 |
+
"epoch": 0.07152774897443646,
|
| 101409 |
+
"grad_norm": 7.78125,
|
| 101410 |
+
"learning_rate": 0.0004881189606767466,
|
| 101411 |
+
"loss": 14.4939,
|
| 101412 |
+
"step": 144860
|
| 101413 |
+
},
|
| 101414 |
+
{
|
| 101415 |
+
"epoch": 0.07153268669009119,
|
| 101416 |
+
"grad_norm": 8.6875,
|
| 101417 |
+
"learning_rate": 0.0004881181376564068,
|
| 101418 |
+
"loss": 14.397,
|
| 101419 |
+
"step": 144870
|
| 101420 |
+
},
|
| 101421 |
+
{
|
| 101422 |
+
"epoch": 0.07153762440574592,
|
| 101423 |
+
"grad_norm": 7.75,
|
| 101424 |
+
"learning_rate": 0.000488117314636067,
|
| 101425 |
+
"loss": 14.5173,
|
| 101426 |
+
"step": 144880
|
| 101427 |
+
},
|
| 101428 |
+
{
|
| 101429 |
+
"epoch": 0.07154256212140066,
|
| 101430 |
+
"grad_norm": 8.125,
|
| 101431 |
+
"learning_rate": 0.0004881164916157272,
|
| 101432 |
+
"loss": 14.4283,
|
| 101433 |
+
"step": 144890
|
| 101434 |
+
},
|
| 101435 |
+
{
|
| 101436 |
+
"epoch": 0.07154749983705538,
|
| 101437 |
+
"grad_norm": 10.0,
|
| 101438 |
+
"learning_rate": 0.0004881156685953874,
|
| 101439 |
+
"loss": 14.6299,
|
| 101440 |
+
"step": 144900
|
| 101441 |
+
},
|
| 101442 |
+
{
|
| 101443 |
+
"epoch": 0.07155243755271011,
|
| 101444 |
+
"grad_norm": 8.375,
|
| 101445 |
+
"learning_rate": 0.0004881148455750476,
|
| 101446 |
+
"loss": 14.4452,
|
| 101447 |
+
"step": 144910
|
| 101448 |
+
},
|
| 101449 |
+
{
|
| 101450 |
+
"epoch": 0.07155737526836485,
|
| 101451 |
+
"grad_norm": 10.375,
|
| 101452 |
+
"learning_rate": 0.0004881140225547078,
|
| 101453 |
+
"loss": 14.3081,
|
| 101454 |
+
"step": 144920
|
| 101455 |
+
},
|
| 101456 |
+
{
|
| 101457 |
+
"epoch": 0.07156231298401958,
|
| 101458 |
+
"grad_norm": 9.5,
|
| 101459 |
+
"learning_rate": 0.000488113199534368,
|
| 101460 |
+
"loss": 14.5014,
|
| 101461 |
+
"step": 144930
|
| 101462 |
+
},
|
| 101463 |
+
{
|
| 101464 |
+
"epoch": 0.0715672506996743,
|
| 101465 |
+
"grad_norm": 8.6875,
|
| 101466 |
+
"learning_rate": 0.00048811237651402825,
|
| 101467 |
+
"loss": 14.3405,
|
| 101468 |
+
"step": 144940
|
| 101469 |
+
},
|
| 101470 |
+
{
|
| 101471 |
+
"epoch": 0.07157218841532904,
|
| 101472 |
+
"grad_norm": 9.3125,
|
| 101473 |
+
"learning_rate": 0.0004881115534936884,
|
| 101474 |
+
"loss": 14.4595,
|
| 101475 |
+
"step": 144950
|
| 101476 |
+
},
|
| 101477 |
+
{
|
| 101478 |
+
"epoch": 0.07157712613098377,
|
| 101479 |
+
"grad_norm": 24.5,
|
| 101480 |
+
"learning_rate": 0.0004881107304733486,
|
| 101481 |
+
"loss": 14.3812,
|
| 101482 |
+
"step": 144960
|
| 101483 |
+
},
|
| 101484 |
+
{
|
| 101485 |
+
"epoch": 0.0715820638466385,
|
| 101486 |
+
"grad_norm": 9.4375,
|
| 101487 |
+
"learning_rate": 0.0004881099074530088,
|
| 101488 |
+
"loss": 14.5181,
|
| 101489 |
+
"step": 144970
|
| 101490 |
+
},
|
| 101491 |
+
{
|
| 101492 |
+
"epoch": 0.07158700156229324,
|
| 101493 |
+
"grad_norm": 10.375,
|
| 101494 |
+
"learning_rate": 0.00048810908443266905,
|
| 101495 |
+
"loss": 14.4102,
|
| 101496 |
+
"step": 144980
|
| 101497 |
+
},
|
| 101498 |
+
{
|
| 101499 |
+
"epoch": 0.07159193927794796,
|
| 101500 |
+
"grad_norm": 9.75,
|
| 101501 |
+
"learning_rate": 0.00048810826141232924,
|
| 101502 |
+
"loss": 14.3786,
|
| 101503 |
+
"step": 144990
|
| 101504 |
+
},
|
| 101505 |
+
{
|
| 101506 |
+
"epoch": 0.07159687699360269,
|
| 101507 |
+
"grad_norm": 14.0,
|
| 101508 |
+
"learning_rate": 0.0004881074383919895,
|
| 101509 |
+
"loss": 14.3526,
|
| 101510 |
+
"step": 145000
|
| 101511 |
}
|
| 101512 |
],
|
| 101513 |
"logging_steps": 10,
|
|
|
|
| 101527 |
"attributes": {}
|
| 101528 |
}
|
| 101529 |
},
|
| 101530 |
+
"total_flos": 2.9977459018887175e+20,
|
| 101531 |
"train_batch_size": 16,
|
| 101532 |
"trial_name": null,
|
| 101533 |
"trial_params": null
|