Training in progress, step 95000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +703 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 715030586
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:248b28822a3145fc2f6193ec7ba37b31470701e449bd9c308191df07f57c85c3
|
| 3 |
size 715030586
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1032262338
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1414cb3a52afd00748c40a10edd86f3990dcbdfb6536ec482813a861d6bd393d
|
| 3 |
size 1032262338
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83461f3cb631035bb80570fe8ff5f003da6889c3b4c5b07ae3097b40b998cc74
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9b1a3a3a92a32908a931fd058a89bd5a31451a6c67bff5977120ddb2fbd625f2
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:117ca0bb908b4df12aafc342e2a9002967a120b9e6b2bc1d22f28fe482f613e7
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ac12b6daed9ec4e4bf445463e78941442596bd18188c92ccb0b9804c0d0a5af
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da8924fd9ebf1bfc34c9d222c0eeb1de5a903b56bd5f2b099e5c970eea697fbe
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -65808,6 +65808,706 @@
|
|
| 65808 |
"learning_rate": 0.0004769107257863772,
|
| 65809 |
"loss": 13.9491,
|
| 65810 |
"step": 94000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65811 |
}
|
| 65812 |
],
|
| 65813 |
"logging_steps": 10,
|
|
@@ -65827,7 +66527,7 @@
|
|
| 65827 |
"attributes": {}
|
| 65828 |
}
|
| 65829 |
},
|
| 65830 |
-
"total_flos": 2.
|
| 65831 |
"train_batch_size": 48,
|
| 65832 |
"trial_name": null,
|
| 65833 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.14072489615983977,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 95000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 65808 |
"learning_rate": 0.0004769107257863772,
|
| 65809 |
"loss": 13.9491,
|
| 65810 |
"step": 94000
|
| 65811 |
+
},
|
| 65812 |
+
{
|
| 65813 |
+
"epoch": 0.1392583946103846,
|
| 65814 |
+
"grad_norm": 5.90625,
|
| 65815 |
+
"learning_rate": 0.00047690825631887347,
|
| 65816 |
+
"loss": 13.9904,
|
| 65817 |
+
"step": 94010
|
| 65818 |
+
},
|
| 65819 |
+
{
|
| 65820 |
+
"epoch": 0.1392732077573488,
|
| 65821 |
+
"grad_norm": 6.4375,
|
| 65822 |
+
"learning_rate": 0.00047690578685136967,
|
| 65823 |
+
"loss": 13.8586,
|
| 65824 |
+
"step": 94020
|
| 65825 |
+
},
|
| 65826 |
+
{
|
| 65827 |
+
"epoch": 0.139288020904313,
|
| 65828 |
+
"grad_norm": 7.09375,
|
| 65829 |
+
"learning_rate": 0.00047690331738386586,
|
| 65830 |
+
"loss": 13.8061,
|
| 65831 |
+
"step": 94030
|
| 65832 |
+
},
|
| 65833 |
+
{
|
| 65834 |
+
"epoch": 0.1393028340512772,
|
| 65835 |
+
"grad_norm": 5.90625,
|
| 65836 |
+
"learning_rate": 0.0004769008479163621,
|
| 65837 |
+
"loss": 13.9197,
|
| 65838 |
+
"step": 94040
|
| 65839 |
+
},
|
| 65840 |
+
{
|
| 65841 |
+
"epoch": 0.13931764719824138,
|
| 65842 |
+
"grad_norm": 7.78125,
|
| 65843 |
+
"learning_rate": 0.0004768983784488583,
|
| 65844 |
+
"loss": 13.8176,
|
| 65845 |
+
"step": 94050
|
| 65846 |
+
},
|
| 65847 |
+
{
|
| 65848 |
+
"epoch": 0.13933246034520558,
|
| 65849 |
+
"grad_norm": 32.25,
|
| 65850 |
+
"learning_rate": 0.00047689590898135456,
|
| 65851 |
+
"loss": 13.8751,
|
| 65852 |
+
"step": 94060
|
| 65853 |
+
},
|
| 65854 |
+
{
|
| 65855 |
+
"epoch": 0.13934727349216977,
|
| 65856 |
+
"grad_norm": 6.4375,
|
| 65857 |
+
"learning_rate": 0.00047689343951385076,
|
| 65858 |
+
"loss": 13.8901,
|
| 65859 |
+
"step": 94070
|
| 65860 |
+
},
|
| 65861 |
+
{
|
| 65862 |
+
"epoch": 0.13936208663913396,
|
| 65863 |
+
"grad_norm": 5.90625,
|
| 65864 |
+
"learning_rate": 0.00047689097004634696,
|
| 65865 |
+
"loss": 13.914,
|
| 65866 |
+
"step": 94080
|
| 65867 |
+
},
|
| 65868 |
+
{
|
| 65869 |
+
"epoch": 0.13937689978609816,
|
| 65870 |
+
"grad_norm": 5.875,
|
| 65871 |
+
"learning_rate": 0.0004768885005788432,
|
| 65872 |
+
"loss": 13.7749,
|
| 65873 |
+
"step": 94090
|
| 65874 |
+
},
|
| 65875 |
+
{
|
| 65876 |
+
"epoch": 0.13939171293306235,
|
| 65877 |
+
"grad_norm": 7.875,
|
| 65878 |
+
"learning_rate": 0.00047688603111133946,
|
| 65879 |
+
"loss": 13.8745,
|
| 65880 |
+
"step": 94100
|
| 65881 |
+
},
|
| 65882 |
+
{
|
| 65883 |
+
"epoch": 0.13940652608002654,
|
| 65884 |
+
"grad_norm": 5.84375,
|
| 65885 |
+
"learning_rate": 0.0004768835616438356,
|
| 65886 |
+
"loss": 13.8541,
|
| 65887 |
+
"step": 94110
|
| 65888 |
+
},
|
| 65889 |
+
{
|
| 65890 |
+
"epoch": 0.13942133922699074,
|
| 65891 |
+
"grad_norm": 6.84375,
|
| 65892 |
+
"learning_rate": 0.00047688109217633185,
|
| 65893 |
+
"loss": 13.9397,
|
| 65894 |
+
"step": 94120
|
| 65895 |
+
},
|
| 65896 |
+
{
|
| 65897 |
+
"epoch": 0.13943615237395493,
|
| 65898 |
+
"grad_norm": 6.6875,
|
| 65899 |
+
"learning_rate": 0.00047687862270882805,
|
| 65900 |
+
"loss": 13.9031,
|
| 65901 |
+
"step": 94130
|
| 65902 |
+
},
|
| 65903 |
+
{
|
| 65904 |
+
"epoch": 0.13945096552091912,
|
| 65905 |
+
"grad_norm": 7.0625,
|
| 65906 |
+
"learning_rate": 0.0004768761532413243,
|
| 65907 |
+
"loss": 13.9231,
|
| 65908 |
+
"step": 94140
|
| 65909 |
+
},
|
| 65910 |
+
{
|
| 65911 |
+
"epoch": 0.13946577866788332,
|
| 65912 |
+
"grad_norm": 6.75,
|
| 65913 |
+
"learning_rate": 0.0004768736837738205,
|
| 65914 |
+
"loss": 13.9564,
|
| 65915 |
+
"step": 94150
|
| 65916 |
+
},
|
| 65917 |
+
{
|
| 65918 |
+
"epoch": 0.1394805918148475,
|
| 65919 |
+
"grad_norm": 5.5625,
|
| 65920 |
+
"learning_rate": 0.0004768712143063167,
|
| 65921 |
+
"loss": 13.7778,
|
| 65922 |
+
"step": 94160
|
| 65923 |
+
},
|
| 65924 |
+
{
|
| 65925 |
+
"epoch": 0.1394954049618117,
|
| 65926 |
+
"grad_norm": 6.65625,
|
| 65927 |
+
"learning_rate": 0.00047686874483881294,
|
| 65928 |
+
"loss": 13.8651,
|
| 65929 |
+
"step": 94170
|
| 65930 |
+
},
|
| 65931 |
+
{
|
| 65932 |
+
"epoch": 0.1395102181087759,
|
| 65933 |
+
"grad_norm": 8.375,
|
| 65934 |
+
"learning_rate": 0.00047686627537130914,
|
| 65935 |
+
"loss": 13.8363,
|
| 65936 |
+
"step": 94180
|
| 65937 |
+
},
|
| 65938 |
+
{
|
| 65939 |
+
"epoch": 0.1395250312557401,
|
| 65940 |
+
"grad_norm": 6.8125,
|
| 65941 |
+
"learning_rate": 0.00047686380590380534,
|
| 65942 |
+
"loss": 13.8701,
|
| 65943 |
+
"step": 94190
|
| 65944 |
+
},
|
| 65945 |
+
{
|
| 65946 |
+
"epoch": 0.13953984440270428,
|
| 65947 |
+
"grad_norm": 5.59375,
|
| 65948 |
+
"learning_rate": 0.0004768613364363016,
|
| 65949 |
+
"loss": 13.8929,
|
| 65950 |
+
"step": 94200
|
| 65951 |
+
},
|
| 65952 |
+
{
|
| 65953 |
+
"epoch": 0.13955465754966848,
|
| 65954 |
+
"grad_norm": 5.75,
|
| 65955 |
+
"learning_rate": 0.0004768588669687978,
|
| 65956 |
+
"loss": 13.8003,
|
| 65957 |
+
"step": 94210
|
| 65958 |
+
},
|
| 65959 |
+
{
|
| 65960 |
+
"epoch": 0.13956947069663267,
|
| 65961 |
+
"grad_norm": 6.65625,
|
| 65962 |
+
"learning_rate": 0.000476856397501294,
|
| 65963 |
+
"loss": 13.8948,
|
| 65964 |
+
"step": 94220
|
| 65965 |
+
},
|
| 65966 |
+
{
|
| 65967 |
+
"epoch": 0.13958428384359686,
|
| 65968 |
+
"grad_norm": 6.8125,
|
| 65969 |
+
"learning_rate": 0.00047685392803379023,
|
| 65970 |
+
"loss": 13.9076,
|
| 65971 |
+
"step": 94230
|
| 65972 |
+
},
|
| 65973 |
+
{
|
| 65974 |
+
"epoch": 0.13959909699056106,
|
| 65975 |
+
"grad_norm": 6.21875,
|
| 65976 |
+
"learning_rate": 0.00047685145856628643,
|
| 65977 |
+
"loss": 13.9548,
|
| 65978 |
+
"step": 94240
|
| 65979 |
+
},
|
| 65980 |
+
{
|
| 65981 |
+
"epoch": 0.13961391013752525,
|
| 65982 |
+
"grad_norm": 7.125,
|
| 65983 |
+
"learning_rate": 0.0004768489890987827,
|
| 65984 |
+
"loss": 13.9511,
|
| 65985 |
+
"step": 94250
|
| 65986 |
+
},
|
| 65987 |
+
{
|
| 65988 |
+
"epoch": 0.13962872328448944,
|
| 65989 |
+
"grad_norm": 5.78125,
|
| 65990 |
+
"learning_rate": 0.0004768465196312789,
|
| 65991 |
+
"loss": 13.8748,
|
| 65992 |
+
"step": 94260
|
| 65993 |
+
},
|
| 65994 |
+
{
|
| 65995 |
+
"epoch": 0.13964353643145364,
|
| 65996 |
+
"grad_norm": 7.28125,
|
| 65997 |
+
"learning_rate": 0.0004768440501637751,
|
| 65998 |
+
"loss": 13.9524,
|
| 65999 |
+
"step": 94270
|
| 66000 |
+
},
|
| 66001 |
+
{
|
| 66002 |
+
"epoch": 0.13965834957841783,
|
| 66003 |
+
"grad_norm": 6.78125,
|
| 66004 |
+
"learning_rate": 0.0004768415806962713,
|
| 66005 |
+
"loss": 13.8717,
|
| 66006 |
+
"step": 94280
|
| 66007 |
+
},
|
| 66008 |
+
{
|
| 66009 |
+
"epoch": 0.13967316272538202,
|
| 66010 |
+
"grad_norm": 6.15625,
|
| 66011 |
+
"learning_rate": 0.0004768391112287676,
|
| 66012 |
+
"loss": 13.9239,
|
| 66013 |
+
"step": 94290
|
| 66014 |
+
},
|
| 66015 |
+
{
|
| 66016 |
+
"epoch": 0.13968797587234622,
|
| 66017 |
+
"grad_norm": 6.71875,
|
| 66018 |
+
"learning_rate": 0.0004768366417612637,
|
| 66019 |
+
"loss": 13.9367,
|
| 66020 |
+
"step": 94300
|
| 66021 |
+
},
|
| 66022 |
+
{
|
| 66023 |
+
"epoch": 0.1397027890193104,
|
| 66024 |
+
"grad_norm": 7.28125,
|
| 66025 |
+
"learning_rate": 0.00047683417229375997,
|
| 66026 |
+
"loss": 13.896,
|
| 66027 |
+
"step": 94310
|
| 66028 |
+
},
|
| 66029 |
+
{
|
| 66030 |
+
"epoch": 0.1397176021662746,
|
| 66031 |
+
"grad_norm": 6.28125,
|
| 66032 |
+
"learning_rate": 0.00047683170282625617,
|
| 66033 |
+
"loss": 13.9525,
|
| 66034 |
+
"step": 94320
|
| 66035 |
+
},
|
| 66036 |
+
{
|
| 66037 |
+
"epoch": 0.1397324153132388,
|
| 66038 |
+
"grad_norm": 8.25,
|
| 66039 |
+
"learning_rate": 0.00047682923335875236,
|
| 66040 |
+
"loss": 13.8564,
|
| 66041 |
+
"step": 94330
|
| 66042 |
+
},
|
| 66043 |
+
{
|
| 66044 |
+
"epoch": 0.139747228460203,
|
| 66045 |
+
"grad_norm": 7.53125,
|
| 66046 |
+
"learning_rate": 0.0004768267638912486,
|
| 66047 |
+
"loss": 13.7938,
|
| 66048 |
+
"step": 94340
|
| 66049 |
+
},
|
| 66050 |
+
{
|
| 66051 |
+
"epoch": 0.13976204160716718,
|
| 66052 |
+
"grad_norm": 42.75,
|
| 66053 |
+
"learning_rate": 0.0004768242944237448,
|
| 66054 |
+
"loss": 13.941,
|
| 66055 |
+
"step": 94350
|
| 66056 |
+
},
|
| 66057 |
+
{
|
| 66058 |
+
"epoch": 0.13977685475413137,
|
| 66059 |
+
"grad_norm": 5.625,
|
| 66060 |
+
"learning_rate": 0.00047682182495624106,
|
| 66061 |
+
"loss": 13.8934,
|
| 66062 |
+
"step": 94360
|
| 66063 |
+
},
|
| 66064 |
+
{
|
| 66065 |
+
"epoch": 0.13979166790109557,
|
| 66066 |
+
"grad_norm": 5.8125,
|
| 66067 |
+
"learning_rate": 0.00047681935548873726,
|
| 66068 |
+
"loss": 13.8162,
|
| 66069 |
+
"step": 94370
|
| 66070 |
+
},
|
| 66071 |
+
{
|
| 66072 |
+
"epoch": 0.13980648104805976,
|
| 66073 |
+
"grad_norm": 5.65625,
|
| 66074 |
+
"learning_rate": 0.00047681688602123346,
|
| 66075 |
+
"loss": 13.769,
|
| 66076 |
+
"step": 94380
|
| 66077 |
+
},
|
| 66078 |
+
{
|
| 66079 |
+
"epoch": 0.13982129419502395,
|
| 66080 |
+
"grad_norm": 6.03125,
|
| 66081 |
+
"learning_rate": 0.0004768144165537297,
|
| 66082 |
+
"loss": 13.8411,
|
| 66083 |
+
"step": 94390
|
| 66084 |
+
},
|
| 66085 |
+
{
|
| 66086 |
+
"epoch": 0.13983610734198818,
|
| 66087 |
+
"grad_norm": 6.71875,
|
| 66088 |
+
"learning_rate": 0.00047681194708622596,
|
| 66089 |
+
"loss": 13.7353,
|
| 66090 |
+
"step": 94400
|
| 66091 |
+
},
|
| 66092 |
+
{
|
| 66093 |
+
"epoch": 0.13985092048895237,
|
| 66094 |
+
"grad_norm": 7.0,
|
| 66095 |
+
"learning_rate": 0.0004768094776187221,
|
| 66096 |
+
"loss": 13.9244,
|
| 66097 |
+
"step": 94410
|
| 66098 |
+
},
|
| 66099 |
+
{
|
| 66100 |
+
"epoch": 0.13986573363591656,
|
| 66101 |
+
"grad_norm": 6.8125,
|
| 66102 |
+
"learning_rate": 0.00047680700815121835,
|
| 66103 |
+
"loss": 13.9016,
|
| 66104 |
+
"step": 94420
|
| 66105 |
+
},
|
| 66106 |
+
{
|
| 66107 |
+
"epoch": 0.13988054678288075,
|
| 66108 |
+
"grad_norm": 5.53125,
|
| 66109 |
+
"learning_rate": 0.00047680453868371455,
|
| 66110 |
+
"loss": 13.9302,
|
| 66111 |
+
"step": 94430
|
| 66112 |
+
},
|
| 66113 |
+
{
|
| 66114 |
+
"epoch": 0.13989535992984495,
|
| 66115 |
+
"grad_norm": 6.21875,
|
| 66116 |
+
"learning_rate": 0.0004768020692162108,
|
| 66117 |
+
"loss": 13.8326,
|
| 66118 |
+
"step": 94440
|
| 66119 |
+
},
|
| 66120 |
+
{
|
| 66121 |
+
"epoch": 0.13991017307680914,
|
| 66122 |
+
"grad_norm": 7.59375,
|
| 66123 |
+
"learning_rate": 0.000476799599748707,
|
| 66124 |
+
"loss": 13.9934,
|
| 66125 |
+
"step": 94450
|
| 66126 |
+
},
|
| 66127 |
+
{
|
| 66128 |
+
"epoch": 0.13992498622377333,
|
| 66129 |
+
"grad_norm": 6.3125,
|
| 66130 |
+
"learning_rate": 0.0004767971302812032,
|
| 66131 |
+
"loss": 13.9006,
|
| 66132 |
+
"step": 94460
|
| 66133 |
+
},
|
| 66134 |
+
{
|
| 66135 |
+
"epoch": 0.13993979937073753,
|
| 66136 |
+
"grad_norm": 6.0625,
|
| 66137 |
+
"learning_rate": 0.00047679466081369944,
|
| 66138 |
+
"loss": 13.8914,
|
| 66139 |
+
"step": 94470
|
| 66140 |
+
},
|
| 66141 |
+
{
|
| 66142 |
+
"epoch": 0.13995461251770172,
|
| 66143 |
+
"grad_norm": 5.65625,
|
| 66144 |
+
"learning_rate": 0.0004767921913461957,
|
| 66145 |
+
"loss": 13.9409,
|
| 66146 |
+
"step": 94480
|
| 66147 |
+
},
|
| 66148 |
+
{
|
| 66149 |
+
"epoch": 0.13996942566466591,
|
| 66150 |
+
"grad_norm": 7.53125,
|
| 66151 |
+
"learning_rate": 0.00047678972187869184,
|
| 66152 |
+
"loss": 13.7752,
|
| 66153 |
+
"step": 94490
|
| 66154 |
+
},
|
| 66155 |
+
{
|
| 66156 |
+
"epoch": 0.1399842388116301,
|
| 66157 |
+
"grad_norm": 5.90625,
|
| 66158 |
+
"learning_rate": 0.0004767872524111881,
|
| 66159 |
+
"loss": 13.8204,
|
| 66160 |
+
"step": 94500
|
| 66161 |
+
},
|
| 66162 |
+
{
|
| 66163 |
+
"epoch": 0.1399990519585943,
|
| 66164 |
+
"grad_norm": 6.3125,
|
| 66165 |
+
"learning_rate": 0.0004767847829436843,
|
| 66166 |
+
"loss": 13.9536,
|
| 66167 |
+
"step": 94510
|
| 66168 |
+
},
|
| 66169 |
+
{
|
| 66170 |
+
"epoch": 0.1400138651055585,
|
| 66171 |
+
"grad_norm": 6.25,
|
| 66172 |
+
"learning_rate": 0.0004767823134761805,
|
| 66173 |
+
"loss": 13.8974,
|
| 66174 |
+
"step": 94520
|
| 66175 |
+
},
|
| 66176 |
+
{
|
| 66177 |
+
"epoch": 0.1400286782525227,
|
| 66178 |
+
"grad_norm": 6.53125,
|
| 66179 |
+
"learning_rate": 0.00047677984400867673,
|
| 66180 |
+
"loss": 13.8577,
|
| 66181 |
+
"step": 94530
|
| 66182 |
+
},
|
| 66183 |
+
{
|
| 66184 |
+
"epoch": 0.14004349139948688,
|
| 66185 |
+
"grad_norm": 7.59375,
|
| 66186 |
+
"learning_rate": 0.00047677737454117293,
|
| 66187 |
+
"loss": 13.8667,
|
| 66188 |
+
"step": 94540
|
| 66189 |
+
},
|
| 66190 |
+
{
|
| 66191 |
+
"epoch": 0.14005830454645107,
|
| 66192 |
+
"grad_norm": 9.1875,
|
| 66193 |
+
"learning_rate": 0.0004767749050736692,
|
| 66194 |
+
"loss": 13.8806,
|
| 66195 |
+
"step": 94550
|
| 66196 |
+
},
|
| 66197 |
+
{
|
| 66198 |
+
"epoch": 0.14007311769341527,
|
| 66199 |
+
"grad_norm": 5.84375,
|
| 66200 |
+
"learning_rate": 0.0004767724356061654,
|
| 66201 |
+
"loss": 13.9042,
|
| 66202 |
+
"step": 94560
|
| 66203 |
+
},
|
| 66204 |
+
{
|
| 66205 |
+
"epoch": 0.14008793084037946,
|
| 66206 |
+
"grad_norm": 6.9375,
|
| 66207 |
+
"learning_rate": 0.0004767699661386616,
|
| 66208 |
+
"loss": 13.8953,
|
| 66209 |
+
"step": 94570
|
| 66210 |
+
},
|
| 66211 |
+
{
|
| 66212 |
+
"epoch": 0.14010274398734365,
|
| 66213 |
+
"grad_norm": 6.90625,
|
| 66214 |
+
"learning_rate": 0.0004767674966711578,
|
| 66215 |
+
"loss": 13.8876,
|
| 66216 |
+
"step": 94580
|
| 66217 |
+
},
|
| 66218 |
+
{
|
| 66219 |
+
"epoch": 0.14011755713430785,
|
| 66220 |
+
"grad_norm": 13.4375,
|
| 66221 |
+
"learning_rate": 0.0004767650272036541,
|
| 66222 |
+
"loss": 13.8303,
|
| 66223 |
+
"step": 94590
|
| 66224 |
+
},
|
| 66225 |
+
{
|
| 66226 |
+
"epoch": 0.14013237028127204,
|
| 66227 |
+
"grad_norm": 6.09375,
|
| 66228 |
+
"learning_rate": 0.0004767625577361502,
|
| 66229 |
+
"loss": 13.8466,
|
| 66230 |
+
"step": 94600
|
| 66231 |
+
},
|
| 66232 |
+
{
|
| 66233 |
+
"epoch": 0.14014718342823623,
|
| 66234 |
+
"grad_norm": 6.125,
|
| 66235 |
+
"learning_rate": 0.00047676008826864647,
|
| 66236 |
+
"loss": 13.885,
|
| 66237 |
+
"step": 94610
|
| 66238 |
+
},
|
| 66239 |
+
{
|
| 66240 |
+
"epoch": 0.14016199657520043,
|
| 66241 |
+
"grad_norm": 10.25,
|
| 66242 |
+
"learning_rate": 0.00047675761880114267,
|
| 66243 |
+
"loss": 13.8564,
|
| 66244 |
+
"step": 94620
|
| 66245 |
+
},
|
| 66246 |
+
{
|
| 66247 |
+
"epoch": 0.14017680972216462,
|
| 66248 |
+
"grad_norm": 5.9375,
|
| 66249 |
+
"learning_rate": 0.0004767551493336389,
|
| 66250 |
+
"loss": 13.9354,
|
| 66251 |
+
"step": 94630
|
| 66252 |
+
},
|
| 66253 |
+
{
|
| 66254 |
+
"epoch": 0.1401916228691288,
|
| 66255 |
+
"grad_norm": 7.03125,
|
| 66256 |
+
"learning_rate": 0.0004767526798661351,
|
| 66257 |
+
"loss": 13.8914,
|
| 66258 |
+
"step": 94640
|
| 66259 |
+
},
|
| 66260 |
+
{
|
| 66261 |
+
"epoch": 0.140206436016093,
|
| 66262 |
+
"grad_norm": 6.40625,
|
| 66263 |
+
"learning_rate": 0.0004767502103986313,
|
| 66264 |
+
"loss": 13.859,
|
| 66265 |
+
"step": 94650
|
| 66266 |
+
},
|
| 66267 |
+
{
|
| 66268 |
+
"epoch": 0.1402212491630572,
|
| 66269 |
+
"grad_norm": 6.21875,
|
| 66270 |
+
"learning_rate": 0.00047674774093112756,
|
| 66271 |
+
"loss": 13.8946,
|
| 66272 |
+
"step": 94660
|
| 66273 |
+
},
|
| 66274 |
+
{
|
| 66275 |
+
"epoch": 0.1402360623100214,
|
| 66276 |
+
"grad_norm": 12.0,
|
| 66277 |
+
"learning_rate": 0.00047674527146362376,
|
| 66278 |
+
"loss": 13.8798,
|
| 66279 |
+
"step": 94670
|
| 66280 |
+
},
|
| 66281 |
+
{
|
| 66282 |
+
"epoch": 0.14025087545698559,
|
| 66283 |
+
"grad_norm": 6.90625,
|
| 66284 |
+
"learning_rate": 0.00047674280199611996,
|
| 66285 |
+
"loss": 13.9638,
|
| 66286 |
+
"step": 94680
|
| 66287 |
+
},
|
| 66288 |
+
{
|
| 66289 |
+
"epoch": 0.14026568860394978,
|
| 66290 |
+
"grad_norm": 6.125,
|
| 66291 |
+
"learning_rate": 0.0004767403325286162,
|
| 66292 |
+
"loss": 13.99,
|
| 66293 |
+
"step": 94690
|
| 66294 |
+
},
|
| 66295 |
+
{
|
| 66296 |
+
"epoch": 0.14028050175091397,
|
| 66297 |
+
"grad_norm": 6.96875,
|
| 66298 |
+
"learning_rate": 0.00047673786306111246,
|
| 66299 |
+
"loss": 13.9276,
|
| 66300 |
+
"step": 94700
|
| 66301 |
+
},
|
| 66302 |
+
{
|
| 66303 |
+
"epoch": 0.14029531489787817,
|
| 66304 |
+
"grad_norm": 6.0,
|
| 66305 |
+
"learning_rate": 0.0004767353935936086,
|
| 66306 |
+
"loss": 13.9056,
|
| 66307 |
+
"step": 94710
|
| 66308 |
+
},
|
| 66309 |
+
{
|
| 66310 |
+
"epoch": 0.14031012804484236,
|
| 66311 |
+
"grad_norm": 7.78125,
|
| 66312 |
+
"learning_rate": 0.00047673292412610485,
|
| 66313 |
+
"loss": 13.8641,
|
| 66314 |
+
"step": 94720
|
| 66315 |
+
},
|
| 66316 |
+
{
|
| 66317 |
+
"epoch": 0.14032494119180655,
|
| 66318 |
+
"grad_norm": 6.46875,
|
| 66319 |
+
"learning_rate": 0.00047673045465860105,
|
| 66320 |
+
"loss": 13.8805,
|
| 66321 |
+
"step": 94730
|
| 66322 |
+
},
|
| 66323 |
+
{
|
| 66324 |
+
"epoch": 0.14033975433877074,
|
| 66325 |
+
"grad_norm": 6.3125,
|
| 66326 |
+
"learning_rate": 0.0004767279851910973,
|
| 66327 |
+
"loss": 13.938,
|
| 66328 |
+
"step": 94740
|
| 66329 |
+
},
|
| 66330 |
+
{
|
| 66331 |
+
"epoch": 0.14035456748573494,
|
| 66332 |
+
"grad_norm": 6.75,
|
| 66333 |
+
"learning_rate": 0.0004767255157235935,
|
| 66334 |
+
"loss": 13.8394,
|
| 66335 |
+
"step": 94750
|
| 66336 |
+
},
|
| 66337 |
+
{
|
| 66338 |
+
"epoch": 0.14036938063269913,
|
| 66339 |
+
"grad_norm": 6.9375,
|
| 66340 |
+
"learning_rate": 0.0004767230462560897,
|
| 66341 |
+
"loss": 13.907,
|
| 66342 |
+
"step": 94760
|
| 66343 |
+
},
|
| 66344 |
+
{
|
| 66345 |
+
"epoch": 0.14038419377966332,
|
| 66346 |
+
"grad_norm": 13.25,
|
| 66347 |
+
"learning_rate": 0.00047672057678858594,
|
| 66348 |
+
"loss": 13.8627,
|
| 66349 |
+
"step": 94770
|
| 66350 |
+
},
|
| 66351 |
+
{
|
| 66352 |
+
"epoch": 0.14039900692662752,
|
| 66353 |
+
"grad_norm": 7.6875,
|
| 66354 |
+
"learning_rate": 0.0004767181073210822,
|
| 66355 |
+
"loss": 13.9326,
|
| 66356 |
+
"step": 94780
|
| 66357 |
+
},
|
| 66358 |
+
{
|
| 66359 |
+
"epoch": 0.1404138200735917,
|
| 66360 |
+
"grad_norm": 6.125,
|
| 66361 |
+
"learning_rate": 0.00047671563785357834,
|
| 66362 |
+
"loss": 13.9368,
|
| 66363 |
+
"step": 94790
|
| 66364 |
+
},
|
| 66365 |
+
{
|
| 66366 |
+
"epoch": 0.1404286332205559,
|
| 66367 |
+
"grad_norm": 9.125,
|
| 66368 |
+
"learning_rate": 0.0004767131683860746,
|
| 66369 |
+
"loss": 13.8717,
|
| 66370 |
+
"step": 94800
|
| 66371 |
+
},
|
| 66372 |
+
{
|
| 66373 |
+
"epoch": 0.1404434463675201,
|
| 66374 |
+
"grad_norm": 7.5625,
|
| 66375 |
+
"learning_rate": 0.0004767106989185708,
|
| 66376 |
+
"loss": 13.9224,
|
| 66377 |
+
"step": 94810
|
| 66378 |
+
},
|
| 66379 |
+
{
|
| 66380 |
+
"epoch": 0.1404582595144843,
|
| 66381 |
+
"grad_norm": 6.0,
|
| 66382 |
+
"learning_rate": 0.00047670822945106704,
|
| 66383 |
+
"loss": 13.8873,
|
| 66384 |
+
"step": 94820
|
| 66385 |
+
},
|
| 66386 |
+
{
|
| 66387 |
+
"epoch": 0.14047307266144848,
|
| 66388 |
+
"grad_norm": 8.0625,
|
| 66389 |
+
"learning_rate": 0.00047670575998356323,
|
| 66390 |
+
"loss": 13.9651,
|
| 66391 |
+
"step": 94830
|
| 66392 |
+
},
|
| 66393 |
+
{
|
| 66394 |
+
"epoch": 0.14048788580841268,
|
| 66395 |
+
"grad_norm": 5.625,
|
| 66396 |
+
"learning_rate": 0.00047670329051605943,
|
| 66397 |
+
"loss": 13.8329,
|
| 66398 |
+
"step": 94840
|
| 66399 |
+
},
|
| 66400 |
+
{
|
| 66401 |
+
"epoch": 0.14050269895537687,
|
| 66402 |
+
"grad_norm": 7.59375,
|
| 66403 |
+
"learning_rate": 0.0004767008210485557,
|
| 66404 |
+
"loss": 13.8635,
|
| 66405 |
+
"step": 94850
|
| 66406 |
+
},
|
| 66407 |
+
{
|
| 66408 |
+
"epoch": 0.14051751210234106,
|
| 66409 |
+
"grad_norm": 5.875,
|
| 66410 |
+
"learning_rate": 0.0004766983515810519,
|
| 66411 |
+
"loss": 13.8424,
|
| 66412 |
+
"step": 94860
|
| 66413 |
+
},
|
| 66414 |
+
{
|
| 66415 |
+
"epoch": 0.14053232524930526,
|
| 66416 |
+
"grad_norm": 6.59375,
|
| 66417 |
+
"learning_rate": 0.0004766958821135481,
|
| 66418 |
+
"loss": 13.8477,
|
| 66419 |
+
"step": 94870
|
| 66420 |
+
},
|
| 66421 |
+
{
|
| 66422 |
+
"epoch": 0.14054713839626945,
|
| 66423 |
+
"grad_norm": 6.09375,
|
| 66424 |
+
"learning_rate": 0.0004766934126460443,
|
| 66425 |
+
"loss": 13.9522,
|
| 66426 |
+
"step": 94880
|
| 66427 |
+
},
|
| 66428 |
+
{
|
| 66429 |
+
"epoch": 0.14056195154323364,
|
| 66430 |
+
"grad_norm": 8.4375,
|
| 66431 |
+
"learning_rate": 0.0004766909431785406,
|
| 66432 |
+
"loss": 13.8646,
|
| 66433 |
+
"step": 94890
|
| 66434 |
+
},
|
| 66435 |
+
{
|
| 66436 |
+
"epoch": 0.14057676469019784,
|
| 66437 |
+
"grad_norm": 5.96875,
|
| 66438 |
+
"learning_rate": 0.0004766884737110367,
|
| 66439 |
+
"loss": 13.8809,
|
| 66440 |
+
"step": 94900
|
| 66441 |
+
},
|
| 66442 |
+
{
|
| 66443 |
+
"epoch": 0.14059157783716203,
|
| 66444 |
+
"grad_norm": 6.0,
|
| 66445 |
+
"learning_rate": 0.00047668600424353297,
|
| 66446 |
+
"loss": 13.8384,
|
| 66447 |
+
"step": 94910
|
| 66448 |
+
},
|
| 66449 |
+
{
|
| 66450 |
+
"epoch": 0.14060639098412622,
|
| 66451 |
+
"grad_norm": 6.5,
|
| 66452 |
+
"learning_rate": 0.00047668353477602917,
|
| 66453 |
+
"loss": 13.8713,
|
| 66454 |
+
"step": 94920
|
| 66455 |
+
},
|
| 66456 |
+
{
|
| 66457 |
+
"epoch": 0.14062120413109042,
|
| 66458 |
+
"grad_norm": 6.75,
|
| 66459 |
+
"learning_rate": 0.0004766810653085254,
|
| 66460 |
+
"loss": 13.8549,
|
| 66461 |
+
"step": 94930
|
| 66462 |
+
},
|
| 66463 |
+
{
|
| 66464 |
+
"epoch": 0.1406360172780546,
|
| 66465 |
+
"grad_norm": 6.53125,
|
| 66466 |
+
"learning_rate": 0.0004766785958410216,
|
| 66467 |
+
"loss": 13.855,
|
| 66468 |
+
"step": 94940
|
| 66469 |
+
},
|
| 66470 |
+
{
|
| 66471 |
+
"epoch": 0.1406508304250188,
|
| 66472 |
+
"grad_norm": 6.15625,
|
| 66473 |
+
"learning_rate": 0.0004766761263735178,
|
| 66474 |
+
"loss": 13.7981,
|
| 66475 |
+
"step": 94950
|
| 66476 |
+
},
|
| 66477 |
+
{
|
| 66478 |
+
"epoch": 0.140665643571983,
|
| 66479 |
+
"grad_norm": 6.21875,
|
| 66480 |
+
"learning_rate": 0.00047667365690601406,
|
| 66481 |
+
"loss": 13.7989,
|
| 66482 |
+
"step": 94960
|
| 66483 |
+
},
|
| 66484 |
+
{
|
| 66485 |
+
"epoch": 0.1406804567189472,
|
| 66486 |
+
"grad_norm": 6.0625,
|
| 66487 |
+
"learning_rate": 0.0004766711874385103,
|
| 66488 |
+
"loss": 13.8386,
|
| 66489 |
+
"step": 94970
|
| 66490 |
+
},
|
| 66491 |
+
{
|
| 66492 |
+
"epoch": 0.14069526986591138,
|
| 66493 |
+
"grad_norm": 5.75,
|
| 66494 |
+
"learning_rate": 0.00047666871797100646,
|
| 66495 |
+
"loss": 13.8376,
|
| 66496 |
+
"step": 94980
|
| 66497 |
+
},
|
| 66498 |
+
{
|
| 66499 |
+
"epoch": 0.14071008301287558,
|
| 66500 |
+
"grad_norm": 7.28125,
|
| 66501 |
+
"learning_rate": 0.0004766662485035027,
|
| 66502 |
+
"loss": 13.8766,
|
| 66503 |
+
"step": 94990
|
| 66504 |
+
},
|
| 66505 |
+
{
|
| 66506 |
+
"epoch": 0.14072489615983977,
|
| 66507 |
+
"grad_norm": 6.53125,
|
| 66508 |
+
"learning_rate": 0.00047666377903599896,
|
| 66509 |
+
"loss": 13.9248,
|
| 66510 |
+
"step": 95000
|
| 66511 |
}
|
| 66512 |
],
|
| 66513 |
"logging_steps": 10,
|
|
|
|
| 66527 |
"attributes": {}
|
| 66528 |
}
|
| 66529 |
},
|
| 66530 |
+
"total_flos": 2.0530642742225456e+20,
|
| 66531 |
"train_batch_size": 48,
|
| 66532 |
"trial_name": null,
|
| 66533 |
"trial_params": null
|