Training in progress, step 98000, checkpoint
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +353 -3
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 248812730
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3723b9e158f7d7c1b221ad913b7f555d0d264515799090bc69e60850f0a9fa6b
|
| 3 |
size 248812730
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 383474230
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:440f5a9d8875f8e95803b17e9cf6de595e79c54073354d96d86cea394292a079
|
| 3 |
size 383474230
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b31b9ddb9cefffec91b179d35f46e0650b36eb5e7d6b179cc2315e9755fde24
|
| 3 |
size 14960
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b897e4c715d4685a715f56e531cd128aa1c6fa09393166cc3a0438301c30a741
|
| 3 |
size 14960
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84c2a641b88be414d1ece9cf1d942bc724b5f52e33f3e4c339f8cc7d9b2e8364
|
| 3 |
size 14960
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14960
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a17d0f6c880a6c8e0ab8f462efb3f688277303e688570c80d2d94eb3374124e9
|
| 3 |
size 14960
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5924b0a98cf4382d8ddc0bce15fa8820b2d4d5a4d9504a3386f0e9d32485919a
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -33958,6 +33958,356 @@
|
|
| 33958 |
"learning_rate": 0.0004761698855352423,
|
| 33959 |
"loss": 17.5145,
|
| 33960 |
"step": 97000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33961 |
}
|
| 33962 |
],
|
| 33963 |
"logging_steps": 20,
|
|
@@ -33977,7 +34327,7 @@
|
|
| 33977 |
"attributes": {}
|
| 33978 |
}
|
| 33979 |
},
|
| 33980 |
-
"total_flos": 6.
|
| 33981 |
"train_batch_size": 48,
|
| 33982 |
"trial_name": null,
|
| 33983 |
"trial_params": null
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.14516884024909787,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 98000,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 33958 |
"learning_rate": 0.0004761698855352423,
|
| 33959 |
"loss": 17.5145,
|
| 33960 |
"step": 97000
|
| 33961 |
+
},
|
| 33962 |
+
{
|
| 33963 |
+
"epoch": 0.1437171518466069,
|
| 33964 |
+
"grad_norm": 7.84375,
|
| 33965 |
+
"learning_rate": 0.00047616494660023467,
|
| 33966 |
+
"loss": 17.5301,
|
| 33967 |
+
"step": 97020
|
| 33968 |
+
},
|
| 33969 |
+
{
|
| 33970 |
+
"epoch": 0.1437467781405353,
|
| 33971 |
+
"grad_norm": 8.875,
|
| 33972 |
+
"learning_rate": 0.00047616000766522717,
|
| 33973 |
+
"loss": 17.4752,
|
| 33974 |
+
"step": 97040
|
| 33975 |
+
},
|
| 33976 |
+
{
|
| 33977 |
+
"epoch": 0.14377640443446368,
|
| 33978 |
+
"grad_norm": 10.1875,
|
| 33979 |
+
"learning_rate": 0.00047615506873021957,
|
| 33980 |
+
"loss": 17.5011,
|
| 33981 |
+
"step": 97060
|
| 33982 |
+
},
|
| 33983 |
+
{
|
| 33984 |
+
"epoch": 0.14380603072839207,
|
| 33985 |
+
"grad_norm": 9.625,
|
| 33986 |
+
"learning_rate": 0.000476150129795212,
|
| 33987 |
+
"loss": 17.5621,
|
| 33988 |
+
"step": 97080
|
| 33989 |
+
},
|
| 33990 |
+
{
|
| 33991 |
+
"epoch": 0.14383565702232046,
|
| 33992 |
+
"grad_norm": 9.125,
|
| 33993 |
+
"learning_rate": 0.00047614519086020446,
|
| 33994 |
+
"loss": 17.5082,
|
| 33995 |
+
"step": 97100
|
| 33996 |
+
},
|
| 33997 |
+
{
|
| 33998 |
+
"epoch": 0.14386528331624884,
|
| 33999 |
+
"grad_norm": 9.0,
|
| 34000 |
+
"learning_rate": 0.0004761402519251969,
|
| 34001 |
+
"loss": 17.4921,
|
| 34002 |
+
"step": 97120
|
| 34003 |
+
},
|
| 34004 |
+
{
|
| 34005 |
+
"epoch": 0.14389490961017723,
|
| 34006 |
+
"grad_norm": 9.0625,
|
| 34007 |
+
"learning_rate": 0.0004761353129901893,
|
| 34008 |
+
"loss": 17.5339,
|
| 34009 |
+
"step": 97140
|
| 34010 |
+
},
|
| 34011 |
+
{
|
| 34012 |
+
"epoch": 0.14392453590410562,
|
| 34013 |
+
"grad_norm": 8.4375,
|
| 34014 |
+
"learning_rate": 0.00047613037405518175,
|
| 34015 |
+
"loss": 17.5687,
|
| 34016 |
+
"step": 97160
|
| 34017 |
+
},
|
| 34018 |
+
{
|
| 34019 |
+
"epoch": 0.143954162198034,
|
| 34020 |
+
"grad_norm": 9.0,
|
| 34021 |
+
"learning_rate": 0.0004761254351201742,
|
| 34022 |
+
"loss": 17.4704,
|
| 34023 |
+
"step": 97180
|
| 34024 |
+
},
|
| 34025 |
+
{
|
| 34026 |
+
"epoch": 0.1439837884919624,
|
| 34027 |
+
"grad_norm": 9.25,
|
| 34028 |
+
"learning_rate": 0.0004761204961851666,
|
| 34029 |
+
"loss": 17.5618,
|
| 34030 |
+
"step": 97200
|
| 34031 |
+
},
|
| 34032 |
+
{
|
| 34033 |
+
"epoch": 0.14401341478589078,
|
| 34034 |
+
"grad_norm": 10.25,
|
| 34035 |
+
"learning_rate": 0.00047611555725015904,
|
| 34036 |
+
"loss": 17.5355,
|
| 34037 |
+
"step": 97220
|
| 34038 |
+
},
|
| 34039 |
+
{
|
| 34040 |
+
"epoch": 0.14404304107981916,
|
| 34041 |
+
"grad_norm": 7.875,
|
| 34042 |
+
"learning_rate": 0.00047611061831515143,
|
| 34043 |
+
"loss": 17.5152,
|
| 34044 |
+
"step": 97240
|
| 34045 |
+
},
|
| 34046 |
+
{
|
| 34047 |
+
"epoch": 0.14407266737374755,
|
| 34048 |
+
"grad_norm": 8.5625,
|
| 34049 |
+
"learning_rate": 0.00047610567938014393,
|
| 34050 |
+
"loss": 17.5023,
|
| 34051 |
+
"step": 97260
|
| 34052 |
+
},
|
| 34053 |
+
{
|
| 34054 |
+
"epoch": 0.14410229366767593,
|
| 34055 |
+
"grad_norm": 6.96875,
|
| 34056 |
+
"learning_rate": 0.00047610074044513633,
|
| 34057 |
+
"loss": 17.531,
|
| 34058 |
+
"step": 97280
|
| 34059 |
+
},
|
| 34060 |
+
{
|
| 34061 |
+
"epoch": 0.14413191996160432,
|
| 34062 |
+
"grad_norm": 8.5625,
|
| 34063 |
+
"learning_rate": 0.0004760958015101288,
|
| 34064 |
+
"loss": 17.5073,
|
| 34065 |
+
"step": 97300
|
| 34066 |
+
},
|
| 34067 |
+
{
|
| 34068 |
+
"epoch": 0.1441615462555327,
|
| 34069 |
+
"grad_norm": 8.5625,
|
| 34070 |
+
"learning_rate": 0.00047609086257512117,
|
| 34071 |
+
"loss": 17.4898,
|
| 34072 |
+
"step": 97320
|
| 34073 |
+
},
|
| 34074 |
+
{
|
| 34075 |
+
"epoch": 0.1441911725494611,
|
| 34076 |
+
"grad_norm": 8.0,
|
| 34077 |
+
"learning_rate": 0.00047608592364011367,
|
| 34078 |
+
"loss": 17.5595,
|
| 34079 |
+
"step": 97340
|
| 34080 |
+
},
|
| 34081 |
+
{
|
| 34082 |
+
"epoch": 0.14422079884338948,
|
| 34083 |
+
"grad_norm": 9.8125,
|
| 34084 |
+
"learning_rate": 0.00047608098470510607,
|
| 34085 |
+
"loss": 17.558,
|
| 34086 |
+
"step": 97360
|
| 34087 |
+
},
|
| 34088 |
+
{
|
| 34089 |
+
"epoch": 0.14425042513731787,
|
| 34090 |
+
"grad_norm": 7.78125,
|
| 34091 |
+
"learning_rate": 0.0004760760457700985,
|
| 34092 |
+
"loss": 17.5134,
|
| 34093 |
+
"step": 97380
|
| 34094 |
+
},
|
| 34095 |
+
{
|
| 34096 |
+
"epoch": 0.14428005143124625,
|
| 34097 |
+
"grad_norm": 8.75,
|
| 34098 |
+
"learning_rate": 0.00047607110683509096,
|
| 34099 |
+
"loss": 17.5447,
|
| 34100 |
+
"step": 97400
|
| 34101 |
+
},
|
| 34102 |
+
{
|
| 34103 |
+
"epoch": 0.14430967772517464,
|
| 34104 |
+
"grad_norm": 9.3125,
|
| 34105 |
+
"learning_rate": 0.0004760661679000834,
|
| 34106 |
+
"loss": 17.5099,
|
| 34107 |
+
"step": 97420
|
| 34108 |
+
},
|
| 34109 |
+
{
|
| 34110 |
+
"epoch": 0.14433930401910303,
|
| 34111 |
+
"grad_norm": 8.0625,
|
| 34112 |
+
"learning_rate": 0.0004760612289650758,
|
| 34113 |
+
"loss": 17.4835,
|
| 34114 |
+
"step": 97440
|
| 34115 |
+
},
|
| 34116 |
+
{
|
| 34117 |
+
"epoch": 0.1443689303130314,
|
| 34118 |
+
"grad_norm": 9.125,
|
| 34119 |
+
"learning_rate": 0.00047605629003006825,
|
| 34120 |
+
"loss": 17.519,
|
| 34121 |
+
"step": 97460
|
| 34122 |
+
},
|
| 34123 |
+
{
|
| 34124 |
+
"epoch": 0.1443985566069598,
|
| 34125 |
+
"grad_norm": 7.5,
|
| 34126 |
+
"learning_rate": 0.0004760513510950607,
|
| 34127 |
+
"loss": 17.4952,
|
| 34128 |
+
"step": 97480
|
| 34129 |
+
},
|
| 34130 |
+
{
|
| 34131 |
+
"epoch": 0.14442818290088819,
|
| 34132 |
+
"grad_norm": 9.625,
|
| 34133 |
+
"learning_rate": 0.00047604641216005315,
|
| 34134 |
+
"loss": 17.4943,
|
| 34135 |
+
"step": 97500
|
| 34136 |
+
},
|
| 34137 |
+
{
|
| 34138 |
+
"epoch": 0.14445780919481657,
|
| 34139 |
+
"grad_norm": 8.3125,
|
| 34140 |
+
"learning_rate": 0.00047604147322504554,
|
| 34141 |
+
"loss": 17.537,
|
| 34142 |
+
"step": 97520
|
| 34143 |
+
},
|
| 34144 |
+
{
|
| 34145 |
+
"epoch": 0.14448743548874496,
|
| 34146 |
+
"grad_norm": 9.5625,
|
| 34147 |
+
"learning_rate": 0.00047603653429003793,
|
| 34148 |
+
"loss": 17.4928,
|
| 34149 |
+
"step": 97540
|
| 34150 |
+
},
|
| 34151 |
+
{
|
| 34152 |
+
"epoch": 0.14451706178267335,
|
| 34153 |
+
"grad_norm": 8.3125,
|
| 34154 |
+
"learning_rate": 0.00047603159535503044,
|
| 34155 |
+
"loss": 17.4929,
|
| 34156 |
+
"step": 97560
|
| 34157 |
+
},
|
| 34158 |
+
{
|
| 34159 |
+
"epoch": 0.14454668807660176,
|
| 34160 |
+
"grad_norm": 10.0625,
|
| 34161 |
+
"learning_rate": 0.00047602665642002283,
|
| 34162 |
+
"loss": 17.4739,
|
| 34163 |
+
"step": 97580
|
| 34164 |
+
},
|
| 34165 |
+
{
|
| 34166 |
+
"epoch": 0.14457631437053015,
|
| 34167 |
+
"grad_norm": 8.1875,
|
| 34168 |
+
"learning_rate": 0.0004760217174850153,
|
| 34169 |
+
"loss": 17.5714,
|
| 34170 |
+
"step": 97600
|
| 34171 |
+
},
|
| 34172 |
+
{
|
| 34173 |
+
"epoch": 0.14460594066445853,
|
| 34174 |
+
"grad_norm": 7.71875,
|
| 34175 |
+
"learning_rate": 0.00047601677855000767,
|
| 34176 |
+
"loss": 17.4411,
|
| 34177 |
+
"step": 97620
|
| 34178 |
+
},
|
| 34179 |
+
{
|
| 34180 |
+
"epoch": 0.14463556695838692,
|
| 34181 |
+
"grad_norm": 8.3125,
|
| 34182 |
+
"learning_rate": 0.00047601183961500017,
|
| 34183 |
+
"loss": 17.5361,
|
| 34184 |
+
"step": 97640
|
| 34185 |
+
},
|
| 34186 |
+
{
|
| 34187 |
+
"epoch": 0.1446651932523153,
|
| 34188 |
+
"grad_norm": 8.8125,
|
| 34189 |
+
"learning_rate": 0.00047600690067999257,
|
| 34190 |
+
"loss": 17.5253,
|
| 34191 |
+
"step": 97660
|
| 34192 |
+
},
|
| 34193 |
+
{
|
| 34194 |
+
"epoch": 0.1446948195462437,
|
| 34195 |
+
"grad_norm": 10.0625,
|
| 34196 |
+
"learning_rate": 0.000476001961744985,
|
| 34197 |
+
"loss": 17.5119,
|
| 34198 |
+
"step": 97680
|
| 34199 |
+
},
|
| 34200 |
+
{
|
| 34201 |
+
"epoch": 0.14472444584017208,
|
| 34202 |
+
"grad_norm": 9.1875,
|
| 34203 |
+
"learning_rate": 0.00047599702280997746,
|
| 34204 |
+
"loss": 17.5493,
|
| 34205 |
+
"step": 97700
|
| 34206 |
+
},
|
| 34207 |
+
{
|
| 34208 |
+
"epoch": 0.14475407213410046,
|
| 34209 |
+
"grad_norm": 7.53125,
|
| 34210 |
+
"learning_rate": 0.0004759920838749699,
|
| 34211 |
+
"loss": 17.5685,
|
| 34212 |
+
"step": 97720
|
| 34213 |
+
},
|
| 34214 |
+
{
|
| 34215 |
+
"epoch": 0.14478369842802885,
|
| 34216 |
+
"grad_norm": 9.1875,
|
| 34217 |
+
"learning_rate": 0.0004759871449399623,
|
| 34218 |
+
"loss": 17.4322,
|
| 34219 |
+
"step": 97740
|
| 34220 |
+
},
|
| 34221 |
+
{
|
| 34222 |
+
"epoch": 0.14481332472195724,
|
| 34223 |
+
"grad_norm": 9.4375,
|
| 34224 |
+
"learning_rate": 0.00047598220600495475,
|
| 34225 |
+
"loss": 17.4662,
|
| 34226 |
+
"step": 97760
|
| 34227 |
+
},
|
| 34228 |
+
{
|
| 34229 |
+
"epoch": 0.14484295101588562,
|
| 34230 |
+
"grad_norm": 8.0625,
|
| 34231 |
+
"learning_rate": 0.0004759772670699472,
|
| 34232 |
+
"loss": 17.4853,
|
| 34233 |
+
"step": 97780
|
| 34234 |
+
},
|
| 34235 |
+
{
|
| 34236 |
+
"epoch": 0.144872577309814,
|
| 34237 |
+
"grad_norm": 8.5,
|
| 34238 |
+
"learning_rate": 0.00047597232813493965,
|
| 34239 |
+
"loss": 17.5302,
|
| 34240 |
+
"step": 97800
|
| 34241 |
+
},
|
| 34242 |
+
{
|
| 34243 |
+
"epoch": 0.1449022036037424,
|
| 34244 |
+
"grad_norm": 9.5,
|
| 34245 |
+
"learning_rate": 0.00047596738919993204,
|
| 34246 |
+
"loss": 17.5049,
|
| 34247 |
+
"step": 97820
|
| 34248 |
+
},
|
| 34249 |
+
{
|
| 34250 |
+
"epoch": 0.14493182989767078,
|
| 34251 |
+
"grad_norm": 8.625,
|
| 34252 |
+
"learning_rate": 0.0004759624502649245,
|
| 34253 |
+
"loss": 17.5703,
|
| 34254 |
+
"step": 97840
|
| 34255 |
+
},
|
| 34256 |
+
{
|
| 34257 |
+
"epoch": 0.14496145619159917,
|
| 34258 |
+
"grad_norm": 9.625,
|
| 34259 |
+
"learning_rate": 0.00047595751132991694,
|
| 34260 |
+
"loss": 17.5346,
|
| 34261 |
+
"step": 97860
|
| 34262 |
+
},
|
| 34263 |
+
{
|
| 34264 |
+
"epoch": 0.14499108248552756,
|
| 34265 |
+
"grad_norm": 9.0,
|
| 34266 |
+
"learning_rate": 0.00047595257239490933,
|
| 34267 |
+
"loss": 17.4759,
|
| 34268 |
+
"step": 97880
|
| 34269 |
+
},
|
| 34270 |
+
{
|
| 34271 |
+
"epoch": 0.14502070877945594,
|
| 34272 |
+
"grad_norm": 8.125,
|
| 34273 |
+
"learning_rate": 0.0004759476334599018,
|
| 34274 |
+
"loss": 17.5284,
|
| 34275 |
+
"step": 97900
|
| 34276 |
+
},
|
| 34277 |
+
{
|
| 34278 |
+
"epoch": 0.14505033507338433,
|
| 34279 |
+
"grad_norm": 8.875,
|
| 34280 |
+
"learning_rate": 0.00047594269452489417,
|
| 34281 |
+
"loss": 17.488,
|
| 34282 |
+
"step": 97920
|
| 34283 |
+
},
|
| 34284 |
+
{
|
| 34285 |
+
"epoch": 0.14507996136731272,
|
| 34286 |
+
"grad_norm": 8.875,
|
| 34287 |
+
"learning_rate": 0.00047593775558988667,
|
| 34288 |
+
"loss": 17.4954,
|
| 34289 |
+
"step": 97940
|
| 34290 |
+
},
|
| 34291 |
+
{
|
| 34292 |
+
"epoch": 0.1451095876612411,
|
| 34293 |
+
"grad_norm": 8.9375,
|
| 34294 |
+
"learning_rate": 0.00047593281665487907,
|
| 34295 |
+
"loss": 17.522,
|
| 34296 |
+
"step": 97960
|
| 34297 |
+
},
|
| 34298 |
+
{
|
| 34299 |
+
"epoch": 0.1451392139551695,
|
| 34300 |
+
"grad_norm": 7.78125,
|
| 34301 |
+
"learning_rate": 0.0004759278777198715,
|
| 34302 |
+
"loss": 17.525,
|
| 34303 |
+
"step": 97980
|
| 34304 |
+
},
|
| 34305 |
+
{
|
| 34306 |
+
"epoch": 0.14516884024909787,
|
| 34307 |
+
"grad_norm": 9.6875,
|
| 34308 |
+
"learning_rate": 0.00047592293878486396,
|
| 34309 |
+
"loss": 17.539,
|
| 34310 |
+
"step": 98000
|
| 34311 |
}
|
| 34312 |
],
|
| 34313 |
"logging_steps": 20,
|
|
|
|
| 34327 |
"attributes": {}
|
| 34328 |
}
|
| 34329 |
},
|
| 34330 |
+
"total_flos": 6.777681616389643e+19,
|
| 34331 |
"train_batch_size": 48,
|
| 34332 |
"trial_name": null,
|
| 34333 |
"trial_params": null
|