Training in progress, step 145000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a315f19ceb0648e9cb5b3f598caf7ca7290780ca60fefd3020f7a9216fe7258a
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:07ff84bcfe00192074c69a4c114605c5b9e6fb38ace78798ed55a64c77b03044
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:581b2870c57fb5d656dcc0d66480daf6064ebe61b748c188a8a15197d3a22d24
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:9d24c9fd30d40e77558d407701661e07f72772bcc8a9ed8d75bb380c21445697
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:09cc1ad45e55d3c155bc979af2eb6c3796d5af0ee8ef8aae04cea9fbf7fb01db
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:f35fcd7367edf1bdd8195f2a5cd469a85fc53ea56623beee72bf212bd942346a
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:88e0b5b08c4d47940824a2b4da2f02a0a4db5f154dba4969b7537e3154b1ccd5
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:167b94303329e07bc590b6d46ead45c44beadf9b9b799733bd817bfc53423bc3
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9cf828f4527193492b06fdc3f679bb8efad30913cd122cd5fd6a9a945df41a71
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:10e62fb98f155f03d0926a07b1cd7c533ad37396e053bf0f6f00cf68f9ab669b
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:de81ae7d35e9dc8d8e75c9950f1c42daf9e6d68f40b106c4a02d140a42d12523
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:4421eaf2f0daad631ac0c96e0e8b2f77b41118de9d91e399e26c3c594692f003
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:324a5fb8018daf72f8e29112a209540b21897f282c8ec7b4d93cf890083185fa
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:e48e3a169afae65bdfd20b7e41e431b4600bbdd3a9abb25db698202ec341cca8
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07110310542812957,
   "eval_steps": 500,
-  "global_step": 144000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -100808,6 +100808,706 @@
       "learning_rate": 0.000488189740425969,
       "loss": 14.344,
       "step": 144000
     }
   ],
   "logging_steps": 10,
@@ -100827,7 +101527,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.980492281126186e+20,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07159687699360269,
   "eval_steps": 500,
+  "global_step": 145000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000488189740425969,
       "loss": 14.344,
       "step": 144000
+    },
+    {
+      "epoch": 0.0711080431437843,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004881889174056292,
+      "loss": 14.4996,
+      "step": 144010
+    },
+    {
+      "epoch": 0.07111298085943904,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004881880943852894,
+      "loss": 14.5047,
+      "step": 144020
+    },
+    {
+      "epoch": 0.07111791857509377,
+      "grad_norm": 11.75,
+      "learning_rate": 0.0004881872713649496,
+      "loss": 14.4017,
+      "step": 144030
+    },
+    {
+      "epoch": 0.07112285629074849,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.00048818644834460986,
+      "loss": 14.4821,
+      "step": 144040
+    },
+    {
+      "epoch": 0.07112779400640323,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.00048818562532427004,
+      "loss": 14.5003,
+      "step": 144050
+    },
+    {
+      "epoch": 0.07113273172205796,
+      "grad_norm": 9.125,
+      "learning_rate": 0.00048818480230393023,
+      "loss": 14.3626,
+      "step": 144060
+    },
+    {
+      "epoch": 0.07113766943771269,
+      "grad_norm": 12.1875,
+      "learning_rate": 0.0004881839792835904,
+      "loss": 14.5504,
+      "step": 144070
+    },
+    {
+      "epoch": 0.07114260715336743,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.00048818315626325066,
+      "loss": 14.5479,
+      "step": 144080
+    },
+    {
+      "epoch": 0.07114754486902215,
+      "grad_norm": 9.375,
+      "learning_rate": 0.00048818233324291084,
+      "loss": 14.2452,
+      "step": 144090
+    },
+    {
+      "epoch": 0.07115248258467688,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004881815102225711,
+      "loss": 14.5508,
+      "step": 144100
+    },
+    {
+      "epoch": 0.07115742030033162,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.00048818068720223127,
+      "loss": 14.5438,
+      "step": 144110
+    },
+    {
+      "epoch": 0.07116235801598635,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00048817986418189145,
+      "loss": 14.3662,
+      "step": 144120
+    },
+    {
+      "epoch": 0.07116729573164107,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00048817904116155164,
+      "loss": 14.31,
+      "step": 144130
+    },
+    {
+      "epoch": 0.07117223344729581,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004881782181412119,
+      "loss": 14.4795,
+      "step": 144140
+    },
+    {
+      "epoch": 0.07117717116295054,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00048817739512087207,
+      "loss": 14.3714,
+      "step": 144150
+    },
+    {
+      "epoch": 0.07118210887860528,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004881765721005323,
+      "loss": 14.55,
+      "step": 144160
+    },
+    {
+      "epoch": 0.07118704659426,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.00048817574908019244,
+      "loss": 14.5383,
+      "step": 144170
+    },
+    {
+      "epoch": 0.07119198430991473,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004881749260598527,
+      "loss": 14.3071,
+      "step": 144180
+    },
+    {
+      "epoch": 0.07119692202556947,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.00048817410303951287,
+      "loss": 14.4196,
+      "step": 144190
+    },
+    {
+      "epoch": 0.0712018597412242,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004881732800191731,
+      "loss": 14.3529,
+      "step": 144200
+    },
+    {
+      "epoch": 0.07120679745687893,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004881724569988333,
+      "loss": 14.356,
+      "step": 144210
+    },
+    {
+      "epoch": 0.07121173517253367,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.00048817163397849354,
+      "loss": 14.4222,
+      "step": 144220
+    },
+    {
+      "epoch": 0.07121667288818839,
+      "grad_norm": 8.625,
+      "learning_rate": 0.00048817081095815367,
+      "loss": 14.3854,
+      "step": 144230
+    },
+    {
+      "epoch": 0.07122161060384312,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004881699879378139,
+      "loss": 14.5936,
+      "step": 144240
+    },
+    {
+      "epoch": 0.07122654831949786,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004881691649174741,
+      "loss": 14.5264,
+      "step": 144250
+    },
+    {
+      "epoch": 0.07123148603515259,
+      "grad_norm": 11.5625,
+      "learning_rate": 0.00048816834189713433,
+      "loss": 14.5488,
+      "step": 144260
+    },
+    {
+      "epoch": 0.07123642375080731,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004881675188767945,
+      "loss": 14.5507,
+      "step": 144270
+    },
+    {
+      "epoch": 0.07124136146646205,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004881666958564547,
+      "loss": 14.3742,
+      "step": 144280
+    },
+    {
+      "epoch": 0.07124629918211678,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004881658728361149,
+      "loss": 14.6164,
+      "step": 144290
+    },
+    {
+      "epoch": 0.0712512368977715,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.00048816504981577513,
+      "loss": 14.4916,
+      "step": 144300
+    },
+    {
+      "epoch": 0.07125617461342625,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004881642267954353,
+      "loss": 14.5795,
+      "step": 144310
+    },
+    {
+      "epoch": 0.07126111232908097,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.00048816340377509556,
+      "loss": 14.3918,
+      "step": 144320
+    },
+    {
+      "epoch": 0.0712660500447357,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.00048816258075475575,
+      "loss": 14.5638,
+      "step": 144330
+    },
+    {
+      "epoch": 0.07127098776039044,
+      "grad_norm": 11.0,
+      "learning_rate": 0.00048816175773441593,
+      "loss": 14.3977,
+      "step": 144340
+    },
+    {
+      "epoch": 0.07127592547604517,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004881609347140761,
+      "loss": 14.5953,
+      "step": 144350
+    },
+    {
+      "epoch": 0.07128086319169989,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.00048816011169373636,
+      "loss": 14.3013,
+      "step": 144360
+    },
+    {
+      "epoch": 0.07128580090735463,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004881592886733966,
+      "loss": 14.2362,
+      "step": 144370
+    },
+    {
+      "epoch": 0.07129073862300936,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.0004881584656530568,
+      "loss": 14.5241,
+      "step": 144380
+    },
+    {
+      "epoch": 0.07129567633866409,
+      "grad_norm": 14.6875,
+      "learning_rate": 0.00048815764263271703,
+      "loss": 14.3319,
+      "step": 144390
+    },
+    {
+      "epoch": 0.07130061405431883,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.00048815681961237716,
+      "loss": 14.2392,
+      "step": 144400
+    },
+    {
+      "epoch": 0.07130555176997355,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004881559965920374,
+      "loss": 14.3983,
+      "step": 144410
+    },
+    {
+      "epoch": 0.07131048948562828,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004881551735716976,
+      "loss": 14.5738,
+      "step": 144420
+    },
+    {
+      "epoch": 0.07131542720128302,
+      "grad_norm": 10.125,
+      "learning_rate": 0.00048815435055135783,
+      "loss": 14.446,
+      "step": 144430
+    },
+    {
+      "epoch": 0.07132036491693775,
+      "grad_norm": 80.5,
+      "learning_rate": 0.000488153527531018,
+      "loss": 14.4355,
+      "step": 144440
+    },
+    {
+      "epoch": 0.07132530263259247,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.0004881527045106782,
+      "loss": 14.5617,
+      "step": 144450
+    },
+    {
+      "epoch": 0.07133024034824721,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004881518814903384,
+      "loss": 14.3302,
+      "step": 144460
+    },
+    {
+      "epoch": 0.07133517806390194,
+      "grad_norm": 10.375,
+      "learning_rate": 0.00048815105846999863,
+      "loss": 14.4372,
+      "step": 144470
+    },
+    {
+      "epoch": 0.07134011577955668,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004881502354496588,
+      "loss": 14.3109,
+      "step": 144480
+    },
+    {
+      "epoch": 0.0713450534952114,
+      "grad_norm": 11.0,
+      "learning_rate": 0.00048814941242931905,
+      "loss": 14.7036,
+      "step": 144490
+    },
+    {
+      "epoch": 0.07134999121086613,
+      "grad_norm": 7.625,
+      "learning_rate": 0.00048814858940897924,
+      "loss": 14.4057,
+      "step": 144500
+    },
+    {
+      "epoch": 0.07135492892652087,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004881477663886394,
+      "loss": 14.4329,
+      "step": 144510
+    },
+    {
+      "epoch": 0.0713598666421756,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004881469433682996,
+      "loss": 14.4195,
+      "step": 144520
+    },
+    {
+      "epoch": 0.07136480435783032,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.00048814612034795985,
+      "loss": 14.4245,
+      "step": 144530
+    },
+    {
+      "epoch": 0.07136974207348507,
+      "grad_norm": 8.875,
+      "learning_rate": 0.00048814529732762004,
+      "loss": 14.5225,
+      "step": 144540
+    },
+    {
+      "epoch": 0.07137467978913979,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004881444743072803,
+      "loss": 14.3975,
+      "step": 144550
+    },
+    {
+      "epoch": 0.07137961750479452,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004881436512869404,
+      "loss": 14.2411,
+      "step": 144560
+    },
+    {
+      "epoch": 0.07138455522044926,
+      "grad_norm": 9.875,
+      "learning_rate": 0.00048814282826660065,
+      "loss": 14.2595,
+      "step": 144570
+    },
+    {
+      "epoch": 0.07138949293610398,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.00048814200524626084,
+      "loss": 14.3683,
+      "step": 144580
+    },
+    {
+      "epoch": 0.07139443065175871,
+      "grad_norm": 9.875,
+      "learning_rate": 0.0004881411822259211,
+      "loss": 14.4,
+      "step": 144590
+    },
+    {
+      "epoch": 0.07139936836741345,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.00048814035920558127,
+      "loss": 14.6331,
+      "step": 144600
+    },
+    {
+      "epoch": 0.07140430608306818,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004881395361852415,
+      "loss": 14.5715,
+      "step": 144610
+    },
+    {
+      "epoch": 0.0714092437987229,
+      "grad_norm": 37.25,
+      "learning_rate": 0.00048813871316490164,
+      "loss": 14.3596,
+      "step": 144620
+    },
+    {
+      "epoch": 0.07141418151437764,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004881378901445619,
+      "loss": 14.3384,
+      "step": 144630
+    },
+    {
+      "epoch": 0.07141911923003237,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.00048813706712422207,
+      "loss": 14.5801,
+      "step": 144640
+    },
+    {
+      "epoch": 0.0714240569456871,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004881362441038823,
+      "loss": 14.4412,
+      "step": 144650
+    },
+    {
+      "epoch": 0.07142899466134184,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004881354210835425,
+      "loss": 14.5864,
+      "step": 144660
+    },
+    {
+      "epoch": 0.07143393237699656,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004881345980632027,
+      "loss": 14.5571,
+      "step": 144670
+    },
+    {
+      "epoch": 0.07143887009265129,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.00048813377504286287,
+      "loss": 14.4896,
+      "step": 144680
+    },
+    {
+      "epoch": 0.07144380780830603,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004881329520225231,
+      "loss": 14.509,
+      "step": 144690
+    },
+    {
+      "epoch": 0.07144874552396076,
+      "grad_norm": 20.0,
+      "learning_rate": 0.00048813212900218335,
+      "loss": 14.4541,
+      "step": 144700
+    },
+    {
+      "epoch": 0.07145368323961548,
+      "grad_norm": 11.6875,
+      "learning_rate": 0.00048813130598184353,
+      "loss": 14.5157,
+      "step": 144710
+    },
+    {
+      "epoch": 0.07145862095527022,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004881304829615038,
+      "loss": 14.4132,
+      "step": 144720
+    },
+    {
+      "epoch": 0.07146355867092495,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004881296599411639,
+      "loss": 14.3134,
+      "step": 144730
+    },
+    {
+      "epoch": 0.07146849638657968,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.00048812883692082415,
+      "loss": 14.5941,
+      "step": 144740
+    },
+    {
+      "epoch": 0.07147343410223442,
+      "grad_norm": 8.5,
+      "learning_rate": 0.00048812801390048433,
+      "loss": 14.4122,
+      "step": 144750
+    },
+    {
+      "epoch": 0.07147837181788914,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004881271908801446,
+      "loss": 14.4311,
+      "step": 144760
+    },
+    {
+      "epoch": 0.07148330953354387,
+      "grad_norm": 8.875,
+      "learning_rate": 0.00048812636785980476,
+      "loss": 14.6046,
+      "step": 144770
+    },
+    {
+      "epoch": 0.07148824724919861,
+      "grad_norm": 9.625,
+      "learning_rate": 0.000488125544839465,
+      "loss": 14.6219,
+      "step": 144780
+    },
+    {
+      "epoch": 0.07149318496485334,
+      "grad_norm": 11.25,
+      "learning_rate": 0.00048812472181912513,
+      "loss": 14.3943,
+      "step": 144790
+    },
+    {
+      "epoch": 0.07149812268050808,
+      "grad_norm": 11.125,
+      "learning_rate": 0.0004881238987987854,
+      "loss": 14.3531,
+      "step": 144800
+    },
+    {
+      "epoch": 0.0715030603961628,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.00048812307577844556,
+      "loss": 14.4368,
+      "step": 144810
+    },
+    {
+      "epoch": 0.07150799811181753,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0004881222527581058,
+      "loss": 14.3903,
+      "step": 144820
+    },
+    {
+      "epoch": 0.07151293582747227,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.000488121429737766,
+      "loss": 14.4398,
+      "step": 144830
+    },
+    {
+      "epoch": 0.071517873543127,
+      "grad_norm": 9.375,
+      "learning_rate": 0.00048812060671742617,
+      "loss": 14.4256,
+      "step": 144840
+    },
+    {
+      "epoch": 0.07152281125878172,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00048811978369708636,
+      "loss": 14.3634,
+      "step": 144850
+    },
+    {
+      "epoch": 0.07152774897443646,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004881189606767466,
+      "loss": 14.4939,
+      "step": 144860
+    },
+    {
+      "epoch": 0.07153268669009119,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004881181376564068,
+      "loss": 14.397,
+      "step": 144870
+    },
+    {
+      "epoch": 0.07153762440574592,
+      "grad_norm": 7.75,
+      "learning_rate": 0.000488117314636067,
+      "loss": 14.5173,
+      "step": 144880
+    },
+    {
+      "epoch": 0.07154256212140066,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004881164916157272,
+      "loss": 14.4283,
+      "step": 144890
+    },
+    {
+      "epoch": 0.07154749983705538,
+      "grad_norm": 10.0,
+      "learning_rate": 0.0004881156685953874,
+      "loss": 14.6299,
+      "step": 144900
+    },
+    {
+      "epoch": 0.07155243755271011,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004881148455750476,
+      "loss": 14.4452,
+      "step": 144910
+    },
+    {
+      "epoch": 0.07155737526836485,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0004881140225547078,
+      "loss": 14.3081,
+      "step": 144920
+    },
+    {
+      "epoch": 0.07156231298401958,
+      "grad_norm": 9.5,
+      "learning_rate": 0.000488113199534368,
+      "loss": 14.5014,
+      "step": 144930
+    },
+    {
+      "epoch": 0.0715672506996743,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.00048811237651402825,
+      "loss": 14.3405,
+      "step": 144940
+    },
+    {
+      "epoch": 0.07157218841532904,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004881115534936884,
+      "loss": 14.4595,
+      "step": 144950
+    },
+    {
+      "epoch": 0.07157712613098377,
+      "grad_norm": 24.5,
+      "learning_rate": 0.0004881107304733486,
+      "loss": 14.3812,
+      "step": 144960
+    },
+    {
+      "epoch": 0.0715820638466385,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004881099074530088,
+      "loss": 14.5181,
+      "step": 144970
+    },
+    {
+      "epoch": 0.07158700156229324,
+      "grad_norm": 10.375,
+      "learning_rate": 0.00048810908443266905,
+      "loss": 14.4102,
+      "step": 144980
+    },
+    {
+      "epoch": 0.07159193927794796,
+      "grad_norm": 9.75,
+      "learning_rate": 0.00048810826141232924,
+      "loss": 14.3786,
+      "step": 144990
+    },
+    {
+      "epoch": 0.07159687699360269,
+      "grad_norm": 14.0,
+      "learning_rate": 0.0004881074383919895,
+      "loss": 14.3526,
+      "step": 145000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.9977459018887175e+20,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null