Training in progress, step 53000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ee5740b2df90f300ab8740af00b7f7bf24cb5091cfa1f62216c5ed8a2443ab87
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:5f27a56368056fc32d4b7e5d23c6492ac971d8672b0381112d9e6374b2bcfdd7
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:95a29447fc5f4ac111f832f6f571c823477dfb8434cb938866e147b0f6dc865f
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:40a7260a893151ae68ff9abddac367eb1c219ee970995d15c8fbd3b96152db45
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c20ace688e8f339a8465ba9a9599ce24601515d3a5480209803a259f2a022bae
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:bd46a22c8c930911cd104bb7d31b99dedfb69975a9d8245b24717b65ad63864b
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:948b6e9a0f20419167cfbfa65380f04637e36c4763cae8be7b73204ddaedffd2
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:401180f380b7886e93ac8125388345d73ee01c7449dcd0815ad64b364f075d7b
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60b30daf0eee892badc9289c740e442352d44dc59e2f6b50ae78895efb9f5d6a
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:9bbd92622c25bc4db7b24f22c5a71d93f982330fe4e14e84d2a4379f3e6e3b40
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27c5c66975e0a3a1d12fbec1f84e7217d14934a505e257a59525485efe3931e0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b64aa4d617fefae261dcad0912c0ab593e28dac4ab1727f422692cb316f9b8a1
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9948379edb2ae231aacd92fa9d3b4ab412c6756f9dd9b04618a3eccf04ef2931
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:bb2b6eb9245f28b41270f3392f30c7084537d421539e976f89ee6d918cdaa909
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07702836421380704,
   "eval_steps": 500,
-  "global_step": 52000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -36408,6 +36408,706 @@
       "learning_rate": 0.00048728248930226674,
       "loss": 15.0545,
       "step": 52000
     }
   ],
   "logging_steps": 10,
@@ -36427,7 +37127,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.1236730320126476e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07850967891022641,
   "eval_steps": 500,
+  "global_step": 53000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.00048728248930226674,
       "loss": 15.0545,
       "step": 52000
+    },
+    {
+      "epoch": 0.07704317736077124,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.000487280019834763,
+      "loss": 15.0789,
+      "step": 52010
+    },
+    {
+      "epoch": 0.07705799050773543,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048727755036725924,
+      "loss": 15.1253,
+      "step": 52020
+    },
+    {
+      "epoch": 0.07707280365469962,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.00048727508089975544,
+      "loss": 15.152,
+      "step": 52030
+    },
+    {
+      "epoch": 0.07708761680166382,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.00048727261143225164,
+      "loss": 15.2387,
+      "step": 52040
+    },
+    {
+      "epoch": 0.07710242994862801,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004872701419647479,
+      "loss": 15.1069,
+      "step": 52050
+    },
+    {
+      "epoch": 0.0771172430955922,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004872676724972441,
+      "loss": 15.0691,
+      "step": 52060
+    },
+    {
+      "epoch": 0.0771320562425564,
+      "grad_norm": 7.75,
+      "learning_rate": 0.00048726520302974034,
+      "loss": 15.1991,
+      "step": 52070
+    },
+    {
+      "epoch": 0.07714686938952059,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004872627335622365,
+      "loss": 15.1124,
+      "step": 52080
+    },
+    {
+      "epoch": 0.07716168253648478,
+      "grad_norm": 6.25,
+      "learning_rate": 0.00048726026409473273,
+      "loss": 15.2277,
+      "step": 52090
+    },
+    {
+      "epoch": 0.07717649568344898,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.000487257794627229,
+      "loss": 15.171,
+      "step": 52100
+    },
+    {
+      "epoch": 0.07719130883041317,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004872553251597251,
+      "loss": 15.1479,
+      "step": 52110
+    },
+    {
+      "epoch": 0.07720612197737736,
+      "grad_norm": 6.0,
+      "learning_rate": 0.0004872528556922214,
+      "loss": 15.1086,
+      "step": 52120
+    },
+    {
+      "epoch": 0.07722093512434156,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004872503862247176,
+      "loss": 15.1757,
+      "step": 52130
+    },
+    {
+      "epoch": 0.07723574827130575,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004872479167572138,
+      "loss": 15.0438,
+      "step": 52140
+    },
+    {
+      "epoch": 0.07725056141826994,
+      "grad_norm": 6.25,
+      "learning_rate": 0.00048724544728971,
+      "loss": 15.0921,
+      "step": 52150
+    },
+    {
+      "epoch": 0.07726537456523414,
+      "grad_norm": 5.96875,
+      "learning_rate": 0.00048724297782220627,
+      "loss": 15.1225,
+      "step": 52160
+    },
+    {
+      "epoch": 0.07728018771219833,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00048724050835470247,
+      "loss": 15.134,
+      "step": 52170
+    },
+    {
+      "epoch": 0.07729500085916252,
+      "grad_norm": 5.78125,
+      "learning_rate": 0.0004872380388871987,
+      "loss": 15.1183,
+      "step": 52180
+    },
+    {
+      "epoch": 0.07730981400612671,
+      "grad_norm": 7.4375,
+      "learning_rate": 0.00048723556941969486,
+      "loss": 15.1365,
+      "step": 52190
+    },
+    {
+      "epoch": 0.07732462715309091,
+      "grad_norm": 8.875,
+      "learning_rate": 0.0004872330999521911,
+      "loss": 15.1424,
+      "step": 52200
+    },
+    {
+      "epoch": 0.0773394403000551,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.00048723063048468736,
+      "loss": 15.2018,
+      "step": 52210
+    },
+    {
+      "epoch": 0.0773542534470193,
+      "grad_norm": 7.75,
+      "learning_rate": 0.00048722816101718356,
+      "loss": 15.1598,
+      "step": 52220
+    },
+    {
+      "epoch": 0.07736906659398349,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00048722569154967976,
+      "loss": 15.1861,
+      "step": 52230
+    },
+    {
+      "epoch": 0.07738387974094768,
+      "grad_norm": 7.375,
+      "learning_rate": 0.000487223222082176,
+      "loss": 15.1453,
+      "step": 52240
+    },
+    {
+      "epoch": 0.07739869288791187,
+      "grad_norm": 5.84375,
+      "learning_rate": 0.0004872207526146722,
+      "loss": 15.1308,
+      "step": 52250
+    },
+    {
+      "epoch": 0.07741350603487607,
+      "grad_norm": 55.25,
+      "learning_rate": 0.00048721828314716845,
+      "loss": 15.0651,
+      "step": 52260
+    },
+    {
+      "epoch": 0.07742831918184026,
+      "grad_norm": 6.84375,
+      "learning_rate": 0.0004872158136796646,
+      "loss": 15.0813,
+      "step": 52270
+    },
+    {
+      "epoch": 0.07744313232880445,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.00048721334421216085,
+      "loss": 15.2112,
+      "step": 52280
+    },
+    {
+      "epoch": 0.07745794547576866,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.0004872108747446571,
+      "loss": 15.1543,
+      "step": 52290
+    },
+    {
+      "epoch": 0.07747275862273285,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048720840527715324,
+      "loss": 15.1687,
+      "step": 52300
+    },
+    {
+      "epoch": 0.07748757176969705,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.0004872059358096495,
+      "loss": 15.0261,
+      "step": 52310
+    },
+    {
+      "epoch": 0.07750238491666124,
+      "grad_norm": 5.71875,
+      "learning_rate": 0.00048720346634214574,
+      "loss": 15.1831,
+      "step": 52320
+    },
+    {
+      "epoch": 0.07751719806362543,
+      "grad_norm": 5.625,
+      "learning_rate": 0.00048720099687464194,
+      "loss": 15.1855,
+      "step": 52330
+    },
+    {
+      "epoch": 0.07753201121058963,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048719852740713814,
+      "loss": 15.1639,
+      "step": 52340
+    },
+    {
+      "epoch": 0.07754682435755382,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004871960579396344,
+      "loss": 15.1922,
+      "step": 52350
+    },
+    {
+      "epoch": 0.07756163750451801,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.0004871935884721306,
+      "loss": 15.1293,
+      "step": 52360
+    },
+    {
+      "epoch": 0.0775764506514822,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.00048719111900462684,
+      "loss": 15.1444,
+      "step": 52370
+    },
+    {
+      "epoch": 0.0775912637984464,
+      "grad_norm": 6.09375,
+      "learning_rate": 0.000487188649537123,
+      "loss": 15.1006,
+      "step": 52380
+    },
+    {
+      "epoch": 0.0776060769454106,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048718618006961923,
+      "loss": 15.1289,
+      "step": 52390
+    },
+    {
+      "epoch": 0.07762089009237479,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004871837106021155,
+      "loss": 15.2041,
+      "step": 52400
+    },
+    {
+      "epoch": 0.07763570323933898,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004871812411346117,
+      "loss": 15.1185,
+      "step": 52410
+    },
+    {
+      "epoch": 0.07765051638630317,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004871787716671079,
+      "loss": 15.0778,
+      "step": 52420
+    },
+    {
+      "epoch": 0.07766532953326737,
+      "grad_norm": 6.0,
+      "learning_rate": 0.0004871763021996041,
+      "loss": 15.0567,
+      "step": 52430
+    },
+    {
+      "epoch": 0.07768014268023156,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004871738327321003,
+      "loss": 15.0504,
+      "step": 52440
+    },
+    {
+      "epoch": 0.07769495582719575,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.0004871713632645965,
+      "loss": 15.094,
+      "step": 52450
+    },
+    {
+      "epoch": 0.07770976897415995,
+      "grad_norm": 7.09375,
+      "learning_rate": 0.00048716889379709277,
+      "loss": 15.0712,
+      "step": 52460
+    },
+    {
+      "epoch": 0.07772458212112414,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048716642432958897,
+      "loss": 15.0745,
+      "step": 52470
+    },
+    {
+      "epoch": 0.07773939526808833,
+      "grad_norm": 5.5625,
+      "learning_rate": 0.0004871639548620852,
+      "loss": 15.0134,
+      "step": 52480
+    },
+    {
+      "epoch": 0.07775420841505253,
+      "grad_norm": 6.25,
+      "learning_rate": 0.00048716148539458136,
+      "loss": 15.1188,
+      "step": 52490
+    },
+    {
+      "epoch": 0.07776902156201672,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004871590159270776,
+      "loss": 15.0676,
+      "step": 52500
+    },
+    {
+      "epoch": 0.07778383470898091,
+      "grad_norm": 5.78125,
+      "learning_rate": 0.00048715654645957386,
+      "loss": 15.1952,
+      "step": 52510
+    },
+    {
+      "epoch": 0.0777986478559451,
+      "grad_norm": 7.5,
+      "learning_rate": 0.00048715407699207006,
+      "loss": 15.1603,
+      "step": 52520
+    },
+    {
+      "epoch": 0.0778134610029093,
+      "grad_norm": 6.0625,
+      "learning_rate": 0.00048715160752456626,
+      "loss": 15.1377,
+      "step": 52530
+    },
+    {
+      "epoch": 0.07782827414987349,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004871491380570625,
+      "loss": 15.0991,
+      "step": 52540
+    },
+    {
+      "epoch": 0.07784308729683768,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.0004871466685895587,
+      "loss": 15.0923,
+      "step": 52550
+    },
+    {
+      "epoch": 0.07785790044380188,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.00048714419912205496,
+      "loss": 15.1003,
+      "step": 52560
+    },
+    {
+      "epoch": 0.07787271359076607,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004871417296545511,
+      "loss": 15.0931,
+      "step": 52570
+    },
+    {
+      "epoch": 0.07788752673773026,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.00048713926018704735,
+      "loss": 15.0789,
+      "step": 52580
+    },
+    {
+      "epoch": 0.07790233988469446,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004871367907195436,
+      "loss": 15.0337,
+      "step": 52590
+    },
+    {
+      "epoch": 0.07791715303165865,
+      "grad_norm": 5.9375,
+      "learning_rate": 0.00048713432125203974,
+      "loss": 15.1822,
+      "step": 52600
+    },
+    {
+      "epoch": 0.07793196617862286,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.000487131851784536,
+      "loss": 15.0653,
+      "step": 52610
+    },
+    {
+      "epoch": 0.07794677932558705,
+      "grad_norm": 71.5,
+      "learning_rate": 0.00048712938231703224,
+      "loss": 15.0759,
+      "step": 52620
+    },
+    {
+      "epoch": 0.07796159247255124,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.00048712691284952844,
+      "loss": 15.1957,
+      "step": 52630
+    },
+    {
+      "epoch": 0.07797640561951544,
+      "grad_norm": 7.96875,
+      "learning_rate": 0.00048712444338202464,
+      "loss": 15.0842,
+      "step": 52640
+    },
+    {
+      "epoch": 0.07799121876647963,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004871219739145209,
+      "loss": 15.1615,
+      "step": 52650
+    },
+    {
+      "epoch": 0.07800603191344382,
+      "grad_norm": 6.375,
+      "learning_rate": 0.0004871195044470171,
+      "loss": 15.0655,
+      "step": 52660
+    },
+    {
+      "epoch": 0.07802084506040802,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048711703497951334,
+      "loss": 15.098,
+      "step": 52670
+    },
+    {
+      "epoch": 0.07803565820737221,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004871145655120095,
+      "loss": 15.1145,
+      "step": 52680
+    },
+    {
+      "epoch": 0.0780504713543364,
+      "grad_norm": 6.25,
+      "learning_rate": 0.00048711209604450573,
+      "loss": 15.0961,
+      "step": 52690
+    },
+    {
+      "epoch": 0.0780652845013006,
+      "grad_norm": 6.25,
+      "learning_rate": 0.000487109626577002,
+      "loss": 15.1241,
+      "step": 52700
+    },
+    {
+      "epoch": 0.07808009764826479,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004871071571094982,
+      "loss": 15.1092,
+      "step": 52710
+    },
+    {
+      "epoch": 0.07809491079522898,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004871046876419944,
+      "loss": 14.9957,
+      "step": 52720
+    },
+    {
+      "epoch": 0.07810972394219318,
+      "grad_norm": 7.0,
+      "learning_rate": 0.0004871022181744906,
+      "loss": 15.0922,
+      "step": 52730
+    },
+    {
+      "epoch": 0.07812453708915737,
+      "grad_norm": 26.875,
+      "learning_rate": 0.0004870997487069868,
+      "loss": 15.1285,
+      "step": 52740
+    },
+    {
+      "epoch": 0.07813935023612156,
+      "grad_norm": 6.78125,
+      "learning_rate": 0.0004870972792394831,
+      "loss": 15.0511,
+      "step": 52750
+    },
+    {
+      "epoch": 0.07815416338308576,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.00048709480977197927,
+      "loss": 15.0175,
+      "step": 52760
+    },
+    {
+      "epoch": 0.07816897653004995,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.00048709234030447547,
+      "loss": 15.1444,
+      "step": 52770
+    },
+    {
+      "epoch": 0.07818378967701414,
+      "grad_norm": 6.4375,
+      "learning_rate": 0.0004870898708369717,
+      "loss": 15.0849,
+      "step": 52780
+    },
+    {
+      "epoch": 0.07819860282397834,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048708740136946786,
+      "loss": 15.0754,
+      "step": 52790
+    },
+    {
+      "epoch": 0.07821341597094253,
+      "grad_norm": 7.0625,
+      "learning_rate": 0.0004870849319019641,
+      "loss": 14.9947,
+      "step": 52800
+    },
+    {
+      "epoch": 0.07822822911790672,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00048708246243446036,
+      "loss": 15.0559,
+      "step": 52810
+    },
+    {
+      "epoch": 0.07824304226487092,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.00048707999296695656,
+      "loss": 15.1065,
+      "step": 52820
+    },
+    {
+      "epoch": 0.07825785541183511,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048707752349945276,
+      "loss": 15.0698,
+      "step": 52830
+    },
+    {
+      "epoch": 0.0782726685587993,
+      "grad_norm": 7.3125,
+      "learning_rate": 0.000487075054031949,
+      "loss": 15.0243,
+      "step": 52840
+    },
+    {
+      "epoch": 0.0782874817057635,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004870725845644452,
+      "loss": 15.0918,
+      "step": 52850
+    },
+    {
+      "epoch": 0.07830229485272769,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.00048707011509694146,
+      "loss": 15.1827,
+      "step": 52860
+    },
+    {
+      "epoch": 0.07831710799969188,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004870676456294376,
+      "loss": 15.0726,
+      "step": 52870
+    },
+    {
+      "epoch": 0.07833192114665607,
+      "grad_norm": 6.59375,
+      "learning_rate": 0.00048706517616193385,
+      "loss": 15.0329,
+      "step": 52880
+    },
+    {
+      "epoch": 0.07834673429362027,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004870627066944301,
+      "loss": 15.0297,
+      "step": 52890
+    },
+    {
+      "epoch": 0.07836154744058446,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004870602372269263,
+      "loss": 15.0309,
+      "step": 52900
+    },
+    {
+      "epoch": 0.07837636058754865,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004870577677594225,
+      "loss": 15.1305,
+      "step": 52910
+    },
+    {
+      "epoch": 0.07839117373451286,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048705529829191874,
+      "loss": 15.0027,
+      "step": 52920
+    },
+    {
+      "epoch": 0.07840598688147705,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.00048705282882441494,
+      "loss": 15.0698,
+      "step": 52930
+    },
+    {
+      "epoch": 0.07842080002844125,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004870503593569112,
+      "loss": 14.9449,
+      "step": 52940
+    },
+    {
+      "epoch": 0.07843561317540544,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.0004870478898894074,
+      "loss": 15.162,
+      "step": 52950
+    },
+    {
+      "epoch": 0.07845042632236963,
+      "grad_norm": 6.625,
+      "learning_rate": 0.0004870454204219036,
+      "loss": 15.0779,
+      "step": 52960
+    },
+    {
+      "epoch": 0.07846523946933383,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.00048704295095439984,
+      "loss": 15.0824,
+      "step": 52970
+    },
+    {
+      "epoch": 0.07848005261629802,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.000487040481486896,
+      "loss": 15.0998,
+      "step": 52980
+    },
+    {
+      "epoch": 0.07849486576326221,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.00048703801201939223,
+      "loss": 15.1059,
+      "step": 52990
+    },
+    {
+      "epoch": 0.07850967891022641,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004870355425518885,
+      "loss": 14.9802,
+      "step": 53000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.145286727705905e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null