Training in progress, step 54000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5f27a56368056fc32d4b7e5d23c6492ac971d8672b0381112d9e6374b2bcfdd7
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf305787ad6316d86bdea7c38315dd3f15c5ee8490acd682cd824bfcf750dfa4
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:40a7260a893151ae68ff9abddac367eb1c219ee970995d15c8fbd3b96152db45
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:44d8a1cc78376b4af6899f7cebe715f1016f056fd7e5074c671b903d1084112f
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bd46a22c8c930911cd104bb7d31b99dedfb69975a9d8245b24717b65ad63864b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:23c2ea7ce854e5177af77ffba4a1bc4d516dab7464ab8c6434bdf8308557c604
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:401180f380b7886e93ac8125388345d73ee01c7449dcd0815ad64b364f075d7b
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:34d5ec65666ef883baee7446bdc1563c463c7ddd9a4dd81380c1bf04970509fa
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9bbd92622c25bc4db7b24f22c5a71d93f982330fe4e14e84d2a4379f3e6e3b40
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:79db44d0ffd3e36d713e3a17ade583d5fceb74c88819c913e30b572e1fb71816
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b64aa4d617fefae261dcad0912c0ab593e28dac4ab1727f422692cb316f9b8a1
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:2339d63fa54808ccc4d53a4d3aeab8aae8f714a715e05e2ab850f746d0a92bb6
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bb2b6eb9245f28b41270f3392f30c7084537d421539e976f89ee6d918cdaa909
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfaaa443ff50c514e8d740e179deb3f101e73d9201b92424d8bf52ab5c7dfc99
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.07850967891022641,
   "eval_steps": 500,
-  "global_step": 53000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -37108,6 +37108,706 @@
       "learning_rate": 0.0004870355425518885,
       "loss": 14.9802,
       "step": 53000
     }
   ],
   "logging_steps": 10,
@@ -37127,7 +37827,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 1.145286727705905e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.07999099360664577,
   "eval_steps": 500,
+  "global_step": 54000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004870355425518885,
       "loss": 14.9802,
       "step": 53000
+    },
+    {
+      "epoch": 0.0785244920571906,
+      "grad_norm": 7.03125,
+      "learning_rate": 0.0004870330730843847,
+      "loss": 15.0941,
+      "step": 53010
+    },
+    {
+      "epoch": 0.0785393052041548,
+      "grad_norm": 7.125,
+      "learning_rate": 0.0004870306036168809,
+      "loss": 15.1266,
+      "step": 53020
+    },
+    {
+      "epoch": 0.07855411835111899,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004870281341493771,
+      "loss": 15.1903,
+      "step": 53030
+    },
+    {
+      "epoch": 0.07856893149808318,
+      "grad_norm": 7.5625,
+      "learning_rate": 0.0004870256646818733,
+      "loss": 14.9953,
+      "step": 53040
+    },
+    {
+      "epoch": 0.07858374464504737,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.0004870231952143696,
+      "loss": 14.9635,
+      "step": 53050
+    },
+    {
+      "epoch": 0.07859855779201157,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048702072574686577,
+      "loss": 14.9899,
+      "step": 53060
+    },
+    {
+      "epoch": 0.07861337093897576,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.00048701825627936197,
+      "loss": 15.2009,
+      "step": 53070
+    },
+    {
+      "epoch": 0.07862818408593995,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004870157868118582,
+      "loss": 15.0859,
+      "step": 53080
+    },
+    {
+      "epoch": 0.07864299723290415,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.0004870133173443544,
+      "loss": 15.1193,
+      "step": 53090
+    },
+    {
+      "epoch": 0.07865781037986834,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004870108478768506,
+      "loss": 15.1977,
+      "step": 53100
+    },
+    {
+      "epoch": 0.07867262352683253,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00048700837840934686,
+      "loss": 15.1051,
+      "step": 53110
+    },
+    {
+      "epoch": 0.07868743667379673,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048700590894184306,
+      "loss": 15.1508,
+      "step": 53120
+    },
+    {
+      "epoch": 0.07870224982076092,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.00048700343947433926,
+      "loss": 15.192,
+      "step": 53130
+    },
+    {
+      "epoch": 0.07871706296772511,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004870009700068355,
+      "loss": 15.1088,
+      "step": 53140
+    },
+    {
+      "epoch": 0.0787318761146893,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004869985005393317,
+      "loss": 15.109,
+      "step": 53150
+    },
+    {
+      "epoch": 0.0787466892616535,
+      "grad_norm": 6.125,
+      "learning_rate": 0.00048699603107182796,
+      "loss": 15.1353,
+      "step": 53160
+    },
+    {
+      "epoch": 0.07876150240861769,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004869935616043241,
+      "loss": 15.0702,
+      "step": 53170
+    },
+    {
+      "epoch": 0.07877631555558189,
+      "grad_norm": 6.0625,
+      "learning_rate": 0.00048699109213682035,
+      "loss": 15.0845,
+      "step": 53180
+    },
+    {
+      "epoch": 0.07879112870254608,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.0004869886226693166,
+      "loss": 14.9745,
+      "step": 53190
+    },
+    {
+      "epoch": 0.07880594184951027,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004869861532018128,
+      "loss": 14.9899,
+      "step": 53200
+    },
+    {
+      "epoch": 0.07882075499647447,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.000486983683734309,
+      "loss": 15.1241,
+      "step": 53210
+    },
+    {
+      "epoch": 0.07883556814343866,
+      "grad_norm": 6.25,
+      "learning_rate": 0.00048698121426680524,
+      "loss": 15.0384,
+      "step": 53220
+    },
+    {
+      "epoch": 0.07885038129040285,
+      "grad_norm": 8.625,
+      "learning_rate": 0.00048697874479930144,
+      "loss": 15.1642,
+      "step": 53230
+    },
+    {
+      "epoch": 0.07886519443736706,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004869762753317977,
+      "loss": 15.1911,
+      "step": 53240
+    },
+    {
+      "epoch": 0.07888000758433125,
+      "grad_norm": 7.59375,
+      "learning_rate": 0.0004869738058642939,
+      "loss": 15.1585,
+      "step": 53250
+    },
+    {
+      "epoch": 0.07889482073129545,
+      "grad_norm": 6.75,
+      "learning_rate": 0.0004869713363967901,
+      "loss": 15.0286,
+      "step": 53260
+    },
+    {
+      "epoch": 0.07890963387825964,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048696886692928634,
+      "loss": 15.0443,
+      "step": 53270
+    },
+    {
+      "epoch": 0.07892444702522383,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004869663974617825,
+      "loss": 15.0719,
+      "step": 53280
+    },
+    {
+      "epoch": 0.07893926017218802,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048696392799427873,
+      "loss": 15.0195,
+      "step": 53290
+    },
+    {
+      "epoch": 0.07895407331915222,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.000486961458526775,
+      "loss": 15.0844,
+      "step": 53300
+    },
+    {
+      "epoch": 0.07896888646611641,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004869589890592712,
+      "loss": 15.0515,
+      "step": 53310
+    },
+    {
+      "epoch": 0.0789836996130806,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004869565195917674,
+      "loss": 15.0778,
+      "step": 53320
+    },
+    {
+      "epoch": 0.0789985127600448,
+      "grad_norm": 6.875,
+      "learning_rate": 0.0004869540501242636,
+      "loss": 15.1232,
+      "step": 53330
+    },
+    {
+      "epoch": 0.07901332590700899,
+      "grad_norm": 6.28125,
+      "learning_rate": 0.0004869515806567598,
+      "loss": 15.0529,
+      "step": 53340
+    },
+    {
+      "epoch": 0.07902813905397318,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004869491111892561,
+      "loss": 14.9971,
+      "step": 53350
+    },
+    {
+      "epoch": 0.07904295220093738,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.00048694664172175227,
+      "loss": 15.1074,
+      "step": 53360
+    },
+    {
+      "epoch": 0.07905776534790157,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.00048694417225424847,
+      "loss": 15.1246,
+      "step": 53370
+    },
+    {
+      "epoch": 0.07907257849486576,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004869417027867447,
+      "loss": 15.1153,
+      "step": 53380
+    },
+    {
+      "epoch": 0.07908739164182996,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004869392333192409,
+      "loss": 15.0776,
+      "step": 53390
+    },
+    {
+      "epoch": 0.07910220478879415,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.0004869367638517371,
+      "loss": 15.1103,
+      "step": 53400
+    },
+    {
+      "epoch": 0.07911701793575834,
+      "grad_norm": 7.125,
+      "learning_rate": 0.00048693429438423336,
+      "loss": 15.0772,
+      "step": 53410
+    },
+    {
+      "epoch": 0.07913183108272254,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00048693182491672956,
+      "loss": 15.0286,
+      "step": 53420
+    },
+    {
+      "epoch": 0.07914664422968673,
+      "grad_norm": 18.875,
+      "learning_rate": 0.0004869293554492258,
+      "loss": 15.0284,
+      "step": 53430
+    },
+    {
+      "epoch": 0.07916145737665092,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.000486926885981722,
+      "loss": 15.1057,
+      "step": 53440
+    },
+    {
+      "epoch": 0.07917627052361512,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0004869244165142182,
+      "loss": 15.0774,
+      "step": 53450
+    },
+    {
+      "epoch": 0.07919108367057931,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.00048692194704671446,
+      "loss": 15.0241,
+      "step": 53460
+    },
+    {
+      "epoch": 0.0792058968175435,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004869194775792106,
+      "loss": 15.0823,
+      "step": 53470
+    },
+    {
+      "epoch": 0.0792207099645077,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048691700811170685,
+      "loss": 15.0525,
+      "step": 53480
+    },
+    {
+      "epoch": 0.07923552311147189,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.0004869145386442031,
+      "loss": 15.044,
+      "step": 53490
+    },
+    {
+      "epoch": 0.07925033625843608,
+      "grad_norm": 6.34375,
+      "learning_rate": 0.0004869120691766993,
+      "loss": 15.0362,
+      "step": 53500
+    },
+    {
+      "epoch": 0.07926514940540028,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004869095997091955,
+      "loss": 15.0063,
+      "step": 53510
+    },
+    {
+      "epoch": 0.07927996255236447,
+      "grad_norm": 6.5,
+      "learning_rate": 0.00048690713024169175,
+      "loss": 15.0569,
+      "step": 53520
+    },
+    {
+      "epoch": 0.07929477569932866,
+      "grad_norm": 7.1875,
+      "learning_rate": 0.00048690466077418794,
+      "loss": 15.0869,
+      "step": 53530
+    },
+    {
+      "epoch": 0.07930958884629286,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004869021913066842,
+      "loss": 15.0454,
+      "step": 53540
+    },
+    {
+      "epoch": 0.07932440199325705,
+      "grad_norm": 5.78125,
+      "learning_rate": 0.0004868997218391804,
+      "loss": 15.01,
+      "step": 53550
+    },
+    {
+      "epoch": 0.07933921514022126,
+      "grad_norm": 6.5,
+      "learning_rate": 0.0004868972523716766,
+      "loss": 14.9731,
+      "step": 53560
+    },
+    {
+      "epoch": 0.07935402828718545,
+      "grad_norm": 5.46875,
+      "learning_rate": 0.00048689478290417284,
+      "loss": 15.0861,
+      "step": 53570
+    },
+    {
+      "epoch": 0.07936884143414964,
+      "grad_norm": 7.34375,
+      "learning_rate": 0.00048689231343666903,
+      "loss": 15.052,
+      "step": 53580
+    },
+    {
+      "epoch": 0.07938365458111384,
+      "grad_norm": 7.0,
+      "learning_rate": 0.00048688984396916523,
+      "loss": 15.0928,
+      "step": 53590
+    },
+    {
+      "epoch": 0.07939846772807803,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.0004868873745016615,
+      "loss": 15.1057,
+      "step": 53600
+    },
+    {
+      "epoch": 0.07941328087504222,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004868849050341577,
+      "loss": 15.0796,
+      "step": 53610
+    },
+    {
+      "epoch": 0.07942809402200642,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004868824355666539,
+      "loss": 15.1789,
+      "step": 53620
+    },
+    {
+      "epoch": 0.07944290716897061,
+      "grad_norm": 6.125,
+      "learning_rate": 0.0004868799660991501,
+      "loss": 15.1064,
+      "step": 53630
+    },
+    {
+      "epoch": 0.0794577203159348,
+      "grad_norm": 6.1875,
+      "learning_rate": 0.0004868774966316463,
+      "loss": 15.0454,
+      "step": 53640
+    },
+    {
+      "epoch": 0.079472533462899,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004868750271641426,
+      "loss": 15.0827,
+      "step": 53650
+    },
+    {
+      "epoch": 0.07948734660986319,
+      "grad_norm": 5.53125,
+      "learning_rate": 0.00048687255769663877,
+      "loss": 15.0536,
+      "step": 53660
+    },
+    {
+      "epoch": 0.07950215975682738,
+      "grad_norm": 6.71875,
+      "learning_rate": 0.00048687008822913497,
+      "loss": 15.0194,
+      "step": 53670
+    },
+    {
+      "epoch": 0.07951697290379157,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004868676187616312,
+      "loss": 15.0533,
+      "step": 53680
+    },
+    {
+      "epoch": 0.07953178605075577,
+      "grad_norm": 6.15625,
+      "learning_rate": 0.0004868651492941274,
+      "loss": 15.1152,
+      "step": 53690
+    },
+    {
+      "epoch": 0.07954659919771996,
+      "grad_norm": 6.9375,
+      "learning_rate": 0.0004868626798266236,
+      "loss": 15.0846,
+      "step": 53700
+    },
+    {
+      "epoch": 0.07956141234468415,
+      "grad_norm": 6.625,
+      "learning_rate": 0.00048686021035911986,
+      "loss": 15.011,
+      "step": 53710
+    },
+    {
+      "epoch": 0.07957622549164835,
+      "grad_norm": 6.5625,
+      "learning_rate": 0.00048685774089161606,
+      "loss": 14.9982,
+      "step": 53720
+    },
+    {
+      "epoch": 0.07959103863861254,
+      "grad_norm": 5.78125,
+      "learning_rate": 0.0004868552714241123,
+      "loss": 14.9968,
+      "step": 53730
+    },
+    {
+      "epoch": 0.07960585178557673,
+      "grad_norm": 6.0625,
+      "learning_rate": 0.0004868528019566085,
+      "loss": 15.0537,
+      "step": 53740
+    },
+    {
+      "epoch": 0.07962066493254093,
+      "grad_norm": 5.875,
+      "learning_rate": 0.0004868503324891047,
+      "loss": 15.077,
+      "step": 53750
+    },
+    {
+      "epoch": 0.07963547807950512,
+      "grad_norm": 10.6875,
+      "learning_rate": 0.00048684786302160096,
+      "loss": 15.0749,
+      "step": 53760
+    },
+    {
+      "epoch": 0.07965029122646931,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.00048684539355409715,
+      "loss": 14.9793,
+      "step": 53770
+    },
+    {
+      "epoch": 0.0796651043734335,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048684292408659335,
+      "loss": 14.9148,
+      "step": 53780
+    },
+    {
+      "epoch": 0.0796799175203977,
+      "grad_norm": 53.0,
+      "learning_rate": 0.0004868404546190896,
+      "loss": 15.0931,
+      "step": 53790
+    },
+    {
+      "epoch": 0.0796947306673619,
+      "grad_norm": 6.46875,
+      "learning_rate": 0.0004868379851515858,
+      "loss": 15.0475,
+      "step": 53800
+    },
+    {
+      "epoch": 0.07970954381432609,
+      "grad_norm": 7.28125,
+      "learning_rate": 0.000486835515684082,
+      "loss": 15.0885,
+      "step": 53810
+    },
+    {
+      "epoch": 0.07972435696129028,
+      "grad_norm": 12.6875,
+      "learning_rate": 0.00048683304621657825,
+      "loss": 15.0429,
+      "step": 53820
+    },
+    {
+      "epoch": 0.07973917010825447,
+      "grad_norm": 7.21875,
+      "learning_rate": 0.00048683057674907444,
+      "loss": 15.0042,
+      "step": 53830
+    },
+    {
+      "epoch": 0.07975398325521867,
+      "grad_norm": 7.25,
+      "learning_rate": 0.0004868281072815707,
+      "loss": 15.0008,
+      "step": 53840
+    },
+    {
+      "epoch": 0.07976879640218286,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004868256378140669,
+      "loss": 15.0597,
+      "step": 53850
+    },
+    {
+      "epoch": 0.07978360954914705,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004868231683465631,
+      "loss": 14.965,
+      "step": 53860
+    },
+    {
+      "epoch": 0.07979842269611125,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00048682069887905934,
+      "loss": 15.0893,
+      "step": 53870
+    },
+    {
+      "epoch": 0.07981323584307545,
+      "grad_norm": 6.90625,
+      "learning_rate": 0.00048681822941155553,
+      "loss": 15.0194,
+      "step": 53880
+    },
+    {
+      "epoch": 0.07982804899003965,
+      "grad_norm": 21.375,
+      "learning_rate": 0.00048681575994405173,
+      "loss": 15.0301,
+      "step": 53890
+    },
+    {
+      "epoch": 0.07984286213700384,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.000486813290476548,
+      "loss": 15.0558,
+      "step": 53900
+    },
+    {
+      "epoch": 0.07985767528396803,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004868108210090442,
+      "loss": 15.0841,
+      "step": 53910
+    },
+    {
+      "epoch": 0.07987248843093223,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.00048680835154154043,
+      "loss": 15.0492,
+      "step": 53920
+    },
+    {
+      "epoch": 0.07988730157789642,
+      "grad_norm": 7.5625,
+      "learning_rate": 0.00048680588207403663,
+      "loss": 15.0187,
+      "step": 53930
+    },
+    {
+      "epoch": 0.07990211472486061,
+      "grad_norm": 6.6875,
+      "learning_rate": 0.0004868034126065328,
+      "loss": 14.9801,
+      "step": 53940
+    },
+    {
+      "epoch": 0.0799169278718248,
+      "grad_norm": 6.65625,
+      "learning_rate": 0.0004868009431390291,
+      "loss": 14.9848,
+      "step": 53950
+    },
+    {
+      "epoch": 0.079931741018789,
+      "grad_norm": 6.21875,
+      "learning_rate": 0.0004867984736715253,
+      "loss": 14.9927,
+      "step": 53960
+    },
+    {
+      "epoch": 0.07994655416575319,
+      "grad_norm": 6.375,
+      "learning_rate": 0.00048679600420402147,
+      "loss": 15.0322,
+      "step": 53970
+    },
+    {
+      "epoch": 0.07996136731271739,
+      "grad_norm": 6.8125,
+      "learning_rate": 0.0004867935347365177,
+      "loss": 15.0364,
+      "step": 53980
+    },
+    {
+      "epoch": 0.07997618045968158,
+      "grad_norm": 6.53125,
+      "learning_rate": 0.0004867910652690139,
+      "loss": 14.9742,
+      "step": 53990
+    },
+    {
+      "epoch": 0.07999099360664577,
+      "grad_norm": 58.0,
+      "learning_rate": 0.0004867885958015101,
+      "loss": 15.0833,
+      "step": 54000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 1.1669001977630556e+20,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null