Training in progress, step 123000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +703 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bbeaafd6ccfc1a71df631284d94737e176690a5d53963ee816742d55c66f65c3
 size 715030586

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e6c29c60b4be649b8c35881bfd701df8ed9ad086927795ba8119504814ccc63
 size 715030586

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d105f7bc65410e3f121dcaf59b93fed762c9a65fe7e4c8955d1292cb6a6876c9
 size 1032262338

 version https://git-lfs.github.com/spec/v1
+oid sha256:e7893e25921a46906c89e0422b957011974864566379e8e520e9cb3dcb18624a
 size 1032262338

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9da9fa9d20ad8eaec174be663669ed0dd6272da27b984848d5af57376e63a91f
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:ce2fea6d96cf72a768fa7a147391dd04b136c1b2c50e77e9675c8f55cb2b1eaa
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4fa62ee06d52a6750aacfff038383024cfa35b60c5b93fdacff2bca27d4639e6
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:74b4a4d321b5c5ed09a82f77da55cc3d586ac287fee30013935a69f592ce05be
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1810a8bef166e692355d67f304bf8cfd105103f952547985645833c2feea07b4
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:c35d4bd4f1454a7b87cbf6da1f2688fe2add4104efb8d280fd7dde4cb2eaf2d6
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e06c20da7b3de893663276090538aadc1b5a365c5cfce0a0140a2548043a2773
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:505f4e13010a4d0cc4f9be37b8e271ade3bd1580ce75e87673ff271bd9e88c38
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60ff0e39fa2ada0903d0841edad35ce944a197d8f614422d9d9915f5101a0a12
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:6cf85b6d54d63d639c68431b39fe1a75b74f180590a5701ac64352393f1a29a1
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.06024013098772089,
   "eval_steps": 500,
-  "global_step": 122000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -85408,6 +85408,706 @@
       "learning_rate": 0.000490000385173519,
       "loss": 14.6893,
       "step": 122000
     }
   ],
   "logging_steps": 10,
@@ -85427,7 +86127,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 2.6015940864424647e+20,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.06073390255319401,
   "eval_steps": 500,
+  "global_step": 123000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.000490000385173519,
       "loss": 14.6893,
       "step": 122000
+    },
+    {
+      "epoch": 0.06024506870337562,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004899995621531792,
+      "loss": 14.7306,
+      "step": 122010
+    },
+    {
+      "epoch": 0.060250006419030354,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004899987391328394,
+      "loss": 14.6219,
+      "step": 122020
+    },
+    {
+      "epoch": 0.06025494413468508,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004899979161124997,
+      "loss": 14.5998,
+      "step": 122030
+    },
+    {
+      "epoch": 0.060259881850339814,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004899970930921599,
+      "loss": 14.6914,
+      "step": 122040
+    },
+    {
+      "epoch": 0.06026481956599455,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004899962700718201,
+      "loss": 14.5592,
+      "step": 122050
+    },
+    {
+      "epoch": 0.060269757281649274,
+      "grad_norm": 14.9375,
+      "learning_rate": 0.0004899954470514803,
+      "loss": 14.6223,
+      "step": 122060
+    },
+    {
+      "epoch": 0.06027469499730401,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004899946240311405,
+      "loss": 14.6105,
+      "step": 122070
+    },
+    {
+      "epoch": 0.06027963271295874,
+      "grad_norm": 12.0,
+      "learning_rate": 0.0004899938010108006,
+      "loss": 14.6793,
+      "step": 122080
+    },
+    {
+      "epoch": 0.06028457042861347,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004899929779904609,
+      "loss": 14.6072,
+      "step": 122090
+    },
+    {
+      "epoch": 0.0602895081442682,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.000489992154970121,
+      "loss": 14.6658,
+      "step": 122100
+    },
+    {
+      "epoch": 0.060294445859922934,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004899913319497813,
+      "loss": 14.7316,
+      "step": 122110
+    },
+    {
+      "epoch": 0.06029938357557766,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004899905089294415,
+      "loss": 14.6902,
+      "step": 122120
+    },
+    {
+      "epoch": 0.06030432129123239,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004899896859091017,
+      "loss": 14.5037,
+      "step": 122130
+    },
+    {
+      "epoch": 0.06030925900688713,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004899888628887619,
+      "loss": 14.6571,
+      "step": 122140
+    },
+    {
+      "epoch": 0.06031419672254186,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004899880398684222,
+      "loss": 14.4471,
+      "step": 122150
+    },
+    {
+      "epoch": 0.06031913443819659,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004899872168480822,
+      "loss": 14.9159,
+      "step": 122160
+    },
+    {
+      "epoch": 0.06032407215385132,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004899863938277425,
+      "loss": 14.6641,
+      "step": 122170
+    },
+    {
+      "epoch": 0.06032900986950605,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004899855708074027,
+      "loss": 14.4194,
+      "step": 122180
+    },
+    {
+      "epoch": 0.06033394758516078,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.0004899847477870629,
+      "loss": 14.6702,
+      "step": 122190
+    },
+    {
+      "epoch": 0.06033888530081551,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004899839247667231,
+      "loss": 14.4618,
+      "step": 122200
+    },
+    {
+      "epoch": 0.06034382301647025,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004899831017463833,
+      "loss": 14.5465,
+      "step": 122210
+    },
+    {
+      "epoch": 0.06034876073212497,
+      "grad_norm": 9.625,
+      "learning_rate": 0.0004899822787260435,
+      "loss": 14.7497,
+      "step": 122220
+    },
+    {
+      "epoch": 0.060353698447779706,
+      "grad_norm": 11.4375,
+      "learning_rate": 0.0004899814557057038,
+      "loss": 14.5346,
+      "step": 122230
+    },
+    {
+      "epoch": 0.06035863616343444,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.000489980632685364,
+      "loss": 14.5608,
+      "step": 122240
+    },
+    {
+      "epoch": 0.060363573879089166,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004899798096650241,
+      "loss": 14.5786,
+      "step": 122250
+    },
+    {
+      "epoch": 0.0603685115947439,
+      "grad_norm": 11.3125,
+      "learning_rate": 0.0004899789866446844,
+      "loss": 14.5332,
+      "step": 122260
+    },
+    {
+      "epoch": 0.06037344931039863,
+      "grad_norm": 55.25,
+      "learning_rate": 0.0004899781636243445,
+      "loss": 14.5888,
+      "step": 122270
+    },
+    {
+      "epoch": 0.06037838702605336,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004899773406040048,
+      "loss": 14.7294,
+      "step": 122280
+    },
+    {
+      "epoch": 0.06038332474170809,
+      "grad_norm": 10.625,
+      "learning_rate": 0.000489976517583665,
+      "loss": 14.5032,
+      "step": 122290
+    },
+    {
+      "epoch": 0.060388262457362826,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004899756945633252,
+      "loss": 14.6904,
+      "step": 122300
+    },
+    {
+      "epoch": 0.06039320017301756,
+      "grad_norm": 12.25,
+      "learning_rate": 0.0004899748715429854,
+      "loss": 14.7875,
+      "step": 122310
+    },
+    {
+      "epoch": 0.060398137888672286,
+      "grad_norm": 7.75,
+      "learning_rate": 0.0004899740485226455,
+      "loss": 14.519,
+      "step": 122320
+    },
+    {
+      "epoch": 0.06040307560432702,
+      "grad_norm": 10.875,
+      "learning_rate": 0.0004899732255023057,
+      "loss": 14.7135,
+      "step": 122330
+    },
+    {
+      "epoch": 0.06040801331998175,
+      "grad_norm": 16.75,
+      "learning_rate": 0.000489972402481966,
+      "loss": 14.7901,
+      "step": 122340
+    },
+    {
+      "epoch": 0.06041295103563648,
+      "grad_norm": 12.625,
+      "learning_rate": 0.0004899715794616262,
+      "loss": 14.6928,
+      "step": 122350
+    },
+    {
+      "epoch": 0.06041788875129121,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004899707564412864,
+      "loss": 14.7452,
+      "step": 122360
+    },
+    {
+      "epoch": 0.060422826466945946,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004899699334209466,
+      "loss": 14.8396,
+      "step": 122370
+    },
+    {
+      "epoch": 0.06042776418260067,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004899691104006068,
+      "loss": 14.6461,
+      "step": 122380
+    },
+    {
+      "epoch": 0.060432701898255406,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.000489968287380267,
+      "loss": 14.8123,
+      "step": 122390
+    },
+    {
+      "epoch": 0.06043763961391014,
+      "grad_norm": 8.75,
+      "learning_rate": 0.0004899674643599273,
+      "loss": 14.5848,
+      "step": 122400
+    },
+    {
+      "epoch": 0.060442577329564866,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004899666413395874,
+      "loss": 14.5446,
+      "step": 122410
+    },
+    {
+      "epoch": 0.0604475150452196,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004899658183192476,
+      "loss": 14.6535,
+      "step": 122420
+    },
+    {
+      "epoch": 0.06045245276087433,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.0004899649952989078,
+      "loss": 14.7343,
+      "step": 122430
+    },
+    {
+      "epoch": 0.06045739047652906,
+      "grad_norm": 9.0,
+      "learning_rate": 0.000489964172278568,
+      "loss": 14.7516,
+      "step": 122440
+    },
+    {
+      "epoch": 0.06046232819218379,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004899633492582282,
+      "loss": 14.397,
+      "step": 122450
+    },
+    {
+      "epoch": 0.060467265907838526,
+      "grad_norm": 10.3125,
+      "learning_rate": 0.0004899625262378885,
+      "loss": 14.6658,
+      "step": 122460
+    },
+    {
+      "epoch": 0.06047220362349326,
+      "grad_norm": 17.125,
+      "learning_rate": 0.0004899617032175487,
+      "loss": 14.6587,
+      "step": 122470
+    },
+    {
+      "epoch": 0.060477141339147986,
+      "grad_norm": 11.625,
+      "learning_rate": 0.0004899608801972089,
+      "loss": 14.542,
+      "step": 122480
+    },
+    {
+      "epoch": 0.06048207905480272,
+      "grad_norm": 10.75,
+      "learning_rate": 0.000489960057176869,
+      "loss": 14.8654,
+      "step": 122490
+    },
+    {
+      "epoch": 0.06048701677045745,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004899592341565292,
+      "loss": 14.757,
+      "step": 122500
+    },
+    {
+      "epoch": 0.06049195448611218,
+      "grad_norm": 11.0625,
+      "learning_rate": 0.0004899584111361894,
+      "loss": 14.659,
+      "step": 122510
+    },
+    {
+      "epoch": 0.06049689220176691,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.0004899575881158497,
+      "loss": 14.6703,
+      "step": 122520
+    },
+    {
+      "epoch": 0.060501829917421646,
+      "grad_norm": 9.125,
+      "learning_rate": 0.0004899567650955099,
+      "loss": 14.7973,
+      "step": 122530
+    },
+    {
+      "epoch": 0.06050676763307637,
+      "grad_norm": 7.8125,
+      "learning_rate": 0.0004899559420751701,
+      "loss": 14.6864,
+      "step": 122540
+    },
+    {
+      "epoch": 0.060511705348731105,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004899551190548303,
+      "loss": 14.7156,
+      "step": 122550
+    },
+    {
+      "epoch": 0.06051664306438584,
+      "grad_norm": 7.9375,
+      "learning_rate": 0.0004899542960344905,
+      "loss": 14.6875,
+      "step": 122560
+    },
+    {
+      "epoch": 0.060521580780040565,
+      "grad_norm": 9.5,
+      "learning_rate": 0.0004899534730141507,
+      "loss": 14.4033,
+      "step": 122570
+    },
+    {
+      "epoch": 0.0605265184956953,
+      "grad_norm": 9.9375,
+      "learning_rate": 0.0004899526499938109,
+      "loss": 14.635,
+      "step": 122580
+    },
+    {
+      "epoch": 0.06053145621135003,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004899518269734711,
+      "loss": 14.6508,
+      "step": 122590
+    },
+    {
+      "epoch": 0.06053639392700476,
+      "grad_norm": 22.25,
+      "learning_rate": 0.0004899510039531313,
+      "loss": 14.8177,
+      "step": 122600
+    },
+    {
+      "epoch": 0.06054133164265949,
+      "grad_norm": 48.0,
+      "learning_rate": 0.0004899501809327915,
+      "loss": 14.6766,
+      "step": 122610
+    },
+    {
+      "epoch": 0.060546269358314225,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004899493579124517,
+      "loss": 14.6936,
+      "step": 122620
+    },
+    {
+      "epoch": 0.06055120707396896,
+      "grad_norm": 8.75,
+      "learning_rate": 0.000489948534892112,
+      "loss": 14.6728,
+      "step": 122630
+    },
+    {
+      "epoch": 0.060556144789623685,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004899477118717722,
+      "loss": 14.6365,
+      "step": 122640
+    },
+    {
+      "epoch": 0.06056108250527842,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004899468888514323,
+      "loss": 14.4662,
+      "step": 122650
+    },
+    {
+      "epoch": 0.06056602022093315,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004899460658310925,
+      "loss": 14.5871,
+      "step": 122660
+    },
+    {
+      "epoch": 0.06057095793658788,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004899452428107527,
+      "loss": 14.8044,
+      "step": 122670
+    },
+    {
+      "epoch": 0.06057589565224261,
+      "grad_norm": 17.875,
+      "learning_rate": 0.0004899444197904129,
+      "loss": 14.7411,
+      "step": 122680
+    },
+    {
+      "epoch": 0.060580833367897345,
+      "grad_norm": 7.65625,
+      "learning_rate": 0.0004899435967700732,
+      "loss": 14.6095,
+      "step": 122690
+    },
+    {
+      "epoch": 0.06058577108355207,
+      "grad_norm": 11.875,
+      "learning_rate": 0.0004899427737497334,
+      "loss": 14.6202,
+      "step": 122700
+    },
+    {
+      "epoch": 0.060590708799206805,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004899419507293936,
+      "loss": 14.6142,
+      "step": 122710
+    },
+    {
+      "epoch": 0.06059564651486154,
+      "grad_norm": 8.375,
+      "learning_rate": 0.0004899411277090538,
+      "loss": 14.7502,
+      "step": 122720
+    },
+    {
+      "epoch": 0.060600584230516265,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.0004899403046887139,
+      "loss": 14.5435,
+      "step": 122730
+    },
+    {
+      "epoch": 0.060605521946171,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.0004899394816683741,
+      "loss": 14.602,
+      "step": 122740
+    },
+    {
+      "epoch": 0.06061045966182573,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004899386586480344,
+      "loss": 14.9614,
+      "step": 122750
+    },
+    {
+      "epoch": 0.06061539737748046,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004899378356276946,
+      "loss": 14.6614,
+      "step": 122760
+    },
+    {
+      "epoch": 0.06062033509313519,
+      "grad_norm": 30.25,
+      "learning_rate": 0.0004899370126073548,
+      "loss": 14.6029,
+      "step": 122770
+    },
+    {
+      "epoch": 0.060625272808789925,
+      "grad_norm": 10.625,
+      "learning_rate": 0.000489936189587015,
+      "loss": 14.6339,
+      "step": 122780
+    },
+    {
+      "epoch": 0.06063021052444466,
+      "grad_norm": 8.5,
+      "learning_rate": 0.0004899353665666752,
+      "loss": 14.6306,
+      "step": 122790
+    },
+    {
+      "epoch": 0.060635148240099385,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004899345435463354,
+      "loss": 14.6902,
+      "step": 122800
+    },
+    {
+      "epoch": 0.06064008595575412,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004899337205259957,
+      "loss": 14.7273,
+      "step": 122810
+    },
+    {
+      "epoch": 0.06064502367140885,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004899328975056557,
+      "loss": 14.7909,
+      "step": 122820
+    },
+    {
+      "epoch": 0.06064996138706358,
+      "grad_norm": 11.0,
+      "learning_rate": 0.000489932074485316,
+      "loss": 14.5289,
+      "step": 122830
+    },
+    {
+      "epoch": 0.06065489910271831,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.0004899312514649762,
+      "loss": 14.6377,
+      "step": 122840
+    },
+    {
+      "epoch": 0.060659836818373045,
+      "grad_norm": 10.125,
+      "learning_rate": 0.0004899304284446364,
+      "loss": 14.6517,
+      "step": 122850
+    },
+    {
+      "epoch": 0.06066477453402777,
+      "grad_norm": 9.75,
+      "learning_rate": 0.0004899296054242966,
+      "loss": 14.5615,
+      "step": 122860
+    },
+    {
+      "epoch": 0.060669712249682504,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004899287824039569,
+      "loss": 14.6623,
+      "step": 122870
+    },
+    {
+      "epoch": 0.06067464996533724,
+      "grad_norm": 9.125,
+      "learning_rate": 0.000489927959383617,
+      "loss": 14.6153,
+      "step": 122880
+    },
+    {
+      "epoch": 0.060679587680991964,
+      "grad_norm": 49.0,
+      "learning_rate": 0.0004899271363632773,
+      "loss": 14.6059,
+      "step": 122890
+    },
+    {
+      "epoch": 0.0606845253966467,
+      "grad_norm": 11.0,
+      "learning_rate": 0.0004899263133429374,
+      "loss": 14.5997,
+      "step": 122900
+    },
+    {
+      "epoch": 0.06068946311230143,
+      "grad_norm": 8.25,
+      "learning_rate": 0.0004899254903225976,
+      "loss": 14.7843,
+      "step": 122910
+    },
+    {
+      "epoch": 0.06069440082795616,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.0004899246673022579,
+      "loss": 14.7203,
+      "step": 122920
+    },
+    {
+      "epoch": 0.06069933854361089,
+      "grad_norm": 10.75,
+      "learning_rate": 0.0004899238442819181,
+      "loss": 14.5264,
+      "step": 122930
+    },
+    {
+      "epoch": 0.060704276259265624,
+      "grad_norm": 12.5,
+      "learning_rate": 0.0004899230212615783,
+      "loss": 14.6797,
+      "step": 122940
+    },
+    {
+      "epoch": 0.06070921397492036,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.0004899221982412385,
+      "loss": 14.6443,
+      "step": 122950
+    },
+    {
+      "epoch": 0.060714151690575084,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004899213752208987,
+      "loss": 14.4232,
+      "step": 122960
+    },
+    {
+      "epoch": 0.06071908940622982,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004899205522005589,
+      "loss": 14.6483,
+      "step": 122970
+    },
+    {
+      "epoch": 0.06072402712188455,
+      "grad_norm": 7.90625,
+      "learning_rate": 0.0004899197291802191,
+      "loss": 14.7229,
+      "step": 122980
+    },
+    {
+      "epoch": 0.06072896483753928,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.0004899189061598792,
+      "loss": 14.6283,
+      "step": 122990
+    },
+    {
+      "epoch": 0.06073390255319401,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.0004899180831395395,
+      "loss": 14.5907,
+      "step": 123000
     }
   ],
   "logging_steps": 10,
       "attributes": {}
     }
   },
+  "total_flos": 2.6188642487454034e+20,
   "train_batch_size": 16,
   "trial_name": null,
   "trial_params": null