Training in progress, step 98000, checkpoint

Browse files

Files changed (8) hide show

last-checkpoint/optimizer.pt +1 -1
last-checkpoint/pytorch_model.bin +1 -1
last-checkpoint/rng_state_0.pth +1 -1
last-checkpoint/rng_state_1.pth +1 -1
last-checkpoint/rng_state_2.pth +1 -1
last-checkpoint/rng_state_3.pth +1 -1
last-checkpoint/scheduler.pt +1 -1
last-checkpoint/trainer_state.json +353 -3

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6a69ecab5904980e7f3a94d5dc1448f313472762461c45ddbba6c4eae6b123b1
 size 248812730

 version https://git-lfs.github.com/spec/v1
+oid sha256:3723b9e158f7d7c1b221ad913b7f555d0d264515799090bc69e60850f0a9fa6b
 size 248812730

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1c03a6622528358a407d62165f752ea46b6bec53d09b9bd411f76f21f6f68bbf
 size 383474230

 version https://git-lfs.github.com/spec/v1
+oid sha256:440f5a9d8875f8e95803b17e9cf6de595e79c54073354d96d86cea394292a079
 size 383474230

last-checkpoint/rng_state_0.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b422e12da0bc6e9f22a27fec14cb97965395cb85dde2d8505a83b9d029f07e84
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b31b9ddb9cefffec91b179d35f46e0650b36eb5e7d6b179cc2315e9755fde24
 size 14960

last-checkpoint/rng_state_1.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:995166856d98c479a56ca8761344498c514ca5115dba8261f6471644d1ca2655
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:b897e4c715d4685a715f56e531cd128aa1c6fa09393166cc3a0438301c30a741
 size 14960

last-checkpoint/rng_state_2.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f74bfea6e1851682b4a81ec7118825ebff477f53d4b975c76c346a787c9c9ee0
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:84c2a641b88be414d1ece9cf1d942bc724b5f52e33f3e4c339f8cc7d9b2e8364
 size 14960

last-checkpoint/rng_state_3.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13571a671c73d9ce62ec8f6f7f7a3c6bc8122295dafccc52172c28cf4e16bc48
 size 14960

 version https://git-lfs.github.com/spec/v1
+oid sha256:a17d0f6c880a6c8e0ab8f462efb3f688277303e688570c80d2d94eb3374124e9
 size 14960

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0785d0a7b3c66ff29dd15672c3d686a1c82aa2bc9ffe45d135ce07de7a805b3c
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:5924b0a98cf4382d8ddc0bce15fa8820b2d4d5a4d9504a3386f0e9d32485919a
 size 1064

last-checkpoint/trainer_state.json CHANGED Viewed

@@ -2,9 +2,9 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": 0.14368752555267852,
   "eval_steps": 500,
-  "global_step": 97000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -33958,6 +33958,356 @@
       "learning_rate": 0.0004761698855352423,
       "loss": 17.5145,
       "step": 97000
     }
   ],
   "logging_steps": 20,
@@ -33977,7 +34327,7 @@
       "attributes": {}
     }
   },
-  "total_flos": 6.708521159255235e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 0.14516884024909787,
   "eval_steps": 500,
+  "global_step": 98000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "learning_rate": 0.0004761698855352423,
       "loss": 17.5145,
       "step": 97000
+    },
+    {
+      "epoch": 0.1437171518466069,
+      "grad_norm": 7.84375,
+      "learning_rate": 0.00047616494660023467,
+      "loss": 17.5301,
+      "step": 97020
+    },
+    {
+      "epoch": 0.1437467781405353,
+      "grad_norm": 8.875,
+      "learning_rate": 0.00047616000766522717,
+      "loss": 17.4752,
+      "step": 97040
+    },
+    {
+      "epoch": 0.14377640443446368,
+      "grad_norm": 10.1875,
+      "learning_rate": 0.00047615506873021957,
+      "loss": 17.5011,
+      "step": 97060
+    },
+    {
+      "epoch": 0.14380603072839207,
+      "grad_norm": 9.625,
+      "learning_rate": 0.000476150129795212,
+      "loss": 17.5621,
+      "step": 97080
+    },
+    {
+      "epoch": 0.14383565702232046,
+      "grad_norm": 9.125,
+      "learning_rate": 0.00047614519086020446,
+      "loss": 17.5082,
+      "step": 97100
+    },
+    {
+      "epoch": 0.14386528331624884,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004761402519251969,
+      "loss": 17.4921,
+      "step": 97120
+    },
+    {
+      "epoch": 0.14389490961017723,
+      "grad_norm": 9.0625,
+      "learning_rate": 0.0004761353129901893,
+      "loss": 17.5339,
+      "step": 97140
+    },
+    {
+      "epoch": 0.14392453590410562,
+      "grad_norm": 8.4375,
+      "learning_rate": 0.00047613037405518175,
+      "loss": 17.5687,
+      "step": 97160
+    },
+    {
+      "epoch": 0.143954162198034,
+      "grad_norm": 9.0,
+      "learning_rate": 0.0004761254351201742,
+      "loss": 17.4704,
+      "step": 97180
+    },
+    {
+      "epoch": 0.1439837884919624,
+      "grad_norm": 9.25,
+      "learning_rate": 0.0004761204961851666,
+      "loss": 17.5618,
+      "step": 97200
+    },
+    {
+      "epoch": 0.14401341478589078,
+      "grad_norm": 10.25,
+      "learning_rate": 0.00047611555725015904,
+      "loss": 17.5355,
+      "step": 97220
+    },
+    {
+      "epoch": 0.14404304107981916,
+      "grad_norm": 7.875,
+      "learning_rate": 0.00047611061831515143,
+      "loss": 17.5152,
+      "step": 97240
+    },
+    {
+      "epoch": 0.14407266737374755,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.00047610567938014393,
+      "loss": 17.5023,
+      "step": 97260
+    },
+    {
+      "epoch": 0.14410229366767593,
+      "grad_norm": 6.96875,
+      "learning_rate": 0.00047610074044513633,
+      "loss": 17.531,
+      "step": 97280
+    },
+    {
+      "epoch": 0.14413191996160432,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.0004760958015101288,
+      "loss": 17.5073,
+      "step": 97300
+    },
+    {
+      "epoch": 0.1441615462555327,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.00047609086257512117,
+      "loss": 17.4898,
+      "step": 97320
+    },
+    {
+      "epoch": 0.1441911725494611,
+      "grad_norm": 8.0,
+      "learning_rate": 0.00047608592364011367,
+      "loss": 17.5595,
+      "step": 97340
+    },
+    {
+      "epoch": 0.14422079884338948,
+      "grad_norm": 9.8125,
+      "learning_rate": 0.00047608098470510607,
+      "loss": 17.558,
+      "step": 97360
+    },
+    {
+      "epoch": 0.14425042513731787,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004760760457700985,
+      "loss": 17.5134,
+      "step": 97380
+    },
+    {
+      "epoch": 0.14428005143124625,
+      "grad_norm": 8.75,
+      "learning_rate": 0.00047607110683509096,
+      "loss": 17.5447,
+      "step": 97400
+    },
+    {
+      "epoch": 0.14430967772517464,
+      "grad_norm": 9.3125,
+      "learning_rate": 0.0004760661679000834,
+      "loss": 17.5099,
+      "step": 97420
+    },
+    {
+      "epoch": 0.14433930401910303,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004760612289650758,
+      "loss": 17.4835,
+      "step": 97440
+    },
+    {
+      "epoch": 0.1443689303130314,
+      "grad_norm": 9.125,
+      "learning_rate": 0.00047605629003006825,
+      "loss": 17.519,
+      "step": 97460
+    },
+    {
+      "epoch": 0.1443985566069598,
+      "grad_norm": 7.5,
+      "learning_rate": 0.0004760513510950607,
+      "loss": 17.4952,
+      "step": 97480
+    },
+    {
+      "epoch": 0.14442818290088819,
+      "grad_norm": 9.625,
+      "learning_rate": 0.00047604641216005315,
+      "loss": 17.4943,
+      "step": 97500
+    },
+    {
+      "epoch": 0.14445780919481657,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.00047604147322504554,
+      "loss": 17.537,
+      "step": 97520
+    },
+    {
+      "epoch": 0.14448743548874496,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.00047603653429003793,
+      "loss": 17.4928,
+      "step": 97540
+    },
+    {
+      "epoch": 0.14451706178267335,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.00047603159535503044,
+      "loss": 17.4929,
+      "step": 97560
+    },
+    {
+      "epoch": 0.14454668807660176,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.00047602665642002283,
+      "loss": 17.4739,
+      "step": 97580
+    },
+    {
+      "epoch": 0.14457631437053015,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.0004760217174850153,
+      "loss": 17.5714,
+      "step": 97600
+    },
+    {
+      "epoch": 0.14460594066445853,
+      "grad_norm": 7.71875,
+      "learning_rate": 0.00047601677855000767,
+      "loss": 17.4411,
+      "step": 97620
+    },
+    {
+      "epoch": 0.14463556695838692,
+      "grad_norm": 8.3125,
+      "learning_rate": 0.00047601183961500017,
+      "loss": 17.5361,
+      "step": 97640
+    },
+    {
+      "epoch": 0.1446651932523153,
+      "grad_norm": 8.8125,
+      "learning_rate": 0.00047600690067999257,
+      "loss": 17.5253,
+      "step": 97660
+    },
+    {
+      "epoch": 0.1446948195462437,
+      "grad_norm": 10.0625,
+      "learning_rate": 0.000476001961744985,
+      "loss": 17.5119,
+      "step": 97680
+    },
+    {
+      "epoch": 0.14472444584017208,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.00047599702280997746,
+      "loss": 17.5493,
+      "step": 97700
+    },
+    {
+      "epoch": 0.14475407213410046,
+      "grad_norm": 7.53125,
+      "learning_rate": 0.0004759920838749699,
+      "loss": 17.5685,
+      "step": 97720
+    },
+    {
+      "epoch": 0.14478369842802885,
+      "grad_norm": 9.1875,
+      "learning_rate": 0.0004759871449399623,
+      "loss": 17.4322,
+      "step": 97740
+    },
+    {
+      "epoch": 0.14481332472195724,
+      "grad_norm": 9.4375,
+      "learning_rate": 0.00047598220600495475,
+      "loss": 17.4662,
+      "step": 97760
+    },
+    {
+      "epoch": 0.14484295101588562,
+      "grad_norm": 8.0625,
+      "learning_rate": 0.0004759772670699472,
+      "loss": 17.4853,
+      "step": 97780
+    },
+    {
+      "epoch": 0.144872577309814,
+      "grad_norm": 8.5,
+      "learning_rate": 0.00047597232813493965,
+      "loss": 17.5302,
+      "step": 97800
+    },
+    {
+      "epoch": 0.1449022036037424,
+      "grad_norm": 9.5,
+      "learning_rate": 0.00047596738919993204,
+      "loss": 17.5049,
+      "step": 97820
+    },
+    {
+      "epoch": 0.14493182989767078,
+      "grad_norm": 8.625,
+      "learning_rate": 0.0004759624502649245,
+      "loss": 17.5703,
+      "step": 97840
+    },
+    {
+      "epoch": 0.14496145619159917,
+      "grad_norm": 9.625,
+      "learning_rate": 0.00047595751132991694,
+      "loss": 17.5346,
+      "step": 97860
+    },
+    {
+      "epoch": 0.14499108248552756,
+      "grad_norm": 9.0,
+      "learning_rate": 0.00047595257239490933,
+      "loss": 17.4759,
+      "step": 97880
+    },
+    {
+      "epoch": 0.14502070877945594,
+      "grad_norm": 8.125,
+      "learning_rate": 0.0004759476334599018,
+      "loss": 17.5284,
+      "step": 97900
+    },
+    {
+      "epoch": 0.14505033507338433,
+      "grad_norm": 8.875,
+      "learning_rate": 0.00047594269452489417,
+      "loss": 17.488,
+      "step": 97920
+    },
+    {
+      "epoch": 0.14507996136731272,
+      "grad_norm": 8.875,
+      "learning_rate": 0.00047593775558988667,
+      "loss": 17.4954,
+      "step": 97940
+    },
+    {
+      "epoch": 0.1451095876612411,
+      "grad_norm": 8.9375,
+      "learning_rate": 0.00047593281665487907,
+      "loss": 17.522,
+      "step": 97960
+    },
+    {
+      "epoch": 0.1451392139551695,
+      "grad_norm": 7.78125,
+      "learning_rate": 0.0004759278777198715,
+      "loss": 17.525,
+      "step": 97980
+    },
+    {
+      "epoch": 0.14516884024909787,
+      "grad_norm": 9.6875,
+      "learning_rate": 0.00047592293878486396,
+      "loss": 17.539,
+      "step": 98000
     }
   ],
   "logging_steps": 20,
       "attributes": {}
     }
   },
+  "total_flos": 6.777681616389643e+19,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": null