{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 0, "global_step": 172, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.005813953488372093, "grad_norm": 0.62890625, "learning_rate": 1e-05, "loss": 2.1255, "step": 1 }, { "epoch": 0.011627906976744186, "grad_norm": 0.62109375, "learning_rate": 9.94186046511628e-06, "loss": 2.1177, "step": 2 }, { "epoch": 0.01744186046511628, "grad_norm": 0.58203125, "learning_rate": 9.883720930232558e-06, "loss": 2.0582, "step": 3 }, { "epoch": 0.023255813953488372, "grad_norm": 0.5625, "learning_rate": 9.825581395348838e-06, "loss": 2.1849, "step": 4 }, { "epoch": 0.029069767441860465, "grad_norm": 0.5546875, "learning_rate": 9.767441860465117e-06, "loss": 2.0211, "step": 5 }, { "epoch": 0.03488372093023256, "grad_norm": 0.56640625, "learning_rate": 9.709302325581395e-06, "loss": 2.1479, "step": 6 }, { "epoch": 0.040697674418604654, "grad_norm": 0.9140625, "learning_rate": 9.651162790697676e-06, "loss": 2.0632, "step": 7 }, { "epoch": 0.046511627906976744, "grad_norm": 0.48828125, "learning_rate": 9.593023255813954e-06, "loss": 1.9462, "step": 8 }, { "epoch": 0.05232558139534884, "grad_norm": 0.478515625, "learning_rate": 9.534883720930234e-06, "loss": 1.959, "step": 9 }, { "epoch": 0.05813953488372093, "grad_norm": 0.453125, "learning_rate": 9.476744186046513e-06, "loss": 2.0065, "step": 10 }, { "epoch": 0.06395348837209303, "grad_norm": 0.435546875, "learning_rate": 9.418604651162791e-06, "loss": 1.9662, "step": 11 }, { "epoch": 0.06976744186046512, "grad_norm": 0.416015625, "learning_rate": 9.36046511627907e-06, "loss": 1.9764, "step": 12 }, { "epoch": 0.0755813953488372, "grad_norm": 0.412109375, "learning_rate": 9.30232558139535e-06, "loss": 1.8858, "step": 13 }, { "epoch": 0.08139534883720931, "grad_norm": 0.43359375, "learning_rate": 9.244186046511628e-06, "loss": 1.9769, "step": 14 }, { "epoch": 0.0872093023255814, "grad_norm": 0.435546875, "learning_rate": 9.186046511627908e-06, "loss": 1.9238, "step": 15 }, { "epoch": 0.09302325581395349, "grad_norm": 0.408203125, "learning_rate": 9.127906976744186e-06, "loss": 1.8298, "step": 16 }, { "epoch": 0.09883720930232558, "grad_norm": 0.41015625, "learning_rate": 9.069767441860465e-06, "loss": 1.865, "step": 17 }, { "epoch": 0.10465116279069768, "grad_norm": 0.408203125, "learning_rate": 9.011627906976745e-06, "loss": 1.7912, "step": 18 }, { "epoch": 0.11046511627906977, "grad_norm": 0.361328125, "learning_rate": 8.953488372093024e-06, "loss": 1.7787, "step": 19 }, { "epoch": 0.11627906976744186, "grad_norm": 0.37890625, "learning_rate": 8.895348837209304e-06, "loss": 1.804, "step": 20 }, { "epoch": 0.12209302325581395, "grad_norm": 0.349609375, "learning_rate": 8.837209302325582e-06, "loss": 1.7814, "step": 21 }, { "epoch": 0.12790697674418605, "grad_norm": 0.33203125, "learning_rate": 8.779069767441861e-06, "loss": 1.7648, "step": 22 }, { "epoch": 0.13372093023255813, "grad_norm": 0.34375, "learning_rate": 8.72093023255814e-06, "loss": 1.7671, "step": 23 }, { "epoch": 0.13953488372093023, "grad_norm": 0.318359375, "learning_rate": 8.662790697674419e-06, "loss": 1.7183, "step": 24 }, { "epoch": 0.14534883720930233, "grad_norm": 0.376953125, "learning_rate": 8.604651162790698e-06, "loss": 1.76, "step": 25 }, { "epoch": 0.1511627906976744, "grad_norm": 0.34765625, "learning_rate": 8.546511627906978e-06, "loss": 1.6914, "step": 26 }, { "epoch": 0.1569767441860465, "grad_norm": 0.328125, "learning_rate": 8.488372093023256e-06, "loss": 1.64, "step": 27 }, { "epoch": 0.16279069767441862, "grad_norm": 0.314453125, "learning_rate": 8.430232558139537e-06, "loss": 1.7007, "step": 28 }, { "epoch": 0.1686046511627907, "grad_norm": 0.31640625, "learning_rate": 8.372093023255815e-06, "loss": 1.7119, "step": 29 }, { "epoch": 0.1744186046511628, "grad_norm": 0.291015625, "learning_rate": 8.313953488372094e-06, "loss": 1.6822, "step": 30 }, { "epoch": 0.18023255813953487, "grad_norm": 0.314453125, "learning_rate": 8.255813953488374e-06, "loss": 1.5851, "step": 31 }, { "epoch": 0.18604651162790697, "grad_norm": 0.265625, "learning_rate": 8.197674418604652e-06, "loss": 1.5466, "step": 32 }, { "epoch": 0.19186046511627908, "grad_norm": 0.2890625, "learning_rate": 8.139534883720931e-06, "loss": 1.6026, "step": 33 }, { "epoch": 0.19767441860465115, "grad_norm": 0.2890625, "learning_rate": 8.08139534883721e-06, "loss": 1.5484, "step": 34 }, { "epoch": 0.20348837209302326, "grad_norm": 0.26953125, "learning_rate": 8.023255813953488e-06, "loss": 1.5747, "step": 35 }, { "epoch": 0.20930232558139536, "grad_norm": 0.2490234375, "learning_rate": 7.965116279069768e-06, "loss": 1.557, "step": 36 }, { "epoch": 0.21511627906976744, "grad_norm": 0.265625, "learning_rate": 7.906976744186048e-06, "loss": 1.643, "step": 37 }, { "epoch": 0.22093023255813954, "grad_norm": 0.25390625, "learning_rate": 7.848837209302325e-06, "loss": 1.5923, "step": 38 }, { "epoch": 0.22674418604651161, "grad_norm": 0.29296875, "learning_rate": 7.790697674418605e-06, "loss": 1.5476, "step": 39 }, { "epoch": 0.23255813953488372, "grad_norm": 0.251953125, "learning_rate": 7.732558139534885e-06, "loss": 1.5802, "step": 40 }, { "epoch": 0.23837209302325582, "grad_norm": 0.259765625, "learning_rate": 7.674418604651164e-06, "loss": 1.5444, "step": 41 }, { "epoch": 0.2441860465116279, "grad_norm": 0.275390625, "learning_rate": 7.616279069767443e-06, "loss": 1.5437, "step": 42 }, { "epoch": 0.25, "grad_norm": 0.240234375, "learning_rate": 7.5581395348837215e-06, "loss": 1.5196, "step": 43 }, { "epoch": 0.2558139534883721, "grad_norm": 0.2890625, "learning_rate": 7.500000000000001e-06, "loss": 1.5173, "step": 44 }, { "epoch": 0.2616279069767442, "grad_norm": 0.296875, "learning_rate": 7.44186046511628e-06, "loss": 1.5559, "step": 45 }, { "epoch": 0.26744186046511625, "grad_norm": 0.234375, "learning_rate": 7.3837209302325584e-06, "loss": 1.5289, "step": 46 }, { "epoch": 0.27325581395348836, "grad_norm": 0.27734375, "learning_rate": 7.325581395348837e-06, "loss": 1.4879, "step": 47 }, { "epoch": 0.27906976744186046, "grad_norm": 0.2333984375, "learning_rate": 7.267441860465117e-06, "loss": 1.5055, "step": 48 }, { "epoch": 0.28488372093023256, "grad_norm": 0.2373046875, "learning_rate": 7.209302325581395e-06, "loss": 1.4491, "step": 49 }, { "epoch": 0.29069767441860467, "grad_norm": 0.27734375, "learning_rate": 7.151162790697676e-06, "loss": 1.4216, "step": 50 }, { "epoch": 0.29651162790697677, "grad_norm": 0.34375, "learning_rate": 7.0930232558139545e-06, "loss": 1.5556, "step": 51 }, { "epoch": 0.3023255813953488, "grad_norm": 0.28125, "learning_rate": 7.034883720930233e-06, "loss": 1.5028, "step": 52 }, { "epoch": 0.3081395348837209, "grad_norm": 0.2314453125, "learning_rate": 6.976744186046513e-06, "loss": 1.5071, "step": 53 }, { "epoch": 0.313953488372093, "grad_norm": 0.212890625, "learning_rate": 6.9186046511627914e-06, "loss": 1.4472, "step": 54 }, { "epoch": 0.31976744186046513, "grad_norm": 0.2158203125, "learning_rate": 6.86046511627907e-06, "loss": 1.4962, "step": 55 }, { "epoch": 0.32558139534883723, "grad_norm": 0.203125, "learning_rate": 6.80232558139535e-06, "loss": 1.4593, "step": 56 }, { "epoch": 0.3313953488372093, "grad_norm": 0.2197265625, "learning_rate": 6.744186046511628e-06, "loss": 1.4118, "step": 57 }, { "epoch": 0.3372093023255814, "grad_norm": 0.1953125, "learning_rate": 6.686046511627907e-06, "loss": 1.4391, "step": 58 }, { "epoch": 0.3430232558139535, "grad_norm": 0.263671875, "learning_rate": 6.627906976744186e-06, "loss": 1.423, "step": 59 }, { "epoch": 0.3488372093023256, "grad_norm": 0.251953125, "learning_rate": 6.569767441860465e-06, "loss": 1.3889, "step": 60 }, { "epoch": 0.3546511627906977, "grad_norm": 0.20703125, "learning_rate": 6.511627906976745e-06, "loss": 1.4268, "step": 61 }, { "epoch": 0.36046511627906974, "grad_norm": 0.244140625, "learning_rate": 6.4534883720930244e-06, "loss": 1.4263, "step": 62 }, { "epoch": 0.36627906976744184, "grad_norm": 0.173828125, "learning_rate": 6.395348837209303e-06, "loss": 1.4093, "step": 63 }, { "epoch": 0.37209302325581395, "grad_norm": 0.365234375, "learning_rate": 6.337209302325582e-06, "loss": 1.366, "step": 64 }, { "epoch": 0.37790697674418605, "grad_norm": 0.283203125, "learning_rate": 6.279069767441861e-06, "loss": 1.4102, "step": 65 }, { "epoch": 0.38372093023255816, "grad_norm": 0.1904296875, "learning_rate": 6.22093023255814e-06, "loss": 1.3519, "step": 66 }, { "epoch": 0.38953488372093026, "grad_norm": 0.2109375, "learning_rate": 6.162790697674419e-06, "loss": 1.4374, "step": 67 }, { "epoch": 0.3953488372093023, "grad_norm": 0.208984375, "learning_rate": 6.104651162790698e-06, "loss": 1.4138, "step": 68 }, { "epoch": 0.4011627906976744, "grad_norm": 0.2109375, "learning_rate": 6.046511627906977e-06, "loss": 1.3773, "step": 69 }, { "epoch": 0.4069767441860465, "grad_norm": 0.173828125, "learning_rate": 5.988372093023256e-06, "loss": 1.377, "step": 70 }, { "epoch": 0.4127906976744186, "grad_norm": 0.1962890625, "learning_rate": 5.930232558139536e-06, "loss": 1.3981, "step": 71 }, { "epoch": 0.4186046511627907, "grad_norm": 0.33984375, "learning_rate": 5.872093023255815e-06, "loss": 1.423, "step": 72 }, { "epoch": 0.42441860465116277, "grad_norm": 0.2041015625, "learning_rate": 5.8139534883720935e-06, "loss": 1.4068, "step": 73 }, { "epoch": 0.43023255813953487, "grad_norm": 0.26171875, "learning_rate": 5.755813953488373e-06, "loss": 1.3924, "step": 74 }, { "epoch": 0.436046511627907, "grad_norm": 0.1787109375, "learning_rate": 5.697674418604652e-06, "loss": 1.3788, "step": 75 }, { "epoch": 0.4418604651162791, "grad_norm": 0.16796875, "learning_rate": 5.6395348837209305e-06, "loss": 1.3655, "step": 76 }, { "epoch": 0.4476744186046512, "grad_norm": 0.2373046875, "learning_rate": 5.58139534883721e-06, "loss": 1.3145, "step": 77 }, { "epoch": 0.45348837209302323, "grad_norm": 0.21484375, "learning_rate": 5.523255813953489e-06, "loss": 1.3831, "step": 78 }, { "epoch": 0.45930232558139533, "grad_norm": 0.1787109375, "learning_rate": 5.465116279069767e-06, "loss": 1.362, "step": 79 }, { "epoch": 0.46511627906976744, "grad_norm": 0.2421875, "learning_rate": 5.406976744186047e-06, "loss": 1.4256, "step": 80 }, { "epoch": 0.47093023255813954, "grad_norm": 0.1884765625, "learning_rate": 5.348837209302326e-06, "loss": 1.3952, "step": 81 }, { "epoch": 0.47674418604651164, "grad_norm": 0.271484375, "learning_rate": 5.290697674418605e-06, "loss": 1.4481, "step": 82 }, { "epoch": 0.48255813953488375, "grad_norm": 0.1748046875, "learning_rate": 5.232558139534885e-06, "loss": 1.4001, "step": 83 }, { "epoch": 0.4883720930232558, "grad_norm": 0.203125, "learning_rate": 5.1744186046511635e-06, "loss": 1.4217, "step": 84 }, { "epoch": 0.4941860465116279, "grad_norm": 0.16796875, "learning_rate": 5.116279069767442e-06, "loss": 1.3261, "step": 85 }, { "epoch": 0.5, "grad_norm": 0.166015625, "learning_rate": 5.058139534883722e-06, "loss": 1.3314, "step": 86 }, { "epoch": 0.5058139534883721, "grad_norm": 0.1728515625, "learning_rate": 5e-06, "loss": 1.3385, "step": 87 }, { "epoch": 0.5116279069767442, "grad_norm": 0.201171875, "learning_rate": 4.941860465116279e-06, "loss": 1.3199, "step": 88 }, { "epoch": 0.5174418604651163, "grad_norm": 0.1796875, "learning_rate": 4.883720930232559e-06, "loss": 1.3675, "step": 89 }, { "epoch": 0.5232558139534884, "grad_norm": 0.224609375, "learning_rate": 4.825581395348838e-06, "loss": 1.3755, "step": 90 }, { "epoch": 0.5290697674418605, "grad_norm": 0.193359375, "learning_rate": 4.767441860465117e-06, "loss": 1.3449, "step": 91 }, { "epoch": 0.5348837209302325, "grad_norm": 0.177734375, "learning_rate": 4.709302325581396e-06, "loss": 1.3212, "step": 92 }, { "epoch": 0.5406976744186046, "grad_norm": 0.2109375, "learning_rate": 4.651162790697675e-06, "loss": 1.3461, "step": 93 }, { "epoch": 0.5465116279069767, "grad_norm": 0.2236328125, "learning_rate": 4.593023255813954e-06, "loss": 1.3907, "step": 94 }, { "epoch": 0.5523255813953488, "grad_norm": 0.1865234375, "learning_rate": 4.5348837209302326e-06, "loss": 1.3363, "step": 95 }, { "epoch": 0.5581395348837209, "grad_norm": 0.240234375, "learning_rate": 4.476744186046512e-06, "loss": 1.3027, "step": 96 }, { "epoch": 0.563953488372093, "grad_norm": 0.27734375, "learning_rate": 4.418604651162791e-06, "loss": 1.3903, "step": 97 }, { "epoch": 0.5697674418604651, "grad_norm": 0.1669921875, "learning_rate": 4.36046511627907e-06, "loss": 1.3387, "step": 98 }, { "epoch": 0.5755813953488372, "grad_norm": 0.2431640625, "learning_rate": 4.302325581395349e-06, "loss": 1.3576, "step": 99 }, { "epoch": 0.5813953488372093, "grad_norm": 0.2041015625, "learning_rate": 4.244186046511628e-06, "loss": 1.2968, "step": 100 }, { "epoch": 0.5872093023255814, "grad_norm": 0.189453125, "learning_rate": 4.186046511627907e-06, "loss": 1.3493, "step": 101 }, { "epoch": 0.5930232558139535, "grad_norm": 0.18359375, "learning_rate": 4.127906976744187e-06, "loss": 1.3133, "step": 102 }, { "epoch": 0.5988372093023255, "grad_norm": 0.224609375, "learning_rate": 4.0697674418604655e-06, "loss": 1.2797, "step": 103 }, { "epoch": 0.6046511627906976, "grad_norm": 0.1767578125, "learning_rate": 4.011627906976744e-06, "loss": 1.3235, "step": 104 }, { "epoch": 0.6104651162790697, "grad_norm": 0.2158203125, "learning_rate": 3.953488372093024e-06, "loss": 1.3204, "step": 105 }, { "epoch": 0.6162790697674418, "grad_norm": 0.22265625, "learning_rate": 3.8953488372093025e-06, "loss": 1.3177, "step": 106 }, { "epoch": 0.622093023255814, "grad_norm": 0.1904296875, "learning_rate": 3.837209302325582e-06, "loss": 1.3459, "step": 107 }, { "epoch": 0.627906976744186, "grad_norm": 0.205078125, "learning_rate": 3.7790697674418607e-06, "loss": 1.3779, "step": 108 }, { "epoch": 0.6337209302325582, "grad_norm": 0.1982421875, "learning_rate": 3.72093023255814e-06, "loss": 1.3279, "step": 109 }, { "epoch": 0.6395348837209303, "grad_norm": 0.1904296875, "learning_rate": 3.6627906976744186e-06, "loss": 1.2917, "step": 110 }, { "epoch": 0.6453488372093024, "grad_norm": 0.203125, "learning_rate": 3.6046511627906977e-06, "loss": 1.3324, "step": 111 }, { "epoch": 0.6511627906976745, "grad_norm": 0.279296875, "learning_rate": 3.5465116279069772e-06, "loss": 1.3494, "step": 112 }, { "epoch": 0.6569767441860465, "grad_norm": 0.294921875, "learning_rate": 3.4883720930232564e-06, "loss": 1.3521, "step": 113 }, { "epoch": 0.6627906976744186, "grad_norm": 0.2236328125, "learning_rate": 3.430232558139535e-06, "loss": 1.363, "step": 114 }, { "epoch": 0.6686046511627907, "grad_norm": 0.2080078125, "learning_rate": 3.372093023255814e-06, "loss": 1.3405, "step": 115 }, { "epoch": 0.6744186046511628, "grad_norm": 0.185546875, "learning_rate": 3.313953488372093e-06, "loss": 1.316, "step": 116 }, { "epoch": 0.6802325581395349, "grad_norm": 0.2060546875, "learning_rate": 3.2558139534883724e-06, "loss": 1.3212, "step": 117 }, { "epoch": 0.686046511627907, "grad_norm": 0.19921875, "learning_rate": 3.1976744186046516e-06, "loss": 1.3167, "step": 118 }, { "epoch": 0.6918604651162791, "grad_norm": 0.2119140625, "learning_rate": 3.1395348837209307e-06, "loss": 1.3247, "step": 119 }, { "epoch": 0.6976744186046512, "grad_norm": 0.169921875, "learning_rate": 3.0813953488372094e-06, "loss": 1.298, "step": 120 }, { "epoch": 0.7034883720930233, "grad_norm": 0.1767578125, "learning_rate": 3.0232558139534885e-06, "loss": 1.3146, "step": 121 }, { "epoch": 0.7093023255813954, "grad_norm": 0.197265625, "learning_rate": 2.965116279069768e-06, "loss": 1.3587, "step": 122 }, { "epoch": 0.7151162790697675, "grad_norm": 0.2138671875, "learning_rate": 2.9069767441860468e-06, "loss": 1.3082, "step": 123 }, { "epoch": 0.7209302325581395, "grad_norm": 0.2265625, "learning_rate": 2.848837209302326e-06, "loss": 1.34, "step": 124 }, { "epoch": 0.7267441860465116, "grad_norm": 0.61328125, "learning_rate": 2.790697674418605e-06, "loss": 1.3018, "step": 125 }, { "epoch": 0.7325581395348837, "grad_norm": 0.265625, "learning_rate": 2.7325581395348837e-06, "loss": 1.3533, "step": 126 }, { "epoch": 0.7383720930232558, "grad_norm": 0.1748046875, "learning_rate": 2.674418604651163e-06, "loss": 1.3081, "step": 127 }, { "epoch": 0.7441860465116279, "grad_norm": 0.23828125, "learning_rate": 2.6162790697674424e-06, "loss": 1.3223, "step": 128 }, { "epoch": 0.75, "grad_norm": 0.294921875, "learning_rate": 2.558139534883721e-06, "loss": 1.2797, "step": 129 }, { "epoch": 0.7558139534883721, "grad_norm": 0.51953125, "learning_rate": 2.5e-06, "loss": 1.4022, "step": 130 }, { "epoch": 0.7616279069767442, "grad_norm": 0.1826171875, "learning_rate": 2.4418604651162793e-06, "loss": 1.2445, "step": 131 }, { "epoch": 0.7674418604651163, "grad_norm": 0.1728515625, "learning_rate": 2.3837209302325585e-06, "loss": 1.3638, "step": 132 }, { "epoch": 0.7732558139534884, "grad_norm": 0.271484375, "learning_rate": 2.3255813953488376e-06, "loss": 1.3506, "step": 133 }, { "epoch": 0.7790697674418605, "grad_norm": 0.1923828125, "learning_rate": 2.2674418604651163e-06, "loss": 1.3623, "step": 134 }, { "epoch": 0.7848837209302325, "grad_norm": 0.30078125, "learning_rate": 2.2093023255813954e-06, "loss": 1.3084, "step": 135 }, { "epoch": 0.7906976744186046, "grad_norm": 0.201171875, "learning_rate": 2.1511627906976745e-06, "loss": 1.2856, "step": 136 }, { "epoch": 0.7965116279069767, "grad_norm": 0.1953125, "learning_rate": 2.0930232558139536e-06, "loss": 1.3131, "step": 137 }, { "epoch": 0.8023255813953488, "grad_norm": 0.1943359375, "learning_rate": 2.0348837209302328e-06, "loss": 1.3509, "step": 138 }, { "epoch": 0.8081395348837209, "grad_norm": 0.208984375, "learning_rate": 1.976744186046512e-06, "loss": 1.3268, "step": 139 }, { "epoch": 0.813953488372093, "grad_norm": 0.1953125, "learning_rate": 1.918604651162791e-06, "loss": 1.2989, "step": 140 }, { "epoch": 0.8197674418604651, "grad_norm": 0.1748046875, "learning_rate": 1.86046511627907e-06, "loss": 1.2802, "step": 141 }, { "epoch": 0.8255813953488372, "grad_norm": 0.169921875, "learning_rate": 1.8023255813953488e-06, "loss": 1.3031, "step": 142 }, { "epoch": 0.8313953488372093, "grad_norm": 0.212890625, "learning_rate": 1.7441860465116282e-06, "loss": 1.3096, "step": 143 }, { "epoch": 0.8372093023255814, "grad_norm": 0.216796875, "learning_rate": 1.686046511627907e-06, "loss": 1.2951, "step": 144 }, { "epoch": 0.8430232558139535, "grad_norm": 0.1640625, "learning_rate": 1.6279069767441862e-06, "loss": 1.2677, "step": 145 }, { "epoch": 0.8488372093023255, "grad_norm": 0.1875, "learning_rate": 1.5697674418604653e-06, "loss": 1.3207, "step": 146 }, { "epoch": 0.8546511627906976, "grad_norm": 0.171875, "learning_rate": 1.5116279069767443e-06, "loss": 1.2703, "step": 147 }, { "epoch": 0.8604651162790697, "grad_norm": 0.169921875, "learning_rate": 1.4534883720930234e-06, "loss": 1.2819, "step": 148 }, { "epoch": 0.8662790697674418, "grad_norm": 0.1640625, "learning_rate": 1.3953488372093025e-06, "loss": 1.2856, "step": 149 }, { "epoch": 0.872093023255814, "grad_norm": 0.19921875, "learning_rate": 1.3372093023255814e-06, "loss": 1.3392, "step": 150 }, { "epoch": 0.877906976744186, "grad_norm": 0.25390625, "learning_rate": 1.2790697674418605e-06, "loss": 1.3336, "step": 151 }, { "epoch": 0.8837209302325582, "grad_norm": 0.20703125, "learning_rate": 1.2209302325581397e-06, "loss": 1.2752, "step": 152 }, { "epoch": 0.8895348837209303, "grad_norm": 0.216796875, "learning_rate": 1.1627906976744188e-06, "loss": 1.2794, "step": 153 }, { "epoch": 0.8953488372093024, "grad_norm": 0.201171875, "learning_rate": 1.1046511627906977e-06, "loss": 1.3641, "step": 154 }, { "epoch": 0.9011627906976745, "grad_norm": 0.21875, "learning_rate": 1.0465116279069768e-06, "loss": 1.3098, "step": 155 }, { "epoch": 0.9069767441860465, "grad_norm": 0.265625, "learning_rate": 9.88372093023256e-07, "loss": 1.2457, "step": 156 }, { "epoch": 0.9127906976744186, "grad_norm": 0.2041015625, "learning_rate": 9.30232558139535e-07, "loss": 1.3696, "step": 157 }, { "epoch": 0.9186046511627907, "grad_norm": 0.275390625, "learning_rate": 8.720930232558141e-07, "loss": 1.2726, "step": 158 }, { "epoch": 0.9244186046511628, "grad_norm": 0.158203125, "learning_rate": 8.139534883720931e-07, "loss": 1.3204, "step": 159 }, { "epoch": 0.9302325581395349, "grad_norm": 0.1611328125, "learning_rate": 7.558139534883721e-07, "loss": 1.2949, "step": 160 }, { "epoch": 0.936046511627907, "grad_norm": 0.1533203125, "learning_rate": 6.976744186046513e-07, "loss": 1.3007, "step": 161 }, { "epoch": 0.9418604651162791, "grad_norm": 0.1728515625, "learning_rate": 6.395348837209303e-07, "loss": 1.2964, "step": 162 }, { "epoch": 0.9476744186046512, "grad_norm": 0.1796875, "learning_rate": 5.813953488372094e-07, "loss": 1.3142, "step": 163 }, { "epoch": 0.9534883720930233, "grad_norm": 0.1708984375, "learning_rate": 5.232558139534884e-07, "loss": 1.2951, "step": 164 }, { "epoch": 0.9593023255813954, "grad_norm": 0.1640625, "learning_rate": 4.651162790697675e-07, "loss": 1.2901, "step": 165 }, { "epoch": 0.9651162790697675, "grad_norm": 0.1630859375, "learning_rate": 4.0697674418604655e-07, "loss": 1.285, "step": 166 }, { "epoch": 0.9709302325581395, "grad_norm": 0.25390625, "learning_rate": 3.488372093023256e-07, "loss": 1.2164, "step": 167 }, { "epoch": 0.9767441860465116, "grad_norm": 0.25, "learning_rate": 2.906976744186047e-07, "loss": 1.28, "step": 168 }, { "epoch": 0.9825581395348837, "grad_norm": 0.26953125, "learning_rate": 2.3255813953488374e-07, "loss": 1.339, "step": 169 }, { "epoch": 0.9883720930232558, "grad_norm": 0.1943359375, "learning_rate": 1.744186046511628e-07, "loss": 1.3168, "step": 170 }, { "epoch": 0.9941860465116279, "grad_norm": 0.259765625, "learning_rate": 1.1627906976744187e-07, "loss": 1.3515, "step": 171 }, { "epoch": 1.0, "grad_norm": 0.1630859375, "learning_rate": 5.8139534883720935e-08, "loss": 1.2961, "step": 172 } ], "logging_steps": 1.0, "max_steps": 172, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.2407067975509606e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }