| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 238, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004201680672268907, | |
| "grad_norm": 0.421875, | |
| "learning_rate": 9.957983193277312e-06, | |
| "loss": 1.9667, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.008403361344537815, | |
| "grad_norm": 0.396484375, | |
| "learning_rate": 9.915966386554622e-06, | |
| "loss": 1.824, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.012605042016806723, | |
| "grad_norm": 0.388671875, | |
| "learning_rate": 9.873949579831935e-06, | |
| "loss": 1.8351, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01680672268907563, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 9.831932773109244e-06, | |
| "loss": 1.8546, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02100840336134454, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 9.789915966386554e-06, | |
| "loss": 1.7778, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.025210084033613446, | |
| "grad_norm": 0.380859375, | |
| "learning_rate": 9.747899159663867e-06, | |
| "loss": 1.7951, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.029411764705882353, | |
| "grad_norm": 0.36328125, | |
| "learning_rate": 9.705882352941177e-06, | |
| "loss": 1.8846, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.03361344537815126, | |
| "grad_norm": 0.353515625, | |
| "learning_rate": 9.663865546218488e-06, | |
| "loss": 1.8174, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.037815126050420166, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 9.621848739495799e-06, | |
| "loss": 1.7087, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.04201680672268908, | |
| "grad_norm": 0.322265625, | |
| "learning_rate": 9.579831932773111e-06, | |
| "loss": 1.8442, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.046218487394957986, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 9.537815126050422e-06, | |
| "loss": 1.6948, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.05042016806722689, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 9.49579831932773e-06, | |
| "loss": 1.7909, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0546218487394958, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 9.453781512605043e-06, | |
| "loss": 1.6094, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.058823529411764705, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 9.411764705882354e-06, | |
| "loss": 1.6826, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.06302521008403361, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 9.369747899159664e-06, | |
| "loss": 1.6556, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.06722689075630252, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 9.327731092436975e-06, | |
| "loss": 1.6881, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.07142857142857142, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 9.285714285714288e-06, | |
| "loss": 1.6551, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.07563025210084033, | |
| "grad_norm": 0.40625, | |
| "learning_rate": 9.243697478991598e-06, | |
| "loss": 1.6694, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07983193277310924, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 9.201680672268907e-06, | |
| "loss": 1.568, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.08403361344537816, | |
| "grad_norm": 0.275390625, | |
| "learning_rate": 9.15966386554622e-06, | |
| "loss": 1.6109, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08823529411764706, | |
| "grad_norm": 0.31640625, | |
| "learning_rate": 9.11764705882353e-06, | |
| "loss": 1.5474, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.09243697478991597, | |
| "grad_norm": 0.26171875, | |
| "learning_rate": 9.075630252100841e-06, | |
| "loss": 1.6264, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.09663865546218488, | |
| "grad_norm": 0.259765625, | |
| "learning_rate": 9.033613445378152e-06, | |
| "loss": 1.5904, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.10084033613445378, | |
| "grad_norm": 0.2373046875, | |
| "learning_rate": 8.991596638655462e-06, | |
| "loss": 1.5749, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.10504201680672269, | |
| "grad_norm": 0.216796875, | |
| "learning_rate": 8.949579831932775e-06, | |
| "loss": 1.4887, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1092436974789916, | |
| "grad_norm": 0.240234375, | |
| "learning_rate": 8.907563025210085e-06, | |
| "loss": 1.5623, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.1134453781512605, | |
| "grad_norm": 0.234375, | |
| "learning_rate": 8.865546218487396e-06, | |
| "loss": 1.5463, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 0.2275390625, | |
| "learning_rate": 8.823529411764707e-06, | |
| "loss": 1.5493, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.12184873949579832, | |
| "grad_norm": 0.216796875, | |
| "learning_rate": 8.781512605042017e-06, | |
| "loss": 1.5453, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.12605042016806722, | |
| "grad_norm": 0.2333984375, | |
| "learning_rate": 8.739495798319328e-06, | |
| "loss": 1.4993, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.13025210084033614, | |
| "grad_norm": 0.21875, | |
| "learning_rate": 8.697478991596639e-06, | |
| "loss": 1.529, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.13445378151260504, | |
| "grad_norm": 0.2099609375, | |
| "learning_rate": 8.655462184873951e-06, | |
| "loss": 1.5383, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.13865546218487396, | |
| "grad_norm": 0.220703125, | |
| "learning_rate": 8.613445378151262e-06, | |
| "loss": 1.4512, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 1.4346, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.14705882352941177, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 8.529411764705883e-06, | |
| "loss": 1.427, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.15126050420168066, | |
| "grad_norm": 0.2275390625, | |
| "learning_rate": 8.487394957983194e-06, | |
| "loss": 1.4203, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.15546218487394958, | |
| "grad_norm": 0.2578125, | |
| "learning_rate": 8.445378151260505e-06, | |
| "loss": 1.4899, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.15966386554621848, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 8.403361344537815e-06, | |
| "loss": 1.411, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.1638655462184874, | |
| "grad_norm": 0.291015625, | |
| "learning_rate": 8.361344537815128e-06, | |
| "loss": 1.5021, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.16806722689075632, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 8.319327731092438e-06, | |
| "loss": 1.4182, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.1722689075630252, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 8.277310924369747e-06, | |
| "loss": 1.3553, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.17647058823529413, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 8.23529411764706e-06, | |
| "loss": 1.3181, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.18067226890756302, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 8.19327731092437e-06, | |
| "loss": 1.342, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.18487394957983194, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 8.151260504201681e-06, | |
| "loss": 1.3872, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.18907563025210083, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 8.109243697478992e-06, | |
| "loss": 1.3402, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.19327731092436976, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 8.067226890756303e-06, | |
| "loss": 1.4122, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.19747899159663865, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 8.025210084033615e-06, | |
| "loss": 1.3514, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.20168067226890757, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 7.983193277310926e-06, | |
| "loss": 1.3439, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.20588235294117646, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 7.941176470588236e-06, | |
| "loss": 1.3554, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.21008403361344538, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 7.899159663865547e-06, | |
| "loss": 1.3353, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 7.857142857142858e-06, | |
| "loss": 1.3246, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2184873949579832, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 7.815126050420168e-06, | |
| "loss": 1.3579, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.22268907563025211, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 7.773109243697479e-06, | |
| "loss": 1.3769, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.226890756302521, | |
| "grad_norm": 0.2158203125, | |
| "learning_rate": 7.731092436974791e-06, | |
| "loss": 1.3993, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.23109243697478993, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 7.689075630252102e-06, | |
| "loss": 1.3721, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 7.647058823529411e-06, | |
| "loss": 1.3358, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.23949579831932774, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 7.605042016806723e-06, | |
| "loss": 1.3472, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.24369747899159663, | |
| "grad_norm": 0.2470703125, | |
| "learning_rate": 7.563025210084034e-06, | |
| "loss": 1.3561, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.24789915966386555, | |
| "grad_norm": 0.173828125, | |
| "learning_rate": 7.521008403361345e-06, | |
| "loss": 1.3105, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.25210084033613445, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 7.478991596638656e-06, | |
| "loss": 1.3066, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.25630252100840334, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 7.436974789915967e-06, | |
| "loss": 1.2938, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2605042016806723, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 7.394957983193279e-06, | |
| "loss": 1.3339, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.2647058823529412, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 7.352941176470589e-06, | |
| "loss": 1.2754, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2689075630252101, | |
| "grad_norm": 0.205078125, | |
| "learning_rate": 7.310924369747899e-06, | |
| "loss": 1.378, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.27310924369747897, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 7.268907563025211e-06, | |
| "loss": 1.3103, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.2773109243697479, | |
| "grad_norm": 0.1806640625, | |
| "learning_rate": 7.226890756302521e-06, | |
| "loss": 1.3127, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2815126050420168, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 7.184873949579833e-06, | |
| "loss": 1.2806, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2857142857142857, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 1.3568, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.28991596638655465, | |
| "grad_norm": 0.224609375, | |
| "learning_rate": 7.100840336134454e-06, | |
| "loss": 1.2426, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.29411764705882354, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 7.058823529411766e-06, | |
| "loss": 1.2839, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.29831932773109243, | |
| "grad_norm": 0.224609375, | |
| "learning_rate": 7.016806722689076e-06, | |
| "loss": 1.2791, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3025210084033613, | |
| "grad_norm": 0.1953125, | |
| "learning_rate": 6.974789915966387e-06, | |
| "loss": 1.1977, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.3067226890756303, | |
| "grad_norm": 0.154296875, | |
| "learning_rate": 6.932773109243698e-06, | |
| "loss": 1.2612, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.31092436974789917, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 6.8907563025210085e-06, | |
| "loss": 1.327, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.31512605042016806, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 6.84873949579832e-06, | |
| "loss": 1.2307, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.31932773109243695, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 6.806722689075631e-06, | |
| "loss": 1.2599, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.3235294117647059, | |
| "grad_norm": 0.2177734375, | |
| "learning_rate": 6.764705882352942e-06, | |
| "loss": 1.2699, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.3277310924369748, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 6.722689075630253e-06, | |
| "loss": 1.219, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.3319327731092437, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 6.680672268907563e-06, | |
| "loss": 1.2979, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.33613445378151263, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 6.638655462184874e-06, | |
| "loss": 1.3005, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3403361344537815, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 6.596638655462185e-06, | |
| "loss": 1.2593, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.3445378151260504, | |
| "grad_norm": 0.33203125, | |
| "learning_rate": 6.5546218487394966e-06, | |
| "loss": 1.2254, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.3487394957983193, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 6.512605042016807e-06, | |
| "loss": 1.2631, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 6.470588235294119e-06, | |
| "loss": 1.2265, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.35714285714285715, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 1.2601, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.36134453781512604, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 6.386554621848739e-06, | |
| "loss": 1.2146, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.36554621848739494, | |
| "grad_norm": 0.2412109375, | |
| "learning_rate": 6.344537815126051e-06, | |
| "loss": 1.3096, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.3697478991596639, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 6.3025210084033615e-06, | |
| "loss": 1.2107, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3739495798319328, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 6.260504201680673e-06, | |
| "loss": 1.1964, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.37815126050420167, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 6.218487394957984e-06, | |
| "loss": 1.2482, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.38235294117647056, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 6.176470588235295e-06, | |
| "loss": 1.2196, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3865546218487395, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 6.134453781512606e-06, | |
| "loss": 1.272, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3907563025210084, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 6.092436974789916e-06, | |
| "loss": 1.2588, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3949579831932773, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 6.050420168067227e-06, | |
| "loss": 1.2266, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.39915966386554624, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 6.008403361344538e-06, | |
| "loss": 1.2445, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.40336134453781514, | |
| "grad_norm": 0.1494140625, | |
| "learning_rate": 5.9663865546218495e-06, | |
| "loss": 1.174, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.40756302521008403, | |
| "grad_norm": 0.154296875, | |
| "learning_rate": 5.92436974789916e-06, | |
| "loss": 1.2413, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.4117647058823529, | |
| "grad_norm": 0.251953125, | |
| "learning_rate": 5.882352941176471e-06, | |
| "loss": 1.2257, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.41596638655462187, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 5.8403361344537825e-06, | |
| "loss": 1.3162, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.42016806722689076, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 5.798319327731093e-06, | |
| "loss": 1.2659, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.42436974789915966, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 5.756302521008403e-06, | |
| "loss": 1.1955, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.1728515625, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 1.1625, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.4327731092436975, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 5.672268907563025e-06, | |
| "loss": 1.1772, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.4369747899159664, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 5.630252100840337e-06, | |
| "loss": 1.1818, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.4411764705882353, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 5.588235294117647e-06, | |
| "loss": 1.2223, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.44537815126050423, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 5.546218487394959e-06, | |
| "loss": 1.2245, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.4495798319327731, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 5.50420168067227e-06, | |
| "loss": 1.1702, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.453781512605042, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 5.4621848739495795e-06, | |
| "loss": 1.1421, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.4579831932773109, | |
| "grad_norm": 0.146484375, | |
| "learning_rate": 5.420168067226891e-06, | |
| "loss": 1.1457, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.46218487394957986, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 5.378151260504202e-06, | |
| "loss": 1.2542, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.46638655462184875, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 5.336134453781513e-06, | |
| "loss": 1.228, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 5.294117647058824e-06, | |
| "loss": 1.1863, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.47478991596638653, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 5.2521008403361354e-06, | |
| "loss": 1.2029, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.4789915966386555, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 5.210084033613446e-06, | |
| "loss": 1.2012, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4831932773109244, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 5.168067226890757e-06, | |
| "loss": 1.2052, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.48739495798319327, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 5.1260504201680675e-06, | |
| "loss": 1.1896, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.49159663865546216, | |
| "grad_norm": 0.1962890625, | |
| "learning_rate": 5.084033613445378e-06, | |
| "loss": 1.1569, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.4957983193277311, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 5.04201680672269e-06, | |
| "loss": 1.1901, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 5e-06, | |
| "loss": 1.2029, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.5042016806722689, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 4.957983193277311e-06, | |
| "loss": 1.1294, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.5084033613445378, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 4.915966386554622e-06, | |
| "loss": 1.2037, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.5126050420168067, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 4.873949579831933e-06, | |
| "loss": 1.1893, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.5168067226890757, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 4.831932773109244e-06, | |
| "loss": 1.2425, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.5210084033613446, | |
| "grad_norm": 0.21875, | |
| "learning_rate": 4.7899159663865555e-06, | |
| "loss": 1.2155, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.5252100840336135, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 4.747899159663865e-06, | |
| "loss": 1.2264, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.5294117647058824, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.705882352941177e-06, | |
| "loss": 1.1903, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.5336134453781513, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 4.663865546218488e-06, | |
| "loss": 1.1519, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.5378151260504201, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 4.621848739495799e-06, | |
| "loss": 1.2086, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.542016806722689, | |
| "grad_norm": 0.21484375, | |
| "learning_rate": 4.57983193277311e-06, | |
| "loss": 1.1449, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.5462184873949579, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.5378151260504205e-06, | |
| "loss": 1.1914, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.5504201680672269, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 4.495798319327731e-06, | |
| "loss": 1.2032, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.5546218487394958, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 4.453781512605043e-06, | |
| "loss": 1.1796, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.5588235294117647, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.411764705882353e-06, | |
| "loss": 1.1884, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5630252100840336, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 4.369747899159664e-06, | |
| "loss": 1.1705, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5672268907563025, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 4.327731092436976e-06, | |
| "loss": 1.1137, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 1.189, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5756302521008403, | |
| "grad_norm": 0.2373046875, | |
| "learning_rate": 4.243697478991597e-06, | |
| "loss": 1.1762, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5798319327731093, | |
| "grad_norm": 0.2265625, | |
| "learning_rate": 4.201680672268908e-06, | |
| "loss": 1.2506, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.5840336134453782, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 4.159663865546219e-06, | |
| "loss": 1.1834, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 0.154296875, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 1.1359, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.592436974789916, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 4.075630252100841e-06, | |
| "loss": 1.179, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5966386554621849, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 4.033613445378151e-06, | |
| "loss": 1.1377, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.6008403361344538, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 3.991596638655463e-06, | |
| "loss": 1.2201, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.6050420168067226, | |
| "grad_norm": 0.91015625, | |
| "learning_rate": 3.9495798319327735e-06, | |
| "loss": 1.2268, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.6092436974789915, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 3.907563025210084e-06, | |
| "loss": 1.1362, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.6134453781512605, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 3.865546218487396e-06, | |
| "loss": 1.1543, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.6176470588235294, | |
| "grad_norm": 0.1728515625, | |
| "learning_rate": 3.8235294117647055e-06, | |
| "loss": 1.1358, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.6218487394957983, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 3.781512605042017e-06, | |
| "loss": 1.1957, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.6260504201680672, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 3.739495798319328e-06, | |
| "loss": 1.2334, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.6302521008403361, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 3.6974789915966393e-06, | |
| "loss": 1.1978, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.634453781512605, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 3.6554621848739496e-06, | |
| "loss": 1.129, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.6386554621848739, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 3.6134453781512607e-06, | |
| "loss": 1.1494, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.6428571428571429, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 3.5714285714285718e-06, | |
| "loss": 1.1591, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.6470588235294118, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 3.529411764705883e-06, | |
| "loss": 1.1577, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.6512605042016807, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 3.4873949579831936e-06, | |
| "loss": 1.1167, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.6554621848739496, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 3.4453781512605043e-06, | |
| "loss": 1.1735, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.6596638655462185, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 3.4033613445378154e-06, | |
| "loss": 1.1976, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.6638655462184874, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 3.3613445378151265e-06, | |
| "loss": 1.1395, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.6680672268907563, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 3.319327731092437e-06, | |
| "loss": 1.1184, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.6722689075630253, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 3.2773109243697483e-06, | |
| "loss": 1.1342, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6764705882352942, | |
| "grad_norm": 0.224609375, | |
| "learning_rate": 3.2352941176470594e-06, | |
| "loss": 1.1525, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.680672268907563, | |
| "grad_norm": 0.228515625, | |
| "learning_rate": 3.1932773109243696e-06, | |
| "loss": 1.145, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6848739495798319, | |
| "grad_norm": 0.236328125, | |
| "learning_rate": 3.1512605042016808e-06, | |
| "loss": 1.1455, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6890756302521008, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 3.109243697478992e-06, | |
| "loss": 1.142, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6932773109243697, | |
| "grad_norm": 0.208984375, | |
| "learning_rate": 3.067226890756303e-06, | |
| "loss": 1.312, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6974789915966386, | |
| "grad_norm": 0.2119140625, | |
| "learning_rate": 3.0252100840336137e-06, | |
| "loss": 1.1366, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.7016806722689075, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 2.9831932773109248e-06, | |
| "loss": 1.1589, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 2.9411764705882355e-06, | |
| "loss": 1.1543, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.7100840336134454, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 2.8991596638655466e-06, | |
| "loss": 1.1478, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.7142857142857143, | |
| "grad_norm": 0.2275390625, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 1.1859, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.7184873949579832, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 2.8151260504201684e-06, | |
| "loss": 1.133, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.7226890756302521, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 2.7731092436974795e-06, | |
| "loss": 1.1666, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.726890756302521, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 2.7310924369747897e-06, | |
| "loss": 1.1273, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.7310924369747899, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 2.689075630252101e-06, | |
| "loss": 1.1314, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.7352941176470589, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 2.647058823529412e-06, | |
| "loss": 1.1191, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.7394957983193278, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 2.605042016806723e-06, | |
| "loss": 1.146, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.7436974789915967, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 2.5630252100840338e-06, | |
| "loss": 1.1189, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.7478991596638656, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 2.521008403361345e-06, | |
| "loss": 1.1404, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.7521008403361344, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 2.4789915966386555e-06, | |
| "loss": 1.1705, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.7563025210084033, | |
| "grad_norm": 0.2421875, | |
| "learning_rate": 2.4369747899159667e-06, | |
| "loss": 1.151, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7605042016806722, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 2.3949579831932778e-06, | |
| "loss": 1.1666, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.7647058823529411, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 2.3529411764705885e-06, | |
| "loss": 1.1164, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.7689075630252101, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 2.3109243697478996e-06, | |
| "loss": 1.1545, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.773109243697479, | |
| "grad_norm": 0.1806640625, | |
| "learning_rate": 2.2689075630252102e-06, | |
| "loss": 1.1057, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.7773109243697479, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 2.2268907563025214e-06, | |
| "loss": 1.1267, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.7815126050420168, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 2.184873949579832e-06, | |
| "loss": 1.1491, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7857142857142857, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 2.1428571428571427e-06, | |
| "loss": 1.1636, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7899159663865546, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 2.100840336134454e-06, | |
| "loss": 1.17, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7941176470588235, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 2.058823529411765e-06, | |
| "loss": 1.1465, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7983193277310925, | |
| "grad_norm": 0.1728515625, | |
| "learning_rate": 2.0168067226890756e-06, | |
| "loss": 1.1298, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.8025210084033614, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 1.9747899159663867e-06, | |
| "loss": 1.1192, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.8067226890756303, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 1.932773109243698e-06, | |
| "loss": 1.1329, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.8109243697478992, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 1.8907563025210085e-06, | |
| "loss": 1.1601, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.8151260504201681, | |
| "grad_norm": 0.20703125, | |
| "learning_rate": 1.8487394957983196e-06, | |
| "loss": 1.2322, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.819327731092437, | |
| "grad_norm": 0.177734375, | |
| "learning_rate": 1.8067226890756303e-06, | |
| "loss": 1.1707, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 1.7647058823529414e-06, | |
| "loss": 1.1262, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.8277310924369747, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 1.7226890756302521e-06, | |
| "loss": 1.1084, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.8319327731092437, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 1.6806722689075632e-06, | |
| "loss": 1.1811, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.8361344537815126, | |
| "grad_norm": 0.2392578125, | |
| "learning_rate": 1.6386554621848741e-06, | |
| "loss": 1.1382, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.8403361344537815, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 1.5966386554621848e-06, | |
| "loss": 1.1338, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.8445378151260504, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 1.554621848739496e-06, | |
| "loss": 1.1232, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.8487394957983193, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 1.5126050420168068e-06, | |
| "loss": 1.1517, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.8529411764705882, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 1.4705882352941177e-06, | |
| "loss": 1.073, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 1.1056, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.8613445378151261, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 1.3865546218487397e-06, | |
| "loss": 1.1336, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.865546218487395, | |
| "grad_norm": 0.45703125, | |
| "learning_rate": 1.3445378151260504e-06, | |
| "loss": 1.1677, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.8697478991596639, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 1.3025210084033615e-06, | |
| "loss": 1.1393, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.8739495798319328, | |
| "grad_norm": 0.1728515625, | |
| "learning_rate": 1.2605042016806724e-06, | |
| "loss": 1.1574, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.8781512605042017, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 1.2184873949579833e-06, | |
| "loss": 1.1864, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.8823529411764706, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 1.1764705882352942e-06, | |
| "loss": 1.134, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.8865546218487395, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 1.1344537815126051e-06, | |
| "loss": 1.1345, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.8907563025210085, | |
| "grad_norm": 0.154296875, | |
| "learning_rate": 1.092436974789916e-06, | |
| "loss": 1.1375, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.8949579831932774, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 1.050420168067227e-06, | |
| "loss": 1.1274, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8991596638655462, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 1.0084033613445378e-06, | |
| "loss": 1.0815, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.9033613445378151, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 9.66386554621849e-07, | |
| "loss": 1.1312, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.907563025210084, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 9.243697478991598e-07, | |
| "loss": 1.1053, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.9117647058823529, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 8.823529411764707e-07, | |
| "loss": 1.1693, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.9159663865546218, | |
| "grad_norm": 0.2734375, | |
| "learning_rate": 8.403361344537816e-07, | |
| "loss": 1.1403, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.9201680672268907, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 7.983193277310924e-07, | |
| "loss": 1.1303, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.9243697478991597, | |
| "grad_norm": 0.2177734375, | |
| "learning_rate": 7.563025210084034e-07, | |
| "loss": 1.1731, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.9285714285714286, | |
| "grad_norm": 0.255859375, | |
| "learning_rate": 7.142857142857143e-07, | |
| "loss": 1.1134, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.9327731092436975, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 6.722689075630252e-07, | |
| "loss": 1.2129, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.9369747899159664, | |
| "grad_norm": 0.228515625, | |
| "learning_rate": 6.302521008403362e-07, | |
| "loss": 1.1562, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 5.882352941176471e-07, | |
| "loss": 1.1096, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.9453781512605042, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 5.46218487394958e-07, | |
| "loss": 1.1225, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.9495798319327731, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 5.042016806722689e-07, | |
| "loss": 1.1967, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.9537815126050421, | |
| "grad_norm": 0.25390625, | |
| "learning_rate": 4.621848739495799e-07, | |
| "loss": 1.1275, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.957983193277311, | |
| "grad_norm": 0.220703125, | |
| "learning_rate": 4.201680672268908e-07, | |
| "loss": 1.1286, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.9621848739495799, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 3.781512605042017e-07, | |
| "loss": 1.1502, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.9663865546218487, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 3.361344537815126e-07, | |
| "loss": 1.1189, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.9705882352941176, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 2.9411764705882356e-07, | |
| "loss": 1.1171, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.9747899159663865, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 2.5210084033613445e-07, | |
| "loss": 1.1872, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.9789915966386554, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 2.100840336134454e-07, | |
| "loss": 1.1548, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.9831932773109243, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 1.680672268907563e-07, | |
| "loss": 1.1651, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.9873949579831933, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 1.2605042016806723e-07, | |
| "loss": 1.1386, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.9915966386554622, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 8.403361344537815e-08, | |
| "loss": 1.1592, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.9957983193277311, | |
| "grad_norm": 0.26953125, | |
| "learning_rate": 4.2016806722689076e-08, | |
| "loss": 1.0834, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 0.0, | |
| "loss": 1.1298, | |
| "step": 238 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 238, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7.323826266979697e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |