| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 372, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.002688172043010753, | |
| "grad_norm": 1.6433222600981285, | |
| "learning_rate": 0.0, | |
| "loss": 1.562, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.005376344086021506, | |
| "grad_norm": 1.6862631068558513, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.4706, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.008064516129032258, | |
| "grad_norm": 1.7423201097805276, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.5406, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.010752688172043012, | |
| "grad_norm": 1.7727625055064622, | |
| "learning_rate": 3e-06, | |
| "loss": 1.5182, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.013440860215053764, | |
| "grad_norm": 1.5457482765192463, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.5169, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.016129032258064516, | |
| "grad_norm": 1.5659007249743502, | |
| "learning_rate": 5e-06, | |
| "loss": 1.4922, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01881720430107527, | |
| "grad_norm": 1.3878881126089677, | |
| "learning_rate": 6e-06, | |
| "loss": 1.4863, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.021505376344086023, | |
| "grad_norm": 1.295368020848385, | |
| "learning_rate": 7e-06, | |
| "loss": 1.4839, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.024193548387096774, | |
| "grad_norm": 1.589857887668944, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 1.4303, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.026881720430107527, | |
| "grad_norm": 2.60679604894195, | |
| "learning_rate": 9e-06, | |
| "loss": 1.3744, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02956989247311828, | |
| "grad_norm": 0.8410885692002656, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3498, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.03225806451612903, | |
| "grad_norm": 0.7927855266728604, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 1.3179, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.03494623655913978, | |
| "grad_norm": 0.6808035050220127, | |
| "learning_rate": 1.2e-05, | |
| "loss": 1.3268, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.03763440860215054, | |
| "grad_norm": 0.6602967909334083, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 1.2784, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.04032258064516129, | |
| "grad_norm": 0.5797556052811048, | |
| "learning_rate": 1.4e-05, | |
| "loss": 1.2949, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.043010752688172046, | |
| "grad_norm": 0.6000541560518325, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 1.288, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0456989247311828, | |
| "grad_norm": 0.6494981992893607, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 1.2449, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.04838709677419355, | |
| "grad_norm": 0.6723097988215474, | |
| "learning_rate": 1.7e-05, | |
| "loss": 1.2102, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.051075268817204304, | |
| "grad_norm": 0.6702835925568053, | |
| "learning_rate": 1.8e-05, | |
| "loss": 1.2025, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.053763440860215055, | |
| "grad_norm": 0.625636082792655, | |
| "learning_rate": 1.9e-05, | |
| "loss": 1.2777, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.056451612903225805, | |
| "grad_norm": 0.6253912624763358, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2669, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.05913978494623656, | |
| "grad_norm": 0.5910337660829342, | |
| "learning_rate": 2.1000000000000002e-05, | |
| "loss": 1.2654, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.06182795698924731, | |
| "grad_norm": 0.6304908028391322, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 1.2413, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.06451612903225806, | |
| "grad_norm": 0.5377853121890415, | |
| "learning_rate": 2.3e-05, | |
| "loss": 1.2109, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06720430107526881, | |
| "grad_norm": 0.4970873703549533, | |
| "learning_rate": 2.4e-05, | |
| "loss": 1.1359, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06989247311827956, | |
| "grad_norm": 0.5292734885521813, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.2236, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.07258064516129033, | |
| "grad_norm": 0.5428754620149544, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 1.2083, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.07526881720430108, | |
| "grad_norm": 0.5711123503896314, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 1.2161, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07795698924731183, | |
| "grad_norm": 0.49149041488377043, | |
| "learning_rate": 2.8e-05, | |
| "loss": 1.1454, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.08064516129032258, | |
| "grad_norm": 0.5285852530799724, | |
| "learning_rate": 2.9e-05, | |
| "loss": 1.1194, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 0.5295555329242986, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 1.1688, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.08602150537634409, | |
| "grad_norm": 0.465354706566009, | |
| "learning_rate": 3.1e-05, | |
| "loss": 1.1743, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.08870967741935484, | |
| "grad_norm": 0.4486072933924605, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 1.0818, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0913978494623656, | |
| "grad_norm": 0.496727888984662, | |
| "learning_rate": 3.3e-05, | |
| "loss": 1.2101, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.09408602150537634, | |
| "grad_norm": 0.43899748210993167, | |
| "learning_rate": 3.4e-05, | |
| "loss": 1.1884, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0967741935483871, | |
| "grad_norm": 0.4147227405541853, | |
| "learning_rate": 3.5000000000000004e-05, | |
| "loss": 1.0814, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09946236559139784, | |
| "grad_norm": 0.48760701758721925, | |
| "learning_rate": 3.6e-05, | |
| "loss": 1.1212, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.10215053763440861, | |
| "grad_norm": 0.49917378567432974, | |
| "learning_rate": 3.7000000000000005e-05, | |
| "loss": 1.1984, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.10483870967741936, | |
| "grad_norm": 0.5304015628409972, | |
| "learning_rate": 3.8e-05, | |
| "loss": 1.1274, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.10752688172043011, | |
| "grad_norm": 0.4726408598975661, | |
| "learning_rate": 3.9e-05, | |
| "loss": 1.1323, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11021505376344086, | |
| "grad_norm": 0.44174146995469904, | |
| "learning_rate": 4e-05, | |
| "loss": 1.1898, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.11290322580645161, | |
| "grad_norm": 0.5087279682773094, | |
| "learning_rate": 3.999980086219931e-05, | |
| "loss": 1.1469, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.11559139784946236, | |
| "grad_norm": 0.5626510931079601, | |
| "learning_rate": 3.999920345276283e-05, | |
| "loss": 1.1321, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.11827956989247312, | |
| "grad_norm": 0.47565220090788773, | |
| "learning_rate": 3.999820778358724e-05, | |
| "loss": 1.1453, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.12096774193548387, | |
| "grad_norm": 0.4431044005508681, | |
| "learning_rate": 3.999681387450007e-05, | |
| "loss": 1.1408, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.12365591397849462, | |
| "grad_norm": 0.47942624390584926, | |
| "learning_rate": 3.999502175325932e-05, | |
| "loss": 1.168, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.12634408602150538, | |
| "grad_norm": 0.43166434321061714, | |
| "learning_rate": 3.999283145555291e-05, | |
| "loss": 1.1087, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.12903225806451613, | |
| "grad_norm": 0.47105749411720044, | |
| "learning_rate": 3.999024302499794e-05, | |
| "loss": 1.0752, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.13172043010752688, | |
| "grad_norm": 0.3959072081415341, | |
| "learning_rate": 3.998725651313984e-05, | |
| "loss": 1.1011, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.13440860215053763, | |
| "grad_norm": 0.4416535692834609, | |
| "learning_rate": 3.998387197945135e-05, | |
| "loss": 1.1306, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13709677419354838, | |
| "grad_norm": 0.4272647809985287, | |
| "learning_rate": 3.9980089491331344e-05, | |
| "loss": 1.1381, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.13978494623655913, | |
| "grad_norm": 0.47769854993592265, | |
| "learning_rate": 3.997590912410345e-05, | |
| "loss": 1.0976, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1424731182795699, | |
| "grad_norm": 0.3877500456630632, | |
| "learning_rate": 3.997133096101458e-05, | |
| "loss": 1.128, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.14516129032258066, | |
| "grad_norm": 0.3869721085588235, | |
| "learning_rate": 3.996635509323327e-05, | |
| "loss": 1.1225, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1478494623655914, | |
| "grad_norm": 0.47271590281090886, | |
| "learning_rate": 3.9960981619847856e-05, | |
| "loss": 1.1141, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.15053763440860216, | |
| "grad_norm": 0.4368206211090345, | |
| "learning_rate": 3.99552106478645e-05, | |
| "loss": 1.0872, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.1532258064516129, | |
| "grad_norm": 0.3872679475185707, | |
| "learning_rate": 3.994904229220507e-05, | |
| "loss": 1.1514, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.15591397849462366, | |
| "grad_norm": 0.406268890860899, | |
| "learning_rate": 3.9942476675704854e-05, | |
| "loss": 1.0965, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.1586021505376344, | |
| "grad_norm": 0.43172418498531184, | |
| "learning_rate": 3.993551392911009e-05, | |
| "loss": 1.1192, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.16129032258064516, | |
| "grad_norm": 0.4258357918752704, | |
| "learning_rate": 3.9928154191075375e-05, | |
| "loss": 1.0623, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1639784946236559, | |
| "grad_norm": 0.4585556740184179, | |
| "learning_rate": 3.9920397608160925e-05, | |
| "loss": 1.1076, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 0.44452627464263844, | |
| "learning_rate": 3.991224433482961e-05, | |
| "loss": 1.1107, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.1693548387096774, | |
| "grad_norm": 0.4787003491624029, | |
| "learning_rate": 3.990369453344394e-05, | |
| "loss": 1.1165, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.17204301075268819, | |
| "grad_norm": 0.4704549745433953, | |
| "learning_rate": 3.989474837426277e-05, | |
| "loss": 1.1541, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.17473118279569894, | |
| "grad_norm": 0.4026214434021435, | |
| "learning_rate": 3.9885406035437953e-05, | |
| "loss": 1.1166, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.1774193548387097, | |
| "grad_norm": 0.40057979364796353, | |
| "learning_rate": 3.987566770301076e-05, | |
| "loss": 1.0626, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.18010752688172044, | |
| "grad_norm": 0.4340486368362563, | |
| "learning_rate": 3.98655335709082e-05, | |
| "loss": 1.104, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.1827956989247312, | |
| "grad_norm": 0.42609639195543936, | |
| "learning_rate": 3.985500384093917e-05, | |
| "loss": 1.0893, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.18548387096774194, | |
| "grad_norm": 0.381378569874383, | |
| "learning_rate": 3.984407872279037e-05, | |
| "loss": 1.0433, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.1881720430107527, | |
| "grad_norm": 0.3903976348529897, | |
| "learning_rate": 3.983275843402222e-05, | |
| "loss": 1.1019, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.19086021505376344, | |
| "grad_norm": 0.3648695348221521, | |
| "learning_rate": 3.982104320006446e-05, | |
| "loss": 1.0992, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.1935483870967742, | |
| "grad_norm": 1.8993059639660952, | |
| "learning_rate": 3.9808933254211665e-05, | |
| "loss": 1.1056, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.19623655913978494, | |
| "grad_norm": 0.46580843289168206, | |
| "learning_rate": 3.979642883761866e-05, | |
| "loss": 1.1031, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.1989247311827957, | |
| "grad_norm": 0.449285515287558, | |
| "learning_rate": 3.978353019929562e-05, | |
| "loss": 1.1068, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.20161290322580644, | |
| "grad_norm": 0.5567418056951845, | |
| "learning_rate": 3.977023759610321e-05, | |
| "loss": 1.0446, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.20430107526881722, | |
| "grad_norm": 0.38684392317210076, | |
| "learning_rate": 3.9756551292747405e-05, | |
| "loss": 1.0377, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.20698924731182797, | |
| "grad_norm": 0.473773440244898, | |
| "learning_rate": 3.974247156177423e-05, | |
| "loss": 1.1396, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.20967741935483872, | |
| "grad_norm": 0.4177520757238314, | |
| "learning_rate": 3.9727998683564355e-05, | |
| "loss": 1.1008, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.21236559139784947, | |
| "grad_norm": 0.39719194878309766, | |
| "learning_rate": 3.9713132946327494e-05, | |
| "loss": 1.0215, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.21505376344086022, | |
| "grad_norm": 0.4105085260167095, | |
| "learning_rate": 3.9697874646096675e-05, | |
| "loss": 1.1115, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.21774193548387097, | |
| "grad_norm": 0.4087045401288919, | |
| "learning_rate": 3.968222408672232e-05, | |
| "loss": 1.0579, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.22043010752688172, | |
| "grad_norm": 0.39033402258475636, | |
| "learning_rate": 3.9666181579866244e-05, | |
| "loss": 1.0692, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.22311827956989247, | |
| "grad_norm": 0.41439706526743936, | |
| "learning_rate": 3.964974744499539e-05, | |
| "loss": 1.0865, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.22580645161290322, | |
| "grad_norm": 0.38234297411695073, | |
| "learning_rate": 3.963292200937551e-05, | |
| "loss": 1.0173, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.22849462365591397, | |
| "grad_norm": 0.5308750280660687, | |
| "learning_rate": 3.961570560806461e-05, | |
| "loss": 1.067, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.23118279569892472, | |
| "grad_norm": 0.43351295582441124, | |
| "learning_rate": 3.959809858390634e-05, | |
| "loss": 1.086, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.23387096774193547, | |
| "grad_norm": 0.42069712201952686, | |
| "learning_rate": 3.9580101287523105e-05, | |
| "loss": 1.1064, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.23655913978494625, | |
| "grad_norm": 0.42821523209412365, | |
| "learning_rate": 3.95617140773091e-05, | |
| "loss": 1.0263, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.239247311827957, | |
| "grad_norm": 0.4114502165683399, | |
| "learning_rate": 3.954293731942319e-05, | |
| "loss": 1.0729, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.24193548387096775, | |
| "grad_norm": 0.4131919780645225, | |
| "learning_rate": 3.95237713877816e-05, | |
| "loss": 1.0621, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2446236559139785, | |
| "grad_norm": 0.4433939594965718, | |
| "learning_rate": 3.950421666405048e-05, | |
| "loss": 1.0805, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.24731182795698925, | |
| "grad_norm": 0.4056188018789589, | |
| "learning_rate": 3.948427353763829e-05, | |
| "loss": 1.0784, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.4642044159391645, | |
| "learning_rate": 3.946394240568807e-05, | |
| "loss": 1.0406, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.25268817204301075, | |
| "grad_norm": 0.4280982724994961, | |
| "learning_rate": 3.944322367306951e-05, | |
| "loss": 1.1117, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2553763440860215, | |
| "grad_norm": 0.41758547723414086, | |
| "learning_rate": 3.942211775237089e-05, | |
| "loss": 1.0747, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.25806451612903225, | |
| "grad_norm": 0.4344009299837567, | |
| "learning_rate": 3.940062506389089e-05, | |
| "loss": 1.1249, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.260752688172043, | |
| "grad_norm": 0.3847297194838658, | |
| "learning_rate": 3.937874603563015e-05, | |
| "loss": 1.0977, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.26344086021505375, | |
| "grad_norm": 0.4959083398122344, | |
| "learning_rate": 3.935648110328285e-05, | |
| "loss": 1.041, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.2661290322580645, | |
| "grad_norm": 0.46262720954521647, | |
| "learning_rate": 3.933383071022795e-05, | |
| "loss": 1.0926, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.26881720430107525, | |
| "grad_norm": 0.4789561041937064, | |
| "learning_rate": 3.93107953075204e-05, | |
| "loss": 1.0701, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.271505376344086, | |
| "grad_norm": 0.4229869803365367, | |
| "learning_rate": 3.928737535388214e-05, | |
| "loss": 1.063, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.27419354838709675, | |
| "grad_norm": 0.43404703473814416, | |
| "learning_rate": 3.9263571315692976e-05, | |
| "loss": 1.0696, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.2768817204301075, | |
| "grad_norm": 0.4396716028324381, | |
| "learning_rate": 3.923938366698129e-05, | |
| "loss": 1.0317, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.27956989247311825, | |
| "grad_norm": 0.6860340156482403, | |
| "learning_rate": 3.921481288941459e-05, | |
| "loss": 1.0611, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.28225806451612906, | |
| "grad_norm": 0.39601683185098385, | |
| "learning_rate": 3.9189859472289956e-05, | |
| "loss": 1.0294, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2849462365591398, | |
| "grad_norm": 0.39641986440862376, | |
| "learning_rate": 3.9164523912524224e-05, | |
| "loss": 1.0663, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.28763440860215056, | |
| "grad_norm": 0.3898209322812333, | |
| "learning_rate": 3.913880671464418e-05, | |
| "loss": 1.0671, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.2903225806451613, | |
| "grad_norm": 0.408678962590762, | |
| "learning_rate": 3.911270839077644e-05, | |
| "loss": 1.0224, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.29301075268817206, | |
| "grad_norm": 0.4681397312637908, | |
| "learning_rate": 3.908622946063728e-05, | |
| "loss": 1.091, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.2956989247311828, | |
| "grad_norm": 0.47955178042664964, | |
| "learning_rate": 3.9059370451522295e-05, | |
| "loss": 1.0961, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.29838709677419356, | |
| "grad_norm": 0.4229760577312693, | |
| "learning_rate": 3.903213189829589e-05, | |
| "loss": 1.0386, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.3010752688172043, | |
| "grad_norm": 0.39011319960684926, | |
| "learning_rate": 3.900451434338062e-05, | |
| "loss": 1.067, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.30376344086021506, | |
| "grad_norm": 0.39672904488910227, | |
| "learning_rate": 3.8976518336746396e-05, | |
| "loss": 1.0424, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.3064516129032258, | |
| "grad_norm": 0.49393594827425025, | |
| "learning_rate": 3.894814443589954e-05, | |
| "loss": 1.0695, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.30913978494623656, | |
| "grad_norm": 0.38254416729289076, | |
| "learning_rate": 3.8919393205871676e-05, | |
| "loss": 1.0801, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.3118279569892473, | |
| "grad_norm": 0.4456422459103533, | |
| "learning_rate": 3.889026521920847e-05, | |
| "loss": 1.0934, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.31451612903225806, | |
| "grad_norm": 0.39398196216047476, | |
| "learning_rate": 3.886076105595825e-05, | |
| "loss": 1.1011, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3172043010752688, | |
| "grad_norm": 0.3949327527665007, | |
| "learning_rate": 3.883088130366042e-05, | |
| "loss": 1.018, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.31989247311827956, | |
| "grad_norm": 0.39254792724729387, | |
| "learning_rate": 3.88006265573338e-05, | |
| "loss": 1.0607, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 0.5007199853312655, | |
| "learning_rate": 3.876999741946478e-05, | |
| "loss": 1.0609, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.32526881720430106, | |
| "grad_norm": 0.4619751408736227, | |
| "learning_rate": 3.873899449999524e-05, | |
| "loss": 1.0955, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.3279569892473118, | |
| "grad_norm": 0.48219172224114765, | |
| "learning_rate": 3.870761841631051e-05, | |
| "loss": 1.063, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.33064516129032256, | |
| "grad_norm": 0.4054037874416271, | |
| "learning_rate": 3.867586979322703e-05, | |
| "loss": 1.0907, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 0.43161457507331874, | |
| "learning_rate": 3.8643749262979896e-05, | |
| "loss": 1.0666, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.33602150537634407, | |
| "grad_norm": 0.36751029685084174, | |
| "learning_rate": 3.861125746521028e-05, | |
| "loss": 1.0557, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.3387096774193548, | |
| "grad_norm": 0.46690938120869707, | |
| "learning_rate": 3.8578395046952686e-05, | |
| "loss": 1.1023, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.34139784946236557, | |
| "grad_norm": 0.3988094995343537, | |
| "learning_rate": 3.85451626626221e-05, | |
| "loss": 1.0717, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.34408602150537637, | |
| "grad_norm": 0.48432619617982536, | |
| "learning_rate": 3.85115609740009e-05, | |
| "loss": 1.0271, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3467741935483871, | |
| "grad_norm": 0.5127948499632843, | |
| "learning_rate": 3.8477590650225735e-05, | |
| "loss": 1.0575, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.34946236559139787, | |
| "grad_norm": 0.4132091412639387, | |
| "learning_rate": 3.8443252367774164e-05, | |
| "loss": 1.0355, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3521505376344086, | |
| "grad_norm": 0.4439631972175399, | |
| "learning_rate": 3.8408546810451176e-05, | |
| "loss": 1.0541, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.3548387096774194, | |
| "grad_norm": 0.3956247259769062, | |
| "learning_rate": 3.837347466937562e-05, | |
| "loss": 1.0672, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.3575268817204301, | |
| "grad_norm": 0.44952249373265674, | |
| "learning_rate": 3.8338036642966396e-05, | |
| "loss": 1.0444, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.3602150537634409, | |
| "grad_norm": 0.4449484078947791, | |
| "learning_rate": 3.830223343692857e-05, | |
| "loss": 1.0514, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3629032258064516, | |
| "grad_norm": 0.3905509358873801, | |
| "learning_rate": 3.826606576423931e-05, | |
| "loss": 1.0394, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.3655913978494624, | |
| "grad_norm": 0.4183744146790331, | |
| "learning_rate": 3.8229534345133695e-05, | |
| "loss": 1.0212, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3682795698924731, | |
| "grad_norm": 0.46086732418604737, | |
| "learning_rate": 3.819263990709037e-05, | |
| "loss": 0.994, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.3709677419354839, | |
| "grad_norm": 0.4468564375555911, | |
| "learning_rate": 3.8155383184817064e-05, | |
| "loss": 1.0279, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.3736559139784946, | |
| "grad_norm": 0.3966511312736679, | |
| "learning_rate": 3.8117764920235945e-05, | |
| "loss": 0.9992, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.3763440860215054, | |
| "grad_norm": 0.46461846433833476, | |
| "learning_rate": 3.807978586246887e-05, | |
| "loss": 1.088, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.3790322580645161, | |
| "grad_norm": 0.4254641795470929, | |
| "learning_rate": 3.804144676782243e-05, | |
| "loss": 1.0764, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.3817204301075269, | |
| "grad_norm": 0.42137203485219293, | |
| "learning_rate": 3.800274839977293e-05, | |
| "loss": 1.0422, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.3844086021505376, | |
| "grad_norm": 0.4172681789743796, | |
| "learning_rate": 3.796369152895117e-05, | |
| "loss": 1.0453, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.3870967741935484, | |
| "grad_norm": 0.4531431509751161, | |
| "learning_rate": 3.792427693312707e-05, | |
| "loss": 1.0389, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3897849462365591, | |
| "grad_norm": 0.3782466419505299, | |
| "learning_rate": 3.788450539719423e-05, | |
| "loss": 1.025, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.3924731182795699, | |
| "grad_norm": 0.4655605897605627, | |
| "learning_rate": 3.7844377713154264e-05, | |
| "loss": 1.064, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.3951612903225806, | |
| "grad_norm": 0.4384836890227208, | |
| "learning_rate": 3.780389468010106e-05, | |
| "loss": 1.0397, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.3978494623655914, | |
| "grad_norm": 0.4844715439450037, | |
| "learning_rate": 3.776305710420482e-05, | |
| "loss": 1.1193, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.40053763440860213, | |
| "grad_norm": 0.41760675460607827, | |
| "learning_rate": 3.7721865798696056e-05, | |
| "loss": 1.0124, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.4032258064516129, | |
| "grad_norm": 0.7337537478769387, | |
| "learning_rate": 3.7680321583849365e-05, | |
| "loss": 1.0508, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.40591397849462363, | |
| "grad_norm": 0.44725816367920673, | |
| "learning_rate": 3.76384252869671e-05, | |
| "loss": 1.0434, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.40860215053763443, | |
| "grad_norm": 0.40870612635720194, | |
| "learning_rate": 3.759617774236292e-05, | |
| "loss": 1.068, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.4112903225806452, | |
| "grad_norm": 0.4534649483932217, | |
| "learning_rate": 3.755357979134511e-05, | |
| "loss": 1.0614, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.41397849462365593, | |
| "grad_norm": 0.41986572053185917, | |
| "learning_rate": 3.751063228219993e-05, | |
| "loss": 1.0391, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 0.3717380879536067, | |
| "learning_rate": 3.7467336070174604e-05, | |
| "loss": 1.0378, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.41935483870967744, | |
| "grad_norm": 0.41848537015206944, | |
| "learning_rate": 3.742369201746038e-05, | |
| "loss": 1.0439, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.4220430107526882, | |
| "grad_norm": 0.43443932018052933, | |
| "learning_rate": 3.737970099317535e-05, | |
| "loss": 1.0197, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.42473118279569894, | |
| "grad_norm": 0.421554546653683, | |
| "learning_rate": 3.7335363873347056e-05, | |
| "loss": 1.0487, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.4274193548387097, | |
| "grad_norm": 0.8430023271255561, | |
| "learning_rate": 3.729068154089519e-05, | |
| "loss": 1.0333, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.43010752688172044, | |
| "grad_norm": 0.4363044724173691, | |
| "learning_rate": 3.724565488561387e-05, | |
| "loss": 1.0213, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.4327956989247312, | |
| "grad_norm": 0.5335682969510431, | |
| "learning_rate": 3.720028480415401e-05, | |
| "loss": 1.0205, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.43548387096774194, | |
| "grad_norm": 0.4056834135687678, | |
| "learning_rate": 3.7154572200005446e-05, | |
| "loss": 1.0311, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.4381720430107527, | |
| "grad_norm": 0.5322107401886871, | |
| "learning_rate": 3.710851798347891e-05, | |
| "loss": 1.0601, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.44086021505376344, | |
| "grad_norm": 0.4138677278304246, | |
| "learning_rate": 3.7062123071687944e-05, | |
| "loss": 1.0361, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.4435483870967742, | |
| "grad_norm": 0.4775100325512625, | |
| "learning_rate": 3.701538838853062e-05, | |
| "loss": 1.0194, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.44623655913978494, | |
| "grad_norm": 0.40839482534046995, | |
| "learning_rate": 3.696831486467114e-05, | |
| "loss": 1.0463, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4489247311827957, | |
| "grad_norm": 0.3963093446633738, | |
| "learning_rate": 3.6920903437521305e-05, | |
| "loss": 1.0238, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.45161290322580644, | |
| "grad_norm": 0.4344752184390704, | |
| "learning_rate": 3.6873155051221846e-05, | |
| "loss": 1.0472, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4543010752688172, | |
| "grad_norm": 0.4167014186949368, | |
| "learning_rate": 3.6825070656623626e-05, | |
| "loss": 1.0599, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.45698924731182794, | |
| "grad_norm": 0.43904590007956124, | |
| "learning_rate": 3.677665121126871e-05, | |
| "loss": 1.0559, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4596774193548387, | |
| "grad_norm": 0.372185063148541, | |
| "learning_rate": 3.6727897679371276e-05, | |
| "loss": 1.0012, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.46236559139784944, | |
| "grad_norm": 0.43086731351488916, | |
| "learning_rate": 3.667881103179844e-05, | |
| "loss": 1.0133, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.4650537634408602, | |
| "grad_norm": 0.5796354347464544, | |
| "learning_rate": 3.662939224605091e-05, | |
| "loss": 1.0517, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.46774193548387094, | |
| "grad_norm": 0.4587453684541154, | |
| "learning_rate": 3.657964230624351e-05, | |
| "loss": 1.0164, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.47043010752688175, | |
| "grad_norm": 0.5102852182866393, | |
| "learning_rate": 3.6529562203085595e-05, | |
| "loss": 1.052, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.4731182795698925, | |
| "grad_norm": 0.4469591346380821, | |
| "learning_rate": 3.6479152933861336e-05, | |
| "loss": 1.0905, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.47580645161290325, | |
| "grad_norm": 0.45277428352010624, | |
| "learning_rate": 3.642841550240983e-05, | |
| "loss": 1.0961, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.478494623655914, | |
| "grad_norm": 0.45588595960031525, | |
| "learning_rate": 3.6377350919105136e-05, | |
| "loss": 1.0178, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.48118279569892475, | |
| "grad_norm": 0.6147997034643559, | |
| "learning_rate": 3.632596020083612e-05, | |
| "loss": 1.0148, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.4838709677419355, | |
| "grad_norm": 0.3734326271789308, | |
| "learning_rate": 3.627424437098625e-05, | |
| "loss": 1.0006, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.48655913978494625, | |
| "grad_norm": 0.4564187594173089, | |
| "learning_rate": 3.6222204459413186e-05, | |
| "loss": 1.0635, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.489247311827957, | |
| "grad_norm": 0.42811733614493086, | |
| "learning_rate": 3.6169841502428285e-05, | |
| "loss": 1.0469, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.49193548387096775, | |
| "grad_norm": 0.4227875509642681, | |
| "learning_rate": 3.611715654277596e-05, | |
| "loss": 1.0446, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.4946236559139785, | |
| "grad_norm": 0.40548546169007965, | |
| "learning_rate": 3.60641506296129e-05, | |
| "loss": 1.0564, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.49731182795698925, | |
| "grad_norm": 0.4161116484325749, | |
| "learning_rate": 3.601082481848721e-05, | |
| "loss": 0.9917, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.39180067540636987, | |
| "learning_rate": 3.595718017131736e-05, | |
| "loss": 1.0081, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.5026881720430108, | |
| "grad_norm": 0.5307122561583237, | |
| "learning_rate": 3.5903217756371066e-05, | |
| "loss": 0.9972, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.5053763440860215, | |
| "grad_norm": 0.4633315164676552, | |
| "learning_rate": 3.5848938648243976e-05, | |
| "loss": 1.0196, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.5080645161290323, | |
| "grad_norm": 0.43457272116367207, | |
| "learning_rate": 3.579434392783832e-05, | |
| "loss": 1.0429, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.510752688172043, | |
| "grad_norm": 0.42602042879132207, | |
| "learning_rate": 3.5739434682341355e-05, | |
| "loss": 1.0355, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.5134408602150538, | |
| "grad_norm": 0.37328410492227004, | |
| "learning_rate": 3.568421200520371e-05, | |
| "loss": 1.0158, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 0.47901349260363574, | |
| "learning_rate": 3.562867699611764e-05, | |
| "loss": 1.006, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.5188172043010753, | |
| "grad_norm": 0.6800894155552869, | |
| "learning_rate": 3.55728307609951e-05, | |
| "loss": 1.0819, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.521505376344086, | |
| "grad_norm": 0.6815573295093794, | |
| "learning_rate": 3.5516674411945747e-05, | |
| "loss": 0.9767, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.5241935483870968, | |
| "grad_norm": 0.40923877696875666, | |
| "learning_rate": 3.546020906725474e-05, | |
| "loss": 1.0048, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.5268817204301075, | |
| "grad_norm": 0.39166638466881304, | |
| "learning_rate": 3.540343585136056e-05, | |
| "loss": 1.0115, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.5295698924731183, | |
| "grad_norm": 0.46039879078749524, | |
| "learning_rate": 3.5346355894832515e-05, | |
| "loss": 1.0274, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.532258064516129, | |
| "grad_norm": 0.435003701062386, | |
| "learning_rate": 3.5288970334348324e-05, | |
| "loss": 1.0262, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.5349462365591398, | |
| "grad_norm": 0.46422099557675184, | |
| "learning_rate": 3.5231280312671426e-05, | |
| "loss": 1.0406, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.5376344086021505, | |
| "grad_norm": 0.3946242892533647, | |
| "learning_rate": 3.51732869786282e-05, | |
| "loss": 1.0351, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.5403225806451613, | |
| "grad_norm": 0.4593963303455073, | |
| "learning_rate": 3.511499148708517e-05, | |
| "loss": 1.0161, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.543010752688172, | |
| "grad_norm": 0.43211273427185715, | |
| "learning_rate": 3.505639499892591e-05, | |
| "loss": 1.0339, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.5456989247311828, | |
| "grad_norm": 0.4638011311631454, | |
| "learning_rate": 3.499749868102802e-05, | |
| "loss": 1.0195, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.5483870967741935, | |
| "grad_norm": 0.4606785516075864, | |
| "learning_rate": 3.4938303706239814e-05, | |
| "loss": 1.0809, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.5510752688172043, | |
| "grad_norm": 0.4750835163830621, | |
| "learning_rate": 3.487881125335699e-05, | |
| "loss": 1.0104, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.553763440860215, | |
| "grad_norm": 0.48069623342657913, | |
| "learning_rate": 3.4819022507099184e-05, | |
| "loss": 1.0534, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5564516129032258, | |
| "grad_norm": 0.4485052357605267, | |
| "learning_rate": 3.475893865808633e-05, | |
| "loss": 1.008, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.5591397849462365, | |
| "grad_norm": 0.45226568470539963, | |
| "learning_rate": 3.4698560902815e-05, | |
| "loss": 0.9859, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5618279569892473, | |
| "grad_norm": 0.4556713744237398, | |
| "learning_rate": 3.463789044363451e-05, | |
| "loss": 1.0468, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.5645161290322581, | |
| "grad_norm": 0.40515419542450315, | |
| "learning_rate": 3.4576928488723056e-05, | |
| "loss": 1.0069, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5672043010752689, | |
| "grad_norm": 0.407850239298829, | |
| "learning_rate": 3.4515676252063595e-05, | |
| "loss": 1.024, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.5698924731182796, | |
| "grad_norm": 0.4245125668059516, | |
| "learning_rate": 3.445413495341971e-05, | |
| "loss": 0.9842, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5725806451612904, | |
| "grad_norm": 0.5282266357639802, | |
| "learning_rate": 3.439230581831126e-05, | |
| "loss": 1.0511, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.5752688172043011, | |
| "grad_norm": 0.46721556238008377, | |
| "learning_rate": 3.433019007799007e-05, | |
| "loss": 1.0722, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5779569892473119, | |
| "grad_norm": 0.3998174935596331, | |
| "learning_rate": 3.4267788969415315e-05, | |
| "loss": 1.0417, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.5806451612903226, | |
| "grad_norm": 0.39836497217157424, | |
| "learning_rate": 3.420510373522896e-05, | |
| "loss": 0.9522, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 0.5604060165845736, | |
| "learning_rate": 3.4142135623730954e-05, | |
| "loss": 1.0406, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.5860215053763441, | |
| "grad_norm": 0.4626752931850209, | |
| "learning_rate": 3.4078885888854436e-05, | |
| "loss": 1.0403, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5887096774193549, | |
| "grad_norm": 0.4119865874583256, | |
| "learning_rate": 3.4015355790140715e-05, | |
| "loss": 0.974, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.5913978494623656, | |
| "grad_norm": 0.41688760669607, | |
| "learning_rate": 3.39515465927142e-05, | |
| "loss": 1.0354, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5940860215053764, | |
| "grad_norm": 0.47263736408876167, | |
| "learning_rate": 3.388745956725722e-05, | |
| "loss": 1.0438, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.5967741935483871, | |
| "grad_norm": 0.48712838990373963, | |
| "learning_rate": 3.3823095989984697e-05, | |
| "loss": 0.9847, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5994623655913979, | |
| "grad_norm": 0.39317905049275836, | |
| "learning_rate": 3.3758457142618754e-05, | |
| "loss": 0.9806, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.6021505376344086, | |
| "grad_norm": 0.484001386994586, | |
| "learning_rate": 3.369354431236319e-05, | |
| "loss": 1.0003, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.6048387096774194, | |
| "grad_norm": 0.3896751020684252, | |
| "learning_rate": 3.362835879187783e-05, | |
| "loss": 0.9314, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.6075268817204301, | |
| "grad_norm": 0.402131340210077, | |
| "learning_rate": 3.356290187925278e-05, | |
| "loss": 0.957, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.6102150537634409, | |
| "grad_norm": 0.4442069284277535, | |
| "learning_rate": 3.349717487798261e-05, | |
| "loss": 1.0651, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.6129032258064516, | |
| "grad_norm": 0.4075067959077034, | |
| "learning_rate": 3.3431179096940375e-05, | |
| "loss": 1.0117, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.6155913978494624, | |
| "grad_norm": 0.4595977891340027, | |
| "learning_rate": 3.3364915850351525e-05, | |
| "loss": 1.0277, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.6182795698924731, | |
| "grad_norm": 0.41565240224286376, | |
| "learning_rate": 3.3298386457767804e-05, | |
| "loss": 0.9873, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.6209677419354839, | |
| "grad_norm": 0.400290934516727, | |
| "learning_rate": 3.3231592244040885e-05, | |
| "loss": 1.0503, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.6236559139784946, | |
| "grad_norm": 0.43593503744528256, | |
| "learning_rate": 3.3164534539296056e-05, | |
| "loss": 1.0256, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.6263440860215054, | |
| "grad_norm": 0.4297576409774745, | |
| "learning_rate": 3.309721467890571e-05, | |
| "loss": 0.9873, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.6290322580645161, | |
| "grad_norm": 0.5286155107560961, | |
| "learning_rate": 3.302963400346272e-05, | |
| "loss": 1.0526, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.6317204301075269, | |
| "grad_norm": 0.4080215430723157, | |
| "learning_rate": 3.296179385875381e-05, | |
| "loss": 0.993, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.6344086021505376, | |
| "grad_norm": 0.4666697414536282, | |
| "learning_rate": 3.2893695595732705e-05, | |
| "loss": 0.9855, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.6370967741935484, | |
| "grad_norm": 0.44576593027115785, | |
| "learning_rate": 3.282534057049322e-05, | |
| "loss": 0.994, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.6397849462365591, | |
| "grad_norm": 0.45875921319019286, | |
| "learning_rate": 3.275673014424231e-05, | |
| "loss": 1.0695, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.6424731182795699, | |
| "grad_norm": 0.4483391985101821, | |
| "learning_rate": 3.268786568327291e-05, | |
| "loss": 1.0413, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 0.3823024947210084, | |
| "learning_rate": 3.261874855893675e-05, | |
| "loss": 1.0634, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.6478494623655914, | |
| "grad_norm": 0.42590418591004187, | |
| "learning_rate": 3.254938014761704e-05, | |
| "loss": 1.1039, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.6505376344086021, | |
| "grad_norm": 0.4436207874701427, | |
| "learning_rate": 3.2479761830701075e-05, | |
| "loss": 1.0797, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.6532258064516129, | |
| "grad_norm": 0.5436242022516592, | |
| "learning_rate": 3.240989499455269e-05, | |
| "loss": 0.998, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.6559139784946236, | |
| "grad_norm": 0.42461660808494955, | |
| "learning_rate": 3.2339781030484715e-05, | |
| "loss": 1.0014, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.6586021505376344, | |
| "grad_norm": 0.4147658974390641, | |
| "learning_rate": 3.2269421334731196e-05, | |
| "loss": 1.0047, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.6612903225806451, | |
| "grad_norm": 0.3702000902999608, | |
| "learning_rate": 3.219881730841964e-05, | |
| "loss": 1.0057, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.6639784946236559, | |
| "grad_norm": 0.37405944820555137, | |
| "learning_rate": 3.212797035754311e-05, | |
| "loss": 0.9881, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 0.39789221907192235, | |
| "learning_rate": 3.205688189293219e-05, | |
| "loss": 1.002, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6693548387096774, | |
| "grad_norm": 0.35269099760384387, | |
| "learning_rate": 3.198555333022694e-05, | |
| "loss": 1.0445, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.6720430107526881, | |
| "grad_norm": 0.39171670743365294, | |
| "learning_rate": 3.191398608984867e-05, | |
| "loss": 0.9873, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6747311827956989, | |
| "grad_norm": 0.36377972714827284, | |
| "learning_rate": 3.184218159697166e-05, | |
| "loss": 0.9678, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.6774193548387096, | |
| "grad_norm": 0.4760701686418637, | |
| "learning_rate": 3.177014128149479e-05, | |
| "loss": 1.0475, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6801075268817204, | |
| "grad_norm": 0.36306748600915323, | |
| "learning_rate": 3.169786657801306e-05, | |
| "loss": 0.9737, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.6827956989247311, | |
| "grad_norm": 0.36397370143939106, | |
| "learning_rate": 3.162535892578903e-05, | |
| "loss": 1.0009, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6854838709677419, | |
| "grad_norm": 0.41923544253489314, | |
| "learning_rate": 3.155261976872412e-05, | |
| "loss": 0.9855, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.6881720430107527, | |
| "grad_norm": 0.4349008134787599, | |
| "learning_rate": 3.147965055532991e-05, | |
| "loss": 0.9843, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6908602150537635, | |
| "grad_norm": 0.4403161475473632, | |
| "learning_rate": 3.1406452738699284e-05, | |
| "loss": 0.9932, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.6935483870967742, | |
| "grad_norm": 0.4088632034626185, | |
| "learning_rate": 3.1333027776477454e-05, | |
| "loss": 1.0175, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.696236559139785, | |
| "grad_norm": 0.4089626667866183, | |
| "learning_rate": 3.125937713083296e-05, | |
| "loss": 0.9957, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.6989247311827957, | |
| "grad_norm": 0.44005061948101687, | |
| "learning_rate": 3.118550226842857e-05, | |
| "loss": 0.9902, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.7016129032258065, | |
| "grad_norm": 1.1016022022748841, | |
| "learning_rate": 3.111140466039205e-05, | |
| "loss": 0.991, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.7043010752688172, | |
| "grad_norm": 0.39448956783294353, | |
| "learning_rate": 3.103708578228686e-05, | |
| "loss": 1.0041, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.706989247311828, | |
| "grad_norm": 0.41388488702273174, | |
| "learning_rate": 3.0962547114082804e-05, | |
| "loss": 0.9928, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.7096774193548387, | |
| "grad_norm": 0.4065224464102798, | |
| "learning_rate": 3.088779014012652e-05, | |
| "loss": 0.9859, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.7123655913978495, | |
| "grad_norm": 0.39952347811781436, | |
| "learning_rate": 3.0812816349111956e-05, | |
| "loss": 0.9613, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.7150537634408602, | |
| "grad_norm": 0.43554876713734897, | |
| "learning_rate": 3.073762723405069e-05, | |
| "loss": 1.0289, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.717741935483871, | |
| "grad_norm": 0.469813057633801, | |
| "learning_rate": 3.066222429224221e-05, | |
| "loss": 1.0438, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.7204301075268817, | |
| "grad_norm": 0.4353123605440106, | |
| "learning_rate": 3.0586609025244144e-05, | |
| "loss": 1.0017, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.7231182795698925, | |
| "grad_norm": 0.40010712539262144, | |
| "learning_rate": 3.051078293884226e-05, | |
| "loss": 1.0254, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.7258064516129032, | |
| "grad_norm": 0.41179768187019394, | |
| "learning_rate": 3.0434747543020585e-05, | |
| "loss": 1.0167, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.728494623655914, | |
| "grad_norm": 0.39261397155250993, | |
| "learning_rate": 3.0358504351931265e-05, | |
| "loss": 0.9987, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.7311827956989247, | |
| "grad_norm": 0.4037853365263608, | |
| "learning_rate": 3.0282054883864434e-05, | |
| "loss": 1.0016, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.7338709677419355, | |
| "grad_norm": 0.3920371074761728, | |
| "learning_rate": 3.0205400661218e-05, | |
| "loss": 0.9427, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.7365591397849462, | |
| "grad_norm": 0.4525036893342772, | |
| "learning_rate": 3.0128543210467273e-05, | |
| "loss": 1.0566, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.739247311827957, | |
| "grad_norm": 0.41264407607647574, | |
| "learning_rate": 3.0051484062134632e-05, | |
| "loss": 0.9899, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.7419354838709677, | |
| "grad_norm": 0.37437706613357397, | |
| "learning_rate": 2.9974224750759017e-05, | |
| "loss": 0.9817, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.7446236559139785, | |
| "grad_norm": 0.3844600838817203, | |
| "learning_rate": 2.9896766814865355e-05, | |
| "loss": 1.0263, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.7473118279569892, | |
| "grad_norm": 0.4310511049000039, | |
| "learning_rate": 2.9819111796933948e-05, | |
| "loss": 0.9781, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.40281595760365946, | |
| "learning_rate": 2.9741261243369746e-05, | |
| "loss": 1.0273, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.7526881720430108, | |
| "grad_norm": 0.4498302856339957, | |
| "learning_rate": 2.9663216704471547e-05, | |
| "loss": 0.9886, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.7553763440860215, | |
| "grad_norm": 0.4350406167421517, | |
| "learning_rate": 2.958497973440114e-05, | |
| "loss": 1.0247, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.7580645161290323, | |
| "grad_norm": 0.46748351737565624, | |
| "learning_rate": 2.9506551891152334e-05, | |
| "loss": 1.0072, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.760752688172043, | |
| "grad_norm": 0.3998308958015181, | |
| "learning_rate": 2.9427934736519962e-05, | |
| "loss": 1.076, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.7634408602150538, | |
| "grad_norm": 0.42326867383664013, | |
| "learning_rate": 2.9349129836068732e-05, | |
| "loss": 0.9895, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.7661290322580645, | |
| "grad_norm": 0.3949205497118407, | |
| "learning_rate": 2.9270138759102108e-05, | |
| "loss": 1.027, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.7688172043010753, | |
| "grad_norm": 0.40826149975955933, | |
| "learning_rate": 2.919096307863104e-05, | |
| "loss": 1.0128, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.771505376344086, | |
| "grad_norm": 0.6045575439891937, | |
| "learning_rate": 2.9111604371342593e-05, | |
| "loss": 0.9806, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.7741935483870968, | |
| "grad_norm": 0.3906743864943639, | |
| "learning_rate": 2.903206421756862e-05, | |
| "loss": 1.0126, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7768817204301075, | |
| "grad_norm": 0.37994713789537804, | |
| "learning_rate": 2.8952344201254253e-05, | |
| "loss": 0.9984, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.7795698924731183, | |
| "grad_norm": 0.4560671009564336, | |
| "learning_rate": 2.8872445909926358e-05, | |
| "loss": 0.9846, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.782258064516129, | |
| "grad_norm": 0.40231158085064994, | |
| "learning_rate": 2.8792370934661948e-05, | |
| "loss": 1.0403, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.7849462365591398, | |
| "grad_norm": 0.4776678536973747, | |
| "learning_rate": 2.8712120870056455e-05, | |
| "loss": 1.0327, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7876344086021505, | |
| "grad_norm": 0.45302618010000684, | |
| "learning_rate": 2.8631697314192012e-05, | |
| "loss": 1.0126, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.7903225806451613, | |
| "grad_norm": 0.4332121059542856, | |
| "learning_rate": 2.8551101868605644e-05, | |
| "loss": 1.0475, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.793010752688172, | |
| "grad_norm": 0.4498441085262953, | |
| "learning_rate": 2.8470336138257315e-05, | |
| "loss": 1.0178, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.7956989247311828, | |
| "grad_norm": 0.39208633969875073, | |
| "learning_rate": 2.8389401731498018e-05, | |
| "loss": 1.0127, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7983870967741935, | |
| "grad_norm": 0.4042053763726035, | |
| "learning_rate": 2.8308300260037734e-05, | |
| "loss": 0.9732, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.8010752688172043, | |
| "grad_norm": 0.42842239164240437, | |
| "learning_rate": 2.8227033338913318e-05, | |
| "loss": 1.0152, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.803763440860215, | |
| "grad_norm": 0.3807866452863404, | |
| "learning_rate": 2.814560258645638e-05, | |
| "loss": 1.0189, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 0.43852909963759557, | |
| "learning_rate": 2.8064009624260994e-05, | |
| "loss": 1.0084, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.8091397849462365, | |
| "grad_norm": 0.5122035327018767, | |
| "learning_rate": 2.7982256077151482e-05, | |
| "loss": 1.0098, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.8118279569892473, | |
| "grad_norm": 0.38079784946729706, | |
| "learning_rate": 2.7900343573150003e-05, | |
| "loss": 1.0097, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.8145161290322581, | |
| "grad_norm": 0.3583539130301541, | |
| "learning_rate": 2.7818273743444132e-05, | |
| "loss": 0.9964, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.8172043010752689, | |
| "grad_norm": 0.3813956107048218, | |
| "learning_rate": 2.7736048222354414e-05, | |
| "loss": 0.9761, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.8198924731182796, | |
| "grad_norm": 0.3901758217275271, | |
| "learning_rate": 2.7653668647301797e-05, | |
| "loss": 1.0117, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.8225806451612904, | |
| "grad_norm": 0.41237780052722667, | |
| "learning_rate": 2.757113665877502e-05, | |
| "loss": 0.9653, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.8252688172043011, | |
| "grad_norm": 0.457306901223017, | |
| "learning_rate": 2.748845390029794e-05, | |
| "loss": 1.0524, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.8279569892473119, | |
| "grad_norm": 0.3791723859065832, | |
| "learning_rate": 2.740562201839684e-05, | |
| "loss": 0.9861, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.8306451612903226, | |
| "grad_norm": 0.500338650948681, | |
| "learning_rate": 2.7322642662567592e-05, | |
| "loss": 0.9705, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 0.4052884593861236, | |
| "learning_rate": 2.7239517485242836e-05, | |
| "loss": 0.9892, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.8360215053763441, | |
| "grad_norm": 0.3969000439893693, | |
| "learning_rate": 2.715624814175907e-05, | |
| "loss": 0.9883, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.8387096774193549, | |
| "grad_norm": 0.5254585071566374, | |
| "learning_rate": 2.7072836290323698e-05, | |
| "loss": 1.08, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.8413978494623656, | |
| "grad_norm": 0.5111475952965409, | |
| "learning_rate": 2.698928359198197e-05, | |
| "loss": 1.0526, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.8440860215053764, | |
| "grad_norm": 0.4717493748353866, | |
| "learning_rate": 2.6905591710583957e-05, | |
| "loss": 1.0137, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.8467741935483871, | |
| "grad_norm": 0.3838063749897804, | |
| "learning_rate": 2.6821762312751368e-05, | |
| "loss": 0.9901, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.8494623655913979, | |
| "grad_norm": 0.3456617314343378, | |
| "learning_rate": 2.6737797067844403e-05, | |
| "loss": 1.0034, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.8521505376344086, | |
| "grad_norm": 0.37971130684639953, | |
| "learning_rate": 2.6653697647928485e-05, | |
| "loss": 0.9552, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.8548387096774194, | |
| "grad_norm": 0.3820801267530888, | |
| "learning_rate": 2.656946572774095e-05, | |
| "loss": 0.9236, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.8575268817204301, | |
| "grad_norm": 0.4114917943590629, | |
| "learning_rate": 2.648510298465775e-05, | |
| "loss": 1.0, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.8602150537634409, | |
| "grad_norm": 0.4185665498381875, | |
| "learning_rate": 2.6400611098659988e-05, | |
| "loss": 1.0435, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.8629032258064516, | |
| "grad_norm": 0.36227121606774076, | |
| "learning_rate": 2.6315991752300503e-05, | |
| "loss": 0.9797, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.8655913978494624, | |
| "grad_norm": 0.40186567244596927, | |
| "learning_rate": 2.623124663067034e-05, | |
| "loss": 1.0071, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.8682795698924731, | |
| "grad_norm": 0.3833356371805648, | |
| "learning_rate": 2.6146377421365225e-05, | |
| "loss": 1.0159, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.8709677419354839, | |
| "grad_norm": 0.41469411381713683, | |
| "learning_rate": 2.6061385814451913e-05, | |
| "loss": 1.0277, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.8736559139784946, | |
| "grad_norm": 0.92622435409038, | |
| "learning_rate": 2.5976273502434584e-05, | |
| "loss": 1.0001, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.8763440860215054, | |
| "grad_norm": 0.4316506228630945, | |
| "learning_rate": 2.5891042180221094e-05, | |
| "loss": 1.0712, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.8790322580645161, | |
| "grad_norm": 0.42656057546508047, | |
| "learning_rate": 2.580569354508925e-05, | |
| "loss": 1.0074, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.8817204301075269, | |
| "grad_norm": 0.3789318712710433, | |
| "learning_rate": 2.5720229296653006e-05, | |
| "loss": 1.0355, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.8844086021505376, | |
| "grad_norm": 0.367154670317836, | |
| "learning_rate": 2.5634651136828597e-05, | |
| "loss": 1.0394, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.8870967741935484, | |
| "grad_norm": 0.4735001007157819, | |
| "learning_rate": 2.554896076980069e-05, | |
| "loss": 1.0552, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8897849462365591, | |
| "grad_norm": 0.4390567460028508, | |
| "learning_rate": 2.54631599019884e-05, | |
| "loss": 1.0043, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.8924731182795699, | |
| "grad_norm": 0.3642787415401991, | |
| "learning_rate": 2.5377250242011338e-05, | |
| "loss": 0.9854, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8951612903225806, | |
| "grad_norm": 0.4524235630593109, | |
| "learning_rate": 2.5291233500655584e-05, | |
| "loss": 1.0029, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.8978494623655914, | |
| "grad_norm": 0.4097887869063476, | |
| "learning_rate": 2.52051113908396e-05, | |
| "loss": 1.0122, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.9005376344086021, | |
| "grad_norm": 0.3852040955735104, | |
| "learning_rate": 2.5118885627580155e-05, | |
| "loss": 0.9779, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.9032258064516129, | |
| "grad_norm": 0.40481656602470306, | |
| "learning_rate": 2.5032557927958116e-05, | |
| "loss": 1.0125, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.9059139784946236, | |
| "grad_norm": 0.4118716752579493, | |
| "learning_rate": 2.494613001108431e-05, | |
| "loss": 1.0364, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.9086021505376344, | |
| "grad_norm": 0.4489453038959667, | |
| "learning_rate": 2.485960359806528e-05, | |
| "loss": 1.0436, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.9112903225806451, | |
| "grad_norm": 0.41112406404210244, | |
| "learning_rate": 2.4772980411968975e-05, | |
| "loss": 0.9545, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.9139784946236559, | |
| "grad_norm": 0.4856093390929945, | |
| "learning_rate": 2.468626217779047e-05, | |
| "loss": 0.9854, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 0.37523760134058665, | |
| "learning_rate": 2.4599450622417615e-05, | |
| "loss": 0.9699, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.9193548387096774, | |
| "grad_norm": 0.4064413347216363, | |
| "learning_rate": 2.4512547474596624e-05, | |
| "loss": 1.0083, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.9220430107526881, | |
| "grad_norm": 0.44550717714004195, | |
| "learning_rate": 2.4425554464897675e-05, | |
| "loss": 1.0175, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.9247311827956989, | |
| "grad_norm": 0.44076297740074416, | |
| "learning_rate": 2.433847332568042e-05, | |
| "loss": 0.9718, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.9274193548387096, | |
| "grad_norm": 0.4971040038925624, | |
| "learning_rate": 2.4251305791059533e-05, | |
| "loss": 1.0317, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.9301075268817204, | |
| "grad_norm": 0.35978037050758516, | |
| "learning_rate": 2.416405359687012e-05, | |
| "loss": 0.9693, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.9327956989247311, | |
| "grad_norm": 0.41817202738352904, | |
| "learning_rate": 2.4076718480633178e-05, | |
| "loss": 0.9764, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.9354838709677419, | |
| "grad_norm": 0.4130988765844788, | |
| "learning_rate": 2.398930218152101e-05, | |
| "loss": 0.9548, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.9381720430107527, | |
| "grad_norm": 0.47899471351234146, | |
| "learning_rate": 2.390180644032257e-05, | |
| "loss": 0.9965, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.9408602150537635, | |
| "grad_norm": 0.3639159912649112, | |
| "learning_rate": 2.38142329994088e-05, | |
| "loss": 0.945, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.9435483870967742, | |
| "grad_norm": 0.41552533932477614, | |
| "learning_rate": 2.372658360269796e-05, | |
| "loss": 0.976, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.946236559139785, | |
| "grad_norm": 0.4127471276078075, | |
| "learning_rate": 2.363885999562084e-05, | |
| "loss": 1.0493, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.9489247311827957, | |
| "grad_norm": 0.42874463629780296, | |
| "learning_rate": 2.3551063925086072e-05, | |
| "loss": 1.0003, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.9516129032258065, | |
| "grad_norm": 0.4542236208271591, | |
| "learning_rate": 2.3463197139445284e-05, | |
| "loss": 1.0189, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.9543010752688172, | |
| "grad_norm": 0.8840248169596676, | |
| "learning_rate": 2.3375261388458318e-05, | |
| "loss": 1.0006, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.956989247311828, | |
| "grad_norm": 0.47762507803159143, | |
| "learning_rate": 2.3287258423258405e-05, | |
| "loss": 1.0101, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.9596774193548387, | |
| "grad_norm": 0.42765004964798886, | |
| "learning_rate": 2.3199189996317205e-05, | |
| "loss": 0.9896, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.9623655913978495, | |
| "grad_norm": 0.4236101839000849, | |
| "learning_rate": 2.3111057861410026e-05, | |
| "loss": 0.9931, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.9650537634408602, | |
| "grad_norm": 0.38884571703952686, | |
| "learning_rate": 2.3022863773580813e-05, | |
| "loss": 0.9394, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 0.5378824587688318, | |
| "learning_rate": 2.2934609489107236e-05, | |
| "loss": 0.9842, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.9704301075268817, | |
| "grad_norm": 0.39925462372416454, | |
| "learning_rate": 2.2846296765465708e-05, | |
| "loss": 1.0026, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.9731182795698925, | |
| "grad_norm": 0.9592078982505338, | |
| "learning_rate": 2.2757927361296376e-05, | |
| "loss": 1.0332, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.9758064516129032, | |
| "grad_norm": 0.4396877320552629, | |
| "learning_rate": 2.2669503036368124e-05, | |
| "loss": 0.9971, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.978494623655914, | |
| "grad_norm": 0.38966539914800313, | |
| "learning_rate": 2.2581025551543516e-05, | |
| "loss": 0.9469, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.9811827956989247, | |
| "grad_norm": 0.4216276354211585, | |
| "learning_rate": 2.249249666874372e-05, | |
| "loss": 1.0322, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.9838709677419355, | |
| "grad_norm": 0.4351959975704115, | |
| "learning_rate": 2.240391815091344e-05, | |
| "loss": 0.962, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.9865591397849462, | |
| "grad_norm": 0.35811079366878923, | |
| "learning_rate": 2.2315291761985803e-05, | |
| "loss": 0.9937, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.989247311827957, | |
| "grad_norm": 0.3605918004740936, | |
| "learning_rate": 2.222661926684722e-05, | |
| "loss": 0.991, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.9919354838709677, | |
| "grad_norm": 0.4176512601533839, | |
| "learning_rate": 2.2137902431302264e-05, | |
| "loss": 1.0332, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.9946236559139785, | |
| "grad_norm": 0.42340462982190896, | |
| "learning_rate": 2.2049143022038472e-05, | |
| "loss": 0.9922, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.9973118279569892, | |
| "grad_norm": 0.420010163587815, | |
| "learning_rate": 2.196034280659122e-05, | |
| "loss": 1.0155, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.41657151819377736, | |
| "learning_rate": 2.1871503553308447e-05, | |
| "loss": 0.9901, | |
| "step": 372 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 744, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 186, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1543405087752192.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |