{"current_steps": 5, "total_steps": 5610, "loss": 0.3871, "lr": 3.5650623885918005e-07, "epoch": 0.008912655971479501, "percentage": 0.09, "elapsed_time": "0:00:01", "remaining_time": "0:30:56", "throughput": 1912.68, "total_tokens": 3168} {"current_steps": 10, "total_steps": 5610, "loss": 0.3469, "lr": 8.021390374331552e-07, "epoch": 0.017825311942959002, "percentage": 0.18, "elapsed_time": "0:00:02", "remaining_time": "0:25:53", "throughput": 2261.12, "total_tokens": 6272} {"current_steps": 15, "total_steps": 5610, "loss": 0.1418, "lr": 1.2477718360071302e-06, "epoch": 0.026737967914438502, "percentage": 0.27, "elapsed_time": "0:00:04", "remaining_time": "0:24:55", "throughput": 2530.77, "total_tokens": 10144} {"current_steps": 20, "total_steps": 5610, "loss": 0.345, "lr": 1.6934046345811053e-06, "epoch": 0.035650623885918005, "percentage": 0.36, "elapsed_time": "0:00:05", "remaining_time": "0:24:03", "throughput": 2620.51, "total_tokens": 13536} {"current_steps": 25, "total_steps": 5610, "loss": 0.3125, "lr": 2.1390374331550802e-06, "epoch": 0.044563279857397504, "percentage": 0.45, "elapsed_time": "0:00:06", "remaining_time": "0:23:05", "throughput": 2600.0, "total_tokens": 16128} {"current_steps": 30, "total_steps": 5610, "loss": 0.0692, "lr": 2.5846702317290554e-06, "epoch": 0.053475935828877004, "percentage": 0.53, "elapsed_time": "0:00:07", "remaining_time": "0:22:24", "throughput": 2599.47, "total_tokens": 18784} {"current_steps": 35, "total_steps": 5610, "loss": 0.2449, "lr": 3.0303030303030305e-06, "epoch": 0.062388591800356503, "percentage": 0.62, "elapsed_time": "0:00:08", "remaining_time": "0:22:19", "throughput": 2656.87, "total_tokens": 22336} {"current_steps": 40, "total_steps": 5610, "loss": 0.3998, "lr": 3.4759358288770056e-06, "epoch": 0.07130124777183601, "percentage": 0.71, "elapsed_time": "0:00:09", "remaining_time": "0:22:02", "throughput": 2675.33, "total_tokens": 25408} {"current_steps": 45, "total_steps": 5610, "loss": 0.2471, "lr": 3.92156862745098e-06, "epoch": 0.08021390374331551, "percentage": 0.8, "elapsed_time": "0:00:10", "remaining_time": "0:21:41", "throughput": 2656.69, "total_tokens": 27968} {"current_steps": 50, "total_steps": 5610, "loss": 0.4143, "lr": 4.3672014260249555e-06, "epoch": 0.08912655971479501, "percentage": 0.89, "elapsed_time": "0:00:11", "remaining_time": "0:21:27", "throughput": 2655.45, "total_tokens": 30752} {"current_steps": 55, "total_steps": 5610, "loss": 0.2401, "lr": 4.812834224598931e-06, "epoch": 0.09803921568627451, "percentage": 0.98, "elapsed_time": "0:00:12", "remaining_time": "0:21:11", "throughput": 2651.21, "total_tokens": 33376} {"current_steps": 60, "total_steps": 5610, "loss": 0.1603, "lr": 5.258467023172906e-06, "epoch": 0.10695187165775401, "percentage": 1.07, "elapsed_time": "0:00:13", "remaining_time": "0:21:19", "throughput": 2695.36, "total_tokens": 37280} {"current_steps": 65, "total_steps": 5610, "loss": 0.2917, "lr": 5.704099821746881e-06, "epoch": 0.11586452762923351, "percentage": 1.16, "elapsed_time": "0:00:14", "remaining_time": "0:21:18", "throughput": 2712.71, "total_tokens": 40640} {"current_steps": 70, "total_steps": 5610, "loss": 0.4022, "lr": 6.149732620320856e-06, "epoch": 0.12477718360071301, "percentage": 1.25, "elapsed_time": "0:00:16", "remaining_time": "0:21:16", "throughput": 2736.6, "total_tokens": 44128} {"current_steps": 75, "total_steps": 5610, "loss": 0.2311, "lr": 6.59536541889483e-06, "epoch": 0.13368983957219252, "percentage": 1.34, "elapsed_time": "0:00:17", "remaining_time": "0:21:16", "throughput": 2753.84, "total_tokens": 47648} {"current_steps": 80, "total_steps": 5610, "loss": 0.134, "lr": 7.040998217468805e-06, "epoch": 0.14260249554367202, "percentage": 1.43, "elapsed_time": "0:00:18", "remaining_time": "0:21:13", "throughput": 2758.69, "total_tokens": 50816} {"current_steps": 85, "total_steps": 5610, "loss": 0.3235, "lr": 7.4866310160427806e-06, "epoch": 0.15151515151515152, "percentage": 1.52, "elapsed_time": "0:00:19", "remaining_time": "0:21:07", "throughput": 2755.3, "total_tokens": 53728} {"current_steps": 90, "total_steps": 5610, "loss": 0.1043, "lr": 7.932263814616755e-06, "epoch": 0.16042780748663102, "percentage": 1.6, "elapsed_time": "0:00:20", "remaining_time": "0:21:06", "throughput": 2762.85, "total_tokens": 57056} {"current_steps": 95, "total_steps": 5610, "loss": 0.128, "lr": 8.377896613190733e-06, "epoch": 0.16934046345811052, "percentage": 1.69, "elapsed_time": "0:00:21", "remaining_time": "0:21:00", "throughput": 2755.44, "total_tokens": 59808} {"current_steps": 100, "total_steps": 5610, "loss": 0.0951, "lr": 8.823529411764707e-06, "epoch": 0.17825311942959002, "percentage": 1.78, "elapsed_time": "0:00:22", "remaining_time": "0:20:56", "throughput": 2756.06, "total_tokens": 62848} {"current_steps": 105, "total_steps": 5610, "loss": 0.2309, "lr": 9.269162210338681e-06, "epoch": 0.18716577540106952, "percentage": 1.87, "elapsed_time": "0:00:23", "remaining_time": "0:20:52", "throughput": 2755.88, "total_tokens": 65856} {"current_steps": 110, "total_steps": 5610, "loss": 0.1133, "lr": 9.714795008912657e-06, "epoch": 0.19607843137254902, "percentage": 1.96, "elapsed_time": "0:00:24", "remaining_time": "0:20:48", "throughput": 2750.91, "total_tokens": 68672} {"current_steps": 115, "total_steps": 5610, "loss": 0.0673, "lr": 1.0160427807486631e-05, "epoch": 0.20499108734402852, "percentage": 2.05, "elapsed_time": "0:00:26", "remaining_time": "0:20:46", "throughput": 2754.54, "total_tokens": 71840} {"current_steps": 120, "total_steps": 5610, "loss": 0.2123, "lr": 1.0606060606060607e-05, "epoch": 0.21390374331550802, "percentage": 2.14, "elapsed_time": "0:00:27", "remaining_time": "0:20:41", "throughput": 2749.69, "total_tokens": 74624} {"current_steps": 125, "total_steps": 5610, "loss": 0.2054, "lr": 1.1051693404634582e-05, "epoch": 0.22281639928698752, "percentage": 2.23, "elapsed_time": "0:00:28", "remaining_time": "0:20:41", "throughput": 2759.07, "total_tokens": 78080} {"current_steps": 130, "total_steps": 5610, "loss": 0.2639, "lr": 1.1497326203208558e-05, "epoch": 0.23172905525846701, "percentage": 2.32, "elapsed_time": "0:00:29", "remaining_time": "0:20:41", "throughput": 2765.23, "total_tokens": 81408} {"current_steps": 135, "total_steps": 5610, "loss": 0.0858, "lr": 1.1942959001782532e-05, "epoch": 0.24064171122994651, "percentage": 2.41, "elapsed_time": "0:00:30", "remaining_time": "0:20:36", "throughput": 2760.32, "total_tokens": 84192} {"current_steps": 140, "total_steps": 5610, "loss": 0.2855, "lr": 1.2388591800356506e-05, "epoch": 0.24955436720142601, "percentage": 2.5, "elapsed_time": "0:00:31", "remaining_time": "0:20:34", "throughput": 2762.59, "total_tokens": 87264} {"current_steps": 145, "total_steps": 5610, "loss": 0.034, "lr": 1.2834224598930484e-05, "epoch": 0.25846702317290554, "percentage": 2.58, "elapsed_time": "0:00:32", "remaining_time": "0:20:32", "throughput": 2763.57, "total_tokens": 90336} {"current_steps": 150, "total_steps": 5610, "loss": 0.2305, "lr": 1.3279857397504458e-05, "epoch": 0.26737967914438504, "percentage": 2.67, "elapsed_time": "0:00:33", "remaining_time": "0:20:32", "throughput": 2770.15, "total_tokens": 93760} {"current_steps": 155, "total_steps": 5610, "loss": 0.1568, "lr": 1.3725490196078432e-05, "epoch": 0.27629233511586454, "percentage": 2.76, "elapsed_time": "0:00:35", "remaining_time": "0:20:31", "throughput": 2774.6, "total_tokens": 97120} {"current_steps": 160, "total_steps": 5610, "loss": 0.5335, "lr": 1.4171122994652408e-05, "epoch": 0.28520499108734404, "percentage": 2.85, "elapsed_time": "0:00:36", "remaining_time": "0:20:29", "throughput": 2774.5, "total_tokens": 100160} {"current_steps": 165, "total_steps": 5610, "loss": 0.2975, "lr": 1.4616755793226383e-05, "epoch": 0.29411764705882354, "percentage": 2.94, "elapsed_time": "0:00:37", "remaining_time": "0:20:27", "throughput": 2773.19, "total_tokens": 103136} {"current_steps": 170, "total_steps": 5610, "loss": 0.1237, "lr": 1.5062388591800359e-05, "epoch": 0.30303030303030304, "percentage": 3.03, "elapsed_time": "0:00:38", "remaining_time": "0:20:22", "throughput": 2766.48, "total_tokens": 105696} {"current_steps": 175, "total_steps": 5610, "loss": 0.3044, "lr": 1.5508021390374333e-05, "epoch": 0.31194295900178254, "percentage": 3.12, "elapsed_time": "0:00:39", "remaining_time": "0:20:21", "throughput": 2766.52, "total_tokens": 108800} {"current_steps": 180, "total_steps": 5610, "loss": 0.1746, "lr": 1.5953654188948307e-05, "epoch": 0.32085561497326204, "percentage": 3.21, "elapsed_time": "0:00:40", "remaining_time": "0:20:19", "throughput": 2766.05, "total_tokens": 111808} {"current_steps": 185, "total_steps": 5610, "loss": 0.2148, "lr": 1.639928698752228e-05, "epoch": 0.32976827094474154, "percentage": 3.3, "elapsed_time": "0:00:41", "remaining_time": "0:20:17", "throughput": 2768.12, "total_tokens": 114944} {"current_steps": 190, "total_steps": 5610, "loss": 0.1727, "lr": 1.684491978609626e-05, "epoch": 0.33868092691622104, "percentage": 3.39, "elapsed_time": "0:00:42", "remaining_time": "0:20:16", "throughput": 2770.33, "total_tokens": 118112} {"current_steps": 195, "total_steps": 5610, "loss": 0.1062, "lr": 1.7290552584670233e-05, "epoch": 0.34759358288770054, "percentage": 3.48, "elapsed_time": "0:00:43", "remaining_time": "0:20:12", "throughput": 2768.68, "total_tokens": 120896} {"current_steps": 200, "total_steps": 5610, "loss": 0.1255, "lr": 1.7736185383244208e-05, "epoch": 0.35650623885918004, "percentage": 3.57, "elapsed_time": "0:00:44", "remaining_time": "0:20:10", "throughput": 2768.69, "total_tokens": 123904} {"current_steps": 205, "total_steps": 5610, "loss": 0.1552, "lr": 1.8181818181818182e-05, "epoch": 0.36541889483065954, "percentage": 3.65, "elapsed_time": "0:00:45", "remaining_time": "0:20:09", "throughput": 2769.15, "total_tokens": 127008} {"current_steps": 210, "total_steps": 5610, "loss": 0.2139, "lr": 1.862745098039216e-05, "epoch": 0.37433155080213903, "percentage": 3.74, "elapsed_time": "0:00:46", "remaining_time": "0:20:07", "throughput": 2767.96, "total_tokens": 129984} {"current_steps": 215, "total_steps": 5610, "loss": 0.1131, "lr": 1.9073083778966134e-05, "epoch": 0.38324420677361853, "percentage": 3.83, "elapsed_time": "0:00:48", "remaining_time": "0:20:06", "throughput": 2770.18, "total_tokens": 133152} {"current_steps": 220, "total_steps": 5610, "loss": 0.1154, "lr": 1.951871657754011e-05, "epoch": 0.39215686274509803, "percentage": 3.92, "elapsed_time": "0:00:49", "remaining_time": "0:20:03", "throughput": 2769.57, "total_tokens": 136096} {"current_steps": 225, "total_steps": 5610, "loss": 0.2696, "lr": 1.9964349376114083e-05, "epoch": 0.40106951871657753, "percentage": 4.01, "elapsed_time": "0:00:50", "remaining_time": "0:20:02", "throughput": 2769.16, "total_tokens": 139136} {"current_steps": 230, "total_steps": 5610, "loss": 0.2524, "lr": 2.0409982174688057e-05, "epoch": 0.40998217468805703, "percentage": 4.1, "elapsed_time": "0:00:51", "remaining_time": "0:20:00", "throughput": 2769.18, "total_tokens": 142080} {"current_steps": 235, "total_steps": 5610, "loss": 0.221, "lr": 2.0855614973262035e-05, "epoch": 0.41889483065953653, "percentage": 4.19, "elapsed_time": "0:00:52", "remaining_time": "0:20:01", "throughput": 2777.01, "total_tokens": 145824} {"current_steps": 240, "total_steps": 5610, "loss": 0.118, "lr": 2.130124777183601e-05, "epoch": 0.42780748663101603, "percentage": 4.28, "elapsed_time": "0:00:53", "remaining_time": "0:20:00", "throughput": 2781.95, "total_tokens": 149280} {"current_steps": 245, "total_steps": 5610, "loss": 0.2365, "lr": 2.1746880570409983e-05, "epoch": 0.43672014260249553, "percentage": 4.37, "elapsed_time": "0:00:54", "remaining_time": "0:19:59", "throughput": 2783.78, "total_tokens": 152544} {"current_steps": 250, "total_steps": 5610, "loss": 0.3776, "lr": 2.2192513368983957e-05, "epoch": 0.44563279857397503, "percentage": 4.46, "elapsed_time": "0:00:56", "remaining_time": "0:20:01", "throughput": 2791.14, "total_tokens": 156416} {"current_steps": 255, "total_steps": 5610, "loss": 0.1431, "lr": 2.2638146167557932e-05, "epoch": 0.45454545454545453, "percentage": 4.55, "elapsed_time": "0:00:57", "remaining_time": "0:20:00", "throughput": 2793.23, "total_tokens": 159712} {"current_steps": 260, "total_steps": 5610, "loss": 0.0348, "lr": 2.308377896613191e-05, "epoch": 0.46345811051693403, "percentage": 4.63, "elapsed_time": "0:00:58", "remaining_time": "0:19:57", "throughput": 2790.21, "total_tokens": 162400} {"current_steps": 265, "total_steps": 5610, "loss": 0.1799, "lr": 2.3529411764705884e-05, "epoch": 0.47237076648841353, "percentage": 4.72, "elapsed_time": "0:00:59", "remaining_time": "0:19:57", "throughput": 2796.14, "total_tokens": 166048} {"current_steps": 270, "total_steps": 5610, "loss": 0.1163, "lr": 2.3975044563279858e-05, "epoch": 0.48128342245989303, "percentage": 4.81, "elapsed_time": "0:01:00", "remaining_time": "0:19:53", "throughput": 2792.58, "total_tokens": 168576} {"current_steps": 275, "total_steps": 5610, "loss": 0.155, "lr": 2.4420677361853832e-05, "epoch": 0.49019607843137253, "percentage": 4.9, "elapsed_time": "0:01:01", "remaining_time": "0:19:54", "throughput": 2798.75, "total_tokens": 172320} {"current_steps": 280, "total_steps": 5610, "loss": 0.2025, "lr": 2.4866310160427807e-05, "epoch": 0.49910873440285203, "percentage": 4.99, "elapsed_time": "0:01:02", "remaining_time": "0:19:53", "throughput": 2798.78, "total_tokens": 175424} {"current_steps": 281, "total_steps": 5610, "eval_loss": 0.1983698159456253, "epoch": 0.5008912655971479, "percentage": 5.01, "elapsed_time": "0:01:09", "remaining_time": "0:21:51", "throughput": 2544.64, "total_tokens": 176032} {"current_steps": 285, "total_steps": 5610, "loss": 0.1648, "lr": 2.5311942959001784e-05, "epoch": 0.5080213903743316, "percentage": 5.08, "elapsed_time": "0:01:10", "remaining_time": "0:22:00", "throughput": 2518.5, "total_tokens": 178016} {"current_steps": 290, "total_steps": 5610, "loss": 0.0759, "lr": 2.575757575757576e-05, "epoch": 0.5169340463458111, "percentage": 5.17, "elapsed_time": "0:01:11", "remaining_time": "0:21:59", "throughput": 2528.97, "total_tokens": 181888} {"current_steps": 295, "total_steps": 5610, "loss": 0.1507, "lr": 2.6203208556149733e-05, "epoch": 0.5258467023172906, "percentage": 5.26, "elapsed_time": "0:01:13", "remaining_time": "0:21:55", "throughput": 2532.51, "total_tokens": 184960} {"current_steps": 300, "total_steps": 5610, "loss": 0.0901, "lr": 2.6648841354723707e-05, "epoch": 0.5347593582887701, "percentage": 5.35, "elapsed_time": "0:01:14", "remaining_time": "0:21:50", "throughput": 2531.53, "total_tokens": 187488} {"current_steps": 305, "total_steps": 5610, "loss": 0.0708, "lr": 2.7094474153297685e-05, "epoch": 0.5436720142602496, "percentage": 5.44, "elapsed_time": "0:01:15", "remaining_time": "0:21:49", "throughput": 2540.53, "total_tokens": 191232} {"current_steps": 310, "total_steps": 5610, "loss": 0.1351, "lr": 2.754010695187166e-05, "epoch": 0.5525846702317291, "percentage": 5.53, "elapsed_time": "0:01:16", "remaining_time": "0:21:45", "throughput": 2544.2, "total_tokens": 194272} {"current_steps": 315, "total_steps": 5610, "loss": 0.0961, "lr": 2.7985739750445633e-05, "epoch": 0.5614973262032086, "percentage": 5.61, "elapsed_time": "0:01:17", "remaining_time": "0:21:41", "throughput": 2546.73, "total_tokens": 197184} {"current_steps": 320, "total_steps": 5610, "loss": 0.1903, "lr": 2.8431372549019608e-05, "epoch": 0.5704099821746881, "percentage": 5.7, "elapsed_time": "0:01:18", "remaining_time": "0:21:36", "throughput": 2547.34, "total_tokens": 199840} {"current_steps": 325, "total_steps": 5610, "loss": 0.0495, "lr": 2.8877005347593582e-05, "epoch": 0.5793226381461676, "percentage": 5.79, "elapsed_time": "0:01:19", "remaining_time": "0:21:33", "throughput": 2551.85, "total_tokens": 203008} {"current_steps": 330, "total_steps": 5610, "loss": 0.1168, "lr": 2.932263814616756e-05, "epoch": 0.5882352941176471, "percentage": 5.88, "elapsed_time": "0:01:20", "remaining_time": "0:21:31", "throughput": 2557.19, "total_tokens": 206400} {"current_steps": 335, "total_steps": 5610, "loss": 0.1421, "lr": 2.9768270944741534e-05, "epoch": 0.5971479500891266, "percentage": 5.97, "elapsed_time": "0:01:21", "remaining_time": "0:21:28", "throughput": 2560.09, "total_tokens": 209440} {"current_steps": 340, "total_steps": 5610, "loss": 0.2025, "lr": 3.0213903743315508e-05, "epoch": 0.6060606060606061, "percentage": 6.06, "elapsed_time": "0:01:22", "remaining_time": "0:21:25", "throughput": 2564.36, "total_tokens": 212736} {"current_steps": 345, "total_steps": 5610, "loss": 0.2427, "lr": 3.065953654188948e-05, "epoch": 0.6149732620320856, "percentage": 6.15, "elapsed_time": "0:01:24", "remaining_time": "0:21:23", "throughput": 2569.5, "total_tokens": 216096} {"current_steps": 350, "total_steps": 5610, "loss": 0.153, "lr": 3.110516934046346e-05, "epoch": 0.6238859180035651, "percentage": 6.24, "elapsed_time": "0:01:25", "remaining_time": "0:21:20", "throughput": 2572.97, "total_tokens": 219200} {"current_steps": 355, "total_steps": 5610, "loss": 0.1216, "lr": 3.155080213903743e-05, "epoch": 0.6327985739750446, "percentage": 6.33, "elapsed_time": "0:01:26", "remaining_time": "0:21:16", "throughput": 2573.37, "total_tokens": 221952} {"current_steps": 360, "total_steps": 5610, "loss": 0.1366, "lr": 3.199643493761141e-05, "epoch": 0.6417112299465241, "percentage": 6.42, "elapsed_time": "0:01:27", "remaining_time": "0:21:14", "throughput": 2578.55, "total_tokens": 225376} {"current_steps": 365, "total_steps": 5610, "loss": 0.0615, "lr": 3.2442067736185386e-05, "epoch": 0.6506238859180036, "percentage": 6.51, "elapsed_time": "0:01:28", "remaining_time": "0:21:12", "throughput": 2582.96, "total_tokens": 228736} {"current_steps": 370, "total_steps": 5610, "loss": 0.0691, "lr": 3.288770053475936e-05, "epoch": 0.6595365418894831, "percentage": 6.6, "elapsed_time": "0:01:29", "remaining_time": "0:21:09", "throughput": 2584.25, "total_tokens": 231648} {"current_steps": 375, "total_steps": 5610, "loss": 0.0868, "lr": 3.3333333333333335e-05, "epoch": 0.6684491978609626, "percentage": 6.68, "elapsed_time": "0:01:30", "remaining_time": "0:21:07", "throughput": 2588.8, "total_tokens": 234976} {"current_steps": 380, "total_steps": 5610, "loss": 0.1143, "lr": 3.3778966131907306e-05, "epoch": 0.6773618538324421, "percentage": 6.77, "elapsed_time": "0:01:31", "remaining_time": "0:21:05", "throughput": 2592.92, "total_tokens": 238368} {"current_steps": 385, "total_steps": 5610, "loss": 0.0611, "lr": 3.4224598930481284e-05, "epoch": 0.6862745098039216, "percentage": 6.86, "elapsed_time": "0:01:33", "remaining_time": "0:21:02", "throughput": 2595.9, "total_tokens": 241440} {"current_steps": 390, "total_steps": 5610, "loss": 0.2413, "lr": 3.467023172905526e-05, "epoch": 0.6951871657754011, "percentage": 6.95, "elapsed_time": "0:01:34", "remaining_time": "0:20:59", "throughput": 2597.6, "total_tokens": 244448} {"current_steps": 395, "total_steps": 5610, "loss": 0.0962, "lr": 3.511586452762923e-05, "epoch": 0.7040998217468806, "percentage": 7.04, "elapsed_time": "0:01:35", "remaining_time": "0:20:55", "throughput": 2596.07, "total_tokens": 246880} {"current_steps": 400, "total_steps": 5610, "loss": 0.289, "lr": 3.556149732620321e-05, "epoch": 0.7130124777183601, "percentage": 7.13, "elapsed_time": "0:01:36", "remaining_time": "0:20:53", "throughput": 2600.04, "total_tokens": 250240} {"current_steps": 405, "total_steps": 5610, "loss": 0.1467, "lr": 3.600713012477718e-05, "epoch": 0.7219251336898396, "percentage": 7.22, "elapsed_time": "0:01:37", "remaining_time": "0:20:50", "throughput": 2601.48, "total_tokens": 253184} {"current_steps": 410, "total_steps": 5610, "loss": 0.1721, "lr": 3.645276292335116e-05, "epoch": 0.7308377896613191, "percentage": 7.31, "elapsed_time": "0:01:38", "remaining_time": "0:20:47", "throughput": 2601.79, "total_tokens": 255968} {"current_steps": 415, "total_steps": 5610, "loss": 0.1705, "lr": 3.6898395721925136e-05, "epoch": 0.7397504456327986, "percentage": 7.4, "elapsed_time": "0:01:39", "remaining_time": "0:20:44", "throughput": 2602.16, "total_tokens": 258688} {"current_steps": 420, "total_steps": 5610, "loss": 0.1499, "lr": 3.734402852049911e-05, "epoch": 0.7486631016042781, "percentage": 7.49, "elapsed_time": "0:01:40", "remaining_time": "0:20:42", "throughput": 2607.46, "total_tokens": 262240} {"current_steps": 425, "total_steps": 5610, "loss": 0.102, "lr": 3.7789661319073085e-05, "epoch": 0.7575757575757576, "percentage": 7.58, "elapsed_time": "0:01:41", "remaining_time": "0:20:41", "throughput": 2613.09, "total_tokens": 265952} {"current_steps": 430, "total_steps": 5610, "loss": 0.0561, "lr": 3.8235294117647055e-05, "epoch": 0.7664884135472371, "percentage": 7.66, "elapsed_time": "0:01:42", "remaining_time": "0:20:39", "throughput": 2617.0, "total_tokens": 269312} {"current_steps": 435, "total_steps": 5610, "loss": 0.1028, "lr": 3.868092691622103e-05, "epoch": 0.7754010695187166, "percentage": 7.75, "elapsed_time": "0:01:43", "remaining_time": "0:20:37", "throughput": 2617.11, "total_tokens": 272128} {"current_steps": 440, "total_steps": 5610, "loss": 0.1513, "lr": 3.912655971479501e-05, "epoch": 0.7843137254901961, "percentage": 7.84, "elapsed_time": "0:01:45", "remaining_time": "0:20:35", "throughput": 2620.83, "total_tokens": 275552} {"current_steps": 445, "total_steps": 5610, "loss": 0.1387, "lr": 3.957219251336899e-05, "epoch": 0.7932263814616756, "percentage": 7.93, "elapsed_time": "0:01:46", "remaining_time": "0:20:33", "throughput": 2622.63, "total_tokens": 278720} {"current_steps": 450, "total_steps": 5610, "loss": 0.1824, "lr": 4.0017825311942966e-05, "epoch": 0.8021390374331551, "percentage": 8.02, "elapsed_time": "0:01:47", "remaining_time": "0:20:30", "throughput": 2622.9, "total_tokens": 281536} {"current_steps": 455, "total_steps": 5610, "loss": 0.2449, "lr": 4.046345811051694e-05, "epoch": 0.8110516934046346, "percentage": 8.11, "elapsed_time": "0:01:48", "remaining_time": "0:20:28", "throughput": 2624.74, "total_tokens": 284672} {"current_steps": 460, "total_steps": 5610, "loss": 0.1029, "lr": 4.0909090909090915e-05, "epoch": 0.8199643493761141, "percentage": 8.2, "elapsed_time": "0:01:49", "remaining_time": "0:20:27", "throughput": 2629.86, "total_tokens": 288416} {"current_steps": 465, "total_steps": 5610, "loss": 0.0824, "lr": 4.1354723707664886e-05, "epoch": 0.8288770053475936, "percentage": 8.29, "elapsed_time": "0:01:50", "remaining_time": "0:20:25", "throughput": 2629.9, "total_tokens": 291232} {"current_steps": 470, "total_steps": 5610, "loss": 0.1585, "lr": 4.180035650623886e-05, "epoch": 0.8377896613190731, "percentage": 8.38, "elapsed_time": "0:01:51", "remaining_time": "0:20:24", "throughput": 2633.66, "total_tokens": 294784} {"current_steps": 475, "total_steps": 5610, "loss": 0.1035, "lr": 4.224598930481284e-05, "epoch": 0.8467023172905526, "percentage": 8.47, "elapsed_time": "0:01:53", "remaining_time": "0:20:21", "throughput": 2633.88, "total_tokens": 297632} {"current_steps": 480, "total_steps": 5610, "loss": 0.1197, "lr": 4.269162210338681e-05, "epoch": 0.8556149732620321, "percentage": 8.56, "elapsed_time": "0:01:54", "remaining_time": "0:20:18", "throughput": 2634.0, "total_tokens": 300416} {"current_steps": 485, "total_steps": 5610, "loss": 0.1252, "lr": 4.313725490196079e-05, "epoch": 0.8645276292335116, "percentage": 8.65, "elapsed_time": "0:01:55", "remaining_time": "0:20:16", "throughput": 2633.98, "total_tokens": 303232} {"current_steps": 490, "total_steps": 5610, "loss": 0.1306, "lr": 4.358288770053476e-05, "epoch": 0.8734402852049911, "percentage": 8.73, "elapsed_time": "0:01:56", "remaining_time": "0:20:14", "throughput": 2634.68, "total_tokens": 306144} {"current_steps": 495, "total_steps": 5610, "loss": 0.0748, "lr": 4.402852049910874e-05, "epoch": 0.8823529411764706, "percentage": 8.82, "elapsed_time": "0:01:57", "remaining_time": "0:20:10", "throughput": 2633.06, "total_tokens": 308576} {"current_steps": 500, "total_steps": 5610, "loss": 0.1202, "lr": 4.4474153297682716e-05, "epoch": 0.8912655971479501, "percentage": 8.91, "elapsed_time": "0:01:58", "remaining_time": "0:20:09", "throughput": 2636.26, "total_tokens": 312000} {"current_steps": 505, "total_steps": 5610, "loss": 0.1321, "lr": 4.491978609625669e-05, "epoch": 0.9001782531194296, "percentage": 9.0, "elapsed_time": "0:01:59", "remaining_time": "0:20:07", "throughput": 2636.55, "total_tokens": 314848} {"current_steps": 510, "total_steps": 5610, "loss": 0.0629, "lr": 4.5365418894830664e-05, "epoch": 0.9090909090909091, "percentage": 9.09, "elapsed_time": "0:02:00", "remaining_time": "0:20:05", "throughput": 2638.85, "total_tokens": 318112} {"current_steps": 515, "total_steps": 5610, "loss": 0.0765, "lr": 4.5811051693404635e-05, "epoch": 0.9180035650623886, "percentage": 9.18, "elapsed_time": "0:02:01", "remaining_time": "0:20:03", "throughput": 2640.11, "total_tokens": 321152} {"current_steps": 520, "total_steps": 5610, "loss": 0.0566, "lr": 4.625668449197861e-05, "epoch": 0.9269162210338681, "percentage": 9.27, "elapsed_time": "0:02:02", "remaining_time": "0:20:00", "throughput": 2638.68, "total_tokens": 323552} {"current_steps": 525, "total_steps": 5610, "loss": 0.058, "lr": 4.670231729055259e-05, "epoch": 0.9358288770053476, "percentage": 9.36, "elapsed_time": "0:02:03", "remaining_time": "0:19:57", "throughput": 2637.95, "total_tokens": 326112} {"current_steps": 530, "total_steps": 5610, "loss": 0.1965, "lr": 4.714795008912656e-05, "epoch": 0.9447415329768271, "percentage": 9.45, "elapsed_time": "0:02:04", "remaining_time": "0:19:54", "throughput": 2637.68, "total_tokens": 328800} {"current_steps": 535, "total_steps": 5610, "loss": 0.1333, "lr": 4.759358288770054e-05, "epoch": 0.9536541889483066, "percentage": 9.54, "elapsed_time": "0:02:05", "remaining_time": "0:19:53", "throughput": 2642.0, "total_tokens": 332512} {"current_steps": 540, "total_steps": 5610, "loss": 0.1171, "lr": 4.803921568627452e-05, "epoch": 0.9625668449197861, "percentage": 9.63, "elapsed_time": "0:02:06", "remaining_time": "0:19:51", "throughput": 2642.25, "total_tokens": 335360} {"current_steps": 545, "total_steps": 5610, "loss": 0.0733, "lr": 4.848484848484849e-05, "epoch": 0.9714795008912656, "percentage": 9.71, "elapsed_time": "0:02:08", "remaining_time": "0:19:51", "throughput": 2647.65, "total_tokens": 339488} {"current_steps": 550, "total_steps": 5610, "loss": 0.0912, "lr": 4.8930481283422465e-05, "epoch": 0.9803921568627451, "percentage": 9.8, "elapsed_time": "0:02:09", "remaining_time": "0:19:48", "throughput": 2648.02, "total_tokens": 342176} {"current_steps": 555, "total_steps": 5610, "loss": 0.109, "lr": 4.9376114081996436e-05, "epoch": 0.9893048128342246, "percentage": 9.89, "elapsed_time": "0:02:10", "remaining_time": "0:19:47", "throughput": 2650.49, "total_tokens": 345568} {"current_steps": 560, "total_steps": 5610, "loss": 0.1763, "lr": 4.9821746880570414e-05, "epoch": 0.9982174688057041, "percentage": 9.98, "elapsed_time": "0:02:11", "remaining_time": "0:19:44", "throughput": 2649.5, "total_tokens": 348000} {"current_steps": 562, "total_steps": 5610, "eval_loss": 0.13491526246070862, "epoch": 1.0017825311942958, "percentage": 10.02, "elapsed_time": "0:02:18", "remaining_time": "0:20:40", "throughput": 2528.02, "total_tokens": 349200} {"current_steps": 565, "total_steps": 5610, "loss": 0.182, "lr": 4.99999564446608e-05, "epoch": 1.0071301247771836, "percentage": 10.07, "elapsed_time": "0:02:19", "remaining_time": "0:20:46", "throughput": 2514.86, "total_tokens": 350960} {"current_steps": 570, "total_steps": 5610, "loss": 0.0825, "lr": 4.9999690273693036e-05, "epoch": 1.0160427807486632, "percentage": 10.16, "elapsed_time": "0:02:20", "remaining_time": "0:20:44", "throughput": 2518.2, "total_tokens": 354288} {"current_steps": 575, "total_steps": 5610, "loss": 0.0908, "lr": 4.999918213174131e-05, "epoch": 1.0249554367201426, "percentage": 10.25, "elapsed_time": "0:02:21", "remaining_time": "0:20:42", "throughput": 2521.47, "total_tokens": 357648} {"current_steps": 580, "total_steps": 5610, "loss": 0.1092, "lr": 4.9998432023723915e-05, "epoch": 1.0338680926916222, "percentage": 10.34, "elapsed_time": "0:02:22", "remaining_time": "0:20:39", "throughput": 2522.55, "total_tokens": 360496} {"current_steps": 585, "total_steps": 5610, "loss": 0.0588, "lr": 4.9997439956901106e-05, "epoch": 1.0427807486631016, "percentage": 10.43, "elapsed_time": "0:02:23", "remaining_time": "0:20:36", "throughput": 2524.11, "total_tokens": 363376} {"current_steps": 590, "total_steps": 5610, "loss": 0.1617, "lr": 4.999620594087507e-05, "epoch": 1.0516934046345812, "percentage": 10.52, "elapsed_time": "0:02:25", "remaining_time": "0:20:34", "throughput": 2525.69, "total_tokens": 366320} {"current_steps": 595, "total_steps": 5610, "loss": 0.1548, "lr": 4.999472998758978e-05, "epoch": 1.0606060606060606, "percentage": 10.61, "elapsed_time": "0:02:26", "remaining_time": "0:20:31", "throughput": 2528.1, "total_tokens": 369488} {"current_steps": 600, "total_steps": 5610, "loss": 0.0569, "lr": 4.999301211133095e-05, "epoch": 1.0695187165775402, "percentage": 10.7, "elapsed_time": "0:02:27", "remaining_time": "0:20:29", "throughput": 2530.12, "total_tokens": 372656} {"current_steps": 605, "total_steps": 5610, "loss": 0.1505, "lr": 4.999105232872582e-05, "epoch": 1.0784313725490196, "percentage": 10.78, "elapsed_time": "0:02:28", "remaining_time": "0:20:28", "throughput": 2532.96, "total_tokens": 376048} {"current_steps": 610, "total_steps": 5610, "loss": 0.2991, "lr": 4.998885065874305e-05, "epoch": 1.0873440285204992, "percentage": 10.87, "elapsed_time": "0:02:29", "remaining_time": "0:20:26", "throughput": 2536.53, "total_tokens": 379472} {"current_steps": 615, "total_steps": 5610, "loss": 0.2261, "lr": 4.9986407122692504e-05, "epoch": 1.0962566844919786, "percentage": 10.96, "elapsed_time": "0:02:30", "remaining_time": "0:20:23", "throughput": 2537.38, "total_tokens": 382288} {"current_steps": 620, "total_steps": 5610, "loss": 0.2298, "lr": 4.998372174422507e-05, "epoch": 1.1051693404634582, "percentage": 11.05, "elapsed_time": "0:02:31", "remaining_time": "0:20:21", "throughput": 2539.46, "total_tokens": 385392} {"current_steps": 625, "total_steps": 5610, "loss": 0.0593, "lr": 4.998079454933244e-05, "epoch": 1.1140819964349375, "percentage": 11.14, "elapsed_time": "0:02:32", "remaining_time": "0:20:20", "throughput": 2544.12, "total_tokens": 389200} {"current_steps": 630, "total_steps": 5610, "loss": 0.1117, "lr": 4.99776255663468e-05, "epoch": 1.1229946524064172, "percentage": 11.23, "elapsed_time": "0:02:33", "remaining_time": "0:20:17", "throughput": 2543.87, "total_tokens": 391664} {"current_steps": 635, "total_steps": 5610, "loss": 0.0618, "lr": 4.997421482594059e-05, "epoch": 1.1319073083778965, "percentage": 11.32, "elapsed_time": "0:02:34", "remaining_time": "0:20:14", "throughput": 2544.76, "total_tokens": 394416} {"current_steps": 640, "total_steps": 5610, "loss": 0.0573, "lr": 4.997056236112625e-05, "epoch": 1.1408199643493762, "percentage": 11.41, "elapsed_time": "0:02:36", "remaining_time": "0:20:14", "throughput": 2552.97, "total_tokens": 399248} {"current_steps": 645, "total_steps": 5610, "loss": 0.0672, "lr": 4.9966668207255826e-05, "epoch": 1.1497326203208555, "percentage": 11.5, "elapsed_time": "0:02:37", "remaining_time": "0:20:11", "throughput": 2553.46, "total_tokens": 402032} {"current_steps": 650, "total_steps": 5610, "loss": 0.081, "lr": 4.996253240202069e-05, "epoch": 1.1586452762923352, "percentage": 11.59, "elapsed_time": "0:02:38", "remaining_time": "0:20:10", "throughput": 2555.75, "total_tokens": 405296} {"current_steps": 655, "total_steps": 5610, "loss": 0.0902, "lr": 4.9958154985451114e-05, "epoch": 1.1675579322638145, "percentage": 11.68, "elapsed_time": "0:02:39", "remaining_time": "0:20:07", "throughput": 2557.61, "total_tokens": 408400} {"current_steps": 660, "total_steps": 5610, "loss": 0.062, "lr": 4.995353599991595e-05, "epoch": 1.1764705882352942, "percentage": 11.76, "elapsed_time": "0:02:40", "remaining_time": "0:20:06", "throughput": 2561.14, "total_tokens": 412016} {"current_steps": 665, "total_steps": 5610, "loss": 0.2733, "lr": 4.994867549012215e-05, "epoch": 1.1853832442067735, "percentage": 11.85, "elapsed_time": "0:02:42", "remaining_time": "0:20:04", "throughput": 2564.48, "total_tokens": 415504} {"current_steps": 670, "total_steps": 5610, "loss": 0.1307, "lr": 4.99435735031144e-05, "epoch": 1.1942959001782532, "percentage": 11.94, "elapsed_time": "0:02:43", "remaining_time": "0:20:02", "throughput": 2565.37, "total_tokens": 418448} {"current_steps": 675, "total_steps": 5610, "loss": 0.0424, "lr": 4.993823008827465e-05, "epoch": 1.2032085561497325, "percentage": 12.03, "elapsed_time": "0:02:44", "remaining_time": "0:20:00", "throughput": 2565.53, "total_tokens": 421168} {"current_steps": 680, "total_steps": 5610, "loss": 0.1269, "lr": 4.9932645297321555e-05, "epoch": 1.2121212121212122, "percentage": 12.12, "elapsed_time": "0:02:45", "remaining_time": "0:19:57", "throughput": 2565.13, "total_tokens": 423632} {"current_steps": 685, "total_steps": 5610, "loss": 0.0861, "lr": 4.9926819184310103e-05, "epoch": 1.2210338680926915, "percentage": 12.21, "elapsed_time": "0:02:46", "remaining_time": "0:19:55", "throughput": 2566.51, "total_tokens": 426640} {"current_steps": 690, "total_steps": 5610, "loss": 0.0286, "lr": 4.9920751805631e-05, "epoch": 1.2299465240641712, "percentage": 12.3, "elapsed_time": "0:02:47", "remaining_time": "0:19:53", "throughput": 2569.03, "total_tokens": 430032} {"current_steps": 695, "total_steps": 5610, "loss": 0.0113, "lr": 4.991444322001014e-05, "epoch": 1.2388591800356505, "percentage": 12.39, "elapsed_time": "0:02:48", "remaining_time": "0:19:51", "throughput": 2570.41, "total_tokens": 433008} {"current_steps": 700, "total_steps": 5610, "loss": 0.1576, "lr": 4.99078934885081e-05, "epoch": 1.2477718360071302, "percentage": 12.48, "elapsed_time": "0:02:49", "remaining_time": "0:19:49", "throughput": 2572.93, "total_tokens": 436400} {"current_steps": 705, "total_steps": 5610, "loss": 0.1207, "lr": 4.990110267451944e-05, "epoch": 1.2566844919786098, "percentage": 12.57, "elapsed_time": "0:02:50", "remaining_time": "0:19:47", "throughput": 2573.66, "total_tokens": 439248} {"current_steps": 710, "total_steps": 5610, "loss": 0.0847, "lr": 4.989407084377218e-05, "epoch": 1.2655971479500892, "percentage": 12.66, "elapsed_time": "0:02:51", "remaining_time": "0:19:45", "throughput": 2575.34, "total_tokens": 442416} {"current_steps": 715, "total_steps": 5610, "loss": 0.003, "lr": 4.988679806432712e-05, "epoch": 1.2745098039215685, "percentage": 12.75, "elapsed_time": "0:02:52", "remaining_time": "0:19:43", "throughput": 2577.04, "total_tokens": 445616} {"current_steps": 720, "total_steps": 5610, "loss": 0.1655, "lr": 4.9879284406577195e-05, "epoch": 1.2834224598930482, "percentage": 12.83, "elapsed_time": "0:02:53", "remaining_time": "0:19:41", "throughput": 2578.0, "total_tokens": 448528} {"current_steps": 725, "total_steps": 5610, "loss": 0.0605, "lr": 4.98715299432468e-05, "epoch": 1.2923351158645278, "percentage": 12.92, "elapsed_time": "0:02:55", "remaining_time": "0:19:39", "throughput": 2579.4, "total_tokens": 451664} {"current_steps": 730, "total_steps": 5610, "loss": 0.0255, "lr": 4.986353474939106e-05, "epoch": 1.3012477718360071, "percentage": 13.01, "elapsed_time": "0:02:56", "remaining_time": "0:19:38", "throughput": 2581.79, "total_tokens": 455120} {"current_steps": 735, "total_steps": 5610, "loss": 0.0086, "lr": 4.9855298902395134e-05, "epoch": 1.3101604278074865, "percentage": 13.1, "elapsed_time": "0:02:57", "remaining_time": "0:19:36", "throughput": 2583.52, "total_tokens": 458352} {"current_steps": 740, "total_steps": 5610, "loss": 0.1697, "lr": 4.9846822481973455e-05, "epoch": 1.3190730837789661, "percentage": 13.19, "elapsed_time": "0:02:58", "remaining_time": "0:19:34", "throughput": 2585.0, "total_tokens": 461488} {"current_steps": 745, "total_steps": 5610, "loss": 0.0289, "lr": 4.9838105570168946e-05, "epoch": 1.3279857397504458, "percentage": 13.28, "elapsed_time": "0:02:59", "remaining_time": "0:19:33", "throughput": 2587.14, "total_tokens": 464848} {"current_steps": 750, "total_steps": 5610, "loss": 0.3793, "lr": 4.982914825135224e-05, "epoch": 1.3368983957219251, "percentage": 13.37, "elapsed_time": "0:03:00", "remaining_time": "0:19:32", "throughput": 2591.59, "total_tokens": 468944} {"current_steps": 755, "total_steps": 5610, "loss": 0.2285, "lr": 4.981995061222087e-05, "epoch": 1.3458110516934045, "percentage": 13.46, "elapsed_time": "0:03:01", "remaining_time": "0:19:29", "throughput": 2590.43, "total_tokens": 471312} {"current_steps": 760, "total_steps": 5610, "loss": 0.0773, "lr": 4.98105127417984e-05, "epoch": 1.3547237076648841, "percentage": 13.55, "elapsed_time": "0:03:03", "remaining_time": "0:19:27", "throughput": 2590.66, "total_tokens": 474128} {"current_steps": 765, "total_steps": 5610, "loss": 0.1334, "lr": 4.9800834731433596e-05, "epoch": 1.3636363636363638, "percentage": 13.64, "elapsed_time": "0:03:04", "remaining_time": "0:19:25", "throughput": 2589.8, "total_tokens": 476592} {"current_steps": 770, "total_steps": 5610, "loss": 0.0512, "lr": 4.9790916674799526e-05, "epoch": 1.3725490196078431, "percentage": 13.73, "elapsed_time": "0:03:05", "remaining_time": "0:19:24", "throughput": 2592.22, "total_tokens": 480240} {"current_steps": 775, "total_steps": 5610, "loss": 0.0634, "lr": 4.9780758667892656e-05, "epoch": 1.3814616755793225, "percentage": 13.81, "elapsed_time": "0:03:06", "remaining_time": "0:19:22", "throughput": 2593.87, "total_tokens": 483472} {"current_steps": 780, "total_steps": 5610, "loss": 0.2462, "lr": 4.977036080903193e-05, "epoch": 1.3903743315508021, "percentage": 13.9, "elapsed_time": "0:03:07", "remaining_time": "0:19:21", "throughput": 2595.65, "total_tokens": 486768} {"current_steps": 785, "total_steps": 5610, "loss": 0.0367, "lr": 4.975972319885779e-05, "epoch": 1.3992869875222818, "percentage": 13.99, "elapsed_time": "0:03:08", "remaining_time": "0:19:18", "throughput": 2595.87, "total_tokens": 489392} {"current_steps": 790, "total_steps": 5610, "loss": 0.0759, "lr": 4.974884594033123e-05, "epoch": 1.4081996434937611, "percentage": 14.08, "elapsed_time": "0:03:09", "remaining_time": "0:19:17", "throughput": 2597.29, "total_tokens": 492560} {"current_steps": 795, "total_steps": 5610, "loss": 0.1909, "lr": 4.9737729138732805e-05, "epoch": 1.4171122994652405, "percentage": 14.17, "elapsed_time": "0:03:10", "remaining_time": "0:19:15", "throughput": 2597.49, "total_tokens": 495344} {"current_steps": 800, "total_steps": 5610, "loss": 0.1073, "lr": 4.972637290166158e-05, "epoch": 1.4260249554367201, "percentage": 14.26, "elapsed_time": "0:03:11", "remaining_time": "0:19:12", "throughput": 2597.59, "total_tokens": 498128} {"current_steps": 805, "total_steps": 5610, "loss": 0.0423, "lr": 4.97147773390341e-05, "epoch": 1.4349376114081998, "percentage": 14.35, "elapsed_time": "0:03:12", "remaining_time": "0:19:11", "throughput": 2599.43, "total_tokens": 501488} {"current_steps": 810, "total_steps": 5610, "loss": 0.1348, "lr": 4.9702942563083356e-05, "epoch": 1.4438502673796791, "percentage": 14.44, "elapsed_time": "0:03:13", "remaining_time": "0:19:09", "throughput": 2599.59, "total_tokens": 504272} {"current_steps": 815, "total_steps": 5610, "loss": 0.2365, "lr": 4.969086868835765e-05, "epoch": 1.4527629233511585, "percentage": 14.53, "elapsed_time": "0:03:14", "remaining_time": "0:19:07", "throughput": 2598.77, "total_tokens": 506672} {"current_steps": 820, "total_steps": 5610, "loss": 0.1498, "lr": 4.967855583171954e-05, "epoch": 1.4616755793226381, "percentage": 14.62, "elapsed_time": "0:03:15", "remaining_time": "0:19:04", "throughput": 2598.42, "total_tokens": 509232} {"current_steps": 825, "total_steps": 5610, "loss": 0.1283, "lr": 4.9666004112344656e-05, "epoch": 1.4705882352941178, "percentage": 14.71, "elapsed_time": "0:03:17", "remaining_time": "0:19:03", "throughput": 2600.11, "total_tokens": 512528} {"current_steps": 830, "total_steps": 5610, "loss": 0.0449, "lr": 4.965321365172057e-05, "epoch": 1.4795008912655971, "percentage": 14.8, "elapsed_time": "0:03:18", "remaining_time": "0:19:00", "throughput": 2598.97, "total_tokens": 514896} {"current_steps": 835, "total_steps": 5610, "loss": 0.0548, "lr": 4.9640184573645646e-05, "epoch": 1.4884135472370765, "percentage": 14.88, "elapsed_time": "0:03:19", "remaining_time": "0:18:59", "throughput": 2601.17, "total_tokens": 518384} {"current_steps": 840, "total_steps": 5610, "loss": 0.0961, "lr": 4.962691700422778e-05, "epoch": 1.4973262032085561, "percentage": 14.97, "elapsed_time": "0:03:20", "remaining_time": "0:18:58", "throughput": 2605.06, "total_tokens": 522448} {"current_steps": 843, "total_steps": 5610, "eval_loss": 0.12453080713748932, "epoch": 1.5026737967914439, "percentage": 15.03, "elapsed_time": "0:03:27", "remaining_time": "0:19:33", "throughput": 2526.67, "total_tokens": 524208} {"current_steps": 845, "total_steps": 5610, "loss": 0.0782, "lr": 4.9613411071883267e-05, "epoch": 1.5062388591800357, "percentage": 15.06, "elapsed_time": "0:03:28", "remaining_time": "0:19:36", "throughput": 2518.4, "total_tokens": 525264} {"current_steps": 850, "total_steps": 5610, "loss": 0.1161, "lr": 4.959966690733544e-05, "epoch": 1.5151515151515151, "percentage": 15.15, "elapsed_time": "0:03:29", "remaining_time": "0:19:34", "throughput": 2520.51, "total_tokens": 528528} {"current_steps": 855, "total_steps": 5610, "loss": 0.0688, "lr": 4.958568464361353e-05, "epoch": 1.5240641711229945, "percentage": 15.24, "elapsed_time": "0:03:30", "remaining_time": "0:19:32", "throughput": 2521.86, "total_tokens": 531536} {"current_steps": 860, "total_steps": 5610, "loss": 0.0848, "lr": 4.9571464416051294e-05, "epoch": 1.5329768270944741, "percentage": 15.33, "elapsed_time": "0:03:31", "remaining_time": "0:19:30", "throughput": 2523.65, "total_tokens": 534704} {"current_steps": 865, "total_steps": 5610, "loss": 0.0262, "lr": 4.955700636228573e-05, "epoch": 1.5418894830659537, "percentage": 15.42, "elapsed_time": "0:03:32", "remaining_time": "0:19:27", "throughput": 2523.62, "total_tokens": 537264} {"current_steps": 870, "total_steps": 5610, "loss": 0.0095, "lr": 4.954231062225576e-05, "epoch": 1.5508021390374331, "percentage": 15.51, "elapsed_time": "0:03:34", "remaining_time": "0:19:26", "throughput": 2527.71, "total_tokens": 541328} {"current_steps": 875, "total_steps": 5610, "loss": 0.0443, "lr": 4.9527377338200855e-05, "epoch": 1.5597147950089125, "percentage": 15.6, "elapsed_time": "0:03:35", "remaining_time": "0:19:24", "throughput": 2529.6, "total_tokens": 544496} {"current_steps": 880, "total_steps": 5610, "loss": 0.0583, "lr": 4.951220665465964e-05, "epoch": 1.5686274509803921, "percentage": 15.69, "elapsed_time": "0:03:36", "remaining_time": "0:19:23", "throughput": 2531.16, "total_tokens": 547696} {"current_steps": 885, "total_steps": 5610, "loss": 0.3259, "lr": 4.949679871846857e-05, "epoch": 1.5775401069518717, "percentage": 15.78, "elapsed_time": "0:03:37", "remaining_time": "0:19:20", "throughput": 2531.3, "total_tokens": 550416} {"current_steps": 890, "total_steps": 5610, "loss": 0.155, "lr": 4.948115367876043e-05, "epoch": 1.5864527629233511, "percentage": 15.86, "elapsed_time": "0:03:38", "remaining_time": "0:19:19", "throughput": 2534.07, "total_tokens": 553968} {"current_steps": 895, "total_steps": 5610, "loss": 0.0245, "lr": 4.94652716869629e-05, "epoch": 1.5953654188948305, "percentage": 15.95, "elapsed_time": "0:03:39", "remaining_time": "0:19:16", "throughput": 2534.64, "total_tokens": 556656} {"current_steps": 900, "total_steps": 5610, "loss": 0.1417, "lr": 4.944915289679716e-05, "epoch": 1.6042780748663101, "percentage": 16.04, "elapsed_time": "0:03:40", "remaining_time": "0:19:14", "throughput": 2535.83, "total_tokens": 559536} {"current_steps": 905, "total_steps": 5610, "loss": 0.2026, "lr": 4.94327974642763e-05, "epoch": 1.6131907308377897, "percentage": 16.13, "elapsed_time": "0:03:41", "remaining_time": "0:19:12", "throughput": 2537.52, "total_tokens": 562704} {"current_steps": 910, "total_steps": 5610, "loss": 0.0659, "lr": 4.94162055477039e-05, "epoch": 1.6221033868092691, "percentage": 16.22, "elapsed_time": "0:03:42", "remaining_time": "0:19:11", "throughput": 2540.23, "total_tokens": 566352} {"current_steps": 915, "total_steps": 5610, "loss": 0.0742, "lr": 4.939937730767243e-05, "epoch": 1.6310160427807485, "percentage": 16.31, "elapsed_time": "0:03:44", "remaining_time": "0:19:09", "throughput": 2541.87, "total_tokens": 569584} {"current_steps": 920, "total_steps": 5610, "loss": 0.1525, "lr": 4.9382312907061755e-05, "epoch": 1.6399286987522281, "percentage": 16.4, "elapsed_time": "0:03:45", "remaining_time": "0:19:07", "throughput": 2540.67, "total_tokens": 571824} {"current_steps": 925, "total_steps": 5610, "loss": 0.0786, "lr": 4.9365012511037514e-05, "epoch": 1.6488413547237077, "percentage": 16.49, "elapsed_time": "0:03:46", "remaining_time": "0:19:05", "throughput": 2542.78, "total_tokens": 575248} {"current_steps": 930, "total_steps": 5610, "loss": 0.1018, "lr": 4.934747628704952e-05, "epoch": 1.6577540106951871, "percentage": 16.58, "elapsed_time": "0:03:47", "remaining_time": "0:19:03", "throughput": 2543.3, "total_tokens": 578032} {"current_steps": 935, "total_steps": 5610, "loss": 0.2083, "lr": 4.932970440483018e-05, "epoch": 1.6666666666666665, "percentage": 16.67, "elapsed_time": "0:03:48", "remaining_time": "0:19:02", "throughput": 2546.16, "total_tokens": 581744} {"current_steps": 940, "total_steps": 5610, "loss": 0.2742, "lr": 4.931169703639282e-05, "epoch": 1.6755793226381461, "percentage": 16.76, "elapsed_time": "0:03:49", "remaining_time": "0:19:00", "throughput": 2547.29, "total_tokens": 584880} {"current_steps": 945, "total_steps": 5610, "loss": 0.1039, "lr": 4.929345435603003e-05, "epoch": 1.6844919786096257, "percentage": 16.84, "elapsed_time": "0:03:50", "remaining_time": "0:18:58", "throughput": 2548.03, "total_tokens": 587856} {"current_steps": 950, "total_steps": 5610, "loss": 0.0851, "lr": 4.9274976540311956e-05, "epoch": 1.6934046345811051, "percentage": 16.93, "elapsed_time": "0:03:51", "remaining_time": "0:18:57", "throughput": 2549.22, "total_tokens": 590928} {"current_steps": 955, "total_steps": 5610, "loss": 0.0727, "lr": 4.9256263768084635e-05, "epoch": 1.7023172905525845, "percentage": 17.02, "elapsed_time": "0:03:52", "remaining_time": "0:18:55", "throughput": 2550.55, "total_tokens": 594096} {"current_steps": 960, "total_steps": 5610, "loss": 0.0307, "lr": 4.923731622046823e-05, "epoch": 1.7112299465240641, "percentage": 17.11, "elapsed_time": "0:03:54", "remaining_time": "0:18:53", "throughput": 2551.58, "total_tokens": 597136} {"current_steps": 965, "total_steps": 5610, "loss": 0.1659, "lr": 4.9218134080855273e-05, "epoch": 1.7201426024955437, "percentage": 17.2, "elapsed_time": "0:03:55", "remaining_time": "0:18:52", "throughput": 2554.5, "total_tokens": 600912} {"current_steps": 970, "total_steps": 5610, "loss": 0.1413, "lr": 4.919871753490891e-05, "epoch": 1.7290552584670231, "percentage": 17.29, "elapsed_time": "0:03:56", "remaining_time": "0:18:50", "throughput": 2556.37, "total_tokens": 604240} {"current_steps": 975, "total_steps": 5610, "loss": 0.0924, "lr": 4.917906677056111e-05, "epoch": 1.7379679144385025, "percentage": 17.38, "elapsed_time": "0:03:57", "remaining_time": "0:18:48", "throughput": 2557.5, "total_tokens": 607248} {"current_steps": 980, "total_steps": 5610, "loss": 0.1579, "lr": 4.9159181978010814e-05, "epoch": 1.7468805704099821, "percentage": 17.47, "elapsed_time": "0:03:58", "remaining_time": "0:18:47", "throughput": 2559.64, "total_tokens": 610736} {"current_steps": 985, "total_steps": 5610, "loss": 0.0559, "lr": 4.9139063349722113e-05, "epoch": 1.7557932263814617, "percentage": 17.56, "elapsed_time": "0:03:59", "remaining_time": "0:18:45", "throughput": 2561.38, "total_tokens": 614128} {"current_steps": 990, "total_steps": 5610, "loss": 0.0736, "lr": 4.911871108042241e-05, "epoch": 1.7647058823529411, "percentage": 17.65, "elapsed_time": "0:04:00", "remaining_time": "0:18:44", "throughput": 2562.37, "total_tokens": 617232} {"current_steps": 995, "total_steps": 5610, "loss": 0.064, "lr": 4.909812536710048e-05, "epoch": 1.7736185383244205, "percentage": 17.74, "elapsed_time": "0:04:02", "remaining_time": "0:18:42", "throughput": 2564.78, "total_tokens": 620880} {"current_steps": 1000, "total_steps": 5610, "loss": 0.0652, "lr": 4.9077306409004585e-05, "epoch": 1.7825311942959001, "percentage": 17.83, "elapsed_time": "0:04:03", "remaining_time": "0:18:41", "throughput": 2566.93, "total_tokens": 624368} {"current_steps": 1005, "total_steps": 5610, "loss": 0.0429, "lr": 4.9056254407640604e-05, "epoch": 1.7914438502673797, "percentage": 17.91, "elapsed_time": "0:04:04", "remaining_time": "0:18:39", "throughput": 2567.23, "total_tokens": 627152} {"current_steps": 1010, "total_steps": 5610, "loss": 0.0436, "lr": 4.903496956676998e-05, "epoch": 1.8003565062388591, "percentage": 18.0, "elapsed_time": "0:04:05", "remaining_time": "0:18:37", "throughput": 2567.16, "total_tokens": 629680} {"current_steps": 1015, "total_steps": 5610, "loss": 0.1416, "lr": 4.901345209240784e-05, "epoch": 1.8092691622103387, "percentage": 18.09, "elapsed_time": "0:04:06", "remaining_time": "0:18:35", "throughput": 2568.54, "total_tokens": 632848} {"current_steps": 1020, "total_steps": 5610, "loss": 0.0298, "lr": 4.8991702192820924e-05, "epoch": 1.8181818181818183, "percentage": 18.18, "elapsed_time": "0:04:07", "remaining_time": "0:18:33", "throughput": 2569.65, "total_tokens": 635920} {"current_steps": 1025, "total_steps": 5610, "loss": 0.0748, "lr": 4.896972007852563e-05, "epoch": 1.8270944741532977, "percentage": 18.27, "elapsed_time": "0:04:08", "remaining_time": "0:18:31", "throughput": 2570.79, "total_tokens": 639056} {"current_steps": 1030, "total_steps": 5610, "loss": 0.0602, "lr": 4.894750596228594e-05, "epoch": 1.8360071301247771, "percentage": 18.36, "elapsed_time": "0:04:09", "remaining_time": "0:18:30", "throughput": 2571.88, "total_tokens": 642192} {"current_steps": 1035, "total_steps": 5610, "loss": 0.0054, "lr": 4.8925060059111394e-05, "epoch": 1.8449197860962567, "percentage": 18.45, "elapsed_time": "0:04:10", "remaining_time": "0:18:28", "throughput": 2573.36, "total_tokens": 645488} {"current_steps": 1040, "total_steps": 5610, "loss": 0.1834, "lr": 4.890238258625496e-05, "epoch": 1.8538324420677363, "percentage": 18.54, "elapsed_time": "0:04:11", "remaining_time": "0:18:26", "throughput": 2573.68, "total_tokens": 648336} {"current_steps": 1045, "total_steps": 5610, "loss": 0.0974, "lr": 4.887947376321099e-05, "epoch": 1.8627450980392157, "percentage": 18.63, "elapsed_time": "0:04:13", "remaining_time": "0:18:25", "throughput": 2575.4, "total_tokens": 651696} {"current_steps": 1050, "total_steps": 5610, "loss": 0.0829, "lr": 4.885633381171304e-05, "epoch": 1.8716577540106951, "percentage": 18.72, "elapsed_time": "0:04:14", "remaining_time": "0:18:23", "throughput": 2576.21, "total_tokens": 654640} {"current_steps": 1055, "total_steps": 5610, "loss": 0.157, "lr": 4.883296295573176e-05, "epoch": 1.8805704099821747, "percentage": 18.81, "elapsed_time": "0:04:15", "remaining_time": "0:18:22", "throughput": 2578.14, "total_tokens": 658128} {"current_steps": 1060, "total_steps": 5610, "loss": 0.1809, "lr": 4.880936142147271e-05, "epoch": 1.8894830659536543, "percentage": 18.89, "elapsed_time": "0:04:16", "remaining_time": "0:18:20", "throughput": 2578.16, "total_tokens": 660848} {"current_steps": 1065, "total_steps": 5610, "loss": 0.1404, "lr": 4.878552943737418e-05, "epoch": 1.8983957219251337, "percentage": 18.98, "elapsed_time": "0:04:17", "remaining_time": "0:18:18", "throughput": 2577.19, "total_tokens": 663120} {"current_steps": 1070, "total_steps": 5610, "loss": 0.05, "lr": 4.876146723410498e-05, "epoch": 1.9073083778966131, "percentage": 19.07, "elapsed_time": "0:04:18", "remaining_time": "0:18:16", "throughput": 2578.15, "total_tokens": 666288} {"current_steps": 1075, "total_steps": 5610, "loss": 0.0693, "lr": 4.873717504456219e-05, "epoch": 1.9162210338680927, "percentage": 19.16, "elapsed_time": "0:04:19", "remaining_time": "0:18:14", "throughput": 2579.09, "total_tokens": 669360} {"current_steps": 1080, "total_steps": 5610, "loss": 0.1736, "lr": 4.8712653103868916e-05, "epoch": 1.9251336898395723, "percentage": 19.25, "elapsed_time": "0:04:20", "remaining_time": "0:18:12", "throughput": 2578.02, "total_tokens": 671344} {"current_steps": 1085, "total_steps": 5610, "loss": 0.0195, "lr": 4.868790164937204e-05, "epoch": 1.9340463458110517, "percentage": 19.34, "elapsed_time": "0:04:21", "remaining_time": "0:18:10", "throughput": 2579.52, "total_tokens": 674672} {"current_steps": 1090, "total_steps": 5610, "loss": 0.1089, "lr": 4.8662920920639866e-05, "epoch": 1.9429590017825311, "percentage": 19.43, "elapsed_time": "0:04:22", "remaining_time": "0:18:09", "throughput": 2581.14, "total_tokens": 677968} {"current_steps": 1095, "total_steps": 5610, "loss": 0.0145, "lr": 4.8637711159459855e-05, "epoch": 1.9518716577540107, "percentage": 19.52, "elapsed_time": "0:04:23", "remaining_time": "0:18:07", "throughput": 2581.09, "total_tokens": 680560} {"current_steps": 1100, "total_steps": 5610, "loss": 0.0347, "lr": 4.8612272609836263e-05, "epoch": 1.9607843137254903, "percentage": 19.61, "elapsed_time": "0:04:24", "remaining_time": "0:18:05", "throughput": 2582.45, "total_tokens": 683824} {"current_steps": 1105, "total_steps": 5610, "loss": 0.0616, "lr": 4.858660551798778e-05, "epoch": 1.9696969696969697, "percentage": 19.7, "elapsed_time": "0:04:25", "remaining_time": "0:18:04", "throughput": 2584.02, "total_tokens": 687216} {"current_steps": 1110, "total_steps": 5610, "loss": 0.1747, "lr": 4.856071013234513e-05, "epoch": 1.9786096256684491, "percentage": 19.79, "elapsed_time": "0:04:27", "remaining_time": "0:18:02", "throughput": 2584.44, "total_tokens": 690128} {"current_steps": 1115, "total_steps": 5610, "loss": 0.1298, "lr": 4.85345867035487e-05, "epoch": 1.9875222816399287, "percentage": 19.88, "elapsed_time": "0:04:28", "remaining_time": "0:18:00", "throughput": 2585.38, "total_tokens": 693232} {"current_steps": 1120, "total_steps": 5610, "loss": 0.1243, "lr": 4.8508235484446095e-05, "epoch": 1.9964349376114083, "percentage": 19.96, "elapsed_time": "0:04:29", "remaining_time": "0:17:59", "throughput": 2587.31, "total_tokens": 696880} {"current_steps": 1124, "total_steps": 5610, "eval_loss": 0.15137933194637299, "epoch": 2.0035650623885917, "percentage": 20.04, "elapsed_time": "0:04:36", "remaining_time": "0:18:23", "throughput": 2528.08, "total_tokens": 699264} {"current_steps": 1125, "total_steps": 5610, "loss": 0.0738, "lr": 4.8481656730089695e-05, "epoch": 2.0053475935828877, "percentage": 20.05, "elapsed_time": "0:04:37", "remaining_time": "0:18:26", "throughput": 2521.99, "total_tokens": 700096} {"current_steps": 1130, "total_steps": 5610, "loss": 0.1989, "lr": 4.8454850697734174e-05, "epoch": 2.014260249554367, "percentage": 20.14, "elapsed_time": "0:04:38", "remaining_time": "0:18:25", "throughput": 2523.44, "total_tokens": 703360} {"current_steps": 1135, "total_steps": 5610, "loss": 0.0507, "lr": 4.842781764683403e-05, "epoch": 2.0231729055258465, "percentage": 20.23, "elapsed_time": "0:04:39", "remaining_time": "0:18:23", "throughput": 2524.93, "total_tokens": 706624} {"current_steps": 1140, "total_steps": 5610, "loss": 0.1906, "lr": 4.8400557839041064e-05, "epoch": 2.0320855614973263, "percentage": 20.32, "elapsed_time": "0:04:40", "remaining_time": "0:18:21", "throughput": 2525.5, "total_tokens": 709472} {"current_steps": 1145, "total_steps": 5610, "loss": 0.0311, "lr": 4.837307153820184e-05, "epoch": 2.0409982174688057, "percentage": 20.41, "elapsed_time": "0:04:42", "remaining_time": "0:18:20", "throughput": 2527.8, "total_tokens": 713152} {"current_steps": 1150, "total_steps": 5610, "loss": 0.0409, "lr": 4.8345359010355155e-05, "epoch": 2.049910873440285, "percentage": 20.5, "elapsed_time": "0:04:43", "remaining_time": "0:18:18", "throughput": 2529.26, "total_tokens": 716480} {"current_steps": 1155, "total_steps": 5610, "loss": 0.0037, "lr": 4.831742052372943e-05, "epoch": 2.0588235294117645, "percentage": 20.59, "elapsed_time": "0:04:44", "remaining_time": "0:18:16", "throughput": 2529.28, "total_tokens": 719104} {"current_steps": 1160, "total_steps": 5610, "loss": 0.0205, "lr": 4.828925634874014e-05, "epoch": 2.0677361853832443, "percentage": 20.68, "elapsed_time": "0:04:45", "remaining_time": "0:18:14", "throughput": 2529.93, "total_tokens": 722016} {"current_steps": 1165, "total_steps": 5610, "loss": 0.1525, "lr": 4.8260866757987177e-05, "epoch": 2.0766488413547237, "percentage": 20.77, "elapsed_time": "0:04:46", "remaining_time": "0:18:13", "throughput": 2531.21, "total_tokens": 725184} {"current_steps": 1170, "total_steps": 5610, "loss": 0.1205, "lr": 4.823225202625226e-05, "epoch": 2.085561497326203, "percentage": 20.86, "elapsed_time": "0:04:47", "remaining_time": "0:18:11", "throughput": 2532.37, "total_tokens": 728352} {"current_steps": 1175, "total_steps": 5610, "loss": 0.1138, "lr": 4.820341243049618e-05, "epoch": 2.0944741532976825, "percentage": 20.94, "elapsed_time": "0:04:48", "remaining_time": "0:18:09", "throughput": 2533.9, "total_tokens": 731712} {"current_steps": 1180, "total_steps": 5610, "loss": 0.0186, "lr": 4.8174348249856236e-05, "epoch": 2.1033868092691623, "percentage": 21.03, "elapsed_time": "0:04:49", "remaining_time": "0:18:08", "throughput": 2534.98, "total_tokens": 734880} {"current_steps": 1185, "total_steps": 5610, "loss": 0.066, "lr": 4.814505976564343e-05, "epoch": 2.1122994652406417, "percentage": 21.12, "elapsed_time": "0:04:50", "remaining_time": "0:18:06", "throughput": 2535.41, "total_tokens": 737728} {"current_steps": 1190, "total_steps": 5610, "loss": 0.0528, "lr": 4.8115547261339824e-05, "epoch": 2.121212121212121, "percentage": 21.21, "elapsed_time": "0:04:52", "remaining_time": "0:18:05", "throughput": 2537.5, "total_tokens": 741376} {"current_steps": 1195, "total_steps": 5610, "loss": 0.1943, "lr": 4.808581102259573e-05, "epoch": 2.1301247771836005, "percentage": 21.3, "elapsed_time": "0:04:53", "remaining_time": "0:18:03", "throughput": 2538.12, "total_tokens": 744256} {"current_steps": 1200, "total_steps": 5610, "loss": 0.1003, "lr": 4.8055851337227006e-05, "epoch": 2.1390374331550803, "percentage": 21.39, "elapsed_time": "0:04:54", "remaining_time": "0:18:01", "throughput": 2538.11, "total_tokens": 746944} {"current_steps": 1205, "total_steps": 5610, "loss": 0.1651, "lr": 4.802566849521222e-05, "epoch": 2.1479500891265597, "percentage": 21.48, "elapsed_time": "0:04:55", "remaining_time": "0:18:00", "throughput": 2539.48, "total_tokens": 750272} {"current_steps": 1210, "total_steps": 5610, "loss": 0.2084, "lr": 4.799526278868987e-05, "epoch": 2.156862745098039, "percentage": 21.57, "elapsed_time": "0:04:56", "remaining_time": "0:17:58", "throughput": 2539.82, "total_tokens": 753024} {"current_steps": 1215, "total_steps": 5610, "loss": 0.0164, "lr": 4.796463451195554e-05, "epoch": 2.165775401069519, "percentage": 21.66, "elapsed_time": "0:04:57", "remaining_time": "0:17:56", "throughput": 2541.59, "total_tokens": 756576} {"current_steps": 1220, "total_steps": 5610, "loss": 0.0881, "lr": 4.7933783961459094e-05, "epoch": 2.1746880570409983, "percentage": 21.75, "elapsed_time": "0:04:58", "remaining_time": "0:17:55", "throughput": 2542.59, "total_tokens": 759680} {"current_steps": 1225, "total_steps": 5610, "loss": 0.0008, "lr": 4.790271143580174e-05, "epoch": 2.1836007130124777, "percentage": 21.84, "elapsed_time": "0:04:59", "remaining_time": "0:17:53", "throughput": 2543.58, "total_tokens": 762880} {"current_steps": 1230, "total_steps": 5610, "loss": 0.0389, "lr": 4.7871417235733196e-05, "epoch": 2.192513368983957, "percentage": 21.93, "elapsed_time": "0:05:01", "remaining_time": "0:17:51", "throughput": 2544.36, "total_tokens": 765920} {"current_steps": 1235, "total_steps": 5610, "loss": 0.0392, "lr": 4.783990166414875e-05, "epoch": 2.2014260249554365, "percentage": 22.01, "elapsed_time": "0:05:02", "remaining_time": "0:17:50", "throughput": 2546.65, "total_tokens": 769728} {"current_steps": 1240, "total_steps": 5610, "loss": 0.109, "lr": 4.780816502608632e-05, "epoch": 2.2103386809269163, "percentage": 22.1, "elapsed_time": "0:05:03", "remaining_time": "0:17:49", "throughput": 2547.61, "total_tokens": 772832} {"current_steps": 1245, "total_steps": 5610, "loss": 0.1271, "lr": 4.777620762872355e-05, "epoch": 2.2192513368983957, "percentage": 22.19, "elapsed_time": "0:05:04", "remaining_time": "0:17:47", "throughput": 2549.41, "total_tokens": 776352} {"current_steps": 1250, "total_steps": 5610, "loss": 0.0143, "lr": 4.774402978137479e-05, "epoch": 2.228163992869875, "percentage": 22.28, "elapsed_time": "0:05:05", "remaining_time": "0:17:45", "throughput": 2550.46, "total_tokens": 779456} {"current_steps": 1255, "total_steps": 5610, "loss": 0.0065, "lr": 4.7711631795488096e-05, "epoch": 2.237076648841355, "percentage": 22.37, "elapsed_time": "0:05:06", "remaining_time": "0:17:44", "throughput": 2550.61, "total_tokens": 782112} {"current_steps": 1260, "total_steps": 5610, "loss": 0.0725, "lr": 4.767901398464227e-05, "epoch": 2.2459893048128343, "percentage": 22.46, "elapsed_time": "0:05:07", "remaining_time": "0:17:42", "throughput": 2550.7, "total_tokens": 784864} {"current_steps": 1265, "total_steps": 5610, "loss": 0.0415, "lr": 4.7646176664543763e-05, "epoch": 2.2549019607843137, "percentage": 22.55, "elapsed_time": "0:05:08", "remaining_time": "0:17:40", "throughput": 2551.56, "total_tokens": 787936} {"current_steps": 1270, "total_steps": 5610, "loss": 0.0875, "lr": 4.761312015302367e-05, "epoch": 2.263814616755793, "percentage": 22.64, "elapsed_time": "0:05:09", "remaining_time": "0:17:39", "throughput": 2552.32, "total_tokens": 790976} {"current_steps": 1275, "total_steps": 5610, "loss": 0.0067, "lr": 4.757984477003462e-05, "epoch": 2.2727272727272725, "percentage": 22.73, "elapsed_time": "0:05:11", "remaining_time": "0:17:37", "throughput": 2553.01, "total_tokens": 794016} {"current_steps": 1280, "total_steps": 5610, "loss": 0.0988, "lr": 4.7546350837647666e-05, "epoch": 2.2816399286987523, "percentage": 22.82, "elapsed_time": "0:05:12", "remaining_time": "0:17:35", "throughput": 2553.27, "total_tokens": 796864} {"current_steps": 1285, "total_steps": 5610, "loss": 0.0008, "lr": 4.7512638680049245e-05, "epoch": 2.2905525846702317, "percentage": 22.91, "elapsed_time": "0:05:13", "remaining_time": "0:17:34", "throughput": 2554.34, "total_tokens": 800096} {"current_steps": 1290, "total_steps": 5610, "loss": 0.141, "lr": 4.7478708623537956e-05, "epoch": 2.299465240641711, "percentage": 22.99, "elapsed_time": "0:05:14", "remaining_time": "0:17:32", "throughput": 2555.45, "total_tokens": 803392} {"current_steps": 1295, "total_steps": 5610, "loss": 0.0787, "lr": 4.7444560996521415e-05, "epoch": 2.308377896613191, "percentage": 23.08, "elapsed_time": "0:05:15", "remaining_time": "0:17:31", "throughput": 2556.2, "total_tokens": 806400} {"current_steps": 1300, "total_steps": 5610, "loss": 0.3434, "lr": 4.741019612951312e-05, "epoch": 2.3172905525846703, "percentage": 23.17, "elapsed_time": "0:05:16", "remaining_time": "0:17:29", "throughput": 2557.03, "total_tokens": 809568} {"current_steps": 1305, "total_steps": 5610, "loss": 0.0448, "lr": 4.737561435512923e-05, "epoch": 2.3262032085561497, "percentage": 23.26, "elapsed_time": "0:05:17", "remaining_time": "0:17:28", "throughput": 2558.03, "total_tokens": 812768} {"current_steps": 1310, "total_steps": 5610, "loss": 0.0257, "lr": 4.734081600808531e-05, "epoch": 2.335115864527629, "percentage": 23.35, "elapsed_time": "0:05:18", "remaining_time": "0:17:26", "throughput": 2559.09, "total_tokens": 815968} {"current_steps": 1315, "total_steps": 5610, "loss": 0.0109, "lr": 4.7305801425193165e-05, "epoch": 2.344028520499109, "percentage": 23.44, "elapsed_time": "0:05:19", "remaining_time": "0:17:24", "throughput": 2559.74, "total_tokens": 818976} {"current_steps": 1320, "total_steps": 5610, "loss": 0.1287, "lr": 4.727057094535749e-05, "epoch": 2.3529411764705883, "percentage": 23.53, "elapsed_time": "0:05:20", "remaining_time": "0:17:23", "throughput": 2560.16, "total_tokens": 821760} {"current_steps": 1325, "total_steps": 5610, "loss": 0.1399, "lr": 4.72351249095727e-05, "epoch": 2.3618538324420677, "percentage": 23.62, "elapsed_time": "0:05:21", "remaining_time": "0:17:21", "throughput": 2560.0, "total_tokens": 824288} {"current_steps": 1330, "total_steps": 5610, "loss": 0.0546, "lr": 4.7199463660919514e-05, "epoch": 2.370766488413547, "percentage": 23.71, "elapsed_time": "0:05:23", "remaining_time": "0:17:19", "throughput": 2560.94, "total_tokens": 827424} {"current_steps": 1335, "total_steps": 5610, "loss": 0.1861, "lr": 4.7163587544561705e-05, "epoch": 2.379679144385027, "percentage": 23.8, "elapsed_time": "0:05:24", "remaining_time": "0:17:17", "throughput": 2561.22, "total_tokens": 830176} {"current_steps": 1340, "total_steps": 5610, "loss": 0.0112, "lr": 4.7127496907742734e-05, "epoch": 2.3885918003565063, "percentage": 23.89, "elapsed_time": "0:05:25", "remaining_time": "0:17:16", "throughput": 2562.76, "total_tokens": 833664} {"current_steps": 1345, "total_steps": 5610, "loss": 0.108, "lr": 4.709119209978242e-05, "epoch": 2.3975044563279857, "percentage": 23.98, "elapsed_time": "0:05:26", "remaining_time": "0:17:15", "throughput": 2563.56, "total_tokens": 836736} {"current_steps": 1350, "total_steps": 5610, "loss": 0.0531, "lr": 4.7054673472073506e-05, "epoch": 2.406417112299465, "percentage": 24.06, "elapsed_time": "0:05:27", "remaining_time": "0:17:13", "throughput": 2564.93, "total_tokens": 840160} {"current_steps": 1355, "total_steps": 5610, "loss": 0.1849, "lr": 4.7017941378078314e-05, "epoch": 2.415329768270945, "percentage": 24.15, "elapsed_time": "0:05:28", "remaining_time": "0:17:12", "throughput": 2565.59, "total_tokens": 843168} {"current_steps": 1360, "total_steps": 5610, "loss": 0.0017, "lr": 4.698099617332528e-05, "epoch": 2.4242424242424243, "percentage": 24.24, "elapsed_time": "0:05:29", "remaining_time": "0:17:10", "throughput": 2565.79, "total_tokens": 845952} {"current_steps": 1365, "total_steps": 5610, "loss": 0.005, "lr": 4.694383821540555e-05, "epoch": 2.4331550802139037, "percentage": 24.33, "elapsed_time": "0:05:30", "remaining_time": "0:17:08", "throughput": 2565.67, "total_tokens": 848448} {"current_steps": 1370, "total_steps": 5610, "loss": 0.0923, "lr": 4.690646786396945e-05, "epoch": 2.442067736185383, "percentage": 24.42, "elapsed_time": "0:05:31", "remaining_time": "0:17:06", "throughput": 2566.46, "total_tokens": 851552} {"current_steps": 1375, "total_steps": 5610, "loss": 0.0954, "lr": 4.686888548072312e-05, "epoch": 2.450980392156863, "percentage": 24.51, "elapsed_time": "0:05:32", "remaining_time": "0:17:05", "throughput": 2567.35, "total_tokens": 854752} {"current_steps": 1380, "total_steps": 5610, "loss": 0.0676, "lr": 4.683109142942492e-05, "epoch": 2.4598930481283423, "percentage": 24.6, "elapsed_time": "0:05:33", "remaining_time": "0:17:03", "throughput": 2567.91, "total_tokens": 857600} {"current_steps": 1385, "total_steps": 5610, "loss": 0.0957, "lr": 4.679308607588192e-05, "epoch": 2.4688057040998217, "percentage": 24.69, "elapsed_time": "0:05:35", "remaining_time": "0:17:02", "throughput": 2569.62, "total_tokens": 861248} {"current_steps": 1390, "total_steps": 5610, "loss": 0.1243, "lr": 4.6754869787946386e-05, "epoch": 2.477718360071301, "percentage": 24.78, "elapsed_time": "0:05:36", "remaining_time": "0:17:01", "throughput": 2571.56, "total_tokens": 865056} {"current_steps": 1395, "total_steps": 5610, "loss": 0.1454, "lr": 4.6716442935512214e-05, "epoch": 2.486631016042781, "percentage": 24.87, "elapsed_time": "0:05:37", "remaining_time": "0:16:59", "throughput": 2571.82, "total_tokens": 867936} {"current_steps": 1400, "total_steps": 5610, "loss": 0.1073, "lr": 4.6677805890511354e-05, "epoch": 2.4955436720142603, "percentage": 24.96, "elapsed_time": "0:05:38", "remaining_time": "0:16:58", "throughput": 2572.72, "total_tokens": 871136} {"current_steps": 1405, "total_steps": 5610, "loss": 0.0632, "lr": 4.663895902691018e-05, "epoch": 2.5044563279857397, "percentage": 25.04, "elapsed_time": "0:05:39", "remaining_time": "0:16:56", "throughput": 2572.51, "total_tokens": 873600} {"current_steps": 1405, "total_steps": 5610, "eval_loss": 0.1448797881603241, "epoch": 2.5044563279857397, "percentage": 25.04, "elapsed_time": "0:05:45", "remaining_time": "0:17:15", "throughput": 2525.64, "total_tokens": 873600} {"current_steps": 1410, "total_steps": 5610, "loss": 0.0324, "lr": 4.659990272070591e-05, "epoch": 2.5133689839572195, "percentage": 25.13, "elapsed_time": "0:05:47", "remaining_time": "0:17:16", "throughput": 2521.5, "total_tokens": 877152} {"current_steps": 1415, "total_steps": 5610, "loss": 0.0354, "lr": 4.656063734992294e-05, "epoch": 2.522281639928699, "percentage": 25.22, "elapsed_time": "0:05:48", "remaining_time": "0:17:14", "throughput": 2522.03, "total_tokens": 880096} {"current_steps": 1420, "total_steps": 5610, "loss": 0.0523, "lr": 4.6521163294609196e-05, "epoch": 2.5311942959001783, "percentage": 25.31, "elapsed_time": "0:05:49", "remaining_time": "0:17:12", "throughput": 2522.71, "total_tokens": 882944} {"current_steps": 1425, "total_steps": 5610, "loss": 0.0934, "lr": 4.6481480936832444e-05, "epoch": 2.5401069518716577, "percentage": 25.4, "elapsed_time": "0:05:51", "remaining_time": "0:17:11", "throughput": 2524.85, "total_tokens": 886848} {"current_steps": 1430, "total_steps": 5610, "loss": 0.0791, "lr": 4.644159066067662e-05, "epoch": 2.549019607843137, "percentage": 25.49, "elapsed_time": "0:05:52", "remaining_time": "0:17:10", "throughput": 2526.24, "total_tokens": 890272} {"current_steps": 1435, "total_steps": 5610, "loss": 0.0603, "lr": 4.640149285223806e-05, "epoch": 2.557932263814617, "percentage": 25.58, "elapsed_time": "0:05:53", "remaining_time": "0:17:08", "throughput": 2527.55, "total_tokens": 893600} {"current_steps": 1440, "total_steps": 5610, "loss": 0.0062, "lr": 4.636118789962184e-05, "epoch": 2.5668449197860963, "percentage": 25.67, "elapsed_time": "0:05:54", "remaining_time": "0:17:06", "throughput": 2527.91, "total_tokens": 896448} {"current_steps": 1445, "total_steps": 5610, "loss": 0.2594, "lr": 4.632067619293795e-05, "epoch": 2.5757575757575757, "percentage": 25.76, "elapsed_time": "0:05:55", "remaining_time": "0:17:05", "throughput": 2528.53, "total_tokens": 899424} {"current_steps": 1450, "total_steps": 5610, "loss": 0.0197, "lr": 4.6279958124297554e-05, "epoch": 2.5846702317290555, "percentage": 25.85, "elapsed_time": "0:05:56", "remaining_time": "0:17:03", "throughput": 2529.54, "total_tokens": 902624} {"current_steps": 1455, "total_steps": 5610, "loss": 0.0287, "lr": 4.623903408780916e-05, "epoch": 2.593582887700535, "percentage": 25.94, "elapsed_time": "0:05:57", "remaining_time": "0:17:02", "throughput": 2530.04, "total_tokens": 905568} {"current_steps": 1460, "total_steps": 5610, "loss": 0.0068, "lr": 4.619790447957488e-05, "epoch": 2.6024955436720143, "percentage": 26.02, "elapsed_time": "0:05:59", "remaining_time": "0:17:00", "throughput": 2531.25, "total_tokens": 908960} {"current_steps": 1465, "total_steps": 5610, "loss": 0.0148, "lr": 4.615656969768649e-05, "epoch": 2.6114081996434937, "percentage": 26.11, "elapsed_time": "0:06:00", "remaining_time": "0:16:59", "throughput": 2533.01, "total_tokens": 912640} {"current_steps": 1470, "total_steps": 5610, "loss": 0.0079, "lr": 4.611503014222168e-05, "epoch": 2.620320855614973, "percentage": 26.2, "elapsed_time": "0:06:01", "remaining_time": "0:16:57", "throughput": 2533.13, "total_tokens": 915328} {"current_steps": 1475, "total_steps": 5610, "loss": 0.0921, "lr": 4.6073286215240105e-05, "epoch": 2.629233511586453, "percentage": 26.29, "elapsed_time": "0:06:02", "remaining_time": "0:16:56", "throughput": 2534.42, "total_tokens": 918656} {"current_steps": 1480, "total_steps": 5610, "loss": 0.1445, "lr": 4.6031338320779534e-05, "epoch": 2.6381461675579323, "percentage": 26.38, "elapsed_time": "0:06:03", "remaining_time": "0:16:54", "throughput": 2534.45, "total_tokens": 921344} {"current_steps": 1485, "total_steps": 5610, "loss": 0.1085, "lr": 4.598918686485193e-05, "epoch": 2.6470588235294117, "percentage": 26.47, "elapsed_time": "0:06:04", "remaining_time": "0:16:52", "throughput": 2534.92, "total_tokens": 924192} {"current_steps": 1490, "total_steps": 5610, "loss": 0.0044, "lr": 4.594683225543952e-05, "epoch": 2.6559714795008915, "percentage": 26.56, "elapsed_time": "0:06:05", "remaining_time": "0:16:51", "throughput": 2535.84, "total_tokens": 927424} {"current_steps": 1495, "total_steps": 5610, "loss": 0.088, "lr": 4.590427490249084e-05, "epoch": 2.664884135472371, "percentage": 26.65, "elapsed_time": "0:06:06", "remaining_time": "0:16:49", "throughput": 2535.87, "total_tokens": 930080} {"current_steps": 1500, "total_steps": 5610, "loss": 0.227, "lr": 4.5861515217916785e-05, "epoch": 2.6737967914438503, "percentage": 26.74, "elapsed_time": "0:06:07", "remaining_time": "0:16:47", "throughput": 2536.0, "total_tokens": 932768} {"current_steps": 1505, "total_steps": 5610, "loss": 0.095, "lr": 4.581855361558659e-05, "epoch": 2.6827094474153297, "percentage": 26.83, "elapsed_time": "0:06:08", "remaining_time": "0:16:46", "throughput": 2536.83, "total_tokens": 935904} {"current_steps": 1510, "total_steps": 5610, "loss": 0.2288, "lr": 4.577539051132386e-05, "epoch": 2.691622103386809, "percentage": 26.92, "elapsed_time": "0:06:09", "remaining_time": "0:16:44", "throughput": 2537.3, "total_tokens": 938784} {"current_steps": 1515, "total_steps": 5610, "loss": 0.0119, "lr": 4.573202632290252e-05, "epoch": 2.700534759358289, "percentage": 27.01, "elapsed_time": "0:06:10", "remaining_time": "0:16:42", "throughput": 2537.15, "total_tokens": 941280} {"current_steps": 1520, "total_steps": 5610, "loss": 0.0493, "lr": 4.568846147004279e-05, "epoch": 2.7094474153297683, "percentage": 27.09, "elapsed_time": "0:06:12", "remaining_time": "0:16:41", "throughput": 2538.44, "total_tokens": 944672} {"current_steps": 1525, "total_steps": 5610, "loss": 0.0657, "lr": 4.5644696374407105e-05, "epoch": 2.7183600713012477, "percentage": 27.18, "elapsed_time": "0:06:13", "remaining_time": "0:16:39", "throughput": 2539.62, "total_tokens": 948032} {"current_steps": 1530, "total_steps": 5610, "loss": 0.0595, "lr": 4.560073145959602e-05, "epoch": 2.7272727272727275, "percentage": 27.27, "elapsed_time": "0:06:14", "remaining_time": "0:16:38", "throughput": 2541.84, "total_tokens": 952000} {"current_steps": 1535, "total_steps": 5610, "loss": 0.1502, "lr": 4.555656715114419e-05, "epoch": 2.736185383244207, "percentage": 27.36, "elapsed_time": "0:06:15", "remaining_time": "0:16:37", "throughput": 2543.16, "total_tokens": 955456} {"current_steps": 1540, "total_steps": 5610, "loss": 0.1952, "lr": 4.551220387651615e-05, "epoch": 2.7450980392156863, "percentage": 27.45, "elapsed_time": "0:06:16", "remaining_time": "0:16:36", "throughput": 2545.04, "total_tokens": 959232} {"current_steps": 1545, "total_steps": 5610, "loss": 0.1091, "lr": 4.546764206510221e-05, "epoch": 2.7540106951871657, "percentage": 27.54, "elapsed_time": "0:06:18", "remaining_time": "0:16:34", "throughput": 2545.71, "total_tokens": 962304} {"current_steps": 1550, "total_steps": 5610, "loss": 0.0678, "lr": 4.542288214821433e-05, "epoch": 2.762923351158645, "percentage": 27.63, "elapsed_time": "0:06:19", "remaining_time": "0:16:33", "throughput": 2546.33, "total_tokens": 965344} {"current_steps": 1555, "total_steps": 5610, "loss": 0.1246, "lr": 4.5377924559081946e-05, "epoch": 2.771836007130125, "percentage": 27.72, "elapsed_time": "0:06:20", "remaining_time": "0:16:31", "throughput": 2546.6, "total_tokens": 968032} {"current_steps": 1560, "total_steps": 5610, "loss": 0.1625, "lr": 4.533276973284771e-05, "epoch": 2.7807486631016043, "percentage": 27.81, "elapsed_time": "0:06:21", "remaining_time": "0:16:29", "throughput": 2546.4, "total_tokens": 970624} {"current_steps": 1565, "total_steps": 5610, "loss": 0.0861, "lr": 4.528741810656336e-05, "epoch": 2.7896613190730837, "percentage": 27.9, "elapsed_time": "0:06:22", "remaining_time": "0:16:28", "throughput": 2547.15, "total_tokens": 973760} {"current_steps": 1570, "total_steps": 5610, "loss": 0.0216, "lr": 4.5241870119185426e-05, "epoch": 2.7985739750445635, "percentage": 27.99, "elapsed_time": "0:06:23", "remaining_time": "0:16:26", "throughput": 2547.29, "total_tokens": 976480} {"current_steps": 1575, "total_steps": 5610, "loss": 0.0769, "lr": 4.519612621157103e-05, "epoch": 2.807486631016043, "percentage": 28.07, "elapsed_time": "0:06:24", "remaining_time": "0:16:24", "throughput": 2547.56, "total_tokens": 979328} {"current_steps": 1580, "total_steps": 5610, "loss": 0.0051, "lr": 4.515018682647359e-05, "epoch": 2.8163992869875223, "percentage": 28.16, "elapsed_time": "0:06:25", "remaining_time": "0:16:23", "throughput": 2548.56, "total_tokens": 982624} {"current_steps": 1585, "total_steps": 5610, "loss": 0.1352, "lr": 4.510405240853854e-05, "epoch": 2.8253119429590017, "percentage": 28.25, "elapsed_time": "0:06:26", "remaining_time": "0:16:21", "throughput": 2549.11, "total_tokens": 985664} {"current_steps": 1590, "total_steps": 5610, "loss": 0.0066, "lr": 4.505772340429905e-05, "epoch": 2.834224598930481, "percentage": 28.34, "elapsed_time": "0:06:27", "remaining_time": "0:16:20", "throughput": 2550.08, "total_tokens": 989024} {"current_steps": 1595, "total_steps": 5610, "loss": 0.1211, "lr": 4.501120026217164e-05, "epoch": 2.843137254901961, "percentage": 28.43, "elapsed_time": "0:06:28", "remaining_time": "0:16:19", "throughput": 2550.83, "total_tokens": 992160} {"current_steps": 1600, "total_steps": 5610, "loss": 0.0308, "lr": 4.496448343245192e-05, "epoch": 2.8520499108734403, "percentage": 28.52, "elapsed_time": "0:06:30", "remaining_time": "0:16:17", "throughput": 2551.62, "total_tokens": 995328} {"current_steps": 1605, "total_steps": 5610, "loss": 0.011, "lr": 4.4917573367310184e-05, "epoch": 2.8609625668449197, "percentage": 28.61, "elapsed_time": "0:06:31", "remaining_time": "0:16:16", "throughput": 2553.37, "total_tokens": 999136} {"current_steps": 1610, "total_steps": 5610, "loss": 0.1852, "lr": 4.4870470520787035e-05, "epoch": 2.8698752228163995, "percentage": 28.7, "elapsed_time": "0:06:32", "remaining_time": "0:16:14", "throughput": 2553.52, "total_tokens": 1001920} {"current_steps": 1615, "total_steps": 5610, "loss": 0.0288, "lr": 4.482317534878901e-05, "epoch": 2.878787878787879, "percentage": 28.79, "elapsed_time": "0:06:33", "remaining_time": "0:16:13", "throughput": 2555.02, "total_tokens": 1005632} {"current_steps": 1620, "total_steps": 5610, "loss": 0.1688, "lr": 4.477568830908415e-05, "epoch": 2.8877005347593583, "percentage": 28.88, "elapsed_time": "0:06:34", "remaining_time": "0:16:12", "throughput": 2556.77, "total_tokens": 1009408} {"current_steps": 1625, "total_steps": 5610, "loss": 0.0989, "lr": 4.4728009861297586e-05, "epoch": 2.8966131907308377, "percentage": 28.97, "elapsed_time": "0:06:35", "remaining_time": "0:16:10", "throughput": 2557.34, "total_tokens": 1012448} {"current_steps": 1630, "total_steps": 5610, "loss": 0.0401, "lr": 4.468014046690707e-05, "epoch": 2.905525846702317, "percentage": 29.06, "elapsed_time": "0:06:37", "remaining_time": "0:16:09", "throughput": 2558.18, "total_tokens": 1015616} {"current_steps": 1635, "total_steps": 5610, "loss": 0.0336, "lr": 4.463208058923851e-05, "epoch": 2.914438502673797, "percentage": 29.14, "elapsed_time": "0:06:38", "remaining_time": "0:16:07", "throughput": 2559.34, "total_tokens": 1018944} {"current_steps": 1640, "total_steps": 5610, "loss": 0.0084, "lr": 4.458383069346152e-05, "epoch": 2.9233511586452763, "percentage": 29.23, "elapsed_time": "0:06:39", "remaining_time": "0:16:06", "throughput": 2559.56, "total_tokens": 1021696} {"current_steps": 1645, "total_steps": 5610, "loss": 0.1792, "lr": 4.453539124658486e-05, "epoch": 2.9322638146167557, "percentage": 29.32, "elapsed_time": "0:06:40", "remaining_time": "0:16:04", "throughput": 2560.29, "total_tokens": 1024832} {"current_steps": 1650, "total_steps": 5610, "loss": 0.055, "lr": 4.4486762717451975e-05, "epoch": 2.9411764705882355, "percentage": 29.41, "elapsed_time": "0:06:41", "remaining_time": "0:16:03", "throughput": 2560.69, "total_tokens": 1027712} {"current_steps": 1655, "total_steps": 5610, "loss": 0.2018, "lr": 4.443794557673641e-05, "epoch": 2.950089126559715, "percentage": 29.5, "elapsed_time": "0:06:42", "remaining_time": "0:16:01", "throughput": 2561.69, "total_tokens": 1031040} {"current_steps": 1660, "total_steps": 5610, "loss": 0.0196, "lr": 4.43889402969373e-05, "epoch": 2.9590017825311943, "percentage": 29.59, "elapsed_time": "0:06:43", "remaining_time": "0:16:00", "throughput": 2561.49, "total_tokens": 1033440} {"current_steps": 1665, "total_steps": 5610, "loss": 0.0174, "lr": 4.4339747352374726e-05, "epoch": 2.9679144385026737, "percentage": 29.68, "elapsed_time": "0:06:44", "remaining_time": "0:15:58", "throughput": 2562.58, "total_tokens": 1036864} {"current_steps": 1670, "total_steps": 5610, "loss": 0.0023, "lr": 4.4290367219185206e-05, "epoch": 2.976827094474153, "percentage": 29.77, "elapsed_time": "0:06:45", "remaining_time": "0:15:57", "throughput": 2562.99, "total_tokens": 1039808} {"current_steps": 1675, "total_steps": 5610, "loss": 0.0292, "lr": 4.424080037531705e-05, "epoch": 2.985739750445633, "percentage": 29.86, "elapsed_time": "0:06:46", "remaining_time": "0:15:55", "throughput": 2564.04, "total_tokens": 1043200} {"current_steps": 1680, "total_steps": 5610, "loss": 0.0496, "lr": 4.4191047300525704e-05, "epoch": 2.9946524064171123, "percentage": 29.95, "elapsed_time": "0:06:47", "remaining_time": "0:15:54", "throughput": 2563.45, "total_tokens": 1045504} {"current_steps": 1685, "total_steps": 5610, "loss": 0.0335, "lr": 4.414110847636916e-05, "epoch": 3.0035650623885917, "percentage": 30.04, "elapsed_time": "0:06:48", "remaining_time": "0:15:52", "throughput": 2562.14, "total_tokens": 1047768} {"current_steps": 1686, "total_steps": 5610, "eval_loss": 0.14105600118637085, "epoch": 3.0053475935828877, "percentage": 30.05, "elapsed_time": "0:06:55", "remaining_time": "0:16:06", "throughput": 2523.19, "total_tokens": 1048184} {"current_steps": 1690, "total_steps": 5610, "loss": 0.0955, "lr": 4.409098438620326e-05, "epoch": 3.0124777183600715, "percentage": 30.12, "elapsed_time": "0:06:56", "remaining_time": "0:16:07", "throughput": 2519.11, "total_tokens": 1050456} {"current_steps": 1695, "total_steps": 5610, "loss": 0.034, "lr": 4.404067551517703e-05, "epoch": 3.021390374331551, "percentage": 30.21, "elapsed_time": "0:06:58", "remaining_time": "0:16:05", "throughput": 2519.95, "total_tokens": 1053592} {"current_steps": 1700, "total_steps": 5610, "loss": 0.0236, "lr": 4.399018235022799e-05, "epoch": 3.0303030303030303, "percentage": 30.3, "elapsed_time": "0:06:59", "remaining_time": "0:16:04", "throughput": 2520.57, "total_tokens": 1056664} {"current_steps": 1705, "total_steps": 5610, "loss": 0.1618, "lr": 4.393950538007743e-05, "epoch": 3.0392156862745097, "percentage": 30.39, "elapsed_time": "0:07:00", "remaining_time": "0:16:02", "throughput": 2520.77, "total_tokens": 1059384} {"current_steps": 1710, "total_steps": 5610, "loss": 0.0008, "lr": 4.3888645095225675e-05, "epoch": 3.0481283422459895, "percentage": 30.48, "elapsed_time": "0:07:01", "remaining_time": "0:16:00", "throughput": 2521.05, "total_tokens": 1062168} {"current_steps": 1715, "total_steps": 5610, "loss": 0.1569, "lr": 4.383760198794734e-05, "epoch": 3.057040998217469, "percentage": 30.57, "elapsed_time": "0:07:02", "remaining_time": "0:15:59", "throughput": 2521.42, "total_tokens": 1064952} {"current_steps": 1720, "total_steps": 5610, "loss": 0.0255, "lr": 4.37863765522866e-05, "epoch": 3.0659536541889483, "percentage": 30.66, "elapsed_time": "0:07:03", "remaining_time": "0:15:57", "throughput": 2521.18, "total_tokens": 1067416} {"current_steps": 1725, "total_steps": 5610, "loss": 0.0085, "lr": 4.3734969284052345e-05, "epoch": 3.0748663101604277, "percentage": 30.75, "elapsed_time": "0:07:04", "remaining_time": "0:15:56", "throughput": 2521.93, "total_tokens": 1070552} {"current_steps": 1730, "total_steps": 5610, "loss": 0.0115, "lr": 4.368338068081343e-05, "epoch": 3.0837789661319075, "percentage": 30.84, "elapsed_time": "0:07:05", "remaining_time": "0:15:54", "throughput": 2523.32, "total_tokens": 1074136} {"current_steps": 1735, "total_steps": 5610, "loss": 0.0007, "lr": 4.3631611241893874e-05, "epoch": 3.092691622103387, "percentage": 30.93, "elapsed_time": "0:07:06", "remaining_time": "0:15:53", "throughput": 2524.85, "total_tokens": 1077848} {"current_steps": 1740, "total_steps": 5610, "loss": 0.0336, "lr": 4.3579661468367924e-05, "epoch": 3.1016042780748663, "percentage": 31.02, "elapsed_time": "0:07:07", "remaining_time": "0:15:51", "throughput": 2525.27, "total_tokens": 1080664} {"current_steps": 1745, "total_steps": 5610, "loss": 0.1682, "lr": 4.352753186305536e-05, "epoch": 3.1105169340463457, "percentage": 31.11, "elapsed_time": "0:07:09", "remaining_time": "0:15:50", "throughput": 2526.2, "total_tokens": 1083992} {"current_steps": 1750, "total_steps": 5610, "loss": 0.2081, "lr": 4.347522293051648e-05, "epoch": 3.1194295900178255, "percentage": 31.19, "elapsed_time": "0:07:10", "remaining_time": "0:15:49", "throughput": 2527.94, "total_tokens": 1087800} {"current_steps": 1755, "total_steps": 5610, "loss": 0.0736, "lr": 4.3422735177047324e-05, "epoch": 3.128342245989305, "percentage": 31.28, "elapsed_time": "0:07:11", "remaining_time": "0:15:47", "throughput": 2528.41, "total_tokens": 1090776} {"current_steps": 1760, "total_steps": 5610, "loss": 0.0485, "lr": 4.337006911067473e-05, "epoch": 3.1372549019607843, "percentage": 31.37, "elapsed_time": "0:07:12", "remaining_time": "0:15:46", "throughput": 2528.68, "total_tokens": 1093624} {"current_steps": 1765, "total_steps": 5610, "loss": 0.0722, "lr": 4.331722524115139e-05, "epoch": 3.1461675579322637, "percentage": 31.46, "elapsed_time": "0:07:13", "remaining_time": "0:15:44", "throughput": 2529.04, "total_tokens": 1096472} {"current_steps": 1770, "total_steps": 5610, "loss": 0.1286, "lr": 4.3264204079950975e-05, "epoch": 3.1550802139037435, "percentage": 31.55, "elapsed_time": "0:07:14", "remaining_time": "0:15:43", "throughput": 2529.96, "total_tokens": 1099736} {"current_steps": 1775, "total_steps": 5610, "loss": 0.0243, "lr": 4.321100614026315e-05, "epoch": 3.163992869875223, "percentage": 31.64, "elapsed_time": "0:07:15", "remaining_time": "0:15:41", "throughput": 2531.15, "total_tokens": 1103384} {"current_steps": 1780, "total_steps": 5610, "loss": 0.0609, "lr": 4.31576319369886e-05, "epoch": 3.1729055258467023, "percentage": 31.73, "elapsed_time": "0:07:17", "remaining_time": "0:15:40", "throughput": 2531.76, "total_tokens": 1106520} {"current_steps": 1785, "total_steps": 5610, "loss": 0.0166, "lr": 4.310408198673406e-05, "epoch": 3.1818181818181817, "percentage": 31.82, "elapsed_time": "0:07:18", "remaining_time": "0:15:38", "throughput": 2531.87, "total_tokens": 1109208} {"current_steps": 1790, "total_steps": 5610, "loss": 0.0331, "lr": 4.305035680780732e-05, "epoch": 3.1907308377896615, "percentage": 31.91, "elapsed_time": "0:07:19", "remaining_time": "0:15:37", "throughput": 2532.87, "total_tokens": 1112536} {"current_steps": 1795, "total_steps": 5610, "loss": 0.1358, "lr": 4.299645692021221e-05, "epoch": 3.199643493761141, "percentage": 32.0, "elapsed_time": "0:07:20", "remaining_time": "0:15:35", "throughput": 2534.08, "total_tokens": 1115992} {"current_steps": 1800, "total_steps": 5610, "loss": 0.0381, "lr": 4.294238284564354e-05, "epoch": 3.2085561497326203, "percentage": 32.09, "elapsed_time": "0:07:21", "remaining_time": "0:15:34", "throughput": 2534.8, "total_tokens": 1119192} {"current_steps": 1805, "total_steps": 5610, "loss": 0.0293, "lr": 4.2888135107482067e-05, "epoch": 3.2174688057040997, "percentage": 32.17, "elapsed_time": "0:07:22", "remaining_time": "0:15:33", "throughput": 2535.65, "total_tokens": 1122552} {"current_steps": 1810, "total_steps": 5610, "loss": 0.0364, "lr": 4.283371423078945e-05, "epoch": 3.2263814616755795, "percentage": 32.26, "elapsed_time": "0:07:23", "remaining_time": "0:15:31", "throughput": 2536.81, "total_tokens": 1126072} {"current_steps": 1815, "total_steps": 5610, "loss": 0.1569, "lr": 4.277912074230312e-05, "epoch": 3.235294117647059, "percentage": 32.35, "elapsed_time": "0:07:24", "remaining_time": "0:15:30", "throughput": 2536.97, "total_tokens": 1128792} {"current_steps": 1820, "total_steps": 5610, "loss": 0.0284, "lr": 4.272435517043125e-05, "epoch": 3.2442067736185383, "percentage": 32.44, "elapsed_time": "0:07:26", "remaining_time": "0:15:28", "throughput": 2537.97, "total_tokens": 1132152} {"current_steps": 1825, "total_steps": 5610, "loss": 0.0444, "lr": 4.2669418045247576e-05, "epoch": 3.2531194295900177, "percentage": 32.53, "elapsed_time": "0:07:27", "remaining_time": "0:15:27", "throughput": 2538.35, "total_tokens": 1135064} {"current_steps": 1830, "total_steps": 5610, "loss": 0.0877, "lr": 4.2614309898486297e-05, "epoch": 3.2620320855614975, "percentage": 32.62, "elapsed_time": "0:07:28", "remaining_time": "0:15:25", "throughput": 2538.79, "total_tokens": 1137976} {"current_steps": 1835, "total_steps": 5610, "loss": 0.0404, "lr": 4.25590312635369e-05, "epoch": 3.270944741532977, "percentage": 32.71, "elapsed_time": "0:07:29", "remaining_time": "0:15:24", "throughput": 2539.55, "total_tokens": 1141080} {"current_steps": 1840, "total_steps": 5610, "loss": 0.0572, "lr": 4.250358267543907e-05, "epoch": 3.2798573975044563, "percentage": 32.8, "elapsed_time": "0:07:30", "remaining_time": "0:15:22", "throughput": 2540.43, "total_tokens": 1144376} {"current_steps": 1845, "total_steps": 5610, "loss": 0.136, "lr": 4.244796467087741e-05, "epoch": 3.2887700534759357, "percentage": 32.89, "elapsed_time": "0:07:31", "remaining_time": "0:15:21", "throughput": 2540.77, "total_tokens": 1147224} {"current_steps": 1850, "total_steps": 5610, "loss": 0.0632, "lr": 4.2392177788176335e-05, "epoch": 3.2976827094474155, "percentage": 32.98, "elapsed_time": "0:07:32", "remaining_time": "0:15:19", "throughput": 2541.41, "total_tokens": 1150360} {"current_steps": 1855, "total_steps": 5610, "loss": 0.0068, "lr": 4.2336222567294804e-05, "epoch": 3.306595365418895, "percentage": 33.07, "elapsed_time": "0:07:33", "remaining_time": "0:15:18", "throughput": 2542.35, "total_tokens": 1153688} {"current_steps": 1860, "total_steps": 5610, "loss": 0.169, "lr": 4.228009954982112e-05, "epoch": 3.3155080213903743, "percentage": 33.16, "elapsed_time": "0:07:34", "remaining_time": "0:15:17", "throughput": 2543.34, "total_tokens": 1157016} {"current_steps": 1865, "total_steps": 5610, "loss": 0.0277, "lr": 4.22238092789677e-05, "epoch": 3.3244206773618536, "percentage": 33.24, "elapsed_time": "0:07:35", "remaining_time": "0:15:15", "throughput": 2543.5, "total_tokens": 1159768} {"current_steps": 1870, "total_steps": 5610, "loss": 0.0685, "lr": 4.2167352299565746e-05, "epoch": 3.3333333333333335, "percentage": 33.33, "elapsed_time": "0:07:37", "remaining_time": "0:15:14", "throughput": 2543.57, "total_tokens": 1162520} {"current_steps": 1875, "total_steps": 5610, "loss": 0.0219, "lr": 4.21107291580601e-05, "epoch": 3.342245989304813, "percentage": 33.42, "elapsed_time": "0:07:38", "remaining_time": "0:15:12", "throughput": 2543.84, "total_tokens": 1165336} {"current_steps": 1880, "total_steps": 5610, "loss": 0.0469, "lr": 4.205394040250382e-05, "epoch": 3.3511586452762923, "percentage": 33.51, "elapsed_time": "0:07:39", "remaining_time": "0:15:11", "throughput": 2544.7, "total_tokens": 1168632} {"current_steps": 1885, "total_steps": 5610, "loss": 0.0652, "lr": 4.199698658255298e-05, "epoch": 3.3600713012477716, "percentage": 33.6, "elapsed_time": "0:07:40", "remaining_time": "0:15:09", "throughput": 2544.97, "total_tokens": 1171352} {"current_steps": 1890, "total_steps": 5610, "loss": 0.0775, "lr": 4.193986824946125e-05, "epoch": 3.3689839572192515, "percentage": 33.69, "elapsed_time": "0:07:41", "remaining_time": "0:15:08", "throughput": 2545.44, "total_tokens": 1174360} {"current_steps": 1895, "total_steps": 5610, "loss": 0.0253, "lr": 4.188258595607468e-05, "epoch": 3.377896613190731, "percentage": 33.78, "elapsed_time": "0:07:42", "remaining_time": "0:15:06", "throughput": 2545.96, "total_tokens": 1177368} {"current_steps": 1900, "total_steps": 5610, "loss": 0.0291, "lr": 4.182514025682625e-05, "epoch": 3.3868092691622103, "percentage": 33.87, "elapsed_time": "0:07:43", "remaining_time": "0:15:05", "throughput": 2546.99, "total_tokens": 1180824} {"current_steps": 1905, "total_steps": 5610, "loss": 0.1351, "lr": 4.176753170773052e-05, "epoch": 3.3957219251336896, "percentage": 33.96, "elapsed_time": "0:07:44", "remaining_time": "0:15:03", "throughput": 2547.19, "total_tokens": 1183544} {"current_steps": 1910, "total_steps": 5610, "loss": 0.0833, "lr": 4.170976086637832e-05, "epoch": 3.4046345811051695, "percentage": 34.05, "elapsed_time": "0:07:45", "remaining_time": "0:15:02", "throughput": 2546.62, "total_tokens": 1185848} {"current_steps": 1915, "total_steps": 5610, "loss": 0.1992, "lr": 4.1651828291931264e-05, "epoch": 3.413547237076649, "percentage": 34.14, "elapsed_time": "0:07:46", "remaining_time": "0:15:00", "throughput": 2547.52, "total_tokens": 1189176} {"current_steps": 1920, "total_steps": 5610, "loss": 0.0105, "lr": 4.159373454511636e-05, "epoch": 3.4224598930481283, "percentage": 34.22, "elapsed_time": "0:07:48", "remaining_time": "0:14:59", "throughput": 2548.98, "total_tokens": 1192984} {"current_steps": 1925, "total_steps": 5610, "loss": 0.1187, "lr": 4.1535480188220636e-05, "epoch": 3.431372549019608, "percentage": 34.31, "elapsed_time": "0:07:49", "remaining_time": "0:14:58", "throughput": 2550.6, "total_tokens": 1196888} {"current_steps": 1930, "total_steps": 5610, "loss": 0.1077, "lr": 4.1477065785085634e-05, "epoch": 3.4402852049910875, "percentage": 34.4, "elapsed_time": "0:07:50", "remaining_time": "0:14:57", "throughput": 2552.22, "total_tokens": 1200792} {"current_steps": 1935, "total_steps": 5610, "loss": 0.0855, "lr": 4.141849190110199e-05, "epoch": 3.449197860962567, "percentage": 34.49, "elapsed_time": "0:07:51", "remaining_time": "0:14:55", "throughput": 2552.71, "total_tokens": 1203832} {"current_steps": 1940, "total_steps": 5610, "loss": 0.1094, "lr": 4.1359759103203935e-05, "epoch": 3.4581105169340463, "percentage": 34.58, "elapsed_time": "0:07:52", "remaining_time": "0:14:54", "throughput": 2553.59, "total_tokens": 1207160} {"current_steps": 1945, "total_steps": 5610, "loss": 0.083, "lr": 4.130086795986383e-05, "epoch": 3.4670231729055256, "percentage": 34.67, "elapsed_time": "0:07:53", "remaining_time": "0:14:52", "throughput": 2554.56, "total_tokens": 1210616} {"current_steps": 1950, "total_steps": 5610, "loss": 0.0812, "lr": 4.124181904108664e-05, "epoch": 3.4759358288770055, "percentage": 34.76, "elapsed_time": "0:07:54", "remaining_time": "0:14:51", "throughput": 2554.93, "total_tokens": 1213528} {"current_steps": 1955, "total_steps": 5610, "loss": 0.1351, "lr": 4.1182612918404466e-05, "epoch": 3.484848484848485, "percentage": 34.85, "elapsed_time": "0:07:56", "remaining_time": "0:14:50", "throughput": 2555.42, "total_tokens": 1216568} {"current_steps": 1960, "total_steps": 5610, "loss": 0.0371, "lr": 4.1123250164870955e-05, "epoch": 3.4937611408199643, "percentage": 34.94, "elapsed_time": "0:07:57", "remaining_time": "0:14:48", "throughput": 2556.18, "total_tokens": 1219896} {"current_steps": 1965, "total_steps": 5610, "loss": 0.098, "lr": 4.1063731355055763e-05, "epoch": 3.502673796791444, "percentage": 35.03, "elapsed_time": "0:07:58", "remaining_time": "0:14:47", "throughput": 2556.65, "total_tokens": 1222904} {"current_steps": 1967, "total_steps": 5610, "eval_loss": 0.11949615180492401, "epoch": 3.5062388591800357, "percentage": 35.06, "elapsed_time": "0:08:05", "remaining_time": "0:14:58", "throughput": 2523.22, "total_tokens": 1223864} {"current_steps": 1970, "total_steps": 5610, "loss": 0.032, "lr": 4.100405706503904e-05, "epoch": 3.5115864527629235, "percentage": 35.12, "elapsed_time": "0:08:06", "remaining_time": "0:14:58", "throughput": 2519.21, "total_tokens": 1225496} {"current_steps": 1975, "total_steps": 5610, "loss": 0.0615, "lr": 4.094422787240581e-05, "epoch": 3.520499108734403, "percentage": 35.2, "elapsed_time": "0:08:07", "remaining_time": "0:14:57", "throughput": 2519.55, "total_tokens": 1228280} {"current_steps": 1980, "total_steps": 5610, "loss": 0.0089, "lr": 4.088424435624038e-05, "epoch": 3.5294117647058822, "percentage": 35.29, "elapsed_time": "0:08:08", "remaining_time": "0:14:55", "throughput": 2520.05, "total_tokens": 1231288} {"current_steps": 1985, "total_steps": 5610, "loss": 0.097, "lr": 4.082410709712077e-05, "epoch": 3.5383244206773616, "percentage": 35.38, "elapsed_time": "0:08:09", "remaining_time": "0:14:54", "throughput": 2520.73, "total_tokens": 1234456} {"current_steps": 1990, "total_steps": 5610, "loss": 0.0988, "lr": 4.0763816677113064e-05, "epoch": 3.5472370766488415, "percentage": 35.47, "elapsed_time": "0:08:10", "remaining_time": "0:14:52", "throughput": 2521.72, "total_tokens": 1237912} {"current_steps": 1995, "total_steps": 5610, "loss": 0.0549, "lr": 4.070337367976578e-05, "epoch": 3.556149732620321, "percentage": 35.56, "elapsed_time": "0:08:11", "remaining_time": "0:14:51", "throughput": 2522.4, "total_tokens": 1240984} {"current_steps": 2000, "total_steps": 5610, "loss": 0.0523, "lr": 4.064277869010421e-05, "epoch": 3.5650623885918002, "percentage": 35.65, "elapsed_time": "0:08:13", "remaining_time": "0:14:50", "throughput": 2523.26, "total_tokens": 1244280} {"current_steps": 2005, "total_steps": 5610, "loss": 0.0064, "lr": 4.058203229462482e-05, "epoch": 3.57397504456328, "percentage": 35.74, "elapsed_time": "0:08:14", "remaining_time": "0:14:48", "throughput": 2523.34, "total_tokens": 1246904} {"current_steps": 2010, "total_steps": 5610, "loss": 0.2163, "lr": 4.052113508128948e-05, "epoch": 3.5828877005347595, "percentage": 35.83, "elapsed_time": "0:08:15", "remaining_time": "0:14:46", "throughput": 2523.9, "total_tokens": 1249880} {"current_steps": 2015, "total_steps": 5610, "loss": 0.0497, "lr": 4.0460087639519836e-05, "epoch": 3.591800356506239, "percentage": 35.92, "elapsed_time": "0:08:16", "remaining_time": "0:14:45", "throughput": 2523.81, "total_tokens": 1252408} {"current_steps": 2020, "total_steps": 5610, "loss": 0.0485, "lr": 4.039889056019159e-05, "epoch": 3.6007130124777182, "percentage": 36.01, "elapsed_time": "0:08:17", "remaining_time": "0:14:43", "throughput": 2524.86, "total_tokens": 1255800} {"current_steps": 2025, "total_steps": 5610, "loss": 0.0137, "lr": 4.03375444356288e-05, "epoch": 3.6096256684491976, "percentage": 36.1, "elapsed_time": "0:08:18", "remaining_time": "0:14:42", "throughput": 2525.8, "total_tokens": 1259160} {"current_steps": 2030, "total_steps": 5610, "loss": 0.153, "lr": 4.0276049859598084e-05, "epoch": 3.6185383244206775, "percentage": 36.19, "elapsed_time": "0:08:19", "remaining_time": "0:14:41", "throughput": 2526.63, "total_tokens": 1262488} {"current_steps": 2035, "total_steps": 5610, "loss": 0.0235, "lr": 4.021440742730295e-05, "epoch": 3.627450980392157, "percentage": 36.27, "elapsed_time": "0:08:20", "remaining_time": "0:14:39", "throughput": 2526.98, "total_tokens": 1265368} {"current_steps": 2040, "total_steps": 5610, "loss": 0.1157, "lr": 4.015261773537799e-05, "epoch": 3.6363636363636362, "percentage": 36.36, "elapsed_time": "0:08:21", "remaining_time": "0:14:38", "throughput": 2528.36, "total_tokens": 1269112} {"current_steps": 2045, "total_steps": 5610, "loss": 0.0795, "lr": 4.009068138188311e-05, "epoch": 3.645276292335116, "percentage": 36.45, "elapsed_time": "0:08:23", "remaining_time": "0:14:37", "throughput": 2529.15, "total_tokens": 1272408} {"current_steps": 2050, "total_steps": 5610, "loss": 0.069, "lr": 4.002859896629776e-05, "epoch": 3.6541889483065955, "percentage": 36.54, "elapsed_time": "0:08:24", "remaining_time": "0:14:35", "throughput": 2529.93, "total_tokens": 1275640} {"current_steps": 2055, "total_steps": 5610, "loss": 0.041, "lr": 3.99663710895151e-05, "epoch": 3.663101604278075, "percentage": 36.63, "elapsed_time": "0:08:25", "remaining_time": "0:14:34", "throughput": 2530.39, "total_tokens": 1278616} {"current_steps": 2060, "total_steps": 5610, "loss": 0.2036, "lr": 3.990399835383623e-05, "epoch": 3.6720142602495542, "percentage": 36.72, "elapsed_time": "0:08:26", "remaining_time": "0:14:32", "throughput": 2530.77, "total_tokens": 1281624} {"current_steps": 2065, "total_steps": 5610, "loss": 0.1056, "lr": 3.984148136296431e-05, "epoch": 3.6809269162210336, "percentage": 36.81, "elapsed_time": "0:08:27", "remaining_time": "0:14:31", "throughput": 2530.76, "total_tokens": 1284216} {"current_steps": 2070, "total_steps": 5610, "loss": 0.0509, "lr": 3.977882072199874e-05, "epoch": 3.6898395721925135, "percentage": 36.9, "elapsed_time": "0:08:28", "remaining_time": "0:14:29", "throughput": 2530.78, "total_tokens": 1286808} {"current_steps": 2075, "total_steps": 5610, "loss": 0.0604, "lr": 3.971601703742932e-05, "epoch": 3.698752228163993, "percentage": 36.99, "elapsed_time": "0:08:29", "remaining_time": "0:14:28", "throughput": 2531.46, "total_tokens": 1289944} {"current_steps": 2080, "total_steps": 5610, "loss": 0.076, "lr": 3.965307091713037e-05, "epoch": 3.7076648841354722, "percentage": 37.08, "elapsed_time": "0:08:30", "remaining_time": "0:14:26", "throughput": 2531.82, "total_tokens": 1292856} {"current_steps": 2085, "total_steps": 5610, "loss": 0.0219, "lr": 3.95899829703548e-05, "epoch": 3.716577540106952, "percentage": 37.17, "elapsed_time": "0:08:31", "remaining_time": "0:14:25", "throughput": 2533.26, "total_tokens": 1296792} {"current_steps": 2090, "total_steps": 5610, "loss": 0.0188, "lr": 3.9526753807728295e-05, "epoch": 3.7254901960784315, "percentage": 37.25, "elapsed_time": "0:08:33", "remaining_time": "0:14:24", "throughput": 2533.71, "total_tokens": 1299800} {"current_steps": 2095, "total_steps": 5610, "loss": 0.018, "lr": 3.946338404124334e-05, "epoch": 3.734402852049911, "percentage": 37.34, "elapsed_time": "0:08:34", "remaining_time": "0:14:22", "throughput": 2533.95, "total_tokens": 1302648} {"current_steps": 2100, "total_steps": 5610, "loss": 0.0049, "lr": 3.939987428425331e-05, "epoch": 3.7433155080213902, "percentage": 37.43, "elapsed_time": "0:08:35", "remaining_time": "0:14:20", "throughput": 2533.63, "total_tokens": 1305016} {"current_steps": 2105, "total_steps": 5610, "loss": 0.108, "lr": 3.933622515146658e-05, "epoch": 3.7522281639928696, "percentage": 37.52, "elapsed_time": "0:08:36", "remaining_time": "0:14:19", "throughput": 2534.11, "total_tokens": 1308024} {"current_steps": 2110, "total_steps": 5610, "loss": 0.0332, "lr": 3.9272437258940494e-05, "epoch": 3.7611408199643495, "percentage": 37.61, "elapsed_time": "0:08:37", "remaining_time": "0:14:17", "throughput": 2534.09, "total_tokens": 1310552} {"current_steps": 2115, "total_steps": 5610, "loss": 0.0833, "lr": 3.9208511224075484e-05, "epoch": 3.770053475935829, "percentage": 37.7, "elapsed_time": "0:08:38", "remaining_time": "0:14:16", "throughput": 2534.67, "total_tokens": 1313656} {"current_steps": 2120, "total_steps": 5610, "loss": 0.0751, "lr": 3.914444766560902e-05, "epoch": 3.7789661319073082, "percentage": 37.79, "elapsed_time": "0:08:39", "remaining_time": "0:14:15", "throughput": 2535.13, "total_tokens": 1316728} {"current_steps": 2125, "total_steps": 5610, "loss": 0.2379, "lr": 3.908024720360968e-05, "epoch": 3.787878787878788, "percentage": 37.88, "elapsed_time": "0:08:40", "remaining_time": "0:14:13", "throughput": 2536.22, "total_tokens": 1320344} {"current_steps": 2130, "total_steps": 5610, "loss": 0.1123, "lr": 3.9015910459471126e-05, "epoch": 3.7967914438502675, "percentage": 37.97, "elapsed_time": "0:08:41", "remaining_time": "0:14:12", "throughput": 2536.69, "total_tokens": 1323416} {"current_steps": 2135, "total_steps": 5610, "loss": 0.0029, "lr": 3.8951438055906084e-05, "epoch": 3.805704099821747, "percentage": 38.06, "elapsed_time": "0:08:42", "remaining_time": "0:14:10", "throughput": 2537.07, "total_tokens": 1326360} {"current_steps": 2140, "total_steps": 5610, "loss": 0.0255, "lr": 3.888683061694032e-05, "epoch": 3.8146167557932262, "percentage": 38.15, "elapsed_time": "0:08:43", "remaining_time": "0:14:09", "throughput": 2538.12, "total_tokens": 1329944} {"current_steps": 2145, "total_steps": 5610, "loss": 0.0605, "lr": 3.882208876790661e-05, "epoch": 3.8235294117647056, "percentage": 38.24, "elapsed_time": "0:08:45", "remaining_time": "0:14:08", "throughput": 2538.8, "total_tokens": 1333080} {"current_steps": 2150, "total_steps": 5610, "loss": 0.1111, "lr": 3.8757213135438655e-05, "epoch": 3.8324420677361855, "percentage": 38.32, "elapsed_time": "0:08:46", "remaining_time": "0:14:06", "throughput": 2539.73, "total_tokens": 1336504} {"current_steps": 2155, "total_steps": 5610, "loss": 0.0818, "lr": 3.869220434746509e-05, "epoch": 3.841354723707665, "percentage": 38.41, "elapsed_time": "0:08:47", "remaining_time": "0:14:05", "throughput": 2540.4, "total_tokens": 1339704} {"current_steps": 2160, "total_steps": 5610, "loss": 0.0514, "lr": 3.862706303320329e-05, "epoch": 3.8502673796791442, "percentage": 38.5, "elapsed_time": "0:08:48", "remaining_time": "0:14:04", "throughput": 2541.2, "total_tokens": 1343032} {"current_steps": 2165, "total_steps": 5610, "loss": 0.1118, "lr": 3.856178982315342e-05, "epoch": 3.859180035650624, "percentage": 38.59, "elapsed_time": "0:08:49", "remaining_time": "0:14:02", "throughput": 2541.78, "total_tokens": 1346104} {"current_steps": 2170, "total_steps": 5610, "loss": 0.2059, "lr": 3.849638534909219e-05, "epoch": 3.8680926916221035, "percentage": 38.68, "elapsed_time": "0:08:50", "remaining_time": "0:14:01", "throughput": 2542.17, "total_tokens": 1348984} {"current_steps": 2175, "total_steps": 5610, "loss": 0.0437, "lr": 3.843085024406686e-05, "epoch": 3.877005347593583, "percentage": 38.77, "elapsed_time": "0:08:51", "remaining_time": "0:13:59", "throughput": 2541.95, "total_tokens": 1351480} {"current_steps": 2180, "total_steps": 5610, "loss": 0.1014, "lr": 3.836518514238903e-05, "epoch": 3.8859180035650622, "percentage": 38.86, "elapsed_time": "0:08:52", "remaining_time": "0:13:58", "throughput": 2543.36, "total_tokens": 1355448} {"current_steps": 2185, "total_steps": 5610, "loss": 0.0669, "lr": 3.8299390679628555e-05, "epoch": 3.8948306595365416, "percentage": 38.95, "elapsed_time": "0:08:54", "remaining_time": "0:13:57", "throughput": 2543.73, "total_tokens": 1358392} {"current_steps": 2190, "total_steps": 5610, "loss": 0.0723, "lr": 3.8233467492607354e-05, "epoch": 3.9037433155080214, "percentage": 39.04, "elapsed_time": "0:08:55", "remaining_time": "0:13:55", "throughput": 2544.08, "total_tokens": 1361368} {"current_steps": 2195, "total_steps": 5610, "loss": 0.0095, "lr": 3.816741621939327e-05, "epoch": 3.912655971479501, "percentage": 39.13, "elapsed_time": "0:08:56", "remaining_time": "0:13:54", "throughput": 2544.75, "total_tokens": 1364536} {"current_steps": 2200, "total_steps": 5610, "loss": 0.0823, "lr": 3.81012374992939e-05, "epoch": 3.9215686274509802, "percentage": 39.22, "elapsed_time": "0:08:57", "remaining_time": "0:13:52", "throughput": 2545.34, "total_tokens": 1367800} {"current_steps": 2205, "total_steps": 5610, "loss": 0.0229, "lr": 3.803493197285036e-05, "epoch": 3.93048128342246, "percentage": 39.3, "elapsed_time": "0:08:58", "remaining_time": "0:13:51", "throughput": 2546.22, "total_tokens": 1371224} {"current_steps": 2210, "total_steps": 5610, "loss": 0.0061, "lr": 3.7968500281831146e-05, "epoch": 3.9393939393939394, "percentage": 39.39, "elapsed_time": "0:08:59", "remaining_time": "0:13:50", "throughput": 2546.29, "total_tokens": 1373944} {"current_steps": 2215, "total_steps": 5610, "loss": 0.0799, "lr": 3.79019430692259e-05, "epoch": 3.948306595365419, "percentage": 39.48, "elapsed_time": "0:09:00", "remaining_time": "0:13:48", "throughput": 2547.13, "total_tokens": 1377240} {"current_steps": 2220, "total_steps": 5610, "loss": 0.0031, "lr": 3.783526097923915e-05, "epoch": 3.9572192513368982, "percentage": 39.57, "elapsed_time": "0:09:01", "remaining_time": "0:13:47", "throughput": 2547.57, "total_tokens": 1380248} {"current_steps": 2225, "total_steps": 5610, "loss": 0.1341, "lr": 3.7768454657284154e-05, "epoch": 3.966131907308378, "percentage": 39.66, "elapsed_time": "0:09:02", "remaining_time": "0:13:45", "throughput": 2547.34, "total_tokens": 1382712} {"current_steps": 2230, "total_steps": 5610, "loss": 0.0958, "lr": 3.770152474997657e-05, "epoch": 3.9750445632798574, "percentage": 39.75, "elapsed_time": "0:09:03", "remaining_time": "0:13:44", "throughput": 2547.95, "total_tokens": 1385976} {"current_steps": 2235, "total_steps": 5610, "loss": 0.0098, "lr": 3.763447190512824e-05, "epoch": 3.983957219251337, "percentage": 39.84, "elapsed_time": "0:09:05", "remaining_time": "0:13:43", "throughput": 2549.03, "total_tokens": 1389624} {"current_steps": 2240, "total_steps": 5610, "loss": 0.102, "lr": 3.7567296771740925e-05, "epoch": 3.9928698752228167, "percentage": 39.93, "elapsed_time": "0:09:06", "remaining_time": "0:13:41", "throughput": 2549.51, "total_tokens": 1392728} {"current_steps": 2245, "total_steps": 5610, "loss": 0.0022, "lr": 3.7500000000000003e-05, "epoch": 4.001782531194296, "percentage": 40.02, "elapsed_time": "0:09:07", "remaining_time": "0:13:40", "throughput": 2549.34, "total_tokens": 1395704} {"current_steps": 2248, "total_steps": 5610, "eval_loss": 0.14008468389511108, "epoch": 4.007130124777183, "percentage": 40.07, "elapsed_time": "0:09:14", "remaining_time": "0:13:49", "throughput": 2520.7, "total_tokens": 1397624} {"current_steps": 2250, "total_steps": 5610, "loss": 0.0091, "lr": 3.743258224126819e-05, "epoch": 4.010695187165775, "percentage": 40.11, "elapsed_time": "0:09:15", "remaining_time": "0:13:49", "throughput": 2517.28, "total_tokens": 1398584} {"current_steps": 2255, "total_steps": 5610, "loss": 0.0594, "lr": 3.736504414807922e-05, "epoch": 4.019607843137255, "percentage": 40.2, "elapsed_time": "0:09:16", "remaining_time": "0:13:48", "throughput": 2517.96, "total_tokens": 1401784} {"current_steps": 2260, "total_steps": 5610, "loss": 0.0225, "lr": 3.729738637413156e-05, "epoch": 4.028520499108734, "percentage": 40.29, "elapsed_time": "0:09:17", "remaining_time": "0:13:46", "throughput": 2517.83, "total_tokens": 1404312} {"current_steps": 2265, "total_steps": 5610, "loss": 0.1722, "lr": 3.722960957428203e-05, "epoch": 4.037433155080214, "percentage": 40.37, "elapsed_time": "0:09:18", "remaining_time": "0:13:45", "throughput": 2518.43, "total_tokens": 1407352} {"current_steps": 2270, "total_steps": 5610, "loss": 0.0917, "lr": 3.716171440453952e-05, "epoch": 4.046345811051693, "percentage": 40.46, "elapsed_time": "0:09:19", "remaining_time": "0:13:43", "throughput": 2519.18, "total_tokens": 1410648} {"current_steps": 2275, "total_steps": 5610, "loss": 0.1049, "lr": 3.709370152205863e-05, "epoch": 4.055258467023173, "percentage": 40.55, "elapsed_time": "0:09:21", "remaining_time": "0:13:42", "throughput": 2519.85, "total_tokens": 1413816} {"current_steps": 2280, "total_steps": 5610, "loss": 0.0348, "lr": 3.7025571585133254e-05, "epoch": 4.064171122994653, "percentage": 40.64, "elapsed_time": "0:09:22", "remaining_time": "0:13:40", "throughput": 2519.41, "total_tokens": 1416024} {"current_steps": 2285, "total_steps": 5610, "loss": 0.0132, "lr": 3.69573252531903e-05, "epoch": 4.073083778966132, "percentage": 40.73, "elapsed_time": "0:09:23", "remaining_time": "0:13:39", "throughput": 2519.91, "total_tokens": 1419128} {"current_steps": 2290, "total_steps": 5610, "loss": 0.0493, "lr": 3.6888963186783224e-05, "epoch": 4.081996434937611, "percentage": 40.82, "elapsed_time": "0:09:24", "remaining_time": "0:13:37", "throughput": 2519.97, "total_tokens": 1421720} {"current_steps": 2295, "total_steps": 5610, "loss": 0.1269, "lr": 3.682048604758567e-05, "epoch": 4.090909090909091, "percentage": 40.91, "elapsed_time": "0:09:25", "remaining_time": "0:13:36", "throughput": 2520.3, "total_tokens": 1424632} {"current_steps": 2300, "total_steps": 5610, "loss": 0.1213, "lr": 3.67518944983851e-05, "epoch": 4.09982174688057, "percentage": 41.0, "elapsed_time": "0:09:26", "remaining_time": "0:13:35", "throughput": 2520.57, "total_tokens": 1427480} {"current_steps": 2305, "total_steps": 5610, "loss": 0.225, "lr": 3.668318920307632e-05, "epoch": 4.10873440285205, "percentage": 41.09, "elapsed_time": "0:09:27", "remaining_time": "0:13:33", "throughput": 2520.88, "total_tokens": 1430296} {"current_steps": 2310, "total_steps": 5610, "loss": 0.0336, "lr": 3.6614370826655074e-05, "epoch": 4.117647058823529, "percentage": 41.18, "elapsed_time": "0:09:28", "remaining_time": "0:13:31", "throughput": 2521.09, "total_tokens": 1432920} {"current_steps": 2315, "total_steps": 5610, "loss": 0.0948, "lr": 3.654544003521164e-05, "epoch": 4.126559714795009, "percentage": 41.27, "elapsed_time": "0:09:29", "remaining_time": "0:13:30", "throughput": 2521.13, "total_tokens": 1435544} {"current_steps": 2320, "total_steps": 5610, "loss": 0.0656, "lr": 3.647639749592433e-05, "epoch": 4.135472370766489, "percentage": 41.35, "elapsed_time": "0:09:30", "remaining_time": "0:13:28", "throughput": 2521.01, "total_tokens": 1438040} {"current_steps": 2325, "total_steps": 5610, "loss": 0.0154, "lr": 3.640724387705308e-05, "epoch": 4.144385026737968, "percentage": 41.44, "elapsed_time": "0:09:31", "remaining_time": "0:13:27", "throughput": 2521.99, "total_tokens": 1441528} {"current_steps": 2330, "total_steps": 5610, "loss": 0.0054, "lr": 3.633797984793294e-05, "epoch": 4.153297682709447, "percentage": 41.53, "elapsed_time": "0:09:32", "remaining_time": "0:13:26", "throughput": 2522.86, "total_tokens": 1444920} {"current_steps": 2335, "total_steps": 5610, "loss": 0.0077, "lr": 3.626860607896764e-05, "epoch": 4.162210338680927, "percentage": 41.62, "elapsed_time": "0:09:33", "remaining_time": "0:13:24", "throughput": 2523.26, "total_tokens": 1447896} {"current_steps": 2340, "total_steps": 5610, "loss": 0.0303, "lr": 3.6199123241623046e-05, "epoch": 4.171122994652406, "percentage": 41.71, "elapsed_time": "0:09:34", "remaining_time": "0:13:23", "throughput": 2524.08, "total_tokens": 1451256} {"current_steps": 2345, "total_steps": 5610, "loss": 0.1724, "lr": 3.6129532008420715e-05, "epoch": 4.180035650623886, "percentage": 41.8, "elapsed_time": "0:09:36", "remaining_time": "0:13:22", "throughput": 2524.36, "total_tokens": 1454136} {"current_steps": 2350, "total_steps": 5610, "loss": 0.1272, "lr": 3.605983305293137e-05, "epoch": 4.188948306595365, "percentage": 41.89, "elapsed_time": "0:09:37", "remaining_time": "0:13:20", "throughput": 2524.16, "total_tokens": 1456504} {"current_steps": 2355, "total_steps": 5610, "loss": 0.0893, "lr": 3.599002704976835e-05, "epoch": 4.197860962566845, "percentage": 41.98, "elapsed_time": "0:09:38", "remaining_time": "0:13:19", "throughput": 2524.87, "total_tokens": 1459768} {"current_steps": 2360, "total_steps": 5610, "loss": 0.0046, "lr": 3.592011467458113e-05, "epoch": 4.206773618538325, "percentage": 42.07, "elapsed_time": "0:09:39", "remaining_time": "0:13:17", "throughput": 2524.87, "total_tokens": 1462392} {"current_steps": 2365, "total_steps": 5610, "loss": 0.0609, "lr": 3.585009660404873e-05, "epoch": 4.215686274509804, "percentage": 42.16, "elapsed_time": "0:09:40", "remaining_time": "0:13:16", "throughput": 2525.94, "total_tokens": 1466040} {"current_steps": 2370, "total_steps": 5610, "loss": 0.1036, "lr": 3.577997351587322e-05, "epoch": 4.224598930481283, "percentage": 42.25, "elapsed_time": "0:09:41", "remaining_time": "0:13:14", "throughput": 2526.51, "total_tokens": 1469208} {"current_steps": 2375, "total_steps": 5610, "loss": 0.0649, "lr": 3.5709746088773085e-05, "epoch": 4.233511586452763, "percentage": 42.34, "elapsed_time": "0:09:42", "remaining_time": "0:13:13", "throughput": 2527.24, "total_tokens": 1472536} {"current_steps": 2380, "total_steps": 5610, "loss": 0.0242, "lr": 3.563941500247676e-05, "epoch": 4.242424242424242, "percentage": 42.42, "elapsed_time": "0:09:43", "remaining_time": "0:13:12", "throughput": 2527.74, "total_tokens": 1475608} {"current_steps": 2385, "total_steps": 5610, "loss": 0.0043, "lr": 3.5568980937715945e-05, "epoch": 4.251336898395722, "percentage": 42.51, "elapsed_time": "0:09:44", "remaining_time": "0:13:10", "throughput": 2528.8, "total_tokens": 1479256} {"current_steps": 2390, "total_steps": 5610, "loss": 0.0546, "lr": 3.54984445762191e-05, "epoch": 4.260249554367201, "percentage": 42.6, "elapsed_time": "0:09:46", "remaining_time": "0:13:09", "throughput": 2530.07, "total_tokens": 1483064} {"current_steps": 2395, "total_steps": 5610, "loss": 0.0818, "lr": 3.5427806600704785e-05, "epoch": 4.269162210338681, "percentage": 42.69, "elapsed_time": "0:09:47", "remaining_time": "0:13:08", "throughput": 2530.24, "total_tokens": 1485880} {"current_steps": 2400, "total_steps": 5610, "loss": 0.017, "lr": 3.535706769487509e-05, "epoch": 4.278074866310161, "percentage": 42.78, "elapsed_time": "0:09:48", "remaining_time": "0:13:06", "throughput": 2530.93, "total_tokens": 1489208} {"current_steps": 2405, "total_steps": 5610, "loss": 0.0034, "lr": 3.5286228543409004e-05, "epoch": 4.28698752228164, "percentage": 42.87, "elapsed_time": "0:09:49", "remaining_time": "0:13:05", "throughput": 2531.31, "total_tokens": 1492216} {"current_steps": 2410, "total_steps": 5610, "loss": 0.0008, "lr": 3.5215289831955786e-05, "epoch": 4.295900178253119, "percentage": 42.96, "elapsed_time": "0:09:50", "remaining_time": "0:13:04", "throughput": 2532.4, "total_tokens": 1495960} {"current_steps": 2415, "total_steps": 5610, "loss": 0.0146, "lr": 3.514425224712835e-05, "epoch": 4.304812834224599, "percentage": 43.05, "elapsed_time": "0:09:51", "remaining_time": "0:13:02", "throughput": 2532.37, "total_tokens": 1498584} {"current_steps": 2420, "total_steps": 5610, "loss": 0.168, "lr": 3.507311647649657e-05, "epoch": 4.313725490196078, "percentage": 43.14, "elapsed_time": "0:09:52", "remaining_time": "0:13:01", "throughput": 2533.14, "total_tokens": 1501880} {"current_steps": 2425, "total_steps": 5610, "loss": 0.063, "lr": 3.5001883208580665e-05, "epoch": 4.322638146167558, "percentage": 43.23, "elapsed_time": "0:09:54", "remaining_time": "0:13:00", "throughput": 2533.77, "total_tokens": 1505112} {"current_steps": 2430, "total_steps": 5610, "loss": 0.1401, "lr": 3.493055313284456e-05, "epoch": 4.331550802139038, "percentage": 43.32, "elapsed_time": "0:09:55", "remaining_time": "0:12:58", "throughput": 2533.86, "total_tokens": 1507768} {"current_steps": 2435, "total_steps": 5610, "loss": 0.0711, "lr": 3.485912693968913e-05, "epoch": 4.340463458110517, "percentage": 43.4, "elapsed_time": "0:09:56", "remaining_time": "0:12:57", "throughput": 2534.69, "total_tokens": 1511224} {"current_steps": 2440, "total_steps": 5610, "loss": 0.0693, "lr": 3.478760532044561e-05, "epoch": 4.349376114081997, "percentage": 43.49, "elapsed_time": "0:09:57", "remaining_time": "0:12:56", "throughput": 2535.26, "total_tokens": 1514456} {"current_steps": 2445, "total_steps": 5610, "loss": 0.0348, "lr": 3.471598896736881e-05, "epoch": 4.358288770053476, "percentage": 43.58, "elapsed_time": "0:09:58", "remaining_time": "0:12:54", "throughput": 2535.58, "total_tokens": 1517400} {"current_steps": 2450, "total_steps": 5610, "loss": 0.1231, "lr": 3.464427857363052e-05, "epoch": 4.367201426024955, "percentage": 43.67, "elapsed_time": "0:09:59", "remaining_time": "0:12:53", "throughput": 2536.14, "total_tokens": 1520664} {"current_steps": 2455, "total_steps": 5610, "loss": 0.0743, "lr": 3.457247483331272e-05, "epoch": 4.376114081996435, "percentage": 43.76, "elapsed_time": "0:10:00", "remaining_time": "0:12:52", "throughput": 2536.85, "total_tokens": 1523960} {"current_steps": 2460, "total_steps": 5610, "loss": 0.0609, "lr": 3.4500578441400876e-05, "epoch": 4.385026737967914, "percentage": 43.85, "elapsed_time": "0:10:01", "remaining_time": "0:12:50", "throughput": 2537.05, "total_tokens": 1526616} {"current_steps": 2465, "total_steps": 5610, "loss": 0.1118, "lr": 3.4428590093777244e-05, "epoch": 4.393939393939394, "percentage": 43.94, "elapsed_time": "0:10:03", "remaining_time": "0:12:49", "throughput": 2538.47, "total_tokens": 1530808} {"current_steps": 2470, "total_steps": 5610, "loss": 0.0235, "lr": 3.43565104872141e-05, "epoch": 4.402852049910873, "percentage": 44.03, "elapsed_time": "0:10:04", "remaining_time": "0:12:47", "throughput": 2538.39, "total_tokens": 1533336} {"current_steps": 2475, "total_steps": 5610, "loss": 0.0906, "lr": 3.428434031936704e-05, "epoch": 4.411764705882353, "percentage": 44.12, "elapsed_time": "0:10:05", "remaining_time": "0:12:46", "throughput": 2538.23, "total_tokens": 1535864} {"current_steps": 2480, "total_steps": 5610, "loss": 0.0636, "lr": 3.421208028876815e-05, "epoch": 4.420677361853833, "percentage": 44.21, "elapsed_time": "0:10:06", "remaining_time": "0:12:45", "throughput": 2538.87, "total_tokens": 1539192} {"current_steps": 2485, "total_steps": 5610, "loss": 0.0018, "lr": 3.413973109481935e-05, "epoch": 4.429590017825312, "percentage": 44.3, "elapsed_time": "0:10:07", "remaining_time": "0:12:43", "throughput": 2539.72, "total_tokens": 1542712} {"current_steps": 2490, "total_steps": 5610, "loss": 0.038, "lr": 3.406729343778552e-05, "epoch": 4.438502673796791, "percentage": 44.39, "elapsed_time": "0:10:08", "remaining_time": "0:12:42", "throughput": 2539.67, "total_tokens": 1545272} {"current_steps": 2495, "total_steps": 5610, "loss": 0.0472, "lr": 3.3994768018787815e-05, "epoch": 4.447415329768271, "percentage": 44.47, "elapsed_time": "0:10:09", "remaining_time": "0:12:41", "throughput": 2541.15, "total_tokens": 1549464} {"current_steps": 2500, "total_steps": 5610, "loss": 0.0049, "lr": 3.392215553979679e-05, "epoch": 4.45632798573975, "percentage": 44.56, "elapsed_time": "0:10:10", "remaining_time": "0:12:39", "throughput": 2541.33, "total_tokens": 1552280} {"current_steps": 2505, "total_steps": 5610, "loss": 0.1194, "lr": 3.38494567036257e-05, "epoch": 4.46524064171123, "percentage": 44.65, "elapsed_time": "0:10:11", "remaining_time": "0:12:38", "throughput": 2541.96, "total_tokens": 1555448} {"current_steps": 2510, "total_steps": 5610, "loss": 0.0021, "lr": 3.3776672213923587e-05, "epoch": 4.47415329768271, "percentage": 44.74, "elapsed_time": "0:10:13", "remaining_time": "0:12:37", "throughput": 2543.28, "total_tokens": 1559480} {"current_steps": 2515, "total_steps": 5610, "loss": 0.078, "lr": 3.370380277516858e-05, "epoch": 4.483065953654189, "percentage": 44.83, "elapsed_time": "0:10:14", "remaining_time": "0:12:35", "throughput": 2544.05, "total_tokens": 1562872} {"current_steps": 2520, "total_steps": 5610, "loss": 0.0329, "lr": 3.3630849092661e-05, "epoch": 4.491978609625669, "percentage": 44.92, "elapsed_time": "0:10:15", "remaining_time": "0:12:34", "throughput": 2544.23, "total_tokens": 1565752} {"current_steps": 2525, "total_steps": 5610, "loss": 0.0106, "lr": 3.355781187251657e-05, "epoch": 4.500891265597148, "percentage": 45.01, "elapsed_time": "0:10:16", "remaining_time": "0:12:33", "throughput": 2544.47, "total_tokens": 1568600} {"current_steps": 2529, "total_steps": 5610, "eval_loss": 0.13448920845985413, "epoch": 4.508021390374331, "percentage": 45.08, "elapsed_time": "0:10:23", "remaining_time": "0:12:39", "throughput": 2518.96, "total_tokens": 1570936} {"current_steps": 2530, "total_steps": 5610, "loss": 0.0823, "lr": 3.3484691821659584e-05, "epoch": 4.509803921568627, "percentage": 45.1, "elapsed_time": "0:10:24", "remaining_time": "0:12:40", "throughput": 2516.06, "total_tokens": 1571512} {"current_steps": 2535, "total_steps": 5610, "loss": 0.076, "lr": 3.3411489647816016e-05, "epoch": 4.518716577540107, "percentage": 45.19, "elapsed_time": "0:10:25", "remaining_time": "0:12:38", "throughput": 2516.16, "total_tokens": 1574232} {"current_steps": 2540, "total_steps": 5610, "loss": 0.0182, "lr": 3.3338206059506736e-05, "epoch": 4.527629233511586, "percentage": 45.28, "elapsed_time": "0:10:26", "remaining_time": "0:12:37", "throughput": 2517.11, "total_tokens": 1577816} {"current_steps": 2545, "total_steps": 5610, "loss": 0.0118, "lr": 3.326484176604061e-05, "epoch": 4.536541889483066, "percentage": 45.37, "elapsed_time": "0:10:28", "remaining_time": "0:12:36", "throughput": 2517.99, "total_tokens": 1581368} {"current_steps": 2550, "total_steps": 5610, "loss": 0.0055, "lr": 3.3191397477507655e-05, "epoch": 4.545454545454545, "percentage": 45.45, "elapsed_time": "0:10:29", "remaining_time": "0:12:34", "throughput": 2517.92, "total_tokens": 1583800} {"current_steps": 2555, "total_steps": 5610, "loss": 0.0965, "lr": 3.3117873904772123e-05, "epoch": 4.554367201426025, "percentage": 45.54, "elapsed_time": "0:10:30", "remaining_time": "0:12:33", "throughput": 2518.85, "total_tokens": 1587384} {"current_steps": 2560, "total_steps": 5610, "loss": 0.0006, "lr": 3.30442717594657e-05, "epoch": 4.563279857397505, "percentage": 45.63, "elapsed_time": "0:10:31", "remaining_time": "0:12:32", "throughput": 2519.17, "total_tokens": 1590328} {"current_steps": 2565, "total_steps": 5610, "loss": 0.0157, "lr": 3.297059175398056e-05, "epoch": 4.572192513368984, "percentage": 45.72, "elapsed_time": "0:10:32", "remaining_time": "0:12:30", "throughput": 2520.26, "total_tokens": 1594136} {"current_steps": 2570, "total_steps": 5610, "loss": 0.0866, "lr": 3.289683460146244e-05, "epoch": 4.581105169340463, "percentage": 45.81, "elapsed_time": "0:10:33", "remaining_time": "0:12:29", "throughput": 2521.16, "total_tokens": 1597656} {"current_steps": 2575, "total_steps": 5610, "loss": 0.0065, "lr": 3.282300101580386e-05, "epoch": 4.590017825311943, "percentage": 45.9, "elapsed_time": "0:10:34", "remaining_time": "0:12:28", "throughput": 2521.37, "total_tokens": 1600536} {"current_steps": 2580, "total_steps": 5610, "loss": 0.0603, "lr": 3.274909171163706e-05, "epoch": 4.598930481283422, "percentage": 45.99, "elapsed_time": "0:10:35", "remaining_time": "0:12:26", "throughput": 2521.97, "total_tokens": 1603832} {"current_steps": 2585, "total_steps": 5610, "loss": 0.2128, "lr": 3.2675107404327194e-05, "epoch": 4.607843137254902, "percentage": 46.08, "elapsed_time": "0:10:37", "remaining_time": "0:12:25", "throughput": 2522.94, "total_tokens": 1607480} {"current_steps": 2590, "total_steps": 5610, "loss": 0.0019, "lr": 3.2601048809965355e-05, "epoch": 4.616755793226382, "percentage": 46.17, "elapsed_time": "0:10:38", "remaining_time": "0:12:24", "throughput": 2523.17, "total_tokens": 1610296} {"current_steps": 2595, "total_steps": 5610, "loss": 0.023, "lr": 3.2526916645361666e-05, "epoch": 4.625668449197861, "percentage": 46.26, "elapsed_time": "0:10:39", "remaining_time": "0:12:22", "throughput": 2523.56, "total_tokens": 1613336} {"current_steps": 2600, "total_steps": 5610, "loss": 0.1726, "lr": 3.2452711628038324e-05, "epoch": 4.634581105169341, "percentage": 46.35, "elapsed_time": "0:10:40", "remaining_time": "0:12:21", "throughput": 2523.72, "total_tokens": 1616152} {"current_steps": 2605, "total_steps": 5610, "loss": 0.0814, "lr": 3.2378434476222666e-05, "epoch": 4.64349376114082, "percentage": 46.43, "elapsed_time": "0:10:41", "remaining_time": "0:12:20", "throughput": 2524.93, "total_tokens": 1620024} {"current_steps": 2610, "total_steps": 5610, "loss": 0.0128, "lr": 3.2304085908840244e-05, "epoch": 4.652406417112299, "percentage": 46.52, "elapsed_time": "0:10:42", "remaining_time": "0:12:18", "throughput": 2525.79, "total_tokens": 1623544} {"current_steps": 2615, "total_steps": 5610, "loss": 0.1778, "lr": 3.222966664550777e-05, "epoch": 4.661319073083779, "percentage": 46.61, "elapsed_time": "0:10:43", "remaining_time": "0:12:17", "throughput": 2525.91, "total_tokens": 1626296} {"current_steps": 2620, "total_steps": 5610, "loss": 0.0401, "lr": 3.2155177406526304e-05, "epoch": 4.670231729055258, "percentage": 46.7, "elapsed_time": "0:10:44", "remaining_time": "0:12:16", "throughput": 2526.3, "total_tokens": 1629336} {"current_steps": 2625, "total_steps": 5610, "loss": 0.0363, "lr": 3.208061891287414e-05, "epoch": 4.6791443850267385, "percentage": 46.79, "elapsed_time": "0:10:46", "remaining_time": "0:12:14", "throughput": 2527.18, "total_tokens": 1632888} {"current_steps": 2630, "total_steps": 5610, "loss": 0.158, "lr": 3.200599188619989e-05, "epoch": 4.688057040998218, "percentage": 46.88, "elapsed_time": "0:10:47", "remaining_time": "0:12:13", "throughput": 2527.48, "total_tokens": 1635768} {"current_steps": 2635, "total_steps": 5610, "loss": 0.021, "lr": 3.1931297048815534e-05, "epoch": 4.696969696969697, "percentage": 46.97, "elapsed_time": "0:10:48", "remaining_time": "0:12:12", "throughput": 2528.26, "total_tokens": 1639256} {"current_steps": 2640, "total_steps": 5610, "loss": 0.0132, "lr": 3.185653512368933e-05, "epoch": 4.705882352941177, "percentage": 47.06, "elapsed_time": "0:10:49", "remaining_time": "0:12:10", "throughput": 2529.38, "total_tokens": 1643128} {"current_steps": 2645, "total_steps": 5610, "loss": 0.0264, "lr": 3.178170683443893e-05, "epoch": 4.714795008912656, "percentage": 47.15, "elapsed_time": "0:10:50", "remaining_time": "0:12:09", "throughput": 2530.06, "total_tokens": 1646424} {"current_steps": 2650, "total_steps": 5610, "loss": 0.1036, "lr": 3.1706812905324276e-05, "epoch": 4.723707664884135, "percentage": 47.24, "elapsed_time": "0:10:51", "remaining_time": "0:12:08", "throughput": 2530.3, "total_tokens": 1649240} {"current_steps": 2655, "total_steps": 5610, "loss": 0.1607, "lr": 3.1631854061240684e-05, "epoch": 4.732620320855615, "percentage": 47.33, "elapsed_time": "0:10:52", "remaining_time": "0:12:06", "throughput": 2530.56, "total_tokens": 1652184} {"current_steps": 2660, "total_steps": 5610, "loss": 0.0155, "lr": 3.155683102771173e-05, "epoch": 4.741532976827094, "percentage": 47.42, "elapsed_time": "0:10:54", "remaining_time": "0:12:05", "throughput": 2531.18, "total_tokens": 1655480} {"current_steps": 2665, "total_steps": 5610, "loss": 0.0032, "lr": 3.1481744530882305e-05, "epoch": 4.750445632798574, "percentage": 47.5, "elapsed_time": "0:10:55", "remaining_time": "0:12:04", "throughput": 2532.34, "total_tokens": 1659352} {"current_steps": 2670, "total_steps": 5610, "loss": 0.0295, "lr": 3.1406595297511566e-05, "epoch": 4.759358288770054, "percentage": 47.59, "elapsed_time": "0:10:56", "remaining_time": "0:12:02", "throughput": 2532.38, "total_tokens": 1661976} {"current_steps": 2675, "total_steps": 5610, "loss": 0.0018, "lr": 3.133138405496587e-05, "epoch": 4.768270944741533, "percentage": 47.68, "elapsed_time": "0:10:57", "remaining_time": "0:12:01", "throughput": 2532.29, "total_tokens": 1664504} {"current_steps": 2680, "total_steps": 5610, "loss": 0.1255, "lr": 3.125611153121178e-05, "epoch": 4.777183600713013, "percentage": 47.77, "elapsed_time": "0:10:58", "remaining_time": "0:11:59", "throughput": 2532.44, "total_tokens": 1667288} {"current_steps": 2685, "total_steps": 5610, "loss": 0.0822, "lr": 3.118077845480897e-05, "epoch": 4.786096256684492, "percentage": 47.86, "elapsed_time": "0:10:59", "remaining_time": "0:11:58", "throughput": 2532.87, "total_tokens": 1670360} {"current_steps": 2690, "total_steps": 5610, "loss": 0.106, "lr": 3.110538555490324e-05, "epoch": 4.795008912655971, "percentage": 47.95, "elapsed_time": "0:11:00", "remaining_time": "0:11:57", "throughput": 2533.51, "total_tokens": 1673624} {"current_steps": 2695, "total_steps": 5610, "loss": 0.0022, "lr": 3.1029933561219375e-05, "epoch": 4.803921568627451, "percentage": 48.04, "elapsed_time": "0:11:01", "remaining_time": "0:11:55", "throughput": 2533.63, "total_tokens": 1676440} {"current_steps": 2700, "total_steps": 5610, "loss": 0.071, "lr": 3.095442320405418e-05, "epoch": 4.81283422459893, "percentage": 48.13, "elapsed_time": "0:11:02", "remaining_time": "0:11:54", "throughput": 2533.98, "total_tokens": 1679448} {"current_steps": 2705, "total_steps": 5610, "loss": 0.0424, "lr": 3.0878855214269293e-05, "epoch": 4.8217468805704105, "percentage": 48.22, "elapsed_time": "0:11:03", "remaining_time": "0:11:52", "throughput": 2534.38, "total_tokens": 1682520} {"current_steps": 2710, "total_steps": 5610, "loss": 0.0025, "lr": 3.0803230323284225e-05, "epoch": 4.83065953654189, "percentage": 48.31, "elapsed_time": "0:11:05", "remaining_time": "0:11:51", "throughput": 2534.81, "total_tokens": 1685656} {"current_steps": 2715, "total_steps": 5610, "loss": 0.0027, "lr": 3.0727549263069224e-05, "epoch": 4.839572192513369, "percentage": 48.4, "elapsed_time": "0:11:06", "remaining_time": "0:11:50", "throughput": 2535.33, "total_tokens": 1688856} {"current_steps": 2720, "total_steps": 5610, "loss": 0.0328, "lr": 3.065181276613817e-05, "epoch": 4.848484848484849, "percentage": 48.48, "elapsed_time": "0:11:07", "remaining_time": "0:11:48", "throughput": 2535.7, "total_tokens": 1691768} {"current_steps": 2725, "total_steps": 5610, "loss": 0.0004, "lr": 3.057602156554155e-05, "epoch": 4.857397504456328, "percentage": 48.57, "elapsed_time": "0:11:08", "remaining_time": "0:11:47", "throughput": 2535.78, "total_tokens": 1694488} {"current_steps": 2730, "total_steps": 5610, "loss": 0.0695, "lr": 3.0500176394859293e-05, "epoch": 4.866310160427807, "percentage": 48.66, "elapsed_time": "0:11:09", "remaining_time": "0:11:46", "throughput": 2536.38, "total_tokens": 1697752} {"current_steps": 2735, "total_steps": 5610, "loss": 0.1446, "lr": 3.042427798819373e-05, "epoch": 4.875222816399287, "percentage": 48.75, "elapsed_time": "0:11:10", "remaining_time": "0:11:44", "throughput": 2536.41, "total_tokens": 1700408} {"current_steps": 2740, "total_steps": 5610, "loss": 0.1087, "lr": 3.0348327080162435e-05, "epoch": 4.884135472370766, "percentage": 48.84, "elapsed_time": "0:11:11", "remaining_time": "0:11:43", "throughput": 2536.88, "total_tokens": 1703512} {"current_steps": 2745, "total_steps": 5610, "loss": 0.0457, "lr": 3.0272324405891172e-05, "epoch": 4.893048128342246, "percentage": 48.93, "elapsed_time": "0:11:12", "remaining_time": "0:11:42", "throughput": 2537.66, "total_tokens": 1707032} {"current_steps": 2750, "total_steps": 5610, "loss": 0.0006, "lr": 3.0196270701006706e-05, "epoch": 4.901960784313726, "percentage": 49.02, "elapsed_time": "0:11:13", "remaining_time": "0:11:40", "throughput": 2538.26, "total_tokens": 1710328} {"current_steps": 2755, "total_steps": 5610, "loss": 0.0688, "lr": 3.012016670162977e-05, "epoch": 4.910873440285205, "percentage": 49.11, "elapsed_time": "0:11:14", "remaining_time": "0:11:39", "throughput": 2538.04, "total_tokens": 1712632} {"current_steps": 2760, "total_steps": 5610, "loss": 0.1258, "lr": 3.0044013144367866e-05, "epoch": 4.919786096256685, "percentage": 49.2, "elapsed_time": "0:11:16", "remaining_time": "0:11:38", "throughput": 2538.97, "total_tokens": 1716344} {"current_steps": 2765, "total_steps": 5610, "loss": 0.0331, "lr": 2.996781076630816e-05, "epoch": 4.928698752228164, "percentage": 49.29, "elapsed_time": "0:11:16", "remaining_time": "0:11:36", "throughput": 2538.76, "total_tokens": 1718712} {"current_steps": 2770, "total_steps": 5610, "loss": 0.0966, "lr": 2.9891560305010392e-05, "epoch": 4.937611408199643, "percentage": 49.38, "elapsed_time": "0:11:18", "remaining_time": "0:11:35", "throughput": 2539.6, "total_tokens": 1722328} {"current_steps": 2775, "total_steps": 5610, "loss": 0.0005, "lr": 2.9815262498499657e-05, "epoch": 4.946524064171123, "percentage": 49.47, "elapsed_time": "0:11:19", "remaining_time": "0:11:33", "throughput": 2540.08, "total_tokens": 1725464} {"current_steps": 2780, "total_steps": 5610, "loss": 0.0312, "lr": 2.9738918085259314e-05, "epoch": 4.955436720142602, "percentage": 49.55, "elapsed_time": "0:11:20", "remaining_time": "0:11:32", "throughput": 2540.44, "total_tokens": 1728472} {"current_steps": 2785, "total_steps": 5610, "loss": 0.1607, "lr": 2.9662527804223827e-05, "epoch": 4.9643493761140824, "percentage": 49.64, "elapsed_time": "0:11:21", "remaining_time": "0:11:31", "throughput": 2540.55, "total_tokens": 1731160} {"current_steps": 2790, "total_steps": 5610, "loss": 0.1619, "lr": 2.9586092394771637e-05, "epoch": 4.973262032085562, "percentage": 49.73, "elapsed_time": "0:11:22", "remaining_time": "0:11:29", "throughput": 2540.94, "total_tokens": 1734264} {"current_steps": 2795, "total_steps": 5610, "loss": 0.1195, "lr": 2.950961259671793e-05, "epoch": 4.982174688057041, "percentage": 49.82, "elapsed_time": "0:11:23", "remaining_time": "0:11:28", "throughput": 2541.2, "total_tokens": 1737144} {"current_steps": 2800, "total_steps": 5610, "loss": 0.0633, "lr": 2.943308915030757e-05, "epoch": 4.991087344028521, "percentage": 49.91, "elapsed_time": "0:11:24", "remaining_time": "0:11:27", "throughput": 2541.93, "total_tokens": 1740664} {"current_steps": 2805, "total_steps": 5610, "loss": 0.0716, "lr": 2.935652279620788e-05, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:11:25", "remaining_time": "0:11:25", "throughput": 2541.85, "total_tokens": 1743216} {"current_steps": 2810, "total_steps": 5610, "loss": 0.077, "lr": 2.9279914275501473e-05, "epoch": 5.008912655971479, "percentage": 50.09, "elapsed_time": "0:11:27", "remaining_time": "0:11:24", "throughput": 2541.87, "total_tokens": 1746384} {"current_steps": 2810, "total_steps": 5610, "eval_loss": 0.1459943950176239, "epoch": 5.008912655971479, "percentage": 50.09, "elapsed_time": "0:11:33", "remaining_time": "0:11:30", "throughput": 2518.79, "total_tokens": 1746384} {"current_steps": 2815, "total_steps": 5610, "loss": 0.014, "lr": 2.9203264329679115e-05, "epoch": 5.017825311942959, "percentage": 50.18, "elapsed_time": "0:11:35", "remaining_time": "0:11:30", "throughput": 2516.71, "total_tokens": 1749680} {"current_steps": 2820, "total_steps": 5610, "loss": 0.0014, "lr": 2.9126573700632504e-05, "epoch": 5.026737967914438, "percentage": 50.27, "elapsed_time": "0:11:36", "remaining_time": "0:11:28", "throughput": 2517.42, "total_tokens": 1753104} {"current_steps": 2825, "total_steps": 5610, "loss": 0.0459, "lr": 2.9049843130647112e-05, "epoch": 5.035650623885918, "percentage": 50.36, "elapsed_time": "0:11:37", "remaining_time": "0:11:27", "throughput": 2517.78, "total_tokens": 1756112} {"current_steps": 2830, "total_steps": 5610, "loss": 0.0925, "lr": 2.8973073362394998e-05, "epoch": 5.044563279857398, "percentage": 50.45, "elapsed_time": "0:11:38", "remaining_time": "0:11:26", "throughput": 2518.33, "total_tokens": 1759344} {"current_steps": 2835, "total_steps": 5610, "loss": 0.0094, "lr": 2.8896265138927638e-05, "epoch": 5.053475935828877, "percentage": 50.53, "elapsed_time": "0:11:39", "remaining_time": "0:11:24", "throughput": 2518.64, "total_tokens": 1762288} {"current_steps": 2840, "total_steps": 5610, "loss": 0.0154, "lr": 2.881941920366868e-05, "epoch": 5.062388591800357, "percentage": 50.62, "elapsed_time": "0:11:40", "remaining_time": "0:11:23", "throughput": 2518.77, "total_tokens": 1765072} {"current_steps": 2845, "total_steps": 5610, "loss": 0.1486, "lr": 2.8742536300406804e-05, "epoch": 5.071301247771836, "percentage": 50.71, "elapsed_time": "0:11:41", "remaining_time": "0:11:22", "throughput": 2519.04, "total_tokens": 1767952} {"current_steps": 2850, "total_steps": 5610, "loss": 0.0356, "lr": 2.8665617173288516e-05, "epoch": 5.080213903743315, "percentage": 50.8, "elapsed_time": "0:11:42", "remaining_time": "0:11:20", "throughput": 2519.31, "total_tokens": 1770896} {"current_steps": 2855, "total_steps": 5610, "loss": 0.1233, "lr": 2.8588662566810893e-05, "epoch": 5.089126559714795, "percentage": 50.89, "elapsed_time": "0:11:43", "remaining_time": "0:11:19", "throughput": 2519.66, "total_tokens": 1773840} {"current_steps": 2860, "total_steps": 5610, "loss": 0.0232, "lr": 2.851167322581445e-05, "epoch": 5.098039215686274, "percentage": 50.98, "elapsed_time": "0:11:45", "remaining_time": "0:11:17", "throughput": 2519.93, "total_tokens": 1776720} {"current_steps": 2865, "total_steps": 5610, "loss": 0.044, "lr": 2.8434649895475877e-05, "epoch": 5.106951871657754, "percentage": 51.07, "elapsed_time": "0:11:46", "remaining_time": "0:11:16", "throughput": 2519.77, "total_tokens": 1779088} {"current_steps": 2870, "total_steps": 5610, "loss": 0.0049, "lr": 2.8357593321300856e-05, "epoch": 5.115864527629234, "percentage": 51.16, "elapsed_time": "0:11:47", "remaining_time": "0:11:15", "throughput": 2519.91, "total_tokens": 1781776} {"current_steps": 2875, "total_steps": 5610, "loss": 0.009, "lr": 2.828050424911683e-05, "epoch": 5.124777183600713, "percentage": 51.25, "elapsed_time": "0:11:48", "remaining_time": "0:11:13", "throughput": 2520.17, "total_tokens": 1784720} {"current_steps": 2880, "total_steps": 5610, "loss": 0.0564, "lr": 2.8203383425065787e-05, "epoch": 5.133689839572193, "percentage": 51.34, "elapsed_time": "0:11:49", "remaining_time": "0:11:12", "throughput": 2520.57, "total_tokens": 1787856} {"current_steps": 2885, "total_steps": 5610, "loss": 0.001, "lr": 2.812623159559704e-05, "epoch": 5.142602495543672, "percentage": 51.43, "elapsed_time": "0:11:50", "remaining_time": "0:11:11", "throughput": 2521.08, "total_tokens": 1791088} {"current_steps": 2890, "total_steps": 5610, "loss": 0.0714, "lr": 2.8049049507460003e-05, "epoch": 5.151515151515151, "percentage": 51.52, "elapsed_time": "0:11:51", "remaining_time": "0:11:09", "throughput": 2522.21, "total_tokens": 1795056} {"current_steps": 2895, "total_steps": 5610, "loss": 0.1153, "lr": 2.7971837907696973e-05, "epoch": 5.160427807486631, "percentage": 51.6, "elapsed_time": "0:11:52", "remaining_time": "0:11:08", "throughput": 2523.25, "total_tokens": 1798928} {"current_steps": 2900, "total_steps": 5610, "loss": 0.0276, "lr": 2.7894597543635863e-05, "epoch": 5.16934046345811, "percentage": 51.69, "elapsed_time": "0:11:54", "remaining_time": "0:11:07", "throughput": 2523.96, "total_tokens": 1802384} {"current_steps": 2905, "total_steps": 5610, "loss": 0.0015, "lr": 2.781732916288303e-05, "epoch": 5.17825311942959, "percentage": 51.78, "elapsed_time": "0:11:55", "remaining_time": "0:11:06", "throughput": 2524.47, "total_tokens": 1805616} {"current_steps": 2910, "total_steps": 5610, "loss": 0.0121, "lr": 2.774003351331597e-05, "epoch": 5.18716577540107, "percentage": 51.87, "elapsed_time": "0:11:56", "remaining_time": "0:11:04", "throughput": 2525.13, "total_tokens": 1809008} {"current_steps": 2915, "total_steps": 5610, "loss": 0.0308, "lr": 2.7662711343076135e-05, "epoch": 5.196078431372549, "percentage": 51.96, "elapsed_time": "0:11:57", "remaining_time": "0:11:03", "throughput": 2526.07, "total_tokens": 1812784} {"current_steps": 2920, "total_steps": 5610, "loss": 0.0108, "lr": 2.7585363400561658e-05, "epoch": 5.204991087344029, "percentage": 52.05, "elapsed_time": "0:11:58", "remaining_time": "0:11:02", "throughput": 2526.0, "total_tokens": 1815248} {"current_steps": 2925, "total_steps": 5610, "loss": 0.0895, "lr": 2.7507990434420126e-05, "epoch": 5.213903743315508, "percentage": 52.14, "elapsed_time": "0:11:59", "remaining_time": "0:11:00", "throughput": 2526.1, "total_tokens": 1818032} {"current_steps": 2930, "total_steps": 5610, "loss": 0.0009, "lr": 2.7430593193541325e-05, "epoch": 5.222816399286987, "percentage": 52.23, "elapsed_time": "0:12:00", "remaining_time": "0:10:59", "throughput": 2526.59, "total_tokens": 1821232} {"current_steps": 2935, "total_steps": 5610, "loss": 0.0073, "lr": 2.7353172427049995e-05, "epoch": 5.231729055258467, "percentage": 52.32, "elapsed_time": "0:12:02", "remaining_time": "0:10:58", "throughput": 2527.34, "total_tokens": 1824784} {"current_steps": 2940, "total_steps": 5610, "loss": 0.0176, "lr": 2.7275728884298596e-05, "epoch": 5.240641711229946, "percentage": 52.41, "elapsed_time": "0:12:03", "remaining_time": "0:10:56", "throughput": 2527.05, "total_tokens": 1827088} {"current_steps": 2945, "total_steps": 5610, "loss": 0.0017, "lr": 2.719826331486e-05, "epoch": 5.249554367201426, "percentage": 52.5, "elapsed_time": "0:12:04", "remaining_time": "0:10:55", "throughput": 2526.68, "total_tokens": 1829328} {"current_steps": 2950, "total_steps": 5610, "loss": 0.0699, "lr": 2.7120776468520314e-05, "epoch": 5.258467023172906, "percentage": 52.58, "elapsed_time": "0:12:05", "remaining_time": "0:10:53", "throughput": 2527.66, "total_tokens": 1833136} {"current_steps": 2955, "total_steps": 5610, "loss": 0.0647, "lr": 2.7043269095271573e-05, "epoch": 5.267379679144385, "percentage": 52.67, "elapsed_time": "0:12:06", "remaining_time": "0:10:52", "throughput": 2527.59, "total_tokens": 1835632} {"current_steps": 2960, "total_steps": 5610, "loss": 0.0064, "lr": 2.6965741945304467e-05, "epoch": 5.276292335115865, "percentage": 52.76, "elapsed_time": "0:12:07", "remaining_time": "0:10:51", "throughput": 2528.22, "total_tokens": 1838992} {"current_steps": 2965, "total_steps": 5610, "loss": 0.0289, "lr": 2.6888195769001146e-05, "epoch": 5.285204991087344, "percentage": 52.85, "elapsed_time": "0:12:08", "remaining_time": "0:10:49", "throughput": 2528.38, "total_tokens": 1841840} {"current_steps": 2970, "total_steps": 5610, "loss": 0.0796, "lr": 2.681063131692787e-05, "epoch": 5.294117647058823, "percentage": 52.94, "elapsed_time": "0:12:09", "remaining_time": "0:10:48", "throughput": 2528.49, "total_tokens": 1844560} {"current_steps": 2975, "total_steps": 5610, "loss": 0.0756, "lr": 2.673304933982783e-05, "epoch": 5.303030303030303, "percentage": 53.03, "elapsed_time": "0:12:10", "remaining_time": "0:10:47", "throughput": 2529.64, "total_tokens": 1848624} {"current_steps": 2980, "total_steps": 5610, "loss": 0.2812, "lr": 2.6655450588613806e-05, "epoch": 5.311942959001782, "percentage": 53.12, "elapsed_time": "0:12:11", "remaining_time": "0:10:45", "throughput": 2530.18, "total_tokens": 1851952} {"current_steps": 2985, "total_steps": 5610, "loss": 0.1026, "lr": 2.657783581436097e-05, "epoch": 5.320855614973262, "percentage": 53.21, "elapsed_time": "0:12:13", "remaining_time": "0:10:44", "throughput": 2531.09, "total_tokens": 1855696} {"current_steps": 2990, "total_steps": 5610, "loss": 0.107, "lr": 2.6500205768299535e-05, "epoch": 5.329768270944742, "percentage": 53.3, "elapsed_time": "0:12:14", "remaining_time": "0:10:43", "throughput": 2532.0, "total_tokens": 1859408} {"current_steps": 2995, "total_steps": 5610, "loss": 0.1455, "lr": 2.642256120180758e-05, "epoch": 5.338680926916221, "percentage": 53.39, "elapsed_time": "0:12:15", "remaining_time": "0:10:42", "throughput": 2531.87, "total_tokens": 1861936} {"current_steps": 3000, "total_steps": 5610, "loss": 0.0684, "lr": 2.6344902866403687e-05, "epoch": 5.347593582887701, "percentage": 53.48, "elapsed_time": "0:12:16", "remaining_time": "0:10:40", "throughput": 2532.0, "total_tokens": 1864624} {"current_steps": 3005, "total_steps": 5610, "loss": 0.1061, "lr": 2.6267231513739726e-05, "epoch": 5.35650623885918, "percentage": 53.57, "elapsed_time": "0:12:17", "remaining_time": "0:10:39", "throughput": 2532.32, "total_tokens": 1867600} {"current_steps": 3010, "total_steps": 5610, "loss": 0.0031, "lr": 2.6189547895593562e-05, "epoch": 5.365418894830659, "percentage": 53.65, "elapsed_time": "0:12:18", "remaining_time": "0:10:37", "throughput": 2532.7, "total_tokens": 1870672} {"current_steps": 3015, "total_steps": 5610, "loss": 0.0391, "lr": 2.611185276386176e-05, "epoch": 5.374331550802139, "percentage": 53.74, "elapsed_time": "0:12:19", "remaining_time": "0:10:36", "throughput": 2533.38, "total_tokens": 1874160} {"current_steps": 3020, "total_steps": 5610, "loss": 0.0856, "lr": 2.6034146870552346e-05, "epoch": 5.383244206773618, "percentage": 53.83, "elapsed_time": "0:12:20", "remaining_time": "0:10:35", "throughput": 2534.11, "total_tokens": 1877616} {"current_steps": 3025, "total_steps": 5610, "loss": 0.1954, "lr": 2.595643096777748e-05, "epoch": 5.392156862745098, "percentage": 53.92, "elapsed_time": "0:12:22", "remaining_time": "0:10:34", "throughput": 2534.24, "total_tokens": 1880432} {"current_steps": 3030, "total_steps": 5610, "loss": 0.0227, "lr": 2.5878705807746245e-05, "epoch": 5.401069518716578, "percentage": 54.01, "elapsed_time": "0:12:23", "remaining_time": "0:10:32", "throughput": 2535.43, "total_tokens": 1884528} {"current_steps": 3035, "total_steps": 5610, "loss": 0.1689, "lr": 2.580097214275727e-05, "epoch": 5.409982174688057, "percentage": 54.1, "elapsed_time": "0:12:24", "remaining_time": "0:10:31", "throughput": 2535.38, "total_tokens": 1887152} {"current_steps": 3040, "total_steps": 5610, "loss": 0.0036, "lr": 2.5723230725191554e-05, "epoch": 5.418894830659537, "percentage": 54.19, "elapsed_time": "0:12:25", "remaining_time": "0:10:30", "throughput": 2535.61, "total_tokens": 1890032} {"current_steps": 3045, "total_steps": 5610, "loss": 0.0668, "lr": 2.5645482307505108e-05, "epoch": 5.427807486631016, "percentage": 54.28, "elapsed_time": "0:12:26", "remaining_time": "0:10:28", "throughput": 2535.38, "total_tokens": 1892304} {"current_steps": 3050, "total_steps": 5610, "loss": 0.1114, "lr": 2.55677276422217e-05, "epoch": 5.436720142602495, "percentage": 54.37, "elapsed_time": "0:12:27", "remaining_time": "0:10:27", "throughput": 2535.99, "total_tokens": 1895728} {"current_steps": 3055, "total_steps": 5610, "loss": 0.0155, "lr": 2.548996748192556e-05, "epoch": 5.445632798573975, "percentage": 54.46, "elapsed_time": "0:12:28", "remaining_time": "0:10:26", "throughput": 2536.1, "total_tokens": 1898384} {"current_steps": 3060, "total_steps": 5610, "loss": 0.0471, "lr": 2.541220257925412e-05, "epoch": 5.454545454545454, "percentage": 54.55, "elapsed_time": "0:12:29", "remaining_time": "0:10:24", "throughput": 2536.25, "total_tokens": 1901104} {"current_steps": 3065, "total_steps": 5610, "loss": 0.0239, "lr": 2.5334433686890702e-05, "epoch": 5.463458110516934, "percentage": 54.63, "elapsed_time": "0:12:30", "remaining_time": "0:10:23", "throughput": 2537.26, "total_tokens": 1904976} {"current_steps": 3070, "total_steps": 5610, "loss": 0.0133, "lr": 2.5256661557557247e-05, "epoch": 5.472370766488414, "percentage": 54.72, "elapsed_time": "0:12:32", "remaining_time": "0:10:22", "throughput": 2538.09, "total_tokens": 1908688} {"current_steps": 3075, "total_steps": 5610, "loss": 0.0521, "lr": 2.517888694400704e-05, "epoch": 5.481283422459893, "percentage": 54.81, "elapsed_time": "0:12:33", "remaining_time": "0:10:20", "throughput": 2538.44, "total_tokens": 1911792} {"current_steps": 3080, "total_steps": 5610, "loss": 0.0028, "lr": 2.5101110599017374e-05, "epoch": 5.490196078431373, "percentage": 54.9, "elapsed_time": "0:12:34", "remaining_time": "0:10:19", "throughput": 2539.05, "total_tokens": 1915248} {"current_steps": 3085, "total_steps": 5610, "loss": 0.0221, "lr": 2.502333327538235e-05, "epoch": 5.499108734402852, "percentage": 54.99, "elapsed_time": "0:12:35", "remaining_time": "0:10:18", "throughput": 2539.56, "total_tokens": 1918544} {"current_steps": 3090, "total_steps": 5610, "loss": 0.0712, "lr": 2.4945555725905502e-05, "epoch": 5.508021390374331, "percentage": 55.08, "elapsed_time": "0:12:36", "remaining_time": "0:10:17", "throughput": 2540.26, "total_tokens": 1922032} {"current_steps": 3091, "total_steps": 5610, "eval_loss": 0.15239505469799042, "epoch": 5.509803921568627, "percentage": 55.1, "elapsed_time": "0:12:43", "remaining_time": "0:10:21", "throughput": 2519.18, "total_tokens": 1922384} {"current_steps": 3095, "total_steps": 5610, "loss": 0.1049, "lr": 2.4867778703392554e-05, "epoch": 5.516934046345811, "percentage": 55.17, "elapsed_time": "0:12:44", "remaining_time": "0:10:21", "throughput": 2516.56, "total_tokens": 1924400} {"current_steps": 3100, "total_steps": 5610, "loss": 0.0076, "lr": 2.479000296064417e-05, "epoch": 5.52584670231729, "percentage": 55.26, "elapsed_time": "0:12:45", "remaining_time": "0:10:20", "throughput": 2516.87, "total_tokens": 1927376} {"current_steps": 3105, "total_steps": 5610, "loss": 0.04, "lr": 2.4712229250448567e-05, "epoch": 5.53475935828877, "percentage": 55.35, "elapsed_time": "0:12:46", "remaining_time": "0:10:18", "throughput": 2517.25, "total_tokens": 1930352} {"current_steps": 3110, "total_steps": 5610, "loss": 0.0013, "lr": 2.4634458325574323e-05, "epoch": 5.54367201426025, "percentage": 55.44, "elapsed_time": "0:12:47", "remaining_time": "0:10:17", "throughput": 2517.87, "total_tokens": 1933680} {"current_steps": 3115, "total_steps": 5610, "loss": 0.0016, "lr": 2.4556690938763062e-05, "epoch": 5.552584670231729, "percentage": 55.53, "elapsed_time": "0:12:49", "remaining_time": "0:10:16", "throughput": 2518.83, "total_tokens": 1937488} {"current_steps": 3120, "total_steps": 5610, "loss": 0.0034, "lr": 2.4478927842722154e-05, "epoch": 5.561497326203209, "percentage": 55.61, "elapsed_time": "0:12:50", "remaining_time": "0:10:14", "throughput": 2519.06, "total_tokens": 1940368} {"current_steps": 3125, "total_steps": 5610, "loss": 0.022, "lr": 2.4401169790117427e-05, "epoch": 5.570409982174688, "percentage": 55.7, "elapsed_time": "0:12:51", "remaining_time": "0:10:13", "throughput": 2519.62, "total_tokens": 1943728} {"current_steps": 3130, "total_steps": 5610, "loss": 0.1843, "lr": 2.4323417533565916e-05, "epoch": 5.579322638146167, "percentage": 55.79, "elapsed_time": "0:12:52", "remaining_time": "0:10:12", "throughput": 2519.99, "total_tokens": 1946832} {"current_steps": 3135, "total_steps": 5610, "loss": 0.0948, "lr": 2.424567182562854e-05, "epoch": 5.588235294117647, "percentage": 55.88, "elapsed_time": "0:12:53", "remaining_time": "0:10:10", "throughput": 2520.32, "total_tokens": 1949904} {"current_steps": 3140, "total_steps": 5610, "loss": 0.0667, "lr": 2.4167933418802837e-05, "epoch": 5.597147950089127, "percentage": 55.97, "elapsed_time": "0:12:54", "remaining_time": "0:10:09", "throughput": 2520.23, "total_tokens": 1952432} {"current_steps": 3145, "total_steps": 5610, "loss": 0.0711, "lr": 2.4090203065515695e-05, "epoch": 5.606060606060606, "percentage": 56.06, "elapsed_time": "0:12:55", "remaining_time": "0:10:08", "throughput": 2520.37, "total_tokens": 1955216} {"current_steps": 3150, "total_steps": 5610, "loss": 0.0889, "lr": 2.4012481518116022e-05, "epoch": 5.614973262032086, "percentage": 56.15, "elapsed_time": "0:12:56", "remaining_time": "0:10:06", "throughput": 2520.71, "total_tokens": 1958096} {"current_steps": 3155, "total_steps": 5610, "loss": 0.0592, "lr": 2.3934769528867513e-05, "epoch": 5.623885918003565, "percentage": 56.24, "elapsed_time": "0:12:57", "remaining_time": "0:10:05", "throughput": 2521.28, "total_tokens": 1961456} {"current_steps": 3160, "total_steps": 5610, "loss": 0.1017, "lr": 2.385706784994135e-05, "epoch": 5.632798573975045, "percentage": 56.33, "elapsed_time": "0:12:59", "remaining_time": "0:10:03", "throughput": 2521.42, "total_tokens": 1964272} {"current_steps": 3165, "total_steps": 5610, "loss": 0.0376, "lr": 2.3779377233408923e-05, "epoch": 5.641711229946524, "percentage": 56.42, "elapsed_time": "0:13:00", "remaining_time": "0:10:02", "throughput": 2521.64, "total_tokens": 1967120} {"current_steps": 3170, "total_steps": 5610, "loss": 0.1132, "lr": 2.3701698431234528e-05, "epoch": 5.650623885918003, "percentage": 56.51, "elapsed_time": "0:13:01", "remaining_time": "0:10:01", "throughput": 2521.75, "total_tokens": 1969872} {"current_steps": 3175, "total_steps": 5610, "loss": 0.1088, "lr": 2.362403219526815e-05, "epoch": 5.659536541889483, "percentage": 56.6, "elapsed_time": "0:13:02", "remaining_time": "0:09:59", "throughput": 2522.17, "total_tokens": 1972944} {"current_steps": 3180, "total_steps": 5610, "loss": 0.3807, "lr": 2.3546379277238107e-05, "epoch": 5.668449197860962, "percentage": 56.68, "elapsed_time": "0:13:03", "remaining_time": "0:09:58", "throughput": 2522.43, "total_tokens": 1975888} {"current_steps": 3185, "total_steps": 5610, "loss": 0.0442, "lr": 2.3468740428743833e-05, "epoch": 5.677361853832442, "percentage": 56.77, "elapsed_time": "0:13:04", "remaining_time": "0:09:57", "throughput": 2522.94, "total_tokens": 1979088} {"current_steps": 3190, "total_steps": 5610, "loss": 0.0184, "lr": 2.339111640124859e-05, "epoch": 5.686274509803922, "percentage": 56.86, "elapsed_time": "0:13:05", "remaining_time": "0:09:55", "throughput": 2522.86, "total_tokens": 1981520} {"current_steps": 3195, "total_steps": 5610, "loss": 0.0387, "lr": 2.3313507946072172e-05, "epoch": 5.695187165775401, "percentage": 56.95, "elapsed_time": "0:13:06", "remaining_time": "0:09:54", "throughput": 2523.45, "total_tokens": 1984880} {"current_steps": 3200, "total_steps": 5610, "loss": 0.0526, "lr": 2.323591581438365e-05, "epoch": 5.704099821746881, "percentage": 57.04, "elapsed_time": "0:13:07", "remaining_time": "0:09:53", "throughput": 2523.42, "total_tokens": 1987440} {"current_steps": 3205, "total_steps": 5610, "loss": 0.0058, "lr": 2.3158340757194116e-05, "epoch": 5.71301247771836, "percentage": 57.13, "elapsed_time": "0:13:08", "remaining_time": "0:09:51", "throughput": 2523.86, "total_tokens": 1990640} {"current_steps": 3210, "total_steps": 5610, "loss": 0.0267, "lr": 2.3080783525349388e-05, "epoch": 5.721925133689839, "percentage": 57.22, "elapsed_time": "0:13:09", "remaining_time": "0:09:50", "throughput": 2524.24, "total_tokens": 1993808} {"current_steps": 3215, "total_steps": 5610, "loss": 0.0237, "lr": 2.3003244869522743e-05, "epoch": 5.730837789661319, "percentage": 57.31, "elapsed_time": "0:13:10", "remaining_time": "0:09:49", "throughput": 2524.48, "total_tokens": 1996688} {"current_steps": 3220, "total_steps": 5610, "loss": 0.0987, "lr": 2.2925725540207688e-05, "epoch": 5.739750445632799, "percentage": 57.4, "elapsed_time": "0:13:12", "remaining_time": "0:09:47", "throughput": 2524.76, "total_tokens": 1999696} {"current_steps": 3225, "total_steps": 5610, "loss": 0.0499, "lr": 2.2848226287710645e-05, "epoch": 5.748663101604278, "percentage": 57.49, "elapsed_time": "0:13:13", "remaining_time": "0:09:46", "throughput": 2524.62, "total_tokens": 2002032} {"current_steps": 3230, "total_steps": 5610, "loss": 0.1424, "lr": 2.277074786214372e-05, "epoch": 5.757575757575758, "percentage": 57.58, "elapsed_time": "0:13:14", "remaining_time": "0:09:45", "throughput": 2525.33, "total_tokens": 2005584} {"current_steps": 3235, "total_steps": 5610, "loss": 0.097, "lr": 2.2693291013417453e-05, "epoch": 5.766488413547237, "percentage": 57.66, "elapsed_time": "0:13:15", "remaining_time": "0:09:43", "throughput": 2525.3, "total_tokens": 2008176} {"current_steps": 3240, "total_steps": 5610, "loss": 0.0027, "lr": 2.2615856491233513e-05, "epoch": 5.775401069518717, "percentage": 57.75, "elapsed_time": "0:13:16", "remaining_time": "0:09:42", "throughput": 2525.72, "total_tokens": 2011376} {"current_steps": 3245, "total_steps": 5610, "loss": 0.0006, "lr": 2.2538445045077488e-05, "epoch": 5.784313725490196, "percentage": 57.84, "elapsed_time": "0:13:17", "remaining_time": "0:09:41", "throughput": 2525.97, "total_tokens": 2014224} {"current_steps": 3250, "total_steps": 5610, "loss": 0.0911, "lr": 2.246105742421162e-05, "epoch": 5.793226381461675, "percentage": 57.93, "elapsed_time": "0:13:18", "remaining_time": "0:09:39", "throughput": 2526.08, "total_tokens": 2016912} {"current_steps": 3255, "total_steps": 5610, "loss": 0.1258, "lr": 2.2383694377667543e-05, "epoch": 5.802139037433155, "percentage": 58.02, "elapsed_time": "0:13:19", "remaining_time": "0:09:38", "throughput": 2526.51, "total_tokens": 2020048} {"current_steps": 3260, "total_steps": 5610, "loss": 0.0003, "lr": 2.2306356654239012e-05, "epoch": 5.811051693404634, "percentage": 58.11, "elapsed_time": "0:13:20", "remaining_time": "0:09:37", "throughput": 2526.86, "total_tokens": 2023216} {"current_steps": 3265, "total_steps": 5610, "loss": 0.0009, "lr": 2.222904500247473e-05, "epoch": 5.819964349376114, "percentage": 58.2, "elapsed_time": "0:13:21", "remaining_time": "0:09:35", "throughput": 2527.69, "total_tokens": 2026928} {"current_steps": 3270, "total_steps": 5610, "loss": 0.004, "lr": 2.2151760170671004e-05, "epoch": 5.828877005347594, "percentage": 58.29, "elapsed_time": "0:13:22", "remaining_time": "0:09:34", "throughput": 2527.73, "total_tokens": 2029584} {"current_steps": 3275, "total_steps": 5610, "loss": 0.0417, "lr": 2.207450290686458e-05, "epoch": 5.837789661319073, "percentage": 58.38, "elapsed_time": "0:13:24", "remaining_time": "0:09:33", "throughput": 2528.11, "total_tokens": 2032720} {"current_steps": 3280, "total_steps": 5610, "loss": 0.088, "lr": 2.1997273958825375e-05, "epoch": 5.846702317290553, "percentage": 58.47, "elapsed_time": "0:13:25", "remaining_time": "0:09:31", "throughput": 2528.74, "total_tokens": 2036176} {"current_steps": 3285, "total_steps": 5610, "loss": 0.1004, "lr": 2.1920074074049225e-05, "epoch": 5.855614973262032, "percentage": 58.56, "elapsed_time": "0:13:26", "remaining_time": "0:09:30", "throughput": 2529.38, "total_tokens": 2039632} {"current_steps": 3290, "total_steps": 5610, "loss": 0.0648, "lr": 2.1842903999750665e-05, "epoch": 5.864527629233511, "percentage": 58.65, "elapsed_time": "0:13:27", "remaining_time": "0:09:29", "throughput": 2530.06, "total_tokens": 2043184} {"current_steps": 3295, "total_steps": 5610, "loss": 0.0579, "lr": 2.1765764482855715e-05, "epoch": 5.873440285204991, "percentage": 58.73, "elapsed_time": "0:13:28", "remaining_time": "0:09:28", "throughput": 2530.58, "total_tokens": 2046416} {"current_steps": 3300, "total_steps": 5610, "loss": 0.001, "lr": 2.1688656269994612e-05, "epoch": 5.882352941176471, "percentage": 58.82, "elapsed_time": "0:13:29", "remaining_time": "0:09:26", "throughput": 2530.53, "total_tokens": 2049008} {"current_steps": 3305, "total_steps": 5610, "loss": 0.0239, "lr": 2.1611580107494597e-05, "epoch": 5.89126559714795, "percentage": 58.91, "elapsed_time": "0:13:30", "remaining_time": "0:09:25", "throughput": 2531.28, "total_tokens": 2052656} {"current_steps": 3310, "total_steps": 5610, "loss": 0.0937, "lr": 2.153453674137272e-05, "epoch": 5.90017825311943, "percentage": 59.0, "elapsed_time": "0:13:32", "remaining_time": "0:09:24", "throughput": 2531.74, "total_tokens": 2055888} {"current_steps": 3315, "total_steps": 5610, "loss": 0.1453, "lr": 2.1457526917328588e-05, "epoch": 5.909090909090909, "percentage": 59.09, "elapsed_time": "0:13:33", "remaining_time": "0:09:22", "throughput": 2532.15, "total_tokens": 2059056} {"current_steps": 3320, "total_steps": 5610, "loss": 0.0477, "lr": 2.1380551380737128e-05, "epoch": 5.918003565062389, "percentage": 59.18, "elapsed_time": "0:13:34", "remaining_time": "0:09:21", "throughput": 2532.5, "total_tokens": 2062096} {"current_steps": 3325, "total_steps": 5610, "loss": 0.0597, "lr": 2.130361087664145e-05, "epoch": 5.926916221033868, "percentage": 59.27, "elapsed_time": "0:13:35", "remaining_time": "0:09:20", "throughput": 2532.83, "total_tokens": 2065168} {"current_steps": 3330, "total_steps": 5610, "loss": 0.0187, "lr": 2.122670614974555e-05, "epoch": 5.935828877005347, "percentage": 59.36, "elapsed_time": "0:13:36", "remaining_time": "0:09:18", "throughput": 2532.98, "total_tokens": 2067856} {"current_steps": 3335, "total_steps": 5610, "loss": 0.0907, "lr": 2.1149837944407136e-05, "epoch": 5.944741532976827, "percentage": 59.45, "elapsed_time": "0:13:37", "remaining_time": "0:09:17", "throughput": 2533.41, "total_tokens": 2071056} {"current_steps": 3340, "total_steps": 5610, "loss": 0.0003, "lr": 2.107300700463045e-05, "epoch": 5.953654188948306, "percentage": 59.54, "elapsed_time": "0:13:38", "remaining_time": "0:09:16", "throughput": 2533.77, "total_tokens": 2074192} {"current_steps": 3345, "total_steps": 5610, "loss": 0.0006, "lr": 2.0996214074059034e-05, "epoch": 5.962566844919786, "percentage": 59.63, "elapsed_time": "0:13:39", "remaining_time": "0:09:15", "throughput": 2533.88, "total_tokens": 2077040} {"current_steps": 3350, "total_steps": 5610, "loss": 0.2074, "lr": 2.0919459895968517e-05, "epoch": 5.971479500891266, "percentage": 59.71, "elapsed_time": "0:13:40", "remaining_time": "0:09:13", "throughput": 2533.67, "total_tokens": 2079312} {"current_steps": 3355, "total_steps": 5610, "loss": 0.0026, "lr": 2.084274521325948e-05, "epoch": 5.980392156862745, "percentage": 59.8, "elapsed_time": "0:13:41", "remaining_time": "0:09:12", "throughput": 2534.33, "total_tokens": 2082864} {"current_steps": 3360, "total_steps": 5610, "loss": 0.0012, "lr": 2.0766070768450206e-05, "epoch": 5.989304812834225, "percentage": 59.89, "elapsed_time": "0:13:42", "remaining_time": "0:09:11", "throughput": 2534.6, "total_tokens": 2085872} {"current_steps": 3365, "total_steps": 5610, "loss": 0.0005, "lr": 2.0689437303669508e-05, "epoch": 5.998217468805704, "percentage": 59.98, "elapsed_time": "0:13:43", "remaining_time": "0:09:09", "throughput": 2534.49, "total_tokens": 2088272} {"current_steps": 3370, "total_steps": 5610, "loss": 0.0046, "lr": 2.0612845560649603e-05, "epoch": 6.007130124777183, "percentage": 60.07, "elapsed_time": "0:13:45", "remaining_time": "0:09:08", "throughput": 2534.39, "total_tokens": 2091232} {"current_steps": 3372, "total_steps": 5610, "eval_loss": 0.15147051215171814, "epoch": 6.010695187165775, "percentage": 60.11, "elapsed_time": "0:13:51", "remaining_time": "0:09:12", "throughput": 2515.25, "total_tokens": 2092320} {"current_steps": 3375, "total_steps": 5610, "loss": 0.021, "lr": 2.0536296280718825e-05, "epoch": 6.016042780748663, "percentage": 60.16, "elapsed_time": "0:13:53", "remaining_time": "0:09:11", "throughput": 2513.08, "total_tokens": 2093952} {"current_steps": 3380, "total_steps": 5610, "loss": 0.0566, "lr": 2.0459790204794545e-05, "epoch": 6.024955436720143, "percentage": 60.25, "elapsed_time": "0:13:54", "remaining_time": "0:09:10", "throughput": 2513.97, "total_tokens": 2097728} {"current_steps": 3385, "total_steps": 5610, "loss": 0.0017, "lr": 2.0383328073375955e-05, "epoch": 6.033868092691622, "percentage": 60.34, "elapsed_time": "0:13:55", "remaining_time": "0:09:09", "throughput": 2514.2, "total_tokens": 2100736} {"current_steps": 3390, "total_steps": 5610, "loss": 0.0363, "lr": 2.0306910626536926e-05, "epoch": 6.042780748663102, "percentage": 60.43, "elapsed_time": "0:13:56", "remaining_time": "0:09:07", "throughput": 2514.72, "total_tokens": 2104032} {"current_steps": 3395, "total_steps": 5610, "loss": 0.0004, "lr": 2.0230538603918787e-05, "epoch": 6.051693404634581, "percentage": 60.52, "elapsed_time": "0:13:57", "remaining_time": "0:09:06", "throughput": 2515.2, "total_tokens": 2107264} {"current_steps": 3400, "total_steps": 5610, "loss": 0.0012, "lr": 2.015421274472325e-05, "epoch": 6.0606060606060606, "percentage": 60.61, "elapsed_time": "0:13:58", "remaining_time": "0:09:05", "throughput": 2515.59, "total_tokens": 2110336} {"current_steps": 3405, "total_steps": 5610, "loss": 0.0009, "lr": 2.0077933787705204e-05, "epoch": 6.06951871657754, "percentage": 60.7, "elapsed_time": "0:13:59", "remaining_time": "0:09:03", "throughput": 2515.83, "total_tokens": 2113248} {"current_steps": 3410, "total_steps": 5610, "loss": 0.0291, "lr": 2.000170247116554e-05, "epoch": 6.078431372549019, "percentage": 60.78, "elapsed_time": "0:14:01", "remaining_time": "0:09:02", "throughput": 2515.97, "total_tokens": 2116032} {"current_steps": 3415, "total_steps": 5610, "loss": 0.0975, "lr": 1.9925519532944104e-05, "epoch": 6.087344028520499, "percentage": 60.87, "elapsed_time": "0:14:02", "remaining_time": "0:09:01", "throughput": 2516.2, "total_tokens": 2118848} {"current_steps": 3420, "total_steps": 5610, "loss": 0.0013, "lr": 1.9849385710412424e-05, "epoch": 6.096256684491979, "percentage": 60.96, "elapsed_time": "0:14:03", "remaining_time": "0:08:59", "throughput": 2516.62, "total_tokens": 2122208} {"current_steps": 3425, "total_steps": 5610, "loss": 0.0451, "lr": 1.977330174046667e-05, "epoch": 6.105169340463458, "percentage": 61.05, "elapsed_time": "0:14:04", "remaining_time": "0:08:58", "throughput": 2516.95, "total_tokens": 2125248} {"current_steps": 3430, "total_steps": 5610, "loss": 0.0574, "lr": 1.9697268359520506e-05, "epoch": 6.114081996434938, "percentage": 61.14, "elapsed_time": "0:14:05", "remaining_time": "0:08:57", "throughput": 2517.91, "total_tokens": 2129248} {"current_steps": 3435, "total_steps": 5610, "loss": 0.0008, "lr": 1.9621286303497915e-05, "epoch": 6.122994652406417, "percentage": 61.23, "elapsed_time": "0:14:06", "remaining_time": "0:08:56", "throughput": 2517.91, "total_tokens": 2131904} {"current_steps": 3440, "total_steps": 5610, "loss": 0.0006, "lr": 1.954535630782612e-05, "epoch": 6.1319073083778965, "percentage": 61.32, "elapsed_time": "0:14:07", "remaining_time": "0:08:54", "throughput": 2518.65, "total_tokens": 2135552} {"current_steps": 3445, "total_steps": 5610, "loss": 0.0861, "lr": 1.9469479107428463e-05, "epoch": 6.140819964349376, "percentage": 61.41, "elapsed_time": "0:14:09", "remaining_time": "0:08:53", "throughput": 2519.03, "total_tokens": 2138688} {"current_steps": 3450, "total_steps": 5610, "loss": 0.1345, "lr": 1.9393655436717283e-05, "epoch": 6.149732620320855, "percentage": 61.5, "elapsed_time": "0:14:10", "remaining_time": "0:08:52", "throughput": 2519.02, "total_tokens": 2141248} {"current_steps": 3455, "total_steps": 5610, "loss": 0.0748, "lr": 1.9317886029586778e-05, "epoch": 6.158645276292335, "percentage": 61.59, "elapsed_time": "0:14:11", "remaining_time": "0:08:50", "throughput": 2519.67, "total_tokens": 2144768} {"current_steps": 3460, "total_steps": 5610, "loss": 0.0015, "lr": 1.9242171619405986e-05, "epoch": 6.167557932263815, "percentage": 61.68, "elapsed_time": "0:14:12", "remaining_time": "0:08:49", "throughput": 2519.81, "total_tokens": 2147552} {"current_steps": 3465, "total_steps": 5610, "loss": 0.0288, "lr": 1.916651293901157e-05, "epoch": 6.176470588235294, "percentage": 61.76, "elapsed_time": "0:14:13", "remaining_time": "0:08:48", "throughput": 2520.42, "total_tokens": 2151040} {"current_steps": 3470, "total_steps": 5610, "loss": 0.1793, "lr": 1.909091072070083e-05, "epoch": 6.185383244206774, "percentage": 61.85, "elapsed_time": "0:14:14", "remaining_time": "0:08:47", "throughput": 2521.4, "total_tokens": 2155040} {"current_steps": 3475, "total_steps": 5610, "loss": 0.0771, "lr": 1.9015365696224564e-05, "epoch": 6.194295900178253, "percentage": 61.94, "elapsed_time": "0:14:15", "remaining_time": "0:08:45", "throughput": 2521.53, "total_tokens": 2157824} {"current_steps": 3480, "total_steps": 5610, "loss": 0.0006, "lr": 1.893987859677997e-05, "epoch": 6.2032085561497325, "percentage": 62.03, "elapsed_time": "0:14:16", "remaining_time": "0:08:44", "throughput": 2521.71, "total_tokens": 2160672} {"current_steps": 3485, "total_steps": 5610, "loss": 0.1115, "lr": 1.886445015300362e-05, "epoch": 6.212121212121212, "percentage": 62.12, "elapsed_time": "0:14:17", "remaining_time": "0:08:43", "throughput": 2521.98, "total_tokens": 2163552} {"current_steps": 3490, "total_steps": 5610, "loss": 0.0291, "lr": 1.8789081094964347e-05, "epoch": 6.221033868092691, "percentage": 62.21, "elapsed_time": "0:14:19", "remaining_time": "0:08:41", "throughput": 2522.86, "total_tokens": 2167456} {"current_steps": 3495, "total_steps": 5610, "loss": 0.0421, "lr": 1.8713772152156205e-05, "epoch": 6.229946524064171, "percentage": 62.3, "elapsed_time": "0:14:20", "remaining_time": "0:08:40", "throughput": 2523.24, "total_tokens": 2170560} {"current_steps": 3500, "total_steps": 5610, "loss": 0.0437, "lr": 1.863852405349135e-05, "epoch": 6.238859180035651, "percentage": 62.39, "elapsed_time": "0:14:21", "remaining_time": "0:08:39", "throughput": 2523.31, "total_tokens": 2173152} {"current_steps": 3505, "total_steps": 5610, "loss": 0.076, "lr": 1.856333752729311e-05, "epoch": 6.24777183600713, "percentage": 62.48, "elapsed_time": "0:14:22", "remaining_time": "0:08:37", "throughput": 2523.33, "total_tokens": 2175808} {"current_steps": 3510, "total_steps": 5610, "loss": 0.1015, "lr": 1.848821330128878e-05, "epoch": 6.25668449197861, "percentage": 62.57, "elapsed_time": "0:14:23", "remaining_time": "0:08:36", "throughput": 2523.28, "total_tokens": 2178304} {"current_steps": 3515, "total_steps": 5610, "loss": 0.0089, "lr": 1.8413152102602687e-05, "epoch": 6.265597147950089, "percentage": 62.66, "elapsed_time": "0:14:24", "remaining_time": "0:08:35", "throughput": 2523.64, "total_tokens": 2181312} {"current_steps": 3520, "total_steps": 5610, "loss": 0.008, "lr": 1.8338154657749128e-05, "epoch": 6.2745098039215685, "percentage": 62.75, "elapsed_time": "0:14:25", "remaining_time": "0:08:33", "throughput": 2523.84, "total_tokens": 2184128} {"current_steps": 3525, "total_steps": 5610, "loss": 0.0022, "lr": 1.826322169262531e-05, "epoch": 6.283422459893048, "percentage": 62.83, "elapsed_time": "0:14:26", "remaining_time": "0:08:32", "throughput": 2524.44, "total_tokens": 2187584} {"current_steps": 3530, "total_steps": 5610, "loss": 0.0009, "lr": 1.818835393250434e-05, "epoch": 6.292335115864527, "percentage": 62.92, "elapsed_time": "0:14:27", "remaining_time": "0:08:31", "throughput": 2525.12, "total_tokens": 2191168} {"current_steps": 3535, "total_steps": 5610, "loss": 0.0091, "lr": 1.8113552102028236e-05, "epoch": 6.301247771836007, "percentage": 63.01, "elapsed_time": "0:14:28", "remaining_time": "0:08:30", "throughput": 2525.92, "total_tokens": 2194880} {"current_steps": 3540, "total_steps": 5610, "loss": 0.0559, "lr": 1.803881692520087e-05, "epoch": 6.310160427807487, "percentage": 63.1, "elapsed_time": "0:14:29", "remaining_time": "0:08:28", "throughput": 2525.72, "total_tokens": 2197184} {"current_steps": 3545, "total_steps": 5610, "loss": 0.0004, "lr": 1.796414912538095e-05, "epoch": 6.319073083778966, "percentage": 63.19, "elapsed_time": "0:14:31", "remaining_time": "0:08:27", "throughput": 2525.98, "total_tokens": 2200160} {"current_steps": 3550, "total_steps": 5610, "loss": 0.1829, "lr": 1.7889549425275093e-05, "epoch": 6.327985739750446, "percentage": 63.28, "elapsed_time": "0:14:32", "remaining_time": "0:08:26", "throughput": 2526.66, "total_tokens": 2203776} {"current_steps": 3555, "total_steps": 5610, "loss": 0.176, "lr": 1.7815018546930754e-05, "epoch": 6.336898395721925, "percentage": 63.37, "elapsed_time": "0:14:33", "remaining_time": "0:08:24", "throughput": 2527.13, "total_tokens": 2207104} {"current_steps": 3560, "total_steps": 5610, "loss": 0.0602, "lr": 1.7740557211729258e-05, "epoch": 6.3458110516934045, "percentage": 63.46, "elapsed_time": "0:14:34", "remaining_time": "0:08:23", "throughput": 2527.6, "total_tokens": 2210400} {"current_steps": 3565, "total_steps": 5610, "loss": 0.1188, "lr": 1.7666166140378852e-05, "epoch": 6.354723707664884, "percentage": 63.55, "elapsed_time": "0:14:35", "remaining_time": "0:08:22", "throughput": 2528.07, "total_tokens": 2213728} {"current_steps": 3570, "total_steps": 5610, "loss": 0.0346, "lr": 1.7591846052907673e-05, "epoch": 6.363636363636363, "percentage": 63.64, "elapsed_time": "0:14:36", "remaining_time": "0:08:20", "throughput": 2528.14, "total_tokens": 2216416} {"current_steps": 3575, "total_steps": 5610, "loss": 0.0174, "lr": 1.7517597668656823e-05, "epoch": 6.372549019607844, "percentage": 63.73, "elapsed_time": "0:14:37", "remaining_time": "0:08:19", "throughput": 2528.3, "total_tokens": 2219328} {"current_steps": 3580, "total_steps": 5610, "loss": 0.0271, "lr": 1.7443421706273395e-05, "epoch": 6.381461675579323, "percentage": 63.81, "elapsed_time": "0:14:38", "remaining_time": "0:08:18", "throughput": 2528.68, "total_tokens": 2222496} {"current_steps": 3585, "total_steps": 5610, "loss": 0.1009, "lr": 1.7369318883703506e-05, "epoch": 6.390374331550802, "percentage": 63.9, "elapsed_time": "0:14:40", "remaining_time": "0:08:17", "throughput": 2528.98, "total_tokens": 2225504} {"current_steps": 3590, "total_steps": 5610, "loss": 0.1496, "lr": 1.7295289918185348e-05, "epoch": 6.399286987522282, "percentage": 63.99, "elapsed_time": "0:14:41", "remaining_time": "0:08:15", "throughput": 2529.79, "total_tokens": 2229312} {"current_steps": 3595, "total_steps": 5610, "loss": 0.0354, "lr": 1.722133552624227e-05, "epoch": 6.408199643493761, "percentage": 64.08, "elapsed_time": "0:14:42", "remaining_time": "0:08:14", "throughput": 2530.27, "total_tokens": 2232544} {"current_steps": 3600, "total_steps": 5610, "loss": 0.0023, "lr": 1.714745642367583e-05, "epoch": 6.4171122994652405, "percentage": 64.17, "elapsed_time": "0:14:43", "remaining_time": "0:08:13", "throughput": 2530.76, "total_tokens": 2235808} {"current_steps": 3605, "total_steps": 5610, "loss": 0.1309, "lr": 1.707365332555883e-05, "epoch": 6.42602495543672, "percentage": 64.26, "elapsed_time": "0:14:44", "remaining_time": "0:08:11", "throughput": 2531.13, "total_tokens": 2239040} {"current_steps": 3610, "total_steps": 5610, "loss": 0.1223, "lr": 1.699992694622847e-05, "epoch": 6.434937611408199, "percentage": 64.35, "elapsed_time": "0:14:45", "remaining_time": "0:08:10", "throughput": 2531.18, "total_tokens": 2241728} {"current_steps": 3615, "total_steps": 5610, "loss": 0.1168, "lr": 1.6926277999279372e-05, "epoch": 6.443850267379679, "percentage": 64.44, "elapsed_time": "0:14:46", "remaining_time": "0:08:09", "throughput": 2531.56, "total_tokens": 2244928} {"current_steps": 3620, "total_steps": 5610, "loss": 0.0013, "lr": 1.6852707197556677e-05, "epoch": 6.452762923351159, "percentage": 64.53, "elapsed_time": "0:14:47", "remaining_time": "0:08:08", "throughput": 2531.81, "total_tokens": 2247936} {"current_steps": 3625, "total_steps": 5610, "loss": 0.1828, "lr": 1.67792152531492e-05, "epoch": 6.461675579322638, "percentage": 64.62, "elapsed_time": "0:14:48", "remaining_time": "0:08:06", "throughput": 2531.85, "total_tokens": 2250560} {"current_steps": 3630, "total_steps": 5610, "loss": 0.021, "lr": 1.6705802877382464e-05, "epoch": 6.470588235294118, "percentage": 64.71, "elapsed_time": "0:14:49", "remaining_time": "0:08:05", "throughput": 2531.92, "total_tokens": 2253248} {"current_steps": 3635, "total_steps": 5610, "loss": 0.0776, "lr": 1.6632470780811866e-05, "epoch": 6.479500891265597, "percentage": 64.8, "elapsed_time": "0:14:51", "remaining_time": "0:08:04", "throughput": 2532.2, "total_tokens": 2256320} {"current_steps": 3640, "total_steps": 5610, "loss": 0.0297, "lr": 1.6559219673215784e-05, "epoch": 6.4884135472370765, "percentage": 64.88, "elapsed_time": "0:14:52", "remaining_time": "0:08:02", "throughput": 2532.39, "total_tokens": 2259168} {"current_steps": 3645, "total_steps": 5610, "loss": 0.0353, "lr": 1.6486050263588702e-05, "epoch": 6.497326203208556, "percentage": 64.97, "elapsed_time": "0:14:53", "remaining_time": "0:08:01", "throughput": 2532.75, "total_tokens": 2262240} {"current_steps": 3650, "total_steps": 5610, "loss": 0.0674, "lr": 1.641296326013436e-05, "epoch": 6.506238859180035, "percentage": 65.06, "elapsed_time": "0:14:54", "remaining_time": "0:08:00", "throughput": 2533.22, "total_tokens": 2265600} {"current_steps": 3653, "total_steps": 5610, "eval_loss": 0.15169650316238403, "epoch": 6.5115864527629235, "percentage": 65.12, "elapsed_time": "0:15:01", "remaining_time": "0:08:02", "throughput": 2515.81, "total_tokens": 2267520} {"current_steps": 3655, "total_steps": 5610, "loss": 0.0634, "lr": 1.633995937025889e-05, "epoch": 6.515151515151516, "percentage": 65.15, "elapsed_time": "0:15:02", "remaining_time": "0:08:02", "throughput": 2513.78, "total_tokens": 2268768} {"current_steps": 3660, "total_steps": 5610, "loss": 0.0202, "lr": 1.6267039300563965e-05, "epoch": 6.524064171122995, "percentage": 65.24, "elapsed_time": "0:15:03", "remaining_time": "0:08:01", "throughput": 2514.37, "total_tokens": 2272256} {"current_steps": 3665, "total_steps": 5610, "loss": 0.0034, "lr": 1.619420375683996e-05, "epoch": 6.532976827094474, "percentage": 65.33, "elapsed_time": "0:15:04", "remaining_time": "0:08:00", "throughput": 2515.09, "total_tokens": 2275968} {"current_steps": 3670, "total_steps": 5610, "loss": 0.008, "lr": 1.6121453444059153e-05, "epoch": 6.541889483065954, "percentage": 65.42, "elapsed_time": "0:15:05", "remaining_time": "0:07:58", "throughput": 2515.24, "total_tokens": 2278784} {"current_steps": 3675, "total_steps": 5610, "loss": 0.0209, "lr": 1.6048789066368858e-05, "epoch": 6.550802139037433, "percentage": 65.51, "elapsed_time": "0:15:07", "remaining_time": "0:07:57", "throughput": 2515.32, "total_tokens": 2281472} {"current_steps": 3680, "total_steps": 5610, "loss": 0.0158, "lr": 1.5976211327084606e-05, "epoch": 6.5597147950089125, "percentage": 65.6, "elapsed_time": "0:15:08", "remaining_time": "0:07:56", "throughput": 2515.69, "total_tokens": 2284608} {"current_steps": 3685, "total_steps": 5610, "loss": 0.0469, "lr": 1.59037209286834e-05, "epoch": 6.568627450980392, "percentage": 65.69, "elapsed_time": "0:15:09", "remaining_time": "0:07:54", "throughput": 2515.82, "total_tokens": 2287296} {"current_steps": 3690, "total_steps": 5610, "loss": 0.0839, "lr": 1.583131857279685e-05, "epoch": 6.577540106951871, "percentage": 65.78, "elapsed_time": "0:15:10", "remaining_time": "0:07:53", "throughput": 2516.02, "total_tokens": 2290176} {"current_steps": 3695, "total_steps": 5610, "loss": 0.0791, "lr": 1.57590049602044e-05, "epoch": 6.586452762923351, "percentage": 65.86, "elapsed_time": "0:15:11", "remaining_time": "0:07:52", "throughput": 2516.15, "total_tokens": 2292960} {"current_steps": 3700, "total_steps": 5610, "loss": 0.0513, "lr": 1.5686780790826574e-05, "epoch": 6.595365418894831, "percentage": 65.95, "elapsed_time": "0:15:12", "remaining_time": "0:07:51", "throughput": 2516.55, "total_tokens": 2296192} {"current_steps": 3705, "total_steps": 5610, "loss": 0.0003, "lr": 1.561464676371816e-05, "epoch": 6.60427807486631, "percentage": 66.04, "elapsed_time": "0:15:13", "remaining_time": "0:07:49", "throughput": 2517.47, "total_tokens": 2300224} {"current_steps": 3710, "total_steps": 5610, "loss": 0.0658, "lr": 1.5542603577061464e-05, "epoch": 6.61319073083779, "percentage": 66.13, "elapsed_time": "0:15:14", "remaining_time": "0:07:48", "throughput": 2517.67, "total_tokens": 2303040} {"current_steps": 3715, "total_steps": 5610, "loss": 0.01, "lr": 1.5470651928159564e-05, "epoch": 6.622103386809269, "percentage": 66.22, "elapsed_time": "0:15:15", "remaining_time": "0:07:47", "throughput": 2517.66, "total_tokens": 2305600} {"current_steps": 3720, "total_steps": 5610, "loss": 0.0104, "lr": 1.539879251342954e-05, "epoch": 6.6310160427807485, "percentage": 66.31, "elapsed_time": "0:15:16", "remaining_time": "0:07:45", "throughput": 2518.02, "total_tokens": 2308736} {"current_steps": 3725, "total_steps": 5610, "loss": 0.0303, "lr": 1.5327026028395724e-05, "epoch": 6.639928698752228, "percentage": 66.4, "elapsed_time": "0:15:17", "remaining_time": "0:07:44", "throughput": 2518.38, "total_tokens": 2311840} {"current_steps": 3730, "total_steps": 5610, "loss": 0.0151, "lr": 1.5255353167683017e-05, "epoch": 6.648841354723707, "percentage": 66.49, "elapsed_time": "0:15:19", "remaining_time": "0:07:43", "throughput": 2519.27, "total_tokens": 2315808} {"current_steps": 3735, "total_steps": 5610, "loss": 0.0755, "lr": 1.5183774625010119e-05, "epoch": 6.657754010695188, "percentage": 66.58, "elapsed_time": "0:15:20", "remaining_time": "0:07:42", "throughput": 2519.74, "total_tokens": 2319072} {"current_steps": 3740, "total_steps": 5610, "loss": 0.1207, "lr": 1.5112291093182818e-05, "epoch": 6.666666666666667, "percentage": 66.67, "elapsed_time": "0:15:21", "remaining_time": "0:07:40", "throughput": 2520.71, "total_tokens": 2323104} {"current_steps": 3745, "total_steps": 5610, "loss": 0.0103, "lr": 1.5040903264087328e-05, "epoch": 6.675579322638146, "percentage": 66.76, "elapsed_time": "0:15:22", "remaining_time": "0:07:39", "throughput": 2520.91, "total_tokens": 2325984} {"current_steps": 3750, "total_steps": 5610, "loss": 0.0045, "lr": 1.4969611828683517e-05, "epoch": 6.684491978609626, "percentage": 66.84, "elapsed_time": "0:15:23", "remaining_time": "0:07:38", "throughput": 2521.36, "total_tokens": 2329152} {"current_steps": 3755, "total_steps": 5610, "loss": 0.0771, "lr": 1.4898417476998289e-05, "epoch": 6.693404634581105, "percentage": 66.93, "elapsed_time": "0:15:24", "remaining_time": "0:07:36", "throughput": 2521.99, "total_tokens": 2332768} {"current_steps": 3760, "total_steps": 5610, "loss": 0.0004, "lr": 1.4827320898118884e-05, "epoch": 6.7023172905525845, "percentage": 67.02, "elapsed_time": "0:15:26", "remaining_time": "0:07:35", "throughput": 2522.22, "total_tokens": 2335680} {"current_steps": 3765, "total_steps": 5610, "loss": 0.1187, "lr": 1.4756322780186193e-05, "epoch": 6.711229946524064, "percentage": 67.11, "elapsed_time": "0:15:27", "remaining_time": "0:07:34", "throughput": 2522.45, "total_tokens": 2338656} {"current_steps": 3770, "total_steps": 5610, "loss": 0.0343, "lr": 1.4685423810388094e-05, "epoch": 6.720142602495543, "percentage": 67.2, "elapsed_time": "0:15:28", "remaining_time": "0:07:33", "throughput": 2522.91, "total_tokens": 2342016} {"current_steps": 3775, "total_steps": 5610, "loss": 0.0842, "lr": 1.4614624674952842e-05, "epoch": 6.729055258467023, "percentage": 67.29, "elapsed_time": "0:15:29", "remaining_time": "0:07:31", "throughput": 2523.26, "total_tokens": 2345120} {"current_steps": 3780, "total_steps": 5610, "loss": 0.081, "lr": 1.4543926059142379e-05, "epoch": 6.737967914438503, "percentage": 67.38, "elapsed_time": "0:15:30", "remaining_time": "0:07:30", "throughput": 2523.77, "total_tokens": 2348512} {"current_steps": 3785, "total_steps": 5610, "loss": 0.1136, "lr": 1.4473328647245726e-05, "epoch": 6.746880570409982, "percentage": 67.47, "elapsed_time": "0:15:31", "remaining_time": "0:07:29", "throughput": 2523.47, "total_tokens": 2350688} {"current_steps": 3790, "total_steps": 5610, "loss": 0.0185, "lr": 1.4402833122572368e-05, "epoch": 6.755793226381462, "percentage": 67.56, "elapsed_time": "0:15:32", "remaining_time": "0:07:27", "throughput": 2523.63, "total_tokens": 2353504} {"current_steps": 3795, "total_steps": 5610, "loss": 0.0589, "lr": 1.4332440167445613e-05, "epoch": 6.764705882352941, "percentage": 67.65, "elapsed_time": "0:15:33", "remaining_time": "0:07:26", "throughput": 2524.04, "total_tokens": 2356672} {"current_steps": 3800, "total_steps": 5610, "loss": 0.0831, "lr": 1.4262150463195981e-05, "epoch": 6.7736185383244205, "percentage": 67.74, "elapsed_time": "0:15:34", "remaining_time": "0:07:25", "throughput": 2524.7, "total_tokens": 2360288} {"current_steps": 3805, "total_steps": 5610, "loss": 0.0163, "lr": 1.4191964690154702e-05, "epoch": 6.7825311942959, "percentage": 67.83, "elapsed_time": "0:15:35", "remaining_time": "0:07:23", "throughput": 2524.71, "total_tokens": 2362944} {"current_steps": 3810, "total_steps": 5610, "loss": 0.0017, "lr": 1.412188352764699e-05, "epoch": 6.791443850267379, "percentage": 67.91, "elapsed_time": "0:15:37", "remaining_time": "0:07:22", "throughput": 2525.09, "total_tokens": 2366080} {"current_steps": 3815, "total_steps": 5610, "loss": 0.1283, "lr": 1.4051907653985552e-05, "epoch": 6.80035650623886, "percentage": 68.0, "elapsed_time": "0:15:38", "remaining_time": "0:07:21", "throughput": 2525.7, "total_tokens": 2369632} {"current_steps": 3820, "total_steps": 5610, "loss": 0.1444, "lr": 1.3982037746464043e-05, "epoch": 6.809269162210339, "percentage": 68.09, "elapsed_time": "0:15:39", "remaining_time": "0:07:20", "throughput": 2526.53, "total_tokens": 2373504} {"current_steps": 3825, "total_steps": 5610, "loss": 0.0177, "lr": 1.3912274481350433e-05, "epoch": 6.818181818181818, "percentage": 68.18, "elapsed_time": "0:15:40", "remaining_time": "0:07:18", "throughput": 2526.86, "total_tokens": 2376480} {"current_steps": 3830, "total_steps": 5610, "loss": 0.0341, "lr": 1.3842618533880531e-05, "epoch": 6.827094474153298, "percentage": 68.27, "elapsed_time": "0:15:41", "remaining_time": "0:07:17", "throughput": 2527.11, "total_tokens": 2379488} {"current_steps": 3835, "total_steps": 5610, "loss": 0.1742, "lr": 1.3773070578251424e-05, "epoch": 6.836007130124777, "percentage": 68.36, "elapsed_time": "0:15:42", "remaining_time": "0:07:16", "throughput": 2527.35, "total_tokens": 2382496} {"current_steps": 3840, "total_steps": 5610, "loss": 0.0996, "lr": 1.3703631287614935e-05, "epoch": 6.8449197860962565, "percentage": 68.45, "elapsed_time": "0:15:43", "remaining_time": "0:07:15", "throughput": 2528.09, "total_tokens": 2386304} {"current_steps": 3845, "total_steps": 5610, "loss": 0.0696, "lr": 1.363430133407112e-05, "epoch": 6.853832442067736, "percentage": 68.54, "elapsed_time": "0:15:45", "remaining_time": "0:07:13", "throughput": 2528.51, "total_tokens": 2389504} {"current_steps": 3850, "total_steps": 5610, "loss": 0.0125, "lr": 1.3565081388661782e-05, "epoch": 6.862745098039216, "percentage": 68.63, "elapsed_time": "0:15:46", "remaining_time": "0:07:12", "throughput": 2528.66, "total_tokens": 2392320} {"current_steps": 3855, "total_steps": 5610, "loss": 0.1099, "lr": 1.3495972121363968e-05, "epoch": 6.871657754010696, "percentage": 68.72, "elapsed_time": "0:15:47", "remaining_time": "0:07:11", "throughput": 2529.1, "total_tokens": 2395648} {"current_steps": 3860, "total_steps": 5610, "loss": 0.0357, "lr": 1.3426974201083439e-05, "epoch": 6.880570409982175, "percentage": 68.81, "elapsed_time": "0:15:48", "remaining_time": "0:07:09", "throughput": 2528.96, "total_tokens": 2398080} {"current_steps": 3865, "total_steps": 5610, "loss": 0.0005, "lr": 1.3358088295648274e-05, "epoch": 6.889483065953654, "percentage": 68.89, "elapsed_time": "0:15:49", "remaining_time": "0:07:08", "throughput": 2528.88, "total_tokens": 2400448} {"current_steps": 3870, "total_steps": 5610, "loss": 0.0008, "lr": 1.328931507180233e-05, "epoch": 6.898395721925134, "percentage": 68.98, "elapsed_time": "0:15:50", "remaining_time": "0:07:07", "throughput": 2529.06, "total_tokens": 2403424} {"current_steps": 3875, "total_steps": 5610, "loss": 0.0087, "lr": 1.3220655195198847e-05, "epoch": 6.907308377896613, "percentage": 69.07, "elapsed_time": "0:15:51", "remaining_time": "0:07:05", "throughput": 2529.14, "total_tokens": 2405984} {"current_steps": 3880, "total_steps": 5610, "loss": 0.0832, "lr": 1.3152109330393985e-05, "epoch": 6.9162210338680925, "percentage": 69.16, "elapsed_time": "0:15:52", "remaining_time": "0:07:04", "throughput": 2529.7, "total_tokens": 2409472} {"current_steps": 3885, "total_steps": 5610, "loss": 0.0044, "lr": 1.3083678140840366e-05, "epoch": 6.925133689839572, "percentage": 69.25, "elapsed_time": "0:15:53", "remaining_time": "0:07:03", "throughput": 2529.92, "total_tokens": 2412384} {"current_steps": 3890, "total_steps": 5610, "loss": 0.0957, "lr": 1.3015362288880678e-05, "epoch": 6.934046345811051, "percentage": 69.34, "elapsed_time": "0:15:54", "remaining_time": "0:07:02", "throughput": 2530.14, "total_tokens": 2415328} {"current_steps": 3895, "total_steps": 5610, "loss": 0.0202, "lr": 1.2947162435741278e-05, "epoch": 6.942959001782532, "percentage": 69.43, "elapsed_time": "0:15:55", "remaining_time": "0:07:00", "throughput": 2530.69, "total_tokens": 2418848} {"current_steps": 3900, "total_steps": 5610, "loss": 0.2008, "lr": 1.2879079241525783e-05, "epoch": 6.951871657754011, "percentage": 69.52, "elapsed_time": "0:15:56", "remaining_time": "0:06:59", "throughput": 2530.97, "total_tokens": 2421824} {"current_steps": 3905, "total_steps": 5610, "loss": 0.2242, "lr": 1.2811113365208627e-05, "epoch": 6.96078431372549, "percentage": 69.61, "elapsed_time": "0:15:57", "remaining_time": "0:06:58", "throughput": 2530.81, "total_tokens": 2424224} {"current_steps": 3910, "total_steps": 5610, "loss": 0.0045, "lr": 1.2743265464628786e-05, "epoch": 6.96969696969697, "percentage": 69.7, "elapsed_time": "0:15:59", "remaining_time": "0:06:56", "throughput": 2531.32, "total_tokens": 2427616} {"current_steps": 3915, "total_steps": 5610, "loss": 0.0024, "lr": 1.2675536196483306e-05, "epoch": 6.978609625668449, "percentage": 69.79, "elapsed_time": "0:16:00", "remaining_time": "0:06:55", "throughput": 2531.41, "total_tokens": 2430368} {"current_steps": 3920, "total_steps": 5610, "loss": 0.0026, "lr": 1.260792621632102e-05, "epoch": 6.9875222816399285, "percentage": 69.88, "elapsed_time": "0:16:01", "remaining_time": "0:06:54", "throughput": 2531.73, "total_tokens": 2433376} {"current_steps": 3925, "total_steps": 5610, "loss": 0.003, "lr": 1.2540436178536186e-05, "epoch": 6.996434937611408, "percentage": 69.96, "elapsed_time": "0:16:02", "remaining_time": "0:06:53", "throughput": 2532.18, "total_tokens": 2436608} {"current_steps": 3930, "total_steps": 5610, "loss": 0.0127, "lr": 1.2473066736362124e-05, "epoch": 7.005347593582887, "percentage": 70.05, "elapsed_time": "0:16:03", "remaining_time": "0:06:51", "throughput": 2531.78, "total_tokens": 2439064} {"current_steps": 3934, "total_steps": 5610, "eval_loss": 0.15626700222492218, "epoch": 7.0124777183600715, "percentage": 70.12, "elapsed_time": "0:16:10", "remaining_time": "0:06:53", "throughput": 2515.71, "total_tokens": 2441688} {"current_steps": 3935, "total_steps": 5610, "loss": 0.0017, "lr": 1.2405818541864905e-05, "epoch": 7.0142602495543676, "percentage": 70.14, "elapsed_time": "0:16:11", "remaining_time": "0:06:53", "throughput": 2513.21, "total_tokens": 2442328} {"current_steps": 3940, "total_steps": 5610, "loss": 0.1579, "lr": 1.2338692245937077e-05, "epoch": 7.023172905525847, "percentage": 70.23, "elapsed_time": "0:16:12", "remaining_time": "0:06:52", "throughput": 2513.46, "total_tokens": 2445272} {"current_steps": 3945, "total_steps": 5610, "loss": 0.0009, "lr": 1.2271688498291335e-05, "epoch": 7.032085561497326, "percentage": 70.32, "elapsed_time": "0:16:13", "remaining_time": "0:06:51", "throughput": 2513.74, "total_tokens": 2448216} {"current_steps": 3950, "total_steps": 5610, "loss": 0.0329, "lr": 1.2204807947454203e-05, "epoch": 7.040998217468806, "percentage": 70.41, "elapsed_time": "0:16:15", "remaining_time": "0:06:49", "throughput": 2514.29, "total_tokens": 2451704} {"current_steps": 3955, "total_steps": 5610, "loss": 0.0814, "lr": 1.2138051240759826e-05, "epoch": 7.049910873440285, "percentage": 70.5, "elapsed_time": "0:16:16", "remaining_time": "0:06:48", "throughput": 2514.44, "total_tokens": 2454392} {"current_steps": 3960, "total_steps": 5610, "loss": 0.0202, "lr": 1.2071419024343633e-05, "epoch": 7.0588235294117645, "percentage": 70.59, "elapsed_time": "0:16:17", "remaining_time": "0:06:47", "throughput": 2514.47, "total_tokens": 2457112} {"current_steps": 3965, "total_steps": 5610, "loss": 0.0494, "lr": 1.2004911943136143e-05, "epoch": 7.067736185383244, "percentage": 70.68, "elapsed_time": "0:16:18", "remaining_time": "0:06:45", "throughput": 2514.84, "total_tokens": 2460312} {"current_steps": 3970, "total_steps": 5610, "loss": 0.0192, "lr": 1.1938530640856696e-05, "epoch": 7.076648841354723, "percentage": 70.77, "elapsed_time": "0:16:19", "remaining_time": "0:06:44", "throughput": 2515.08, "total_tokens": 2463224} {"current_steps": 3975, "total_steps": 5610, "loss": 0.0011, "lr": 1.1872275760007198e-05, "epoch": 7.0855614973262036, "percentage": 70.86, "elapsed_time": "0:16:20", "remaining_time": "0:06:43", "throughput": 2515.25, "total_tokens": 2466008} {"current_steps": 3980, "total_steps": 5610, "loss": 0.0015, "lr": 1.1806147941865938e-05, "epoch": 7.094474153297683, "percentage": 70.94, "elapsed_time": "0:16:21", "remaining_time": "0:06:41", "throughput": 2515.59, "total_tokens": 2469176} {"current_steps": 3985, "total_steps": 5610, "loss": 0.1977, "lr": 1.1740147826481385e-05, "epoch": 7.103386809269162, "percentage": 71.03, "elapsed_time": "0:16:22", "remaining_time": "0:06:40", "throughput": 2515.97, "total_tokens": 2472408} {"current_steps": 3990, "total_steps": 5610, "loss": 0.0318, "lr": 1.1674276052665973e-05, "epoch": 7.112299465240642, "percentage": 71.12, "elapsed_time": "0:16:23", "remaining_time": "0:06:39", "throughput": 2516.37, "total_tokens": 2475608} {"current_steps": 3995, "total_steps": 5610, "loss": 0.0146, "lr": 1.1608533257989901e-05, "epoch": 7.121212121212121, "percentage": 71.21, "elapsed_time": "0:16:24", "remaining_time": "0:06:38", "throughput": 2516.64, "total_tokens": 2478680} {"current_steps": 4000, "total_steps": 5610, "loss": 0.1046, "lr": 1.1542920078775018e-05, "epoch": 7.1301247771836005, "percentage": 71.3, "elapsed_time": "0:16:25", "remaining_time": "0:06:36", "throughput": 2516.91, "total_tokens": 2481592} {"current_steps": 4005, "total_steps": 5610, "loss": 0.0188, "lr": 1.14774371500886e-05, "epoch": 7.13903743315508, "percentage": 71.39, "elapsed_time": "0:16:27", "remaining_time": "0:06:35", "throughput": 2517.54, "total_tokens": 2485176} {"current_steps": 4010, "total_steps": 5610, "loss": 0.0005, "lr": 1.141208510573725e-05, "epoch": 7.14795008912656, "percentage": 71.48, "elapsed_time": "0:16:28", "remaining_time": "0:06:34", "throughput": 2517.81, "total_tokens": 2488152} {"current_steps": 4015, "total_steps": 5610, "loss": 0.0989, "lr": 1.1346864578260758e-05, "epoch": 7.1568627450980395, "percentage": 71.57, "elapsed_time": "0:16:29", "remaining_time": "0:06:33", "throughput": 2518.12, "total_tokens": 2491320} {"current_steps": 4020, "total_steps": 5610, "loss": 0.0352, "lr": 1.1281776198925939e-05, "epoch": 7.165775401069519, "percentage": 71.66, "elapsed_time": "0:16:30", "remaining_time": "0:06:31", "throughput": 2518.16, "total_tokens": 2493944} {"current_steps": 4025, "total_steps": 5610, "loss": 0.001, "lr": 1.121682059772056e-05, "epoch": 7.174688057040998, "percentage": 71.75, "elapsed_time": "0:16:31", "remaining_time": "0:06:30", "throughput": 2518.26, "total_tokens": 2496664} {"current_steps": 4030, "total_steps": 5610, "loss": 0.0003, "lr": 1.1151998403347244e-05, "epoch": 7.183600713012478, "percentage": 71.84, "elapsed_time": "0:16:32", "remaining_time": "0:06:29", "throughput": 2518.75, "total_tokens": 2500216} {"current_steps": 4035, "total_steps": 5610, "loss": 0.0176, "lr": 1.1087310243217386e-05, "epoch": 7.192513368983957, "percentage": 71.93, "elapsed_time": "0:16:33", "remaining_time": "0:06:27", "throughput": 2519.22, "total_tokens": 2503544} {"current_steps": 4040, "total_steps": 5610, "loss": 0.0753, "lr": 1.1022756743445028e-05, "epoch": 7.2014260249554365, "percentage": 72.01, "elapsed_time": "0:16:34", "remaining_time": "0:06:26", "throughput": 2519.81, "total_tokens": 2507160} {"current_steps": 4045, "total_steps": 5610, "loss": 0.1054, "lr": 1.0958338528840893e-05, "epoch": 7.210338680926916, "percentage": 72.1, "elapsed_time": "0:16:36", "remaining_time": "0:06:25", "throughput": 2520.12, "total_tokens": 2510232} {"current_steps": 4050, "total_steps": 5610, "loss": 0.0075, "lr": 1.0894056222906226e-05, "epoch": 7.219251336898395, "percentage": 72.19, "elapsed_time": "0:16:37", "remaining_time": "0:06:24", "throughput": 2520.32, "total_tokens": 2513144} {"current_steps": 4055, "total_steps": 5610, "loss": 0.0433, "lr": 1.0829910447826868e-05, "epoch": 7.2281639928698755, "percentage": 72.28, "elapsed_time": "0:16:38", "remaining_time": "0:06:22", "throughput": 2520.79, "total_tokens": 2516504} {"current_steps": 4060, "total_steps": 5610, "loss": 0.0225, "lr": 1.0765901824467167e-05, "epoch": 7.237076648841355, "percentage": 72.37, "elapsed_time": "0:16:39", "remaining_time": "0:06:21", "throughput": 2520.51, "total_tokens": 2518648} {"current_steps": 4065, "total_steps": 5610, "loss": 0.0218, "lr": 1.0702030972363963e-05, "epoch": 7.245989304812834, "percentage": 72.46, "elapsed_time": "0:16:40", "remaining_time": "0:06:20", "throughput": 2520.94, "total_tokens": 2521880} {"current_steps": 4070, "total_steps": 5610, "loss": 0.0101, "lr": 1.063829850972065e-05, "epoch": 7.254901960784314, "percentage": 72.55, "elapsed_time": "0:16:41", "remaining_time": "0:06:18", "throughput": 2521.44, "total_tokens": 2525336} {"current_steps": 4075, "total_steps": 5610, "loss": 0.1253, "lr": 1.0574705053401127e-05, "epoch": 7.263814616755793, "percentage": 72.64, "elapsed_time": "0:16:42", "remaining_time": "0:06:17", "throughput": 2521.57, "total_tokens": 2528184} {"current_steps": 4080, "total_steps": 5610, "loss": 0.0922, "lr": 1.0511251218923868e-05, "epoch": 7.2727272727272725, "percentage": 72.73, "elapsed_time": "0:16:43", "remaining_time": "0:06:16", "throughput": 2521.64, "total_tokens": 2530904} {"current_steps": 4085, "total_steps": 5610, "loss": 0.0206, "lr": 1.0447937620455964e-05, "epoch": 7.281639928698752, "percentage": 72.82, "elapsed_time": "0:16:44", "remaining_time": "0:06:15", "throughput": 2521.72, "total_tokens": 2533656} {"current_steps": 4090, "total_steps": 5610, "loss": 0.082, "lr": 1.0384764870807149e-05, "epoch": 7.290552584670232, "percentage": 72.91, "elapsed_time": "0:16:45", "remaining_time": "0:06:13", "throughput": 2521.5, "total_tokens": 2535928} {"current_steps": 4095, "total_steps": 5610, "loss": 0.0186, "lr": 1.0321733581423884e-05, "epoch": 7.2994652406417115, "percentage": 72.99, "elapsed_time": "0:16:46", "remaining_time": "0:06:12", "throughput": 2522.01, "total_tokens": 2539352} {"current_steps": 4100, "total_steps": 5610, "loss": 0.1287, "lr": 1.025884436238346e-05, "epoch": 7.308377896613191, "percentage": 73.08, "elapsed_time": "0:16:47", "remaining_time": "0:06:11", "throughput": 2522.32, "total_tokens": 2542456} {"current_steps": 4105, "total_steps": 5610, "loss": 0.0221, "lr": 1.0196097822388075e-05, "epoch": 7.31729055258467, "percentage": 73.17, "elapsed_time": "0:16:49", "remaining_time": "0:06:09", "throughput": 2522.76, "total_tokens": 2545816} {"current_steps": 4110, "total_steps": 5610, "loss": 0.0755, "lr": 1.013349456875892e-05, "epoch": 7.32620320855615, "percentage": 73.26, "elapsed_time": "0:16:50", "remaining_time": "0:06:08", "throughput": 2523.06, "total_tokens": 2548824} {"current_steps": 4115, "total_steps": 5610, "loss": 0.0006, "lr": 1.0071035207430352e-05, "epoch": 7.335115864527629, "percentage": 73.35, "elapsed_time": "0:16:51", "remaining_time": "0:06:07", "throughput": 2523.49, "total_tokens": 2552152} {"current_steps": 4120, "total_steps": 5610, "loss": 0.0005, "lr": 1.0008720342943966e-05, "epoch": 7.3440285204991085, "percentage": 73.44, "elapsed_time": "0:16:52", "remaining_time": "0:06:06", "throughput": 2524.06, "total_tokens": 2555768} {"current_steps": 4125, "total_steps": 5610, "loss": 0.0011, "lr": 9.94655057844281e-06, "epoch": 7.352941176470588, "percentage": 73.53, "elapsed_time": "0:16:53", "remaining_time": "0:06:04", "throughput": 2524.04, "total_tokens": 2558328} {"current_steps": 4130, "total_steps": 5610, "loss": 0.0646, "lr": 9.884526515665508e-06, "epoch": 7.361853832442068, "percentage": 73.62, "elapsed_time": "0:16:54", "remaining_time": "0:06:03", "throughput": 2524.33, "total_tokens": 2561368} {"current_steps": 4135, "total_steps": 5610, "loss": 0.0886, "lr": 9.822648754940431e-06, "epoch": 7.3707664884135475, "percentage": 73.71, "elapsed_time": "0:16:55", "remaining_time": "0:06:02", "throughput": 2524.4, "total_tokens": 2564056} {"current_steps": 4140, "total_steps": 5610, "loss": 0.0008, "lr": 9.760917895179894e-06, "epoch": 7.379679144385027, "percentage": 73.8, "elapsed_time": "0:16:56", "remaining_time": "0:06:01", "throughput": 2524.5, "total_tokens": 2566744} {"current_steps": 4145, "total_steps": 5610, "loss": 0.0011, "lr": 9.699334533874386e-06, "epoch": 7.388591800356506, "percentage": 73.89, "elapsed_time": "0:16:57", "remaining_time": "0:05:59", "throughput": 2524.75, "total_tokens": 2569656} {"current_steps": 4150, "total_steps": 5610, "loss": 0.0823, "lr": 9.637899267086758e-06, "epoch": 7.397504456327986, "percentage": 73.98, "elapsed_time": "0:16:58", "remaining_time": "0:05:58", "throughput": 2525.25, "total_tokens": 2573112} {"current_steps": 4155, "total_steps": 5610, "loss": 0.123, "lr": 9.576612689446444e-06, "epoch": 7.406417112299465, "percentage": 74.06, "elapsed_time": "0:17:00", "remaining_time": "0:05:57", "throughput": 2525.95, "total_tokens": 2576952} {"current_steps": 4160, "total_steps": 5610, "loss": 0.0006, "lr": 9.515475394143742e-06, "epoch": 7.4153297682709445, "percentage": 74.15, "elapsed_time": "0:17:01", "remaining_time": "0:05:55", "throughput": 2526.19, "total_tokens": 2579896} {"current_steps": 4165, "total_steps": 5610, "loss": 0.0705, "lr": 9.45448797292403e-06, "epoch": 7.424242424242424, "percentage": 74.24, "elapsed_time": "0:17:02", "remaining_time": "0:05:54", "throughput": 2526.78, "total_tokens": 2583544} {"current_steps": 4170, "total_steps": 5610, "loss": 0.1237, "lr": 9.393651016082083e-06, "epoch": 7.433155080213904, "percentage": 74.33, "elapsed_time": "0:17:03", "remaining_time": "0:05:53", "throughput": 2526.81, "total_tokens": 2586200} {"current_steps": 4175, "total_steps": 5610, "loss": 0.0532, "lr": 9.332965112456337e-06, "epoch": 7.4420677361853835, "percentage": 74.42, "elapsed_time": "0:17:04", "remaining_time": "0:05:52", "throughput": 2527.22, "total_tokens": 2589496} {"current_steps": 4180, "total_steps": 5610, "loss": 0.0375, "lr": 9.272430849423174e-06, "epoch": 7.450980392156863, "percentage": 74.51, "elapsed_time": "0:17:05", "remaining_time": "0:05:50", "throughput": 2527.16, "total_tokens": 2591928} {"current_steps": 4185, "total_steps": 5610, "loss": 0.0419, "lr": 9.21204881289125e-06, "epoch": 7.459893048128342, "percentage": 74.6, "elapsed_time": "0:17:06", "remaining_time": "0:05:49", "throughput": 2527.46, "total_tokens": 2595064} {"current_steps": 4190, "total_steps": 5610, "loss": 0.0159, "lr": 9.151819587295845e-06, "epoch": 7.468805704099822, "percentage": 74.69, "elapsed_time": "0:17:07", "remaining_time": "0:05:48", "throughput": 2527.6, "total_tokens": 2597944} {"current_steps": 4195, "total_steps": 5610, "loss": 0.0023, "lr": 9.09174375559319e-06, "epoch": 7.477718360071301, "percentage": 74.78, "elapsed_time": "0:17:09", "remaining_time": "0:05:47", "throughput": 2528.25, "total_tokens": 2601656} {"current_steps": 4200, "total_steps": 5610, "loss": 0.1474, "lr": 9.031821899254796e-06, "epoch": 7.4866310160427805, "percentage": 74.87, "elapsed_time": "0:17:10", "remaining_time": "0:05:45", "throughput": 2528.42, "total_tokens": 2604472} {"current_steps": 4205, "total_steps": 5610, "loss": 0.1761, "lr": 8.972054598261892e-06, "epoch": 7.49554367201426, "percentage": 74.96, "elapsed_time": "0:17:11", "remaining_time": "0:05:44", "throughput": 2528.99, "total_tokens": 2607992} {"current_steps": 4210, "total_steps": 5610, "loss": 0.1577, "lr": 8.912442431099724e-06, "epoch": 7.50445632798574, "percentage": 75.04, "elapsed_time": "0:17:12", "remaining_time": "0:05:43", "throughput": 2529.68, "total_tokens": 2611800} {"current_steps": 4215, "total_steps": 5610, "loss": 0.0054, "lr": 8.852985974752045e-06, "epoch": 7.5133689839572195, "percentage": 75.13, "elapsed_time": "0:17:13", "remaining_time": "0:05:42", "throughput": 2529.96, "total_tokens": 2614936} {"current_steps": 4215, "total_steps": 5610, "eval_loss": 0.1573449969291687, "epoch": 7.5133689839572195, "percentage": 75.13, "elapsed_time": "0:17:19", "remaining_time": "0:05:44", "throughput": 2514.66, "total_tokens": 2614936} {"current_steps": 4220, "total_steps": 5610, "loss": 0.0687, "lr": 8.793685804695482e-06, "epoch": 7.522281639928699, "percentage": 75.22, "elapsed_time": "0:17:21", "remaining_time": "0:05:43", "throughput": 2513.61, "total_tokens": 2618744} {"current_steps": 4225, "total_steps": 5610, "loss": 0.1056, "lr": 8.734542494893955e-06, "epoch": 7.531194295900178, "percentage": 75.31, "elapsed_time": "0:17:22", "remaining_time": "0:05:41", "throughput": 2513.73, "total_tokens": 2621496} {"current_steps": 4230, "total_steps": 5610, "loss": 0.0174, "lr": 8.675556617793143e-06, "epoch": 7.540106951871658, "percentage": 75.4, "elapsed_time": "0:17:23", "remaining_time": "0:05:40", "throughput": 2513.99, "total_tokens": 2624568} {"current_steps": 4235, "total_steps": 5610, "loss": 0.0416, "lr": 8.616728744314956e-06, "epoch": 7.549019607843137, "percentage": 75.49, "elapsed_time": "0:17:25", "remaining_time": "0:05:39", "throughput": 2514.4, "total_tokens": 2627832} {"current_steps": 4240, "total_steps": 5610, "loss": 0.0847, "lr": 8.558059443851998e-06, "epoch": 7.5579322638146165, "percentage": 75.58, "elapsed_time": "0:17:26", "remaining_time": "0:05:38", "throughput": 2514.81, "total_tokens": 2631160} {"current_steps": 4245, "total_steps": 5610, "loss": 0.0763, "lr": 8.499549284262017e-06, "epoch": 7.566844919786096, "percentage": 75.67, "elapsed_time": "0:17:27", "remaining_time": "0:05:36", "throughput": 2515.25, "total_tokens": 2634488} {"current_steps": 4250, "total_steps": 5610, "loss": 0.05, "lr": 8.441198831862485e-06, "epoch": 7.575757575757576, "percentage": 75.76, "elapsed_time": "0:17:28", "remaining_time": "0:05:35", "throughput": 2515.37, "total_tokens": 2637240} {"current_steps": 4255, "total_steps": 5610, "loss": 0.005, "lr": 8.383008651425035e-06, "epoch": 7.5846702317290555, "percentage": 75.85, "elapsed_time": "0:17:29", "remaining_time": "0:05:34", "throughput": 2515.48, "total_tokens": 2639992} {"current_steps": 4260, "total_steps": 5610, "loss": 0.0504, "lr": 8.32497930617006e-06, "epoch": 7.593582887700535, "percentage": 75.94, "elapsed_time": "0:17:30", "remaining_time": "0:05:32", "throughput": 2516.16, "total_tokens": 2643832} {"current_steps": 4265, "total_steps": 5610, "loss": 0.0035, "lr": 8.267111357761243e-06, "epoch": 7.602495543672014, "percentage": 76.02, "elapsed_time": "0:17:31", "remaining_time": "0:05:31", "throughput": 2516.33, "total_tokens": 2646712} {"current_steps": 4270, "total_steps": 5610, "loss": 0.0828, "lr": 8.209405366300088e-06, "epoch": 7.611408199643494, "percentage": 76.11, "elapsed_time": "0:17:32", "remaining_time": "0:05:30", "throughput": 2516.75, "total_tokens": 2650072} {"current_steps": 4275, "total_steps": 5610, "loss": 0.0014, "lr": 8.151861890320528e-06, "epoch": 7.620320855614973, "percentage": 76.2, "elapsed_time": "0:17:34", "remaining_time": "0:05:29", "throughput": 2517.3, "total_tokens": 2653656} {"current_steps": 4280, "total_steps": 5610, "loss": 0.0652, "lr": 8.094481486783534e-06, "epoch": 7.6292335115864525, "percentage": 76.29, "elapsed_time": "0:17:35", "remaining_time": "0:05:27", "throughput": 2517.96, "total_tokens": 2657464} {"current_steps": 4285, "total_steps": 5610, "loss": 0.1452, "lr": 8.0372647110717e-06, "epoch": 7.638146167557933, "percentage": 76.38, "elapsed_time": "0:17:36", "remaining_time": "0:05:26", "throughput": 2518.24, "total_tokens": 2660568} {"current_steps": 4290, "total_steps": 5610, "loss": 0.0047, "lr": 7.98021211698385e-06, "epoch": 7.647058823529412, "percentage": 76.47, "elapsed_time": "0:17:37", "remaining_time": "0:05:25", "throughput": 2518.42, "total_tokens": 2663448} {"current_steps": 4295, "total_steps": 5610, "loss": 0.1367, "lr": 7.923324256729738e-06, "epoch": 7.6559714795008915, "percentage": 76.56, "elapsed_time": "0:17:38", "remaining_time": "0:05:24", "throughput": 2518.55, "total_tokens": 2666136} {"current_steps": 4300, "total_steps": 5610, "loss": 0.0119, "lr": 7.866601680924633e-06, "epoch": 7.664884135472371, "percentage": 76.65, "elapsed_time": "0:17:39", "remaining_time": "0:05:22", "throughput": 2518.76, "total_tokens": 2669048} {"current_steps": 4305, "total_steps": 5610, "loss": 0.0011, "lr": 7.810044938584038e-06, "epoch": 7.67379679144385, "percentage": 76.74, "elapsed_time": "0:17:40", "remaining_time": "0:05:21", "throughput": 2518.89, "total_tokens": 2671800} {"current_steps": 4310, "total_steps": 5610, "loss": 0.0006, "lr": 7.75365457711837e-06, "epoch": 7.68270944741533, "percentage": 76.83, "elapsed_time": "0:17:41", "remaining_time": "0:05:20", "throughput": 2519.46, "total_tokens": 2675448} {"current_steps": 4315, "total_steps": 5610, "loss": 0.0008, "lr": 7.697431142327632e-06, "epoch": 7.691622103386809, "percentage": 76.92, "elapsed_time": "0:17:43", "remaining_time": "0:05:19", "throughput": 2519.63, "total_tokens": 2678392} {"current_steps": 4320, "total_steps": 5610, "loss": 0.0742, "lr": 7.641375178396151e-06, "epoch": 7.7005347593582885, "percentage": 77.01, "elapsed_time": "0:17:44", "remaining_time": "0:05:17", "throughput": 2519.71, "total_tokens": 2681112} {"current_steps": 4325, "total_steps": 5610, "loss": 0.0172, "lr": 7.585487227887328e-06, "epoch": 7.709447415329768, "percentage": 77.09, "elapsed_time": "0:17:45", "remaining_time": "0:05:16", "throughput": 2520.35, "total_tokens": 2684856} {"current_steps": 4330, "total_steps": 5610, "loss": 0.0057, "lr": 7.529767831738366e-06, "epoch": 7.718360071301248, "percentage": 77.18, "elapsed_time": "0:17:46", "remaining_time": "0:05:15", "throughput": 2520.44, "total_tokens": 2687576} {"current_steps": 4335, "total_steps": 5610, "loss": 0.057, "lr": 7.474217529255018e-06, "epoch": 7.7272727272727275, "percentage": 77.27, "elapsed_time": "0:17:47", "remaining_time": "0:05:13", "throughput": 2520.59, "total_tokens": 2690328} {"current_steps": 4340, "total_steps": 5610, "loss": 0.0017, "lr": 7.4188368581064124e-06, "epoch": 7.736185383244207, "percentage": 77.36, "elapsed_time": "0:17:48", "remaining_time": "0:05:12", "throughput": 2521.29, "total_tokens": 2694168} {"current_steps": 4345, "total_steps": 5610, "loss": 0.0594, "lr": 7.3636263543197945e-06, "epoch": 7.745098039215686, "percentage": 77.45, "elapsed_time": "0:17:49", "remaining_time": "0:05:11", "throughput": 2521.52, "total_tokens": 2697208} {"current_steps": 4350, "total_steps": 5610, "loss": 0.0541, "lr": 7.30858655227539e-06, "epoch": 7.754010695187166, "percentage": 77.54, "elapsed_time": "0:17:50", "remaining_time": "0:05:10", "throughput": 2521.84, "total_tokens": 2700376} {"current_steps": 4355, "total_steps": 5610, "loss": 0.0507, "lr": 7.253717984701208e-06, "epoch": 7.762923351158645, "percentage": 77.63, "elapsed_time": "0:17:51", "remaining_time": "0:05:08", "throughput": 2522.03, "total_tokens": 2703256} {"current_steps": 4360, "total_steps": 5610, "loss": 0.1346, "lr": 7.199021182667873e-06, "epoch": 7.7718360071301245, "percentage": 77.72, "elapsed_time": "0:17:52", "remaining_time": "0:05:07", "throughput": 2521.97, "total_tokens": 2705752} {"current_steps": 4365, "total_steps": 5610, "loss": 0.1539, "lr": 7.1444966755834954e-06, "epoch": 7.780748663101605, "percentage": 77.81, "elapsed_time": "0:17:53", "remaining_time": "0:05:06", "throughput": 2522.28, "total_tokens": 2708888} {"current_steps": 4370, "total_steps": 5610, "loss": 0.0009, "lr": 7.0901449911885685e-06, "epoch": 7.789661319073084, "percentage": 77.9, "elapsed_time": "0:17:55", "remaining_time": "0:05:05", "throughput": 2522.39, "total_tokens": 2711576} {"current_steps": 4375, "total_steps": 5610, "loss": 0.0309, "lr": 7.035966655550838e-06, "epoch": 7.7985739750445635, "percentage": 77.99, "elapsed_time": "0:17:56", "remaining_time": "0:05:03", "throughput": 2522.89, "total_tokens": 2715000} {"current_steps": 4380, "total_steps": 5610, "loss": 0.0008, "lr": 6.98196219306019e-06, "epoch": 7.807486631016043, "percentage": 78.07, "elapsed_time": "0:17:57", "remaining_time": "0:05:02", "throughput": 2523.04, "total_tokens": 2717880} {"current_steps": 4385, "total_steps": 5610, "loss": 0.0408, "lr": 6.928132126423636e-06, "epoch": 7.816399286987522, "percentage": 78.16, "elapsed_time": "0:17:58", "remaining_time": "0:05:01", "throughput": 2523.49, "total_tokens": 2721240} {"current_steps": 4390, "total_steps": 5610, "loss": 0.1936, "lr": 6.8744769766601854e-06, "epoch": 7.825311942959002, "percentage": 78.25, "elapsed_time": "0:17:59", "remaining_time": "0:05:00", "throughput": 2523.95, "total_tokens": 2724696} {"current_steps": 4395, "total_steps": 5610, "loss": 0.0644, "lr": 6.820997263095849e-06, "epoch": 7.834224598930481, "percentage": 78.34, "elapsed_time": "0:18:00", "remaining_time": "0:04:58", "throughput": 2524.34, "total_tokens": 2727960} {"current_steps": 4400, "total_steps": 5610, "loss": 0.0025, "lr": 6.767693503358608e-06, "epoch": 7.8431372549019605, "percentage": 78.43, "elapsed_time": "0:18:01", "remaining_time": "0:04:57", "throughput": 2524.6, "total_tokens": 2731000} {"current_steps": 4405, "total_steps": 5610, "loss": 0.0377, "lr": 6.7145662133733715e-06, "epoch": 7.85204991087344, "percentage": 78.52, "elapsed_time": "0:18:02", "remaining_time": "0:04:56", "throughput": 2525.03, "total_tokens": 2734264} {"current_steps": 4410, "total_steps": 5610, "loss": 0.0446, "lr": 6.6616159073570135e-06, "epoch": 7.86096256684492, "percentage": 78.61, "elapsed_time": "0:18:03", "remaining_time": "0:04:54", "throughput": 2524.96, "total_tokens": 2736664} {"current_steps": 4415, "total_steps": 5610, "loss": 0.0262, "lr": 6.6088430978133914e-06, "epoch": 7.8698752228163995, "percentage": 78.7, "elapsed_time": "0:18:04", "remaining_time": "0:04:53", "throughput": 2525.21, "total_tokens": 2739672} {"current_steps": 4420, "total_steps": 5610, "loss": 0.0968, "lr": 6.556248295528389e-06, "epoch": 7.878787878787879, "percentage": 78.79, "elapsed_time": "0:18:06", "remaining_time": "0:04:52", "throughput": 2525.36, "total_tokens": 2742552} {"current_steps": 4425, "total_steps": 5610, "loss": 0.0121, "lr": 6.5038320095649395e-06, "epoch": 7.887700534759358, "percentage": 78.88, "elapsed_time": "0:18:07", "remaining_time": "0:04:51", "throughput": 2525.78, "total_tokens": 2745880} {"current_steps": 4430, "total_steps": 5610, "loss": 0.0374, "lr": 6.451594747258155e-06, "epoch": 7.896613190730838, "percentage": 78.97, "elapsed_time": "0:18:08", "remaining_time": "0:04:49", "throughput": 2526.53, "total_tokens": 2749912} {"current_steps": 4435, "total_steps": 5610, "loss": 0.0212, "lr": 6.399537014210355e-06, "epoch": 7.905525846702317, "percentage": 79.06, "elapsed_time": "0:18:09", "remaining_time": "0:04:48", "throughput": 2527.04, "total_tokens": 2753368} {"current_steps": 4440, "total_steps": 5610, "loss": 0.0653, "lr": 6.3476593142862275e-06, "epoch": 7.9144385026737964, "percentage": 79.14, "elapsed_time": "0:18:10", "remaining_time": "0:04:47", "throughput": 2527.35, "total_tokens": 2756568} {"current_steps": 4445, "total_steps": 5610, "loss": 0.0618, "lr": 6.29596214960792e-06, "epoch": 7.923351158645277, "percentage": 79.23, "elapsed_time": "0:18:11", "remaining_time": "0:04:46", "throughput": 2527.67, "total_tokens": 2759704} {"current_steps": 4450, "total_steps": 5610, "loss": 0.0238, "lr": 6.244446020550182e-06, "epoch": 7.932263814616756, "percentage": 79.32, "elapsed_time": "0:18:12", "remaining_time": "0:04:44", "throughput": 2527.85, "total_tokens": 2762584} {"current_steps": 4455, "total_steps": 5610, "loss": 0.0035, "lr": 6.193111425735515e-06, "epoch": 7.9411764705882355, "percentage": 79.41, "elapsed_time": "0:18:13", "remaining_time": "0:04:43", "throughput": 2528.16, "total_tokens": 2765752} {"current_steps": 4460, "total_steps": 5610, "loss": 0.0055, "lr": 6.141958862029384e-06, "epoch": 7.950089126559715, "percentage": 79.5, "elapsed_time": "0:18:15", "remaining_time": "0:04:42", "throughput": 2528.34, "total_tokens": 2768696} {"current_steps": 4465, "total_steps": 5610, "loss": 0.062, "lr": 6.090988824535374e-06, "epoch": 7.959001782531194, "percentage": 79.59, "elapsed_time": "0:18:16", "remaining_time": "0:04:41", "throughput": 2528.79, "total_tokens": 2772120} {"current_steps": 4470, "total_steps": 5610, "loss": 0.2793, "lr": 6.040201806590387e-06, "epoch": 7.967914438502674, "percentage": 79.68, "elapsed_time": "0:18:17", "remaining_time": "0:04:39", "throughput": 2529.18, "total_tokens": 2775384} {"current_steps": 4475, "total_steps": 5610, "loss": 0.0087, "lr": 5.989598299759919e-06, "epoch": 7.976827094474153, "percentage": 79.77, "elapsed_time": "0:18:18", "remaining_time": "0:04:38", "throughput": 2529.47, "total_tokens": 2778520} {"current_steps": 4480, "total_steps": 5610, "loss": 0.0137, "lr": 5.939178793833233e-06, "epoch": 7.9857397504456324, "percentage": 79.86, "elapsed_time": "0:18:19", "remaining_time": "0:04:37", "throughput": 2529.37, "total_tokens": 2780888} {"current_steps": 4485, "total_steps": 5610, "loss": 0.0554, "lr": 5.888943776818684e-06, "epoch": 7.994652406417112, "percentage": 79.95, "elapsed_time": "0:18:20", "remaining_time": "0:04:36", "throughput": 2529.85, "total_tokens": 2784312} {"current_steps": 4490, "total_steps": 5610, "loss": 0.0004, "lr": 5.83889373493896e-06, "epoch": 8.003565062388592, "percentage": 80.04, "elapsed_time": "0:18:21", "remaining_time": "0:04:34", "throughput": 2529.65, "total_tokens": 2787056} {"current_steps": 4495, "total_steps": 5610, "loss": 0.0794, "lr": 5.789029152626374e-06, "epoch": 8.01247771836007, "percentage": 80.12, "elapsed_time": "0:18:22", "remaining_time": "0:04:33", "throughput": 2529.98, "total_tokens": 2790288} {"current_steps": 4496, "total_steps": 5610, "eval_loss": 0.1566127985715866, "epoch": 8.014260249554367, "percentage": 80.14, "elapsed_time": "0:18:29", "remaining_time": "0:04:34", "throughput": 2515.65, "total_tokens": 2790832} {"current_steps": 4500, "total_steps": 5610, "loss": 0.0913, "lr": 5.73935051251818e-06, "epoch": 8.02139037433155, "percentage": 80.21, "elapsed_time": "0:18:30", "remaining_time": "0:04:34", "throughput": 2514.1, "total_tokens": 2793136} {"current_steps": 4505, "total_steps": 5610, "loss": 0.0212, "lr": 5.689858295451914e-06, "epoch": 8.030303030303031, "percentage": 80.3, "elapsed_time": "0:18:32", "remaining_time": "0:04:32", "throughput": 2514.55, "total_tokens": 2796464} {"current_steps": 4510, "total_steps": 5610, "loss": 0.0003, "lr": 5.640552980460742e-06, "epoch": 8.03921568627451, "percentage": 80.39, "elapsed_time": "0:18:33", "remaining_time": "0:04:31", "throughput": 2514.68, "total_tokens": 2799344} {"current_steps": 4515, "total_steps": 5610, "loss": 0.1263, "lr": 5.591435044768783e-06, "epoch": 8.04812834224599, "percentage": 80.48, "elapsed_time": "0:18:34", "remaining_time": "0:04:30", "throughput": 2514.57, "total_tokens": 2801648} {"current_steps": 4520, "total_steps": 5610, "loss": 0.0286, "lr": 5.542504963786552e-06, "epoch": 8.057040998217468, "percentage": 80.57, "elapsed_time": "0:18:35", "remaining_time": "0:04:28", "throughput": 2514.96, "total_tokens": 2804976} {"current_steps": 4525, "total_steps": 5610, "loss": 0.0051, "lr": 5.493763211106293e-06, "epoch": 8.065953654188949, "percentage": 80.66, "elapsed_time": "0:18:36", "remaining_time": "0:04:27", "throughput": 2515.02, "total_tokens": 2807472} {"current_steps": 4530, "total_steps": 5610, "loss": 0.0008, "lr": 5.4452102584974545e-06, "epoch": 8.074866310160427, "percentage": 80.75, "elapsed_time": "0:18:37", "remaining_time": "0:04:26", "throughput": 2515.44, "total_tokens": 2810768} {"current_steps": 4535, "total_steps": 5610, "loss": 0.0798, "lr": 5.396846575902095e-06, "epoch": 8.083778966131907, "percentage": 80.84, "elapsed_time": "0:18:38", "remaining_time": "0:04:25", "throughput": 2516.05, "total_tokens": 2814480} {"current_steps": 4540, "total_steps": 5610, "loss": 0.0871, "lr": 5.348672631430318e-06, "epoch": 8.092691622103386, "percentage": 80.93, "elapsed_time": "0:18:39", "remaining_time": "0:04:23", "throughput": 2516.55, "total_tokens": 2817968} {"current_steps": 4545, "total_steps": 5610, "loss": 0.0298, "lr": 5.300688891355765e-06, "epoch": 8.101604278074866, "percentage": 81.02, "elapsed_time": "0:18:40", "remaining_time": "0:04:22", "throughput": 2516.68, "total_tokens": 2820784} {"current_steps": 4550, "total_steps": 5610, "loss": 0.0013, "lr": 5.252895820111112e-06, "epoch": 8.110516934046347, "percentage": 81.11, "elapsed_time": "0:18:41", "remaining_time": "0:04:21", "throughput": 2516.96, "total_tokens": 2823824} {"current_steps": 4555, "total_steps": 5610, "loss": 0.0577, "lr": 5.205293880283552e-06, "epoch": 8.119429590017825, "percentage": 81.19, "elapsed_time": "0:18:43", "remaining_time": "0:04:20", "throughput": 2517.21, "total_tokens": 2826832} {"current_steps": 4560, "total_steps": 5610, "loss": 0.1202, "lr": 5.157883532610305e-06, "epoch": 8.128342245989305, "percentage": 81.28, "elapsed_time": "0:18:44", "remaining_time": "0:04:18", "throughput": 2517.65, "total_tokens": 2830256} {"current_steps": 4565, "total_steps": 5610, "loss": 0.0166, "lr": 5.110665235974219e-06, "epoch": 8.137254901960784, "percentage": 81.37, "elapsed_time": "0:18:45", "remaining_time": "0:04:17", "throughput": 2517.7, "total_tokens": 2832848} {"current_steps": 4570, "total_steps": 5610, "loss": 0.0079, "lr": 5.06363944739924e-06, "epoch": 8.146167557932264, "percentage": 81.46, "elapsed_time": "0:18:46", "remaining_time": "0:04:16", "throughput": 2517.84, "total_tokens": 2835664} {"current_steps": 4575, "total_steps": 5610, "loss": 0.0216, "lr": 5.0168066220460715e-06, "epoch": 8.155080213903743, "percentage": 81.55, "elapsed_time": "0:18:47", "remaining_time": "0:04:15", "throughput": 2518.12, "total_tokens": 2838864} {"current_steps": 4580, "total_steps": 5610, "loss": 0.0084, "lr": 4.97016721320773e-06, "epoch": 8.163992869875223, "percentage": 81.64, "elapsed_time": "0:18:48", "remaining_time": "0:04:13", "throughput": 2518.28, "total_tokens": 2841840} {"current_steps": 4585, "total_steps": 5610, "loss": 0.0398, "lr": 4.9237216723051485e-06, "epoch": 8.172905525846703, "percentage": 81.73, "elapsed_time": "0:18:49", "remaining_time": "0:04:12", "throughput": 2518.6, "total_tokens": 2844976} {"current_steps": 4590, "total_steps": 5610, "loss": 0.1104, "lr": 4.877470448882815e-06, "epoch": 8.181818181818182, "percentage": 81.82, "elapsed_time": "0:18:50", "remaining_time": "0:04:11", "throughput": 2518.75, "total_tokens": 2847856} {"current_steps": 4595, "total_steps": 5610, "loss": 0.005, "lr": 4.831413990604447e-06, "epoch": 8.190730837789662, "percentage": 81.91, "elapsed_time": "0:18:51", "remaining_time": "0:04:09", "throughput": 2518.62, "total_tokens": 2850192} {"current_steps": 4600, "total_steps": 5610, "loss": 0.0028, "lr": 4.7855527432486336e-06, "epoch": 8.19964349376114, "percentage": 82.0, "elapsed_time": "0:18:52", "remaining_time": "0:04:08", "throughput": 2518.73, "total_tokens": 2853008} {"current_steps": 4605, "total_steps": 5610, "loss": 0.0306, "lr": 4.739887150704508e-06, "epoch": 8.20855614973262, "percentage": 82.09, "elapsed_time": "0:18:53", "remaining_time": "0:04:07", "throughput": 2519.18, "total_tokens": 2856464} {"current_steps": 4610, "total_steps": 5610, "loss": 0.1142, "lr": 4.694417654967492e-06, "epoch": 8.2174688057041, "percentage": 82.17, "elapsed_time": "0:18:54", "remaining_time": "0:04:06", "throughput": 2519.06, "total_tokens": 2858864} {"current_steps": 4615, "total_steps": 5610, "loss": 0.1039, "lr": 4.649144696134972e-06, "epoch": 8.22638146167558, "percentage": 82.26, "elapsed_time": "0:18:55", "remaining_time": "0:04:04", "throughput": 2519.07, "total_tokens": 2861488} {"current_steps": 4620, "total_steps": 5610, "loss": 0.0168, "lr": 4.6040687124020794e-06, "epoch": 8.235294117647058, "percentage": 82.35, "elapsed_time": "0:18:57", "remaining_time": "0:04:03", "throughput": 2519.62, "total_tokens": 2865136} {"current_steps": 4625, "total_steps": 5610, "loss": 0.0975, "lr": 4.5591901400574285e-06, "epoch": 8.244206773618538, "percentage": 82.44, "elapsed_time": "0:18:58", "remaining_time": "0:04:02", "throughput": 2519.74, "total_tokens": 2867984} {"current_steps": 4630, "total_steps": 5610, "loss": 0.045, "lr": 4.514509413478888e-06, "epoch": 8.253119429590019, "percentage": 82.53, "elapsed_time": "0:18:59", "remaining_time": "0:04:01", "throughput": 2520.02, "total_tokens": 2871088} {"current_steps": 4635, "total_steps": 5610, "loss": 0.0047, "lr": 4.470026965129384e-06, "epoch": 8.262032085561497, "percentage": 82.62, "elapsed_time": "0:19:00", "remaining_time": "0:03:59", "throughput": 2520.37, "total_tokens": 2874352} {"current_steps": 4640, "total_steps": 5610, "loss": 0.1698, "lr": 4.425743225552731e-06, "epoch": 8.270944741532977, "percentage": 82.71, "elapsed_time": "0:19:01", "remaining_time": "0:03:58", "throughput": 2520.88, "total_tokens": 2877840} {"current_steps": 4645, "total_steps": 5610, "loss": 0.04, "lr": 4.381658623369445e-06, "epoch": 8.279857397504456, "percentage": 82.8, "elapsed_time": "0:19:02", "remaining_time": "0:03:57", "throughput": 2521.4, "total_tokens": 2881456} {"current_steps": 4650, "total_steps": 5610, "loss": 0.0493, "lr": 4.337773585272581e-06, "epoch": 8.288770053475936, "percentage": 82.89, "elapsed_time": "0:19:03", "remaining_time": "0:03:56", "throughput": 2521.65, "total_tokens": 2884400} {"current_steps": 4655, "total_steps": 5610, "loss": 0.0691, "lr": 4.294088536023652e-06, "epoch": 8.297682709447415, "percentage": 82.98, "elapsed_time": "0:19:04", "remaining_time": "0:03:54", "throughput": 2521.96, "total_tokens": 2887536} {"current_steps": 4660, "total_steps": 5610, "loss": 0.0404, "lr": 4.250603898448455e-06, "epoch": 8.306595365418895, "percentage": 83.07, "elapsed_time": "0:19:06", "remaining_time": "0:03:53", "throughput": 2522.05, "total_tokens": 2890352} {"current_steps": 4665, "total_steps": 5610, "loss": 0.0727, "lr": 4.2073200934330315e-06, "epoch": 8.315508021390375, "percentage": 83.16, "elapsed_time": "0:19:07", "remaining_time": "0:03:52", "throughput": 2522.35, "total_tokens": 2893520} {"current_steps": 4670, "total_steps": 5610, "loss": 0.1344, "lr": 4.164237539919577e-06, "epoch": 8.324420677361854, "percentage": 83.24, "elapsed_time": "0:19:08", "remaining_time": "0:03:51", "throughput": 2522.35, "total_tokens": 2896048} {"current_steps": 4675, "total_steps": 5610, "loss": 0.0411, "lr": 4.121356654902364e-06, "epoch": 8.333333333333334, "percentage": 83.33, "elapsed_time": "0:19:09", "remaining_time": "0:03:49", "throughput": 2522.76, "total_tokens": 2899472} {"current_steps": 4680, "total_steps": 5610, "loss": 0.0012, "lr": 4.078677853423724e-06, "epoch": 8.342245989304812, "percentage": 83.42, "elapsed_time": "0:19:10", "remaining_time": "0:03:48", "throughput": 2523.15, "total_tokens": 2902832} {"current_steps": 4685, "total_steps": 5610, "loss": 0.08, "lr": 4.036201548570049e-06, "epoch": 8.351158645276293, "percentage": 83.51, "elapsed_time": "0:19:11", "remaining_time": "0:03:47", "throughput": 2523.74, "total_tokens": 2906576} {"current_steps": 4690, "total_steps": 5610, "loss": 0.0402, "lr": 3.993928151467766e-06, "epoch": 8.360071301247771, "percentage": 83.6, "elapsed_time": "0:19:12", "remaining_time": "0:03:46", "throughput": 2524.14, "total_tokens": 2909840} {"current_steps": 4695, "total_steps": 5610, "loss": 0.0201, "lr": 3.951858071279352e-06, "epoch": 8.368983957219251, "percentage": 83.69, "elapsed_time": "0:19:13", "remaining_time": "0:03:44", "throughput": 2524.3, "total_tokens": 2912752} {"current_steps": 4700, "total_steps": 5610, "loss": 0.0012, "lr": 3.909991715199412e-06, "epoch": 8.37789661319073, "percentage": 83.78, "elapsed_time": "0:19:14", "remaining_time": "0:03:43", "throughput": 2524.13, "total_tokens": 2915024} {"current_steps": 4705, "total_steps": 5610, "loss": 0.0012, "lr": 3.8683294884506945e-06, "epoch": 8.38680926916221, "percentage": 83.87, "elapsed_time": "0:19:16", "remaining_time": "0:03:42", "throughput": 2524.58, "total_tokens": 2918480} {"current_steps": 4710, "total_steps": 5610, "loss": 0.0716, "lr": 3.826871794280193e-06, "epoch": 8.39572192513369, "percentage": 83.96, "elapsed_time": "0:19:17", "remaining_time": "0:03:41", "throughput": 2524.92, "total_tokens": 2921712} {"current_steps": 4715, "total_steps": 5610, "loss": 0.0972, "lr": 3.7856190339552513e-06, "epoch": 8.404634581105169, "percentage": 84.05, "elapsed_time": "0:19:18", "remaining_time": "0:03:39", "throughput": 2525.34, "total_tokens": 2925040} {"current_steps": 4720, "total_steps": 5610, "loss": 0.0277, "lr": 3.7445716067596503e-06, "epoch": 8.41354723707665, "percentage": 84.14, "elapsed_time": "0:19:19", "remaining_time": "0:03:38", "throughput": 2525.6, "total_tokens": 2928112} {"current_steps": 4725, "total_steps": 5610, "loss": 0.0825, "lr": 3.7037299099897586e-06, "epoch": 8.422459893048128, "percentage": 84.22, "elapsed_time": "0:19:20", "remaining_time": "0:03:37", "throughput": 2526.46, "total_tokens": 2932368} {"current_steps": 4730, "total_steps": 5610, "loss": 0.055, "lr": 3.663094338950704e-06, "epoch": 8.431372549019608, "percentage": 84.31, "elapsed_time": "0:19:21", "remaining_time": "0:03:36", "throughput": 2526.54, "total_tokens": 2935088} {"current_steps": 4735, "total_steps": 5610, "loss": 0.0191, "lr": 3.6226652869525285e-06, "epoch": 8.440285204991087, "percentage": 84.4, "elapsed_time": "0:19:22", "remaining_time": "0:03:34", "throughput": 2526.64, "total_tokens": 2937840} {"current_steps": 4740, "total_steps": 5610, "loss": 0.0527, "lr": 3.5824431453063662e-06, "epoch": 8.449197860962567, "percentage": 84.49, "elapsed_time": "0:19:23", "remaining_time": "0:03:33", "throughput": 2526.92, "total_tokens": 2941008} {"current_steps": 4745, "total_steps": 5610, "loss": 0.1851, "lr": 3.5424283033207024e-06, "epoch": 8.458110516934047, "percentage": 84.58, "elapsed_time": "0:19:25", "remaining_time": "0:03:32", "throughput": 2527.37, "total_tokens": 2944464} {"current_steps": 4750, "total_steps": 5610, "loss": 0.0429, "lr": 3.5026211482975497e-06, "epoch": 8.467023172905526, "percentage": 84.67, "elapsed_time": "0:19:26", "remaining_time": "0:03:31", "throughput": 2527.53, "total_tokens": 2947376} {"current_steps": 4755, "total_steps": 5610, "loss": 0.0462, "lr": 3.463022065528748e-06, "epoch": 8.475935828877006, "percentage": 84.76, "elapsed_time": "0:19:27", "remaining_time": "0:03:29", "throughput": 2527.78, "total_tokens": 2950480} {"current_steps": 4760, "total_steps": 5610, "loss": 0.0008, "lr": 3.4236314382922103e-06, "epoch": 8.484848484848484, "percentage": 84.85, "elapsed_time": "0:19:28", "remaining_time": "0:03:28", "throughput": 2527.99, "total_tokens": 2953392} {"current_steps": 4765, "total_steps": 5610, "loss": 0.0007, "lr": 3.3844496478482064e-06, "epoch": 8.493761140819965, "percentage": 84.94, "elapsed_time": "0:19:29", "remaining_time": "0:03:27", "throughput": 2528.22, "total_tokens": 2956272} {"current_steps": 4770, "total_steps": 5610, "loss": 0.1504, "lr": 3.345477073435685e-06, "epoch": 8.502673796791443, "percentage": 85.03, "elapsed_time": "0:19:30", "remaining_time": "0:03:26", "throughput": 2528.34, "total_tokens": 2959056} {"current_steps": 4775, "total_steps": 5610, "loss": 0.0063, "lr": 3.3067140922686174e-06, "epoch": 8.511586452762923, "percentage": 85.12, "elapsed_time": "0:19:31", "remaining_time": "0:03:24", "throughput": 2528.76, "total_tokens": 2962480} {"current_steps": 4777, "total_steps": 5610, "eval_loss": 0.1583101898431778, "epoch": 8.515151515151516, "percentage": 85.15, "elapsed_time": "0:19:38", "remaining_time": "0:03:25", "throughput": 2515.44, "total_tokens": 2963888} {"current_steps": 4780, "total_steps": 5610, "loss": 0.0027, "lr": 3.268161079532317e-06, "epoch": 8.520499108734402, "percentage": 85.2, "elapsed_time": "0:19:39", "remaining_time": "0:03:24", "throughput": 2513.8, "total_tokens": 2965360} {"current_steps": 4785, "total_steps": 5610, "loss": 0.0006, "lr": 3.22981840837982e-06, "epoch": 8.529411764705882, "percentage": 85.29, "elapsed_time": "0:19:40", "remaining_time": "0:03:23", "throughput": 2514.08, "total_tokens": 2968464} {"current_steps": 4790, "total_steps": 5610, "loss": 0.0389, "lr": 3.1916864499282856e-06, "epoch": 8.538324420677363, "percentage": 85.38, "elapsed_time": "0:19:41", "remaining_time": "0:03:22", "throughput": 2514.64, "total_tokens": 2972144} {"current_steps": 4795, "total_steps": 5610, "loss": 0.0008, "lr": 3.1537655732553768e-06, "epoch": 8.547237076648841, "percentage": 85.47, "elapsed_time": "0:19:42", "remaining_time": "0:03:21", "throughput": 2514.44, "total_tokens": 2974384} {"current_steps": 4800, "total_steps": 5610, "loss": 0.063, "lr": 3.1160561453957183e-06, "epoch": 8.556149732620321, "percentage": 85.56, "elapsed_time": "0:19:43", "remaining_time": "0:03:19", "throughput": 2514.55, "total_tokens": 2977104} {"current_steps": 4805, "total_steps": 5610, "loss": 0.0639, "lr": 3.078558531337336e-06, "epoch": 8.5650623885918, "percentage": 85.65, "elapsed_time": "0:19:45", "remaining_time": "0:03:18", "throughput": 2514.92, "total_tokens": 2980464} {"current_steps": 4810, "total_steps": 5610, "loss": 0.0005, "lr": 3.0412730940181015e-06, "epoch": 8.57397504456328, "percentage": 85.74, "elapsed_time": "0:19:46", "remaining_time": "0:03:17", "throughput": 2515.01, "total_tokens": 2983248} {"current_steps": 4815, "total_steps": 5610, "loss": 0.0015, "lr": 3.0042001943222376e-06, "epoch": 8.582887700534759, "percentage": 85.83, "elapsed_time": "0:19:47", "remaining_time": "0:03:16", "throughput": 2515.2, "total_tokens": 2986256} {"current_steps": 4820, "total_steps": 5610, "loss": 0.0002, "lr": 2.967340191076834e-06, "epoch": 8.591800356506239, "percentage": 85.92, "elapsed_time": "0:19:48", "remaining_time": "0:03:14", "throughput": 2515.9, "total_tokens": 2990256} {"current_steps": 4825, "total_steps": 5610, "loss": 0.0588, "lr": 2.930693441048371e-06, "epoch": 8.60071301247772, "percentage": 86.01, "elapsed_time": "0:19:49", "remaining_time": "0:03:13", "throughput": 2515.75, "total_tokens": 2992592} {"current_steps": 4830, "total_steps": 5610, "loss": 0.1028, "lr": 2.8942602989392386e-06, "epoch": 8.609625668449198, "percentage": 86.1, "elapsed_time": "0:19:50", "remaining_time": "0:03:12", "throughput": 2516.14, "total_tokens": 2995888} {"current_steps": 4835, "total_steps": 5610, "loss": 0.0167, "lr": 2.858041117384341e-06, "epoch": 8.618538324420678, "percentage": 86.19, "elapsed_time": "0:19:51", "remaining_time": "0:03:11", "throughput": 2516.58, "total_tokens": 2999280} {"current_steps": 4840, "total_steps": 5610, "loss": 0.1074, "lr": 2.8220362469476624e-06, "epoch": 8.627450980392156, "percentage": 86.27, "elapsed_time": "0:19:53", "remaining_time": "0:03:09", "throughput": 2517.05, "total_tokens": 3002864} {"current_steps": 4845, "total_steps": 5610, "loss": 0.0163, "lr": 2.7862460361188614e-06, "epoch": 8.636363636363637, "percentage": 86.36, "elapsed_time": "0:19:53", "remaining_time": "0:03:08", "throughput": 2516.82, "total_tokens": 3004944} {"current_steps": 4850, "total_steps": 5610, "loss": 0.0012, "lr": 2.750670831309957e-06, "epoch": 8.645276292335115, "percentage": 86.45, "elapsed_time": "0:19:55", "remaining_time": "0:03:07", "throughput": 2517.28, "total_tokens": 3008464} {"current_steps": 4855, "total_steps": 5610, "loss": 0.0027, "lr": 2.7153109768518925e-06, "epoch": 8.654188948306595, "percentage": 86.54, "elapsed_time": "0:19:56", "remaining_time": "0:03:06", "throughput": 2517.91, "total_tokens": 3012240} {"current_steps": 4860, "total_steps": 5610, "loss": 0.0144, "lr": 2.680166814991256e-06, "epoch": 8.663101604278076, "percentage": 86.63, "elapsed_time": "0:19:57", "remaining_time": "0:03:04", "throughput": 2518.01, "total_tokens": 3015056} {"current_steps": 4865, "total_steps": 5610, "loss": 0.0146, "lr": 2.645238685886961e-06, "epoch": 8.672014260249554, "percentage": 86.72, "elapsed_time": "0:19:58", "remaining_time": "0:03:03", "throughput": 2518.28, "total_tokens": 3018160} {"current_steps": 4870, "total_steps": 5610, "loss": 0.1039, "lr": 2.6105269276069573e-06, "epoch": 8.680926916221035, "percentage": 86.81, "elapsed_time": "0:19:59", "remaining_time": "0:03:02", "throughput": 2518.59, "total_tokens": 3021392} {"current_steps": 4875, "total_steps": 5610, "loss": 0.0219, "lr": 2.5760318761249263e-06, "epoch": 8.689839572192513, "percentage": 86.9, "elapsed_time": "0:20:00", "remaining_time": "0:03:01", "throughput": 2518.74, "total_tokens": 3024240} {"current_steps": 4880, "total_steps": 5610, "loss": 0.0881, "lr": 2.541753865317076e-06, "epoch": 8.698752228163993, "percentage": 86.99, "elapsed_time": "0:20:01", "remaining_time": "0:02:59", "throughput": 2518.76, "total_tokens": 3026800} {"current_steps": 4885, "total_steps": 5610, "loss": 0.0007, "lr": 2.507693226958871e-06, "epoch": 8.707664884135472, "percentage": 87.08, "elapsed_time": "0:20:02", "remaining_time": "0:02:58", "throughput": 2519.06, "total_tokens": 3029968} {"current_steps": 4890, "total_steps": 5610, "loss": 0.1465, "lr": 2.473850290721838e-06, "epoch": 8.716577540106952, "percentage": 87.17, "elapsed_time": "0:20:03", "remaining_time": "0:02:57", "throughput": 2519.14, "total_tokens": 3032656} {"current_steps": 4895, "total_steps": 5610, "loss": 0.0205, "lr": 2.4402253841703914e-06, "epoch": 8.72549019607843, "percentage": 87.25, "elapsed_time": "0:20:04", "remaining_time": "0:02:55", "throughput": 2519.22, "total_tokens": 3035376} {"current_steps": 4900, "total_steps": 5610, "loss": 0.1735, "lr": 2.4068188327586257e-06, "epoch": 8.73440285204991, "percentage": 87.34, "elapsed_time": "0:20:06", "remaining_time": "0:02:54", "throughput": 2519.49, "total_tokens": 3038512} {"current_steps": 4905, "total_steps": 5610, "loss": 0.0055, "lr": 2.373630959827186e-06, "epoch": 8.743315508021391, "percentage": 87.43, "elapsed_time": "0:20:07", "remaining_time": "0:02:53", "throughput": 2519.79, "total_tokens": 3041744} {"current_steps": 4910, "total_steps": 5610, "loss": 0.0884, "lr": 2.3406620866001485e-06, "epoch": 8.75222816399287, "percentage": 87.52, "elapsed_time": "0:20:08", "remaining_time": "0:02:52", "throughput": 2520.22, "total_tokens": 3045232} {"current_steps": 4915, "total_steps": 5610, "loss": 0.0012, "lr": 2.3079125321818996e-06, "epoch": 8.76114081996435, "percentage": 87.61, "elapsed_time": "0:20:09", "remaining_time": "0:02:51", "throughput": 2520.17, "total_tokens": 3047728} {"current_steps": 4920, "total_steps": 5610, "loss": 0.1771, "lr": 2.275382613554031e-06, "epoch": 8.770053475935828, "percentage": 87.7, "elapsed_time": "0:20:10", "remaining_time": "0:02:49", "throughput": 2520.41, "total_tokens": 3050864} {"current_steps": 4925, "total_steps": 5610, "loss": 0.0071, "lr": 2.2430726455723113e-06, "epoch": 8.778966131907309, "percentage": 87.79, "elapsed_time": "0:20:11", "remaining_time": "0:02:48", "throughput": 2520.51, "total_tokens": 3053680} {"current_steps": 4930, "total_steps": 5610, "loss": 0.0768, "lr": 2.210982940963596e-06, "epoch": 8.787878787878787, "percentage": 87.88, "elapsed_time": "0:20:12", "remaining_time": "0:02:47", "throughput": 2520.95, "total_tokens": 3057136} {"current_steps": 4935, "total_steps": 5610, "loss": 0.0193, "lr": 2.1791138103228275e-06, "epoch": 8.796791443850267, "percentage": 87.97, "elapsed_time": "0:20:13", "remaining_time": "0:02:46", "throughput": 2521.15, "total_tokens": 3060144} {"current_steps": 4940, "total_steps": 5610, "loss": 0.1267, "lr": 2.1474655621100347e-06, "epoch": 8.805704099821746, "percentage": 88.06, "elapsed_time": "0:20:14", "remaining_time": "0:02:44", "throughput": 2521.29, "total_tokens": 3063024} {"current_steps": 4945, "total_steps": 5610, "loss": 0.001, "lr": 2.116038502647319e-06, "epoch": 8.814616755793226, "percentage": 88.15, "elapsed_time": "0:20:16", "remaining_time": "0:02:43", "throughput": 2521.64, "total_tokens": 3066320} {"current_steps": 4950, "total_steps": 5610, "loss": 0.0392, "lr": 2.084832936115902e-06, "epoch": 8.823529411764707, "percentage": 88.24, "elapsed_time": "0:20:17", "remaining_time": "0:02:42", "throughput": 2521.81, "total_tokens": 3069296} {"current_steps": 4955, "total_steps": 5610, "loss": 0.0302, "lr": 2.0538491645531982e-06, "epoch": 8.832442067736185, "percentage": 88.32, "elapsed_time": "0:20:18", "remaining_time": "0:02:41", "throughput": 2521.81, "total_tokens": 3071888} {"current_steps": 4960, "total_steps": 5610, "loss": 0.0071, "lr": 2.0230874878498648e-06, "epoch": 8.841354723707665, "percentage": 88.41, "elapsed_time": "0:20:19", "remaining_time": "0:02:39", "throughput": 2522.48, "total_tokens": 3075984} {"current_steps": 4965, "total_steps": 5610, "loss": 0.024, "lr": 1.9925482037469188e-06, "epoch": 8.850267379679144, "percentage": 88.5, "elapsed_time": "0:20:20", "remaining_time": "0:02:38", "throughput": 2522.76, "total_tokens": 3079152} {"current_steps": 4970, "total_steps": 5610, "loss": 0.0991, "lr": 1.9622316078328566e-06, "epoch": 8.859180035650624, "percentage": 88.59, "elapsed_time": "0:20:21", "remaining_time": "0:02:37", "throughput": 2523.13, "total_tokens": 3082544} {"current_steps": 4975, "total_steps": 5610, "loss": 0.0211, "lr": 1.9321379935407697e-06, "epoch": 8.868092691622103, "percentage": 88.68, "elapsed_time": "0:20:22", "remaining_time": "0:02:36", "throughput": 2523.38, "total_tokens": 3085680} {"current_steps": 4980, "total_steps": 5610, "loss": 0.1267, "lr": 1.9022676521455117e-06, "epoch": 8.877005347593583, "percentage": 88.77, "elapsed_time": "0:20:24", "remaining_time": "0:02:34", "throughput": 2523.93, "total_tokens": 3089392} {"current_steps": 4985, "total_steps": 5610, "loss": 0.0369, "lr": 1.8726208727609219e-06, "epoch": 8.885918003565063, "percentage": 88.86, "elapsed_time": "0:20:25", "remaining_time": "0:02:33", "throughput": 2524.25, "total_tokens": 3092656} {"current_steps": 4990, "total_steps": 5610, "loss": 0.0336, "lr": 1.8431979423369604e-06, "epoch": 8.894830659536542, "percentage": 88.95, "elapsed_time": "0:20:26", "remaining_time": "0:02:32", "throughput": 2524.44, "total_tokens": 3095600} {"current_steps": 4995, "total_steps": 5610, "loss": 0.0862, "lr": 1.8139991456569694e-06, "epoch": 8.903743315508022, "percentage": 89.04, "elapsed_time": "0:20:27", "remaining_time": "0:02:31", "throughput": 2524.49, "total_tokens": 3098320} {"current_steps": 5000, "total_steps": 5610, "loss": 0.0456, "lr": 1.7850247653349223e-06, "epoch": 8.9126559714795, "percentage": 89.13, "elapsed_time": "0:20:28", "remaining_time": "0:02:29", "throughput": 2524.82, "total_tokens": 3101520} {"current_steps": 5005, "total_steps": 5610, "loss": 0.0171, "lr": 1.7562750818126556e-06, "epoch": 8.92156862745098, "percentage": 89.22, "elapsed_time": "0:20:29", "remaining_time": "0:02:28", "throughput": 2525.13, "total_tokens": 3104816} {"current_steps": 5010, "total_steps": 5610, "loss": 0.0012, "lr": 1.727750373357187e-06, "epoch": 8.93048128342246, "percentage": 89.3, "elapsed_time": "0:20:30", "remaining_time": "0:02:27", "throughput": 2525.52, "total_tokens": 3108176} {"current_steps": 5015, "total_steps": 5610, "loss": 0.1332, "lr": 1.699450916058018e-06, "epoch": 8.93939393939394, "percentage": 89.39, "elapsed_time": "0:20:31", "remaining_time": "0:02:26", "throughput": 2525.74, "total_tokens": 3111248} {"current_steps": 5020, "total_steps": 5610, "loss": 0.1009, "lr": 1.6713769838244325e-06, "epoch": 8.94830659536542, "percentage": 89.48, "elapsed_time": "0:20:32", "remaining_time": "0:02:24", "throughput": 2525.97, "total_tokens": 3114224} {"current_steps": 5025, "total_steps": 5610, "loss": 0.0024, "lr": 1.6435288483828748e-06, "epoch": 8.957219251336898, "percentage": 89.57, "elapsed_time": "0:20:33", "remaining_time": "0:02:23", "throughput": 2526.19, "total_tokens": 3117232} {"current_steps": 5030, "total_steps": 5610, "loss": 0.0626, "lr": 1.615906779274326e-06, "epoch": 8.966131907308379, "percentage": 89.66, "elapsed_time": "0:20:35", "remaining_time": "0:02:22", "throughput": 2526.35, "total_tokens": 3120240} {"current_steps": 5035, "total_steps": 5610, "loss": 0.2263, "lr": 1.588511043851662e-06, "epoch": 8.975044563279857, "percentage": 89.75, "elapsed_time": "0:20:36", "remaining_time": "0:02:21", "throughput": 2526.86, "total_tokens": 3123792} {"current_steps": 5040, "total_steps": 5610, "loss": 0.0026, "lr": 1.5613419072770864e-06, "epoch": 8.983957219251337, "percentage": 89.84, "elapsed_time": "0:20:37", "remaining_time": "0:02:19", "throughput": 2527.25, "total_tokens": 3127184} {"current_steps": 5045, "total_steps": 5610, "loss": 0.0026, "lr": 1.534399632519573e-06, "epoch": 8.992869875222816, "percentage": 89.93, "elapsed_time": "0:20:38", "remaining_time": "0:02:18", "throughput": 2527.59, "total_tokens": 3130480} {"current_steps": 5050, "total_steps": 5610, "loss": 0.0006, "lr": 1.5076844803522922e-06, "epoch": 9.001782531194296, "percentage": 90.02, "elapsed_time": "0:20:39", "remaining_time": "0:02:17", "throughput": 2527.16, "total_tokens": 3132712} {"current_steps": 5055, "total_steps": 5610, "loss": 0.0007, "lr": 1.4811967093501189e-06, "epoch": 9.010695187165775, "percentage": 90.11, "elapsed_time": "0:20:40", "remaining_time": "0:02:16", "throughput": 2527.21, "total_tokens": 3135400} {"current_steps": 5058, "total_steps": 5610, "eval_loss": 0.15806140005588531, "epoch": 9.016042780748663, "percentage": 90.16, "elapsed_time": "0:20:47", "remaining_time": "0:02:16", "throughput": 2514.68, "total_tokens": 3137352} {"current_steps": 5060, "total_steps": 5610, "loss": 0.0742, "lr": 1.4549365758871142e-06, "epoch": 9.019607843137255, "percentage": 90.2, "elapsed_time": "0:20:48", "remaining_time": "0:02:15", "throughput": 2513.13, "total_tokens": 3138248} {"current_steps": 5065, "total_steps": 5610, "loss": 0.0338, "lr": 1.4289043341340375e-06, "epoch": 9.028520499108735, "percentage": 90.29, "elapsed_time": "0:20:49", "remaining_time": "0:02:14", "throughput": 2513.47, "total_tokens": 3141480} {"current_steps": 5070, "total_steps": 5610, "loss": 0.0035, "lr": 1.4031002360558849e-06, "epoch": 9.037433155080214, "percentage": 90.37, "elapsed_time": "0:20:51", "remaining_time": "0:02:13", "throughput": 2513.85, "total_tokens": 3144904} {"current_steps": 5075, "total_steps": 5610, "loss": 0.0448, "lr": 1.377524531409491e-06, "epoch": 9.046345811051694, "percentage": 90.46, "elapsed_time": "0:20:52", "remaining_time": "0:02:12", "throughput": 2514.58, "total_tokens": 3148968} {"current_steps": 5080, "total_steps": 5610, "loss": 0.0376, "lr": 1.3521774677410476e-06, "epoch": 9.055258467023172, "percentage": 90.55, "elapsed_time": "0:20:53", "remaining_time": "0:02:10", "throughput": 2514.74, "total_tokens": 3151912} {"current_steps": 5085, "total_steps": 5610, "loss": 0.0003, "lr": 1.3270592903837503e-06, "epoch": 9.064171122994653, "percentage": 90.64, "elapsed_time": "0:20:54", "remaining_time": "0:02:09", "throughput": 2515.01, "total_tokens": 3155080} {"current_steps": 5090, "total_steps": 5610, "loss": 0.0624, "lr": 1.3021702424554221e-06, "epoch": 9.073083778966131, "percentage": 90.73, "elapsed_time": "0:20:55", "remaining_time": "0:02:08", "throughput": 2515.04, "total_tokens": 3157768} {"current_steps": 5095, "total_steps": 5610, "loss": 0.0089, "lr": 1.2775105648561352e-06, "epoch": 9.081996434937611, "percentage": 90.82, "elapsed_time": "0:20:56", "remaining_time": "0:02:07", "throughput": 2515.45, "total_tokens": 3161224} {"current_steps": 5100, "total_steps": 5610, "loss": 0.0148, "lr": 1.2530804962659098e-06, "epoch": 9.090909090909092, "percentage": 90.91, "elapsed_time": "0:20:57", "remaining_time": "0:02:05", "throughput": 2515.54, "total_tokens": 3163944} {"current_steps": 5105, "total_steps": 5610, "loss": 0.0874, "lr": 1.2288802731423883e-06, "epoch": 9.09982174688057, "percentage": 91.0, "elapsed_time": "0:20:58", "remaining_time": "0:02:04", "throughput": 2515.63, "total_tokens": 3166728} {"current_steps": 5110, "total_steps": 5610, "loss": 0.0666, "lr": 1.2049101297185422e-06, "epoch": 9.10873440285205, "percentage": 91.09, "elapsed_time": "0:20:59", "remaining_time": "0:02:03", "throughput": 2516.0, "total_tokens": 3170120} {"current_steps": 5115, "total_steps": 5610, "loss": 0.0025, "lr": 1.1811702980004058e-06, "epoch": 9.117647058823529, "percentage": 91.18, "elapsed_time": "0:21:01", "remaining_time": "0:02:02", "throughput": 2516.17, "total_tokens": 3173000} {"current_steps": 5120, "total_steps": 5610, "loss": 0.0083, "lr": 1.1576610077648513e-06, "epoch": 9.12655971479501, "percentage": 91.27, "elapsed_time": "0:21:02", "remaining_time": "0:02:00", "throughput": 2516.62, "total_tokens": 3176520} {"current_steps": 5125, "total_steps": 5610, "loss": 0.0005, "lr": 1.134382486557342e-06, "epoch": 9.135472370766488, "percentage": 91.35, "elapsed_time": "0:21:03", "remaining_time": "0:01:59", "throughput": 2516.82, "total_tokens": 3179496} {"current_steps": 5130, "total_steps": 5610, "loss": 0.0083, "lr": 1.1113349596897331e-06, "epoch": 9.144385026737968, "percentage": 91.44, "elapsed_time": "0:21:04", "remaining_time": "0:01:58", "throughput": 2516.92, "total_tokens": 3182248} {"current_steps": 5135, "total_steps": 5610, "loss": 0.0882, "lr": 1.0885186502381017e-06, "epoch": 9.153297682709447, "percentage": 91.53, "elapsed_time": "0:21:05", "remaining_time": "0:01:57", "throughput": 2516.92, "total_tokens": 3184840} {"current_steps": 5140, "total_steps": 5610, "loss": 0.0108, "lr": 1.0659337790405704e-06, "epoch": 9.162210338680927, "percentage": 91.62, "elapsed_time": "0:21:06", "remaining_time": "0:01:55", "throughput": 2517.02, "total_tokens": 3187720} {"current_steps": 5145, "total_steps": 5610, "loss": 0.0015, "lr": 1.0435805646951958e-06, "epoch": 9.171122994652407, "percentage": 91.71, "elapsed_time": "0:21:07", "remaining_time": "0:01:54", "throughput": 2517.14, "total_tokens": 3190536} {"current_steps": 5150, "total_steps": 5610, "loss": 0.0013, "lr": 1.0214592235578274e-06, "epoch": 9.180035650623886, "percentage": 91.8, "elapsed_time": "0:21:08", "remaining_time": "0:01:53", "throughput": 2517.38, "total_tokens": 3193608} {"current_steps": 5155, "total_steps": 5610, "loss": 0.0014, "lr": 9.995699697400247e-07, "epoch": 9.188948306595366, "percentage": 91.89, "elapsed_time": "0:21:09", "remaining_time": "0:01:52", "throughput": 2517.71, "total_tokens": 3196936} {"current_steps": 5160, "total_steps": 5610, "loss": 0.0024, "lr": 9.77913015106982e-07, "epoch": 9.197860962566844, "percentage": 91.98, "elapsed_time": "0:21:10", "remaining_time": "0:01:50", "throughput": 2517.95, "total_tokens": 3200040} {"current_steps": 5165, "total_steps": 5610, "loss": 0.0527, "lr": 9.564885692754793e-07, "epoch": 9.206773618538325, "percentage": 92.07, "elapsed_time": "0:21:12", "remaining_time": "0:01:49", "throughput": 2518.26, "total_tokens": 3203240} {"current_steps": 5170, "total_steps": 5610, "loss": 0.0041, "lr": 9.352968396118628e-07, "epoch": 9.215686274509803, "percentage": 92.16, "elapsed_time": "0:21:13", "remaining_time": "0:01:48", "throughput": 2518.57, "total_tokens": 3206376} {"current_steps": 5175, "total_steps": 5610, "loss": 0.0784, "lr": 9.143380312300137e-07, "epoch": 9.224598930481283, "percentage": 92.25, "elapsed_time": "0:21:14", "remaining_time": "0:01:47", "throughput": 2518.79, "total_tokens": 3209480} {"current_steps": 5180, "total_steps": 5610, "loss": 0.0373, "lr": 8.936123469893892e-07, "epoch": 9.233511586452764, "percentage": 92.34, "elapsed_time": "0:21:15", "remaining_time": "0:01:45", "throughput": 2519.45, "total_tokens": 3213448} {"current_steps": 5185, "total_steps": 5610, "loss": 0.126, "lr": 8.731199874930374e-07, "epoch": 9.242424242424242, "percentage": 92.42, "elapsed_time": "0:21:16", "remaining_time": "0:01:44", "throughput": 2519.82, "total_tokens": 3216776} {"current_steps": 5190, "total_steps": 5610, "loss": 0.0006, "lr": 8.528611510856766e-07, "epoch": 9.251336898395722, "percentage": 92.51, "elapsed_time": "0:21:17", "remaining_time": "0:01:43", "throughput": 2520.03, "total_tokens": 3219752} {"current_steps": 5195, "total_steps": 5610, "loss": 0.1164, "lr": 8.328360338517583e-07, "epoch": 9.260249554367201, "percentage": 92.6, "elapsed_time": "0:21:18", "remaining_time": "0:01:42", "throughput": 2520.39, "total_tokens": 3223048} {"current_steps": 5200, "total_steps": 5610, "loss": 0.0017, "lr": 8.130448296135768e-07, "epoch": 9.269162210338681, "percentage": 92.69, "elapsed_time": "0:21:20", "remaining_time": "0:01:40", "throughput": 2521.01, "total_tokens": 3226984} {"current_steps": 5205, "total_steps": 5610, "loss": 0.035, "lr": 7.934877299293875e-07, "epoch": 9.27807486631016, "percentage": 92.78, "elapsed_time": "0:21:21", "remaining_time": "0:01:39", "throughput": 2521.27, "total_tokens": 3230088} {"current_steps": 5210, "total_steps": 5610, "loss": 0.0006, "lr": 7.741649240915666e-07, "epoch": 9.28698752228164, "percentage": 92.87, "elapsed_time": "0:21:22", "remaining_time": "0:01:38", "throughput": 2521.34, "total_tokens": 3232840} {"current_steps": 5215, "total_steps": 5610, "loss": 0.0003, "lr": 7.550765991247654e-07, "epoch": 9.29590017825312, "percentage": 92.96, "elapsed_time": "0:21:23", "remaining_time": "0:01:37", "throughput": 2521.58, "total_tokens": 3235944} {"current_steps": 5220, "total_steps": 5610, "loss": 0.0231, "lr": 7.362229397840981e-07, "epoch": 9.304812834224599, "percentage": 93.05, "elapsed_time": "0:21:24", "remaining_time": "0:01:35", "throughput": 2521.69, "total_tokens": 3238728} {"current_steps": 5225, "total_steps": 5610, "loss": 0.2057, "lr": 7.17604128553373e-07, "epoch": 9.313725490196079, "percentage": 93.14, "elapsed_time": "0:21:25", "remaining_time": "0:01:34", "throughput": 2521.66, "total_tokens": 3241256} {"current_steps": 5230, "total_steps": 5610, "loss": 0.0871, "lr": 6.992203456432977e-07, "epoch": 9.322638146167558, "percentage": 93.23, "elapsed_time": "0:21:26", "remaining_time": "0:01:33", "throughput": 2522.01, "total_tokens": 3244680} {"current_steps": 5235, "total_steps": 5610, "loss": 0.0041, "lr": 6.810717689897633e-07, "epoch": 9.331550802139038, "percentage": 93.32, "elapsed_time": "0:21:27", "remaining_time": "0:01:32", "throughput": 2522.15, "total_tokens": 3247560} {"current_steps": 5240, "total_steps": 5610, "loss": 0.0077, "lr": 6.631585742521068e-07, "epoch": 9.340463458110516, "percentage": 93.4, "elapsed_time": "0:21:28", "remaining_time": "0:01:31", "throughput": 2522.63, "total_tokens": 3251176} {"current_steps": 5245, "total_steps": 5610, "loss": 0.0529, "lr": 6.454809348114044e-07, "epoch": 9.349376114081997, "percentage": 93.49, "elapsed_time": "0:21:29", "remaining_time": "0:01:29", "throughput": 2522.8, "total_tokens": 3254152} {"current_steps": 5250, "total_steps": 5610, "loss": 0.0004, "lr": 6.280390217688114e-07, "epoch": 9.358288770053475, "percentage": 93.58, "elapsed_time": "0:21:30", "remaining_time": "0:01:28", "throughput": 2522.84, "total_tokens": 3256744} {"current_steps": 5255, "total_steps": 5610, "loss": 0.032, "lr": 6.108330039438892e-07, "epoch": 9.367201426024955, "percentage": 93.67, "elapsed_time": "0:21:31", "remaining_time": "0:01:27", "throughput": 2522.85, "total_tokens": 3259400} {"current_steps": 5260, "total_steps": 5610, "loss": 0.0469, "lr": 5.938630478729917e-07, "epoch": 9.376114081996436, "percentage": 93.76, "elapsed_time": "0:21:33", "remaining_time": "0:01:26", "throughput": 2523.18, "total_tokens": 3262728} {"current_steps": 5265, "total_steps": 5610, "loss": 0.0692, "lr": 5.771293178076286e-07, "epoch": 9.385026737967914, "percentage": 93.85, "elapsed_time": "0:21:34", "remaining_time": "0:01:24", "throughput": 2523.67, "total_tokens": 3266376} {"current_steps": 5270, "total_steps": 5610, "loss": 0.0088, "lr": 5.606319757128914e-07, "epoch": 9.393939393939394, "percentage": 93.94, "elapsed_time": "0:21:35", "remaining_time": "0:01:23", "throughput": 2523.61, "total_tokens": 3268808} {"current_steps": 5275, "total_steps": 5610, "loss": 0.0079, "lr": 5.443711812658792e-07, "epoch": 9.402852049910873, "percentage": 94.03, "elapsed_time": "0:21:36", "remaining_time": "0:01:22", "throughput": 2523.91, "total_tokens": 3272008} {"current_steps": 5280, "total_steps": 5610, "loss": 0.0696, "lr": 5.283470918541616e-07, "epoch": 9.411764705882353, "percentage": 94.12, "elapsed_time": "0:21:37", "remaining_time": "0:01:21", "throughput": 2524.05, "total_tokens": 3274920} {"current_steps": 5285, "total_steps": 5610, "loss": 0.0042, "lr": 5.125598625742523e-07, "epoch": 9.420677361853832, "percentage": 94.21, "elapsed_time": "0:21:38", "remaining_time": "0:01:19", "throughput": 2524.43, "total_tokens": 3278376} {"current_steps": 5290, "total_steps": 5610, "loss": 0.1221, "lr": 4.970096462300927e-07, "epoch": 9.429590017825312, "percentage": 94.3, "elapsed_time": "0:21:39", "remaining_time": "0:01:18", "throughput": 2524.75, "total_tokens": 3281704} {"current_steps": 5295, "total_steps": 5610, "loss": 0.0225, "lr": 4.816965933315987e-07, "epoch": 9.43850267379679, "percentage": 94.39, "elapsed_time": "0:21:41", "remaining_time": "0:01:17", "throughput": 2525.17, "total_tokens": 3285256} {"current_steps": 5300, "total_steps": 5610, "loss": 0.0314, "lr": 4.6662085209318305e-07, "epoch": 9.44741532976827, "percentage": 94.47, "elapsed_time": "0:21:42", "remaining_time": "0:01:16", "throughput": 2525.54, "total_tokens": 3288616} {"current_steps": 5305, "total_steps": 5610, "loss": 0.0592, "lr": 4.517825684323324e-07, "epoch": 9.456327985739751, "percentage": 94.56, "elapsed_time": "0:21:43", "remaining_time": "0:01:14", "throughput": 2525.78, "total_tokens": 3291752} {"current_steps": 5310, "total_steps": 5610, "loss": 0.1233, "lr": 4.3718188596819086e-07, "epoch": 9.46524064171123, "percentage": 94.65, "elapsed_time": "0:21:44", "remaining_time": "0:01:13", "throughput": 2525.83, "total_tokens": 3294344} {"current_steps": 5315, "total_steps": 5610, "loss": 0.0009, "lr": 4.228189460201676e-07, "epoch": 9.47415329768271, "percentage": 94.74, "elapsed_time": "0:21:45", "remaining_time": "0:01:12", "throughput": 2526.07, "total_tokens": 3297512} {"current_steps": 5320, "total_steps": 5610, "loss": 0.0773, "lr": 4.086938876065732e-07, "epoch": 9.483065953654188, "percentage": 94.83, "elapsed_time": "0:21:46", "remaining_time": "0:01:11", "throughput": 2526.17, "total_tokens": 3300296} {"current_steps": 5325, "total_steps": 5610, "loss": 0.006, "lr": 3.948068474432715e-07, "epoch": 9.491978609625669, "percentage": 94.92, "elapsed_time": "0:21:47", "remaining_time": "0:01:09", "throughput": 2526.81, "total_tokens": 3304360} {"current_steps": 5330, "total_steps": 5610, "loss": 0.0812, "lr": 3.8115795994236313e-07, "epoch": 9.500891265597147, "percentage": 95.01, "elapsed_time": "0:21:48", "remaining_time": "0:01:08", "throughput": 2526.96, "total_tokens": 3307304} {"current_steps": 5335, "total_steps": 5610, "loss": 0.0782, "lr": 3.6774735721087085e-07, "epoch": 9.509803921568627, "percentage": 95.1, "elapsed_time": "0:21:49", "remaining_time": "0:01:07", "throughput": 2527.24, "total_tokens": 3310536} {"current_steps": 5339, "total_steps": 5610, "eval_loss": 0.15913818776607513, "epoch": 9.516934046345812, "percentage": 95.17, "elapsed_time": "0:21:57", "remaining_time": "0:01:06", "throughput": 2515.21, "total_tokens": 3312648} {"current_steps": 5340, "total_steps": 5610, "loss": 0.0046, "lr": 3.5457516904947587e-07, "epoch": 9.518716577540108, "percentage": 95.19, "elapsed_time": "0:21:58", "remaining_time": "0:01:06", "throughput": 2513.92, "total_tokens": 3313672} {"current_steps": 5345, "total_steps": 5610, "loss": 0.1425, "lr": 3.416415229512443e-07, "epoch": 9.527629233511586, "percentage": 95.28, "elapsed_time": "0:21:59", "remaining_time": "0:01:05", "throughput": 2514.4, "total_tokens": 3317224} {"current_steps": 5350, "total_steps": 5610, "loss": 0.0392, "lr": 3.2894654410041417e-07, "epoch": 9.536541889483066, "percentage": 95.37, "elapsed_time": "0:22:00", "remaining_time": "0:01:04", "throughput": 2514.41, "total_tokens": 3319848} {"current_steps": 5355, "total_steps": 5610, "loss": 0.0162, "lr": 3.1649035537117123e-07, "epoch": 9.545454545454545, "percentage": 95.45, "elapsed_time": "0:22:01", "remaining_time": "0:01:02", "throughput": 2514.55, "total_tokens": 3322664} {"current_steps": 5360, "total_steps": 5610, "loss": 0.0243, "lr": 3.042730773264557e-07, "epoch": 9.554367201426025, "percentage": 95.54, "elapsed_time": "0:22:02", "remaining_time": "0:01:01", "throughput": 2514.89, "total_tokens": 3325928} {"current_steps": 5365, "total_steps": 5610, "loss": 0.0205, "lr": 2.9229482821680197e-07, "epoch": 9.563279857397504, "percentage": 95.63, "elapsed_time": "0:22:03", "remaining_time": "0:01:00", "throughput": 2514.95, "total_tokens": 3328680} {"current_steps": 5370, "total_steps": 5610, "loss": 0.0256, "lr": 2.8055572397919784e-07, "epoch": 9.572192513368984, "percentage": 95.72, "elapsed_time": "0:22:04", "remaining_time": "0:00:59", "throughput": 2515.28, "total_tokens": 3331976} {"current_steps": 5375, "total_steps": 5610, "loss": 0.0021, "lr": 2.690558782359576e-07, "epoch": 9.581105169340464, "percentage": 95.81, "elapsed_time": "0:22:05", "remaining_time": "0:00:57", "throughput": 2515.44, "total_tokens": 3334888} {"current_steps": 5380, "total_steps": 5610, "loss": 0.0778, "lr": 2.5779540229361745e-07, "epoch": 9.590017825311943, "percentage": 95.9, "elapsed_time": "0:22:06", "remaining_time": "0:00:56", "throughput": 2515.66, "total_tokens": 3337960} {"current_steps": 5385, "total_steps": 5610, "loss": 0.0009, "lr": 2.467744051418641e-07, "epoch": 9.598930481283423, "percentage": 95.99, "elapsed_time": "0:22:07", "remaining_time": "0:00:55", "throughput": 2515.86, "total_tokens": 3340936} {"current_steps": 5390, "total_steps": 5610, "loss": 0.021, "lr": 2.3599299345248292e-07, "epoch": 9.607843137254902, "percentage": 96.08, "elapsed_time": "0:22:09", "remaining_time": "0:00:54", "throughput": 2515.98, "total_tokens": 3343784} {"current_steps": 5395, "total_steps": 5610, "loss": 0.0243, "lr": 2.2545127157831413e-07, "epoch": 9.616755793226382, "percentage": 96.17, "elapsed_time": "0:22:10", "remaining_time": "0:00:53", "throughput": 2516.31, "total_tokens": 3347016} {"current_steps": 5400, "total_steps": 5610, "loss": 0.0374, "lr": 2.1514934155226208e-07, "epoch": 9.62566844919786, "percentage": 96.26, "elapsed_time": "0:22:11", "remaining_time": "0:00:51", "throughput": 2516.43, "total_tokens": 3349800} {"current_steps": 5405, "total_steps": 5610, "loss": 0.0016, "lr": 2.0508730308627933e-07, "epoch": 9.63458110516934, "percentage": 96.35, "elapsed_time": "0:22:12", "remaining_time": "0:00:50", "throughput": 2516.99, "total_tokens": 3353640} {"current_steps": 5410, "total_steps": 5610, "loss": 0.0963, "lr": 1.9526525357043136e-07, "epoch": 9.643493761140821, "percentage": 96.43, "elapsed_time": "0:22:13", "remaining_time": "0:00:49", "throughput": 2517.3, "total_tokens": 3356904} {"current_steps": 5415, "total_steps": 5610, "loss": 0.1029, "lr": 1.8568328807193337e-07, "epoch": 9.6524064171123, "percentage": 96.52, "elapsed_time": "0:22:14", "remaining_time": "0:00:48", "throughput": 2517.65, "total_tokens": 3360232} {"current_steps": 5420, "total_steps": 5610, "loss": 0.1306, "lr": 1.7634149933423993e-07, "epoch": 9.66131907308378, "percentage": 96.61, "elapsed_time": "0:22:15", "remaining_time": "0:00:46", "throughput": 2517.7, "total_tokens": 3362824} {"current_steps": 5425, "total_steps": 5610, "loss": 0.1203, "lr": 1.6723997777614574e-07, "epoch": 9.670231729055258, "percentage": 96.7, "elapsed_time": "0:22:16", "remaining_time": "0:00:45", "throughput": 2518.03, "total_tokens": 3366152} {"current_steps": 5430, "total_steps": 5610, "loss": 0.0005, "lr": 1.5837881149090294e-07, "epoch": 9.679144385026738, "percentage": 96.79, "elapsed_time": "0:22:17", "remaining_time": "0:00:44", "throughput": 2518.24, "total_tokens": 3369192} {"current_steps": 5435, "total_steps": 5610, "loss": 0.2422, "lr": 1.497580862453829e-07, "epoch": 9.688057040998217, "percentage": 96.88, "elapsed_time": "0:22:19", "remaining_time": "0:00:43", "throughput": 2518.7, "total_tokens": 3372776} {"current_steps": 5440, "total_steps": 5610, "loss": 0.0752, "lr": 1.4137788547923246e-07, "epoch": 9.696969696969697, "percentage": 96.97, "elapsed_time": "0:22:20", "remaining_time": "0:00:41", "throughput": 2519.12, "total_tokens": 3376232} {"current_steps": 5445, "total_steps": 5610, "loss": 0.067, "lr": 1.3323829030407465e-07, "epoch": 9.705882352941176, "percentage": 97.06, "elapsed_time": "0:22:21", "remaining_time": "0:00:40", "throughput": 2519.6, "total_tokens": 3379912} {"current_steps": 5450, "total_steps": 5610, "loss": 0.1758, "lr": 1.2533937950272023e-07, "epoch": 9.714795008912656, "percentage": 97.15, "elapsed_time": "0:22:22", "remaining_time": "0:00:39", "throughput": 2519.83, "total_tokens": 3382824} {"current_steps": 5455, "total_steps": 5610, "loss": 0.0762, "lr": 1.176812295283991e-07, "epoch": 9.723707664884136, "percentage": 97.24, "elapsed_time": "0:22:23", "remaining_time": "0:00:38", "throughput": 2519.91, "total_tokens": 3385640} {"current_steps": 5460, "total_steps": 5610, "loss": 0.0657, "lr": 1.1026391450404128e-07, "epoch": 9.732620320855615, "percentage": 97.33, "elapsed_time": "0:22:24", "remaining_time": "0:00:36", "throughput": 2520.56, "total_tokens": 3389672} {"current_steps": 5465, "total_steps": 5610, "loss": 0.1683, "lr": 1.0308750622153307e-07, "epoch": 9.741532976827095, "percentage": 97.42, "elapsed_time": "0:22:25", "remaining_time": "0:00:35", "throughput": 2520.93, "total_tokens": 3393096} {"current_steps": 5470, "total_steps": 5610, "loss": 0.0005, "lr": 9.615207414103434e-08, "epoch": 9.750445632798574, "percentage": 97.5, "elapsed_time": "0:22:27", "remaining_time": "0:00:34", "throughput": 2521.12, "total_tokens": 3396136} {"current_steps": 5475, "total_steps": 5610, "loss": 0.147, "lr": 8.945768539031785e-08, "epoch": 9.759358288770054, "percentage": 97.59, "elapsed_time": "0:22:28", "remaining_time": "0:00:33", "throughput": 2521.37, "total_tokens": 3399304} {"current_steps": 5480, "total_steps": 5610, "loss": 0.0234, "lr": 8.30044047640921e-08, "epoch": 9.768270944741532, "percentage": 97.68, "elapsed_time": "0:22:29", "remaining_time": "0:00:32", "throughput": 2521.5, "total_tokens": 3402216} {"current_steps": 5485, "total_steps": 5610, "loss": 0.0406, "lr": 7.679229472340176e-08, "epoch": 9.777183600713013, "percentage": 97.77, "elapsed_time": "0:22:30", "remaining_time": "0:00:30", "throughput": 2521.6, "total_tokens": 3405096} {"current_steps": 5490, "total_steps": 5610, "loss": 0.1852, "lr": 7.082141539500597e-08, "epoch": 9.786096256684491, "percentage": 97.86, "elapsed_time": "0:22:31", "remaining_time": "0:00:29", "throughput": 2521.7, "total_tokens": 3407912} {"current_steps": 5495, "total_steps": 5610, "loss": 0.0003, "lr": 6.509182457080376e-08, "epoch": 9.795008912655971, "percentage": 97.95, "elapsed_time": "0:22:32", "remaining_time": "0:00:28", "throughput": 2521.83, "total_tokens": 3410856} {"current_steps": 5500, "total_steps": 5610, "loss": 0.0123, "lr": 5.9603577707267875e-08, "epoch": 9.803921568627452, "percentage": 98.04, "elapsed_time": "0:22:33", "remaining_time": "0:00:27", "throughput": 2522.05, "total_tokens": 3413928} {"current_steps": 5505, "total_steps": 5610, "loss": 0.0107, "lr": 5.435672792491742e-08, "epoch": 9.81283422459893, "percentage": 98.13, "elapsed_time": "0:22:34", "remaining_time": "0:00:25", "throughput": 2522.44, "total_tokens": 3417416} {"current_steps": 5510, "total_steps": 5610, "loss": 0.0133, "lr": 4.935132600780157e-08, "epoch": 9.82174688057041, "percentage": 98.22, "elapsed_time": "0:22:35", "remaining_time": "0:00:24", "throughput": 2522.5, "total_tokens": 3420136} {"current_steps": 5515, "total_steps": 5610, "loss": 0.0713, "lr": 4.4587420402997235e-08, "epoch": 9.830659536541889, "percentage": 98.31, "elapsed_time": "0:22:36", "remaining_time": "0:00:23", "throughput": 2522.75, "total_tokens": 3423272} {"current_steps": 5520, "total_steps": 5610, "loss": 0.0024, "lr": 4.006505722015386e-08, "epoch": 9.83957219251337, "percentage": 98.4, "elapsed_time": "0:22:38", "remaining_time": "0:00:22", "throughput": 2523.03, "total_tokens": 3426472} {"current_steps": 5525, "total_steps": 5610, "loss": 0.024, "lr": 3.578428023103819e-08, "epoch": 9.848484848484848, "percentage": 98.48, "elapsed_time": "0:22:39", "remaining_time": "0:00:20", "throughput": 2523.44, "total_tokens": 3429992} {"current_steps": 5530, "total_steps": 5610, "loss": 0.0727, "lr": 3.1745130869123566e-08, "epoch": 9.857397504456328, "percentage": 98.57, "elapsed_time": "0:22:40", "remaining_time": "0:00:19", "throughput": 2523.37, "total_tokens": 3432456} {"current_steps": 5535, "total_steps": 5610, "loss": 0.1221, "lr": 2.794764822916518e-08, "epoch": 9.866310160427808, "percentage": 98.66, "elapsed_time": "0:22:41", "remaining_time": "0:00:18", "throughput": 2523.33, "total_tokens": 3434888} {"current_steps": 5540, "total_steps": 5610, "loss": 0.1701, "lr": 2.4391869066844874e-08, "epoch": 9.875222816399287, "percentage": 98.75, "elapsed_time": "0:22:42", "remaining_time": "0:00:17", "throughput": 2523.52, "total_tokens": 3437832} {"current_steps": 5545, "total_steps": 5610, "loss": 0.0002, "lr": 2.1077827798404726e-08, "epoch": 9.884135472370767, "percentage": 98.84, "elapsed_time": "0:22:43", "remaining_time": "0:00:15", "throughput": 2523.73, "total_tokens": 3440872} {"current_steps": 5550, "total_steps": 5610, "loss": 0.0609, "lr": 1.8005556500313993e-08, "epoch": 9.893048128342246, "percentage": 98.93, "elapsed_time": "0:22:44", "remaining_time": "0:00:14", "throughput": 2523.87, "total_tokens": 3443784} {"current_steps": 5555, "total_steps": 5610, "loss": 0.095, "lr": 1.51750849089638e-08, "epoch": 9.901960784313726, "percentage": 99.02, "elapsed_time": "0:22:45", "remaining_time": "0:00:13", "throughput": 2524.4, "total_tokens": 3447592} {"current_steps": 5560, "total_steps": 5610, "loss": 0.0665, "lr": 1.2586440420372936e-08, "epoch": 9.910873440285204, "percentage": 99.11, "elapsed_time": "0:22:46", "remaining_time": "0:00:12", "throughput": 2524.79, "total_tokens": 3451048} {"current_steps": 5565, "total_steps": 5610, "loss": 0.1243, "lr": 1.023964808992417e-08, "epoch": 9.919786096256685, "percentage": 99.2, "elapsed_time": "0:22:47", "remaining_time": "0:00:11", "throughput": 2524.9, "total_tokens": 3453928} {"current_steps": 5570, "total_steps": 5610, "loss": 0.0082, "lr": 8.134730632125554e-09, "epoch": 9.928698752228165, "percentage": 99.29, "elapsed_time": "0:22:49", "remaining_time": "0:00:09", "throughput": 2525.1, "total_tokens": 3456968} {"current_steps": 5575, "total_steps": 5610, "loss": 0.1228, "lr": 6.271708420385603e-09, "epoch": 9.937611408199643, "percentage": 99.38, "elapsed_time": "0:22:50", "remaining_time": "0:00:08", "throughput": 2525.56, "total_tokens": 3460616} {"current_steps": 5580, "total_steps": 5610, "loss": 0.0615, "lr": 4.650599486827334e-09, "epoch": 9.946524064171124, "percentage": 99.47, "elapsed_time": "0:22:51", "remaining_time": "0:00:07", "throughput": 2525.75, "total_tokens": 3463592} {"current_steps": 5585, "total_steps": 5610, "loss": 0.0408, "lr": 3.2714195220912013e-09, "epoch": 9.955436720142602, "percentage": 99.55, "elapsed_time": "0:22:52", "remaining_time": "0:00:06", "throughput": 2526.08, "total_tokens": 3466888} {"current_steps": 5590, "total_steps": 5610, "loss": 0.0049, "lr": 2.134181875204644e-09, "epoch": 9.964349376114082, "percentage": 99.64, "elapsed_time": "0:22:53", "remaining_time": "0:00:04", "throughput": 2526.46, "total_tokens": 3470408} {"current_steps": 5595, "total_steps": 5610, "loss": 0.1008, "lr": 1.2388975534460834e-09, "epoch": 9.973262032085561, "percentage": 99.73, "elapsed_time": "0:22:54", "remaining_time": "0:00:03", "throughput": 2526.71, "total_tokens": 3473608} {"current_steps": 5600, "total_steps": 5610, "loss": 0.0005, "lr": 5.855752222366783e-10, "epoch": 9.982174688057041, "percentage": 99.82, "elapsed_time": "0:22:55", "remaining_time": "0:00:02", "throughput": 2526.86, "total_tokens": 3476616} {"current_steps": 5605, "total_steps": 5610, "loss": 0.0557, "lr": 1.7422120505705686e-10, "epoch": 9.99108734402852, "percentage": 99.91, "elapsed_time": "0:22:56", "remaining_time": "0:00:01", "throughput": 2527.04, "total_tokens": 3479624} {"current_steps": 5610, "total_steps": 5610, "loss": 0.0016, "lr": 4.839483383478616e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:22:57", "remaining_time": "0:00:00", "throughput": 2526.74, "total_tokens": 3481336} {"current_steps": 5610, "total_steps": 5610, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:22:58", "remaining_time": "0:00:00", "throughput": 2525.37, "total_tokens": 3481336}