train_wsc_123_1760351015 / trainer_log.jsonl
rbelanec's picture
Training in progress, step 1250
510f807 verified
{"current_steps": 5, "total_steps": 1250, "loss": 0.3789, "lr": 0.00096, "epoch": 0.04, "percentage": 0.4, "elapsed_time": "0:00:01", "remaining_time": "0:05:06", "throughput": 1740.02, "total_tokens": 2144}
{"current_steps": 10, "total_steps": 1250, "loss": 0.4731, "lr": 0.0021599999999999996, "epoch": 0.08, "percentage": 0.8, "elapsed_time": "0:00:01", "remaining_time": "0:03:46", "throughput": 2264.1, "total_tokens": 4128}
{"current_steps": 15, "total_steps": 1250, "loss": 6.3056, "lr": 0.00336, "epoch": 0.12, "percentage": 1.2, "elapsed_time": "0:00:02", "remaining_time": "0:03:18", "throughput": 2587.23, "total_tokens": 6240}
{"current_steps": 20, "total_steps": 1250, "loss": 3.0486, "lr": 0.00456, "epoch": 0.16, "percentage": 1.6, "elapsed_time": "0:00:02", "remaining_time": "0:03:03", "throughput": 2717.41, "total_tokens": 8096}
{"current_steps": 25, "total_steps": 1250, "loss": 5.1407, "lr": 0.0057599999999999995, "epoch": 0.2, "percentage": 2.0, "elapsed_time": "0:00:03", "remaining_time": "0:02:53", "throughput": 2848.16, "total_tokens": 10112}
{"current_steps": 30, "total_steps": 1250, "loss": 7.8817, "lr": 0.00696, "epoch": 0.24, "percentage": 2.4, "elapsed_time": "0:00:04", "remaining_time": "0:02:47", "throughput": 2919.64, "total_tokens": 12032}
{"current_steps": 35, "total_steps": 1250, "loss": 7.0054, "lr": 0.00816, "epoch": 0.28, "percentage": 2.8, "elapsed_time": "0:00:04", "remaining_time": "0:02:42", "throughput": 2955.97, "total_tokens": 13824}
{"current_steps": 40, "total_steps": 1250, "loss": 5.8528, "lr": 0.00936, "epoch": 0.32, "percentage": 3.2, "elapsed_time": "0:00:05", "remaining_time": "0:02:39", "throughput": 3011.93, "total_tokens": 15840}
{"current_steps": 45, "total_steps": 1250, "loss": 4.8726, "lr": 0.010559999999999998, "epoch": 0.36, "percentage": 3.6, "elapsed_time": "0:00:05", "remaining_time": "0:02:36", "throughput": 3070.43, "total_tokens": 17920}
{"current_steps": 50, "total_steps": 1250, "loss": 3.5817, "lr": 0.01176, "epoch": 0.4, "percentage": 4.0, "elapsed_time": "0:00:06", "remaining_time": "0:02:33", "throughput": 3083.51, "total_tokens": 19712}
{"current_steps": 55, "total_steps": 1250, "loss": 2.9242, "lr": 0.01296, "epoch": 0.44, "percentage": 4.4, "elapsed_time": "0:00:06", "remaining_time": "0:02:31", "throughput": 3139.62, "total_tokens": 21952}
{"current_steps": 60, "total_steps": 1250, "loss": 2.8259, "lr": 0.014159999999999999, "epoch": 0.48, "percentage": 4.8, "elapsed_time": "0:00:07", "remaining_time": "0:02:30", "throughput": 3184.51, "total_tokens": 24160}
{"current_steps": 63, "total_steps": 1250, "eval_loss": 2.4233529567718506, "epoch": 0.504, "percentage": 5.04, "elapsed_time": "0:00:08", "remaining_time": "0:02:49", "throughput": 2837.06, "total_tokens": 25504}
{"current_steps": 65, "total_steps": 1250, "loss": 1.6953, "lr": 0.01536, "epoch": 0.52, "percentage": 5.2, "elapsed_time": "0:00:10", "remaining_time": "0:03:04", "throughput": 2575.77, "total_tokens": 26112}
{"current_steps": 70, "total_steps": 1250, "loss": 1.0396, "lr": 0.016560000000000002, "epoch": 0.56, "percentage": 5.6, "elapsed_time": "0:00:10", "remaining_time": "0:03:01", "throughput": 2603.72, "total_tokens": 28064}
{"current_steps": 75, "total_steps": 1250, "loss": 2.335, "lr": 0.017759999999999998, "epoch": 0.6, "percentage": 6.0, "elapsed_time": "0:00:11", "remaining_time": "0:02:57", "throughput": 2631.78, "total_tokens": 29824}
{"current_steps": 80, "total_steps": 1250, "loss": 0.5806, "lr": 0.01896, "epoch": 0.64, "percentage": 6.4, "elapsed_time": "0:00:11", "remaining_time": "0:02:54", "throughput": 2679.52, "total_tokens": 31904}
{"current_steps": 85, "total_steps": 1250, "loss": 0.608, "lr": 0.02016, "epoch": 0.68, "percentage": 6.8, "elapsed_time": "0:00:12", "remaining_time": "0:02:51", "throughput": 2722.26, "total_tokens": 33984}
{"current_steps": 90, "total_steps": 1250, "loss": 2.9865, "lr": 0.021359999999999997, "epoch": 0.72, "percentage": 7.2, "elapsed_time": "0:00:13", "remaining_time": "0:02:47", "throughput": 2745.0, "total_tokens": 35776}
{"current_steps": 95, "total_steps": 1250, "loss": 1.1828, "lr": 0.02256, "epoch": 0.76, "percentage": 7.6, "elapsed_time": "0:00:13", "remaining_time": "0:02:45", "throughput": 2759.78, "total_tokens": 37472}
{"current_steps": 100, "total_steps": 1250, "loss": 0.6819, "lr": 0.02376, "epoch": 0.8, "percentage": 8.0, "elapsed_time": "0:00:14", "remaining_time": "0:02:42", "throughput": 2781.59, "total_tokens": 39328}
{"current_steps": 105, "total_steps": 1250, "loss": 0.9793, "lr": 0.02496, "epoch": 0.84, "percentage": 8.4, "elapsed_time": "0:00:14", "remaining_time": "0:02:40", "throughput": 2807.08, "total_tokens": 41280}
{"current_steps": 110, "total_steps": 1250, "loss": 0.5861, "lr": 0.02616, "epoch": 0.88, "percentage": 8.8, "elapsed_time": "0:00:15", "remaining_time": "0:02:38", "throughput": 2844.7, "total_tokens": 43552}
{"current_steps": 115, "total_steps": 1250, "loss": 1.4858, "lr": 0.02736, "epoch": 0.92, "percentage": 9.2, "elapsed_time": "0:00:15", "remaining_time": "0:02:36", "throughput": 2850.08, "total_tokens": 45216}
{"current_steps": 120, "total_steps": 1250, "loss": 0.3556, "lr": 0.02856, "epoch": 0.96, "percentage": 9.6, "elapsed_time": "0:00:16", "remaining_time": "0:02:34", "throughput": 2877.38, "total_tokens": 47360}
{"current_steps": 125, "total_steps": 1250, "loss": 0.7315, "lr": 0.029759999999999998, "epoch": 1.0, "percentage": 10.0, "elapsed_time": "0:00:17", "remaining_time": "0:02:34", "throughput": 2877.85, "total_tokens": 49376}
{"current_steps": 126, "total_steps": 1250, "eval_loss": 0.5473280549049377, "epoch": 1.008, "percentage": 10.08, "elapsed_time": "0:00:18", "remaining_time": "0:02:44", "throughput": 2692.2, "total_tokens": 49696}
{"current_steps": 130, "total_steps": 1250, "loss": 0.5602, "lr": 0.029999064225016296, "epoch": 1.04, "percentage": 10.4, "elapsed_time": "0:00:19", "remaining_time": "0:02:51", "throughput": 2577.45, "total_tokens": 51200}
{"current_steps": 135, "total_steps": 1250, "loss": 0.6027, "lr": 0.029995262839249498, "epoch": 1.08, "percentage": 10.8, "elapsed_time": "0:00:20", "remaining_time": "0:02:48", "throughput": 2602.96, "total_tokens": 53216}
{"current_steps": 140, "total_steps": 1250, "loss": 0.7287, "lr": 0.0299885380972807, "epoch": 1.12, "percentage": 11.2, "elapsed_time": "0:00:21", "remaining_time": "0:02:46", "throughput": 2624.77, "total_tokens": 55168}
{"current_steps": 145, "total_steps": 1250, "loss": 0.6063, "lr": 0.02997889131011168, "epoch": 1.16, "percentage": 11.6, "elapsed_time": "0:00:21", "remaining_time": "0:02:44", "throughput": 2640.56, "total_tokens": 56960}
{"current_steps": 150, "total_steps": 1250, "loss": 0.6846, "lr": 0.0299663243584027, "epoch": 1.2, "percentage": 12.0, "elapsed_time": "0:00:22", "remaining_time": "0:02:42", "throughput": 2660.71, "total_tokens": 58880}
{"current_steps": 155, "total_steps": 1250, "loss": 0.7672, "lr": 0.029950839692105897, "epoch": 1.24, "percentage": 12.4, "elapsed_time": "0:00:22", "remaining_time": "0:02:40", "throughput": 2675.34, "total_tokens": 60672}
{"current_steps": 160, "total_steps": 1250, "loss": 1.9008, "lr": 0.029932440329987653, "epoch": 1.28, "percentage": 12.8, "elapsed_time": "0:00:23", "remaining_time": "0:02:38", "throughput": 2701.85, "total_tokens": 62848}
{"current_steps": 165, "total_steps": 1250, "loss": 1.5402, "lr": 0.02991112985904007, "epoch": 1.32, "percentage": 13.2, "elapsed_time": "0:00:23", "remaining_time": "0:02:36", "throughput": 2707.61, "total_tokens": 64448}
{"current_steps": 170, "total_steps": 1250, "loss": 1.0271, "lr": 0.029886912433781675, "epoch": 1.3599999999999999, "percentage": 13.6, "elapsed_time": "0:00:24", "remaining_time": "0:02:34", "throughput": 2724.78, "total_tokens": 66368}
{"current_steps": 175, "total_steps": 1250, "loss": 0.6777, "lr": 0.02985979277544751, "epoch": 1.4, "percentage": 14.0, "elapsed_time": "0:00:24", "remaining_time": "0:02:32", "throughput": 2735.66, "total_tokens": 68128}
{"current_steps": 180, "total_steps": 1250, "loss": 0.7326, "lr": 0.029829776171068707, "epoch": 1.44, "percentage": 14.4, "elapsed_time": "0:00:25", "remaining_time": "0:02:31", "throughput": 2753.16, "total_tokens": 70112}
{"current_steps": 185, "total_steps": 1250, "loss": 1.1616, "lr": 0.029796868472441763, "epoch": 1.48, "percentage": 14.8, "elapsed_time": "0:00:26", "remaining_time": "0:02:30", "throughput": 2775.22, "total_tokens": 72384}
{"current_steps": 189, "total_steps": 1250, "eval_loss": 0.532053530216217, "epoch": 1.512, "percentage": 15.12, "elapsed_time": "0:00:27", "remaining_time": "0:02:34", "throughput": 2684.23, "total_tokens": 74112}
{"current_steps": 190, "total_steps": 1250, "loss": 0.5949, "lr": 0.029761076094987723, "epoch": 1.52, "percentage": 15.2, "elapsed_time": "0:00:28", "remaining_time": "0:02:41", "throughput": 2588.01, "total_tokens": 74752}
{"current_steps": 195, "total_steps": 1250, "loss": 0.5306, "lr": 0.02972240601650149, "epoch": 1.56, "percentage": 15.6, "elapsed_time": "0:00:29", "remaining_time": "0:02:39", "throughput": 2602.78, "total_tokens": 76640}
{"current_steps": 200, "total_steps": 1250, "loss": 0.5088, "lr": 0.029680865775791494, "epoch": 1.6, "percentage": 16.0, "elapsed_time": "0:00:30", "remaining_time": "0:02:37", "throughput": 2623.88, "total_tokens": 78784}
{"current_steps": 205, "total_steps": 1250, "loss": 0.4879, "lr": 0.02963646347120996, "epoch": 1.6400000000000001, "percentage": 16.4, "elapsed_time": "0:00:30", "remaining_time": "0:02:36", "throughput": 2639.2, "total_tokens": 80768}
{"current_steps": 210, "total_steps": 1250, "loss": 0.5444, "lr": 0.029589207759074154, "epoch": 1.6800000000000002, "percentage": 16.8, "elapsed_time": "0:00:31", "remaining_time": "0:02:34", "throughput": 2656.41, "total_tokens": 82848}
{"current_steps": 215, "total_steps": 1250, "loss": 0.4531, "lr": 0.029539107851978778, "epoch": 1.72, "percentage": 17.2, "elapsed_time": "0:00:31", "remaining_time": "0:02:32", "throughput": 2669.26, "total_tokens": 84768}
{"current_steps": 220, "total_steps": 1250, "loss": 0.4487, "lr": 0.02948617351699999, "epoch": 1.76, "percentage": 17.6, "elapsed_time": "0:00:32", "remaining_time": "0:02:31", "throughput": 2685.2, "total_tokens": 86848}
{"current_steps": 225, "total_steps": 1250, "loss": 0.429, "lr": 0.029430415073791287, "epoch": 1.8, "percentage": 18.0, "elapsed_time": "0:00:32", "remaining_time": "0:02:29", "throughput": 2696.96, "total_tokens": 88736}
{"current_steps": 230, "total_steps": 1250, "loss": 0.4092, "lr": 0.029371843392571644, "epoch": 1.8399999999999999, "percentage": 18.4, "elapsed_time": "0:00:33", "remaining_time": "0:02:28", "throughput": 2713.46, "total_tokens": 90848}
{"current_steps": 235, "total_steps": 1250, "loss": 0.4367, "lr": 0.029310469892006367, "epoch": 1.88, "percentage": 18.8, "elapsed_time": "0:00:34", "remaining_time": "0:02:27", "throughput": 2730.36, "total_tokens": 92992}
{"current_steps": 240, "total_steps": 1250, "loss": 0.4234, "lr": 0.029246306536981, "epoch": 1.92, "percentage": 19.2, "elapsed_time": "0:00:34", "remaining_time": "0:02:25", "throughput": 2740.41, "total_tokens": 94880}
{"current_steps": 245, "total_steps": 1250, "loss": 0.3853, "lr": 0.02917936583626874, "epoch": 1.96, "percentage": 19.6, "elapsed_time": "0:00:35", "remaining_time": "0:02:24", "throughput": 2750.22, "total_tokens": 96704}
{"current_steps": 250, "total_steps": 1250, "loss": 0.4384, "lr": 0.029109660840091818, "epoch": 2.0, "percentage": 20.0, "elapsed_time": "0:00:35", "remaining_time": "0:02:23", "throughput": 2746.26, "total_tokens": 98240}
{"current_steps": 252, "total_steps": 1250, "eval_loss": 0.3677593171596527, "epoch": 2.016, "percentage": 20.16, "elapsed_time": "0:00:37", "remaining_time": "0:02:27", "throughput": 2663.62, "total_tokens": 99136}
{"current_steps": 255, "total_steps": 1250, "loss": 0.36, "lr": 0.029037205137577363, "epoch": 2.04, "percentage": 20.4, "elapsed_time": "0:00:38", "remaining_time": "0:02:30", "throughput": 2605.71, "total_tokens": 100224}
{"current_steps": 260, "total_steps": 1250, "loss": 0.4149, "lr": 0.02896201285410813, "epoch": 2.08, "percentage": 20.8, "elapsed_time": "0:00:39", "remaining_time": "0:02:28", "throughput": 2611.62, "total_tokens": 101920}
{"current_steps": 265, "total_steps": 1250, "loss": 0.3722, "lr": 0.028884098648568782, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:00:39", "remaining_time": "0:02:27", "throughput": 2622.6, "total_tokens": 103808}
{"current_steps": 270, "total_steps": 1250, "loss": 0.4172, "lr": 0.028803477710488055, "epoch": 2.16, "percentage": 21.6, "elapsed_time": "0:00:40", "remaining_time": "0:02:25", "throughput": 2635.79, "total_tokens": 105920}
{"current_steps": 275, "total_steps": 1250, "loss": 0.38, "lr": 0.028720165757077573, "epoch": 2.2, "percentage": 22.0, "elapsed_time": "0:00:40", "remaining_time": "0:02:24", "throughput": 2652.6, "total_tokens": 108160}
{"current_steps": 280, "total_steps": 1250, "loss": 0.49, "lr": 0.02863417903016773, "epoch": 2.24, "percentage": 22.4, "elapsed_time": "0:00:41", "remaining_time": "0:02:23", "throughput": 2660.81, "total_tokens": 109920}
{"current_steps": 285, "total_steps": 1250, "loss": 0.7063, "lr": 0.02854553429304131, "epoch": 2.2800000000000002, "percentage": 22.8, "elapsed_time": "0:00:41", "remaining_time": "0:02:21", "throughput": 2672.36, "total_tokens": 111904}
{"current_steps": 290, "total_steps": 1250, "loss": 0.4212, "lr": 0.02845424882716545, "epoch": 2.32, "percentage": 23.2, "elapsed_time": "0:00:42", "remaining_time": "0:02:20", "throughput": 2678.32, "total_tokens": 113632}
{"current_steps": 295, "total_steps": 1250, "loss": 0.418, "lr": 0.028360340428822597, "epoch": 2.36, "percentage": 23.6, "elapsed_time": "0:00:42", "remaining_time": "0:02:19", "throughput": 2688.92, "total_tokens": 115616}
{"current_steps": 300, "total_steps": 1250, "loss": 0.4265, "lr": 0.028263827405641085, "epoch": 2.4, "percentage": 24.0, "elapsed_time": "0:00:43", "remaining_time": "0:02:17", "throughput": 2697.53, "total_tokens": 117472}
{"current_steps": 305, "total_steps": 1250, "loss": 0.3562, "lr": 0.028164728573026005, "epoch": 2.44, "percentage": 24.4, "elapsed_time": "0:00:44", "remaining_time": "0:02:16", "throughput": 2710.33, "total_tokens": 119616}
{"current_steps": 310, "total_steps": 1250, "loss": 0.4739, "lr": 0.02806306325049113, "epoch": 2.48, "percentage": 24.8, "elapsed_time": "0:00:44", "remaining_time": "0:02:15", "throughput": 2719.64, "total_tokens": 121568}
{"current_steps": 315, "total_steps": 1250, "loss": 0.384, "lr": 0.027958851257892527, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:00:45", "remaining_time": "0:02:14", "throughput": 2734.73, "total_tokens": 123904}
{"current_steps": 315, "total_steps": 1250, "eval_loss": 0.3612143099308014, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:00:46", "remaining_time": "0:02:17", "throughput": 2676.73, "total_tokens": 123904}
{"current_steps": 320, "total_steps": 1250, "loss": 0.4243, "lr": 0.02785211291156464, "epoch": 2.56, "percentage": 25.6, "elapsed_time": "0:00:47", "remaining_time": "0:02:18", "throughput": 2633.15, "total_tokens": 125696}
{"current_steps": 325, "total_steps": 1250, "loss": 0.3746, "lr": 0.027742869020359582, "epoch": 2.6, "percentage": 26.0, "elapsed_time": "0:00:48", "remaining_time": "0:02:17", "throughput": 2640.2, "total_tokens": 127488}
{"current_steps": 330, "total_steps": 1250, "loss": 0.3832, "lr": 0.027631140881590383, "epoch": 2.64, "percentage": 26.4, "elapsed_time": "0:00:48", "remaining_time": "0:02:16", "throughput": 2652.68, "total_tokens": 129632}
{"current_steps": 335, "total_steps": 1250, "loss": 0.5008, "lr": 0.027516950276879084, "epoch": 2.68, "percentage": 26.8, "elapsed_time": "0:00:49", "remaining_time": "0:02:15", "throughput": 2663.12, "total_tokens": 131680}
{"current_steps": 340, "total_steps": 1250, "loss": 0.375, "lr": 0.02740031946791033, "epoch": 2.7199999999999998, "percentage": 27.2, "elapsed_time": "0:00:49", "remaining_time": "0:02:13", "throughput": 2669.82, "total_tokens": 133472}
{"current_steps": 345, "total_steps": 1250, "loss": 0.5866, "lr": 0.027281271192091415, "epoch": 2.76, "percentage": 27.6, "elapsed_time": "0:00:50", "remaining_time": "0:02:12", "throughput": 2675.38, "total_tokens": 135200}
{"current_steps": 350, "total_steps": 1250, "loss": 0.4668, "lr": 0.027159828658119597, "epoch": 2.8, "percentage": 28.0, "elapsed_time": "0:00:51", "remaining_time": "0:02:11", "throughput": 2689.31, "total_tokens": 137536}
{"current_steps": 355, "total_steps": 1250, "loss": 0.5393, "lr": 0.0270360155414575, "epoch": 2.84, "percentage": 28.4, "elapsed_time": "0:00:51", "remaining_time": "0:02:10", "throughput": 2701.24, "total_tokens": 139744}
{"current_steps": 360, "total_steps": 1250, "loss": 0.5677, "lr": 0.02690985597971753, "epoch": 2.88, "percentage": 28.8, "elapsed_time": "0:00:52", "remaining_time": "0:02:09", "throughput": 2710.11, "total_tokens": 141760}
{"current_steps": 365, "total_steps": 1250, "loss": 0.355, "lr": 0.026781374567956224, "epoch": 2.92, "percentage": 29.2, "elapsed_time": "0:00:52", "remaining_time": "0:02:08", "throughput": 2720.35, "total_tokens": 143872}
{"current_steps": 370, "total_steps": 1250, "loss": 0.3541, "lr": 0.026650596353879386, "epoch": 2.96, "percentage": 29.6, "elapsed_time": "0:00:53", "remaining_time": "0:02:07", "throughput": 2729.01, "total_tokens": 145856}
{"current_steps": 375, "total_steps": 1250, "loss": 0.4165, "lr": 0.026517546832958965, "epoch": 3.0, "percentage": 30.0, "elapsed_time": "0:00:54", "remaining_time": "0:02:06", "throughput": 2729.02, "total_tokens": 147648}
{"current_steps": 378, "total_steps": 1250, "eval_loss": 0.4323018193244934, "epoch": 3.024, "percentage": 30.24, "elapsed_time": "0:00:55", "remaining_time": "0:02:08", "throughput": 2673.22, "total_tokens": 148736}
{"current_steps": 380, "total_steps": 1250, "loss": 0.3542, "lr": 0.026382251943462682, "epoch": 3.04, "percentage": 30.4, "elapsed_time": "0:00:56", "remaining_time": "0:02:10", "throughput": 2630.81, "total_tokens": 149472}
{"current_steps": 385, "total_steps": 1250, "loss": 0.4196, "lr": 0.026244738061397325, "epoch": 3.08, "percentage": 30.8, "elapsed_time": "0:00:57", "remaining_time": "0:02:08", "throughput": 2641.0, "total_tokens": 151552}
{"current_steps": 390, "total_steps": 1250, "loss": 0.4161, "lr": 0.026105031995366672, "epoch": 3.12, "percentage": 31.2, "elapsed_time": "0:00:57", "remaining_time": "0:02:07", "throughput": 2650.07, "total_tokens": 153568}
{"current_steps": 395, "total_steps": 1250, "loss": 0.4733, "lr": 0.025963160981345105, "epoch": 3.16, "percentage": 31.6, "elapsed_time": "0:00:58", "remaining_time": "0:02:06", "throughput": 2659.41, "total_tokens": 155616}
{"current_steps": 400, "total_steps": 1250, "loss": 0.408, "lr": 0.02581915267736791, "epoch": 3.2, "percentage": 32.0, "elapsed_time": "0:00:59", "remaining_time": "0:02:05", "throughput": 2666.02, "total_tokens": 157728}
{"current_steps": 405, "total_steps": 1250, "loss": 0.3754, "lr": 0.025673035158139283, "epoch": 3.24, "percentage": 32.4, "elapsed_time": "0:00:59", "remaining_time": "0:02:04", "throughput": 2670.88, "total_tokens": 159488}
{"current_steps": 410, "total_steps": 1250, "loss": 0.3989, "lr": 0.02552483690955911, "epoch": 3.2800000000000002, "percentage": 32.8, "elapsed_time": "0:01:00", "remaining_time": "0:02:03", "throughput": 2680.04, "total_tokens": 161600}
{"current_steps": 415, "total_steps": 1250, "loss": 0.3847, "lr": 0.0253745868231696, "epoch": 3.32, "percentage": 33.2, "elapsed_time": "0:01:00", "remaining_time": "0:02:02", "throughput": 2688.97, "total_tokens": 163712}
{"current_steps": 420, "total_steps": 1250, "loss": 0.3272, "lr": 0.025222314190522798, "epoch": 3.36, "percentage": 33.6, "elapsed_time": "0:01:01", "remaining_time": "0:02:01", "throughput": 2694.22, "total_tokens": 165536}
{"current_steps": 425, "total_steps": 1250, "loss": 0.4486, "lr": 0.02506804869747014, "epoch": 3.4, "percentage": 34.0, "elapsed_time": "0:01:02", "remaining_time": "0:02:00", "throughput": 2705.71, "total_tokens": 167872}
{"current_steps": 430, "total_steps": 1250, "loss": 0.4475, "lr": 0.024911820418375166, "epoch": 3.44, "percentage": 34.4, "elapsed_time": "0:01:02", "remaining_time": "0:01:59", "throughput": 2715.49, "total_tokens": 170112}
{"current_steps": 435, "total_steps": 1250, "loss": 0.428, "lr": 0.02475365981025043, "epoch": 3.48, "percentage": 34.8, "elapsed_time": "0:01:03", "remaining_time": "0:01:58", "throughput": 2721.48, "total_tokens": 172000}
{"current_steps": 440, "total_steps": 1250, "loss": 0.4076, "lr": 0.02459359770681987, "epoch": 3.52, "percentage": 35.2, "elapsed_time": "0:01:03", "remaining_time": "0:01:57", "throughput": 2728.69, "total_tokens": 174016}
{"current_steps": 441, "total_steps": 1250, "eval_loss": 0.42745479941368103, "epoch": 3.528, "percentage": 35.28, "elapsed_time": "0:01:04", "remaining_time": "0:01:58", "throughput": 2689.29, "total_tokens": 174432}
{"current_steps": 445, "total_steps": 1250, "loss": 0.3735, "lr": 0.02443166531250769, "epoch": 3.56, "percentage": 35.6, "elapsed_time": "0:01:06", "remaining_time": "0:01:59", "throughput": 2651.79, "total_tokens": 175776}
{"current_steps": 450, "total_steps": 1250, "loss": 0.3547, "lr": 0.024267894196355017, "epoch": 3.6, "percentage": 36.0, "elapsed_time": "0:01:06", "remaining_time": "0:01:58", "throughput": 2660.56, "total_tokens": 177952}
{"current_steps": 455, "total_steps": 1250, "loss": 0.3795, "lr": 0.024102316285865434, "epoch": 3.64, "percentage": 36.4, "elapsed_time": "0:01:07", "remaining_time": "0:01:57", "throughput": 2668.02, "total_tokens": 179968}
{"current_steps": 460, "total_steps": 1250, "loss": 0.4155, "lr": 0.02393496386078067, "epoch": 3.68, "percentage": 36.8, "elapsed_time": "0:01:08", "remaining_time": "0:01:56", "throughput": 2675.23, "total_tokens": 181952}
{"current_steps": 465, "total_steps": 1250, "loss": 0.4515, "lr": 0.02376586954678758, "epoch": 3.7199999999999998, "percentage": 37.2, "elapsed_time": "0:01:08", "remaining_time": "0:01:55", "throughput": 2679.4, "total_tokens": 183680}
{"current_steps": 470, "total_steps": 1250, "loss": 0.3645, "lr": 0.02359506630915773, "epoch": 3.76, "percentage": 37.6, "elapsed_time": "0:01:09", "remaining_time": "0:01:54", "throughput": 2685.86, "total_tokens": 185632}
{"current_steps": 475, "total_steps": 1250, "loss": 0.328, "lr": 0.023422587446320715, "epoch": 3.8, "percentage": 38.0, "elapsed_time": "0:01:09", "remaining_time": "0:01:53", "throughput": 2691.84, "total_tokens": 187552}
{"current_steps": 480, "total_steps": 1250, "loss": 0.3968, "lr": 0.0232484665833726, "epoch": 3.84, "percentage": 38.4, "elapsed_time": "0:01:10", "remaining_time": "0:01:52", "throughput": 2697.16, "total_tokens": 189408}
{"current_steps": 485, "total_steps": 1250, "loss": 0.3496, "lr": 0.023072737665520607, "epoch": 3.88, "percentage": 38.8, "elapsed_time": "0:01:10", "remaining_time": "0:01:51", "throughput": 2704.82, "total_tokens": 191488}
{"current_steps": 490, "total_steps": 1250, "loss": 0.3902, "lr": 0.022895434951465468, "epoch": 3.92, "percentage": 39.2, "elapsed_time": "0:01:11", "remaining_time": "0:01:50", "throughput": 2709.63, "total_tokens": 193312}
{"current_steps": 495, "total_steps": 1250, "loss": 0.3725, "lr": 0.022716593006722595, "epoch": 3.96, "percentage": 39.6, "elapsed_time": "0:01:11", "remaining_time": "0:01:49", "throughput": 2717.29, "total_tokens": 195424}
{"current_steps": 500, "total_steps": 1250, "loss": 0.3432, "lr": 0.02253624669688347, "epoch": 4.0, "percentage": 40.0, "elapsed_time": "0:01:12", "remaining_time": "0:01:48", "throughput": 2714.64, "total_tokens": 197024}
{"current_steps": 504, "total_steps": 1250, "eval_loss": 0.3646264672279358, "epoch": 4.032, "percentage": 40.32, "elapsed_time": "0:01:14", "remaining_time": "0:01:49", "throughput": 2676.05, "total_tokens": 198656}
{"current_steps": 505, "total_steps": 1250, "loss": 0.4676, "lr": 0.022354431180818528, "epoch": 4.04, "percentage": 40.4, "elapsed_time": "0:01:15", "remaining_time": "0:01:50", "throughput": 2645.47, "total_tokens": 199040}
{"current_steps": 510, "total_steps": 1250, "loss": 0.3955, "lr": 0.022171181903822883, "epoch": 4.08, "percentage": 40.8, "elapsed_time": "0:01:15", "remaining_time": "0:01:50", "throughput": 2648.84, "total_tokens": 200960}
{"current_steps": 515, "total_steps": 1250, "loss": 0.383, "lr": 0.021986534590706163, "epoch": 4.12, "percentage": 41.2, "elapsed_time": "0:01:16", "remaining_time": "0:01:49", "throughput": 2656.06, "total_tokens": 203008}
{"current_steps": 520, "total_steps": 1250, "loss": 0.3559, "lr": 0.021800525238827927, "epoch": 4.16, "percentage": 41.6, "elapsed_time": "0:01:16", "remaining_time": "0:01:48", "throughput": 2659.39, "total_tokens": 204672}
{"current_steps": 525, "total_steps": 1250, "loss": 0.3964, "lr": 0.02161319011107988, "epoch": 4.2, "percentage": 42.0, "elapsed_time": "0:01:17", "remaining_time": "0:01:47", "throughput": 2666.78, "total_tokens": 206784}
{"current_steps": 530, "total_steps": 1250, "loss": 0.3616, "lr": 0.021424565728816354, "epoch": 4.24, "percentage": 42.4, "elapsed_time": "0:01:18", "remaining_time": "0:01:46", "throughput": 2672.12, "total_tokens": 208672}
{"current_steps": 535, "total_steps": 1250, "loss": 0.3369, "lr": 0.021234688864734418, "epoch": 4.28, "percentage": 42.8, "elapsed_time": "0:01:18", "remaining_time": "0:01:45", "throughput": 2679.02, "total_tokens": 210752}
{"current_steps": 540, "total_steps": 1250, "loss": 0.3544, "lr": 0.02104359653570494, "epoch": 4.32, "percentage": 43.2, "elapsed_time": "0:01:19", "remaining_time": "0:01:44", "throughput": 2691.58, "total_tokens": 213472}
{"current_steps": 545, "total_steps": 1250, "loss": 0.3812, "lr": 0.020851325995556093, "epoch": 4.36, "percentage": 43.6, "elapsed_time": "0:01:19", "remaining_time": "0:01:43", "throughput": 2699.07, "total_tokens": 215616}
{"current_steps": 550, "total_steps": 1250, "loss": 0.4615, "lr": 0.020657914727810648, "epoch": 4.4, "percentage": 44.0, "elapsed_time": "0:01:20", "remaining_time": "0:01:42", "throughput": 2705.57, "total_tokens": 217664}
{"current_steps": 555, "total_steps": 1250, "loss": 0.4038, "lr": 0.020463400438378472, "epoch": 4.44, "percentage": 44.4, "elapsed_time": "0:01:21", "remaining_time": "0:01:41", "throughput": 2710.79, "total_tokens": 219584}
{"current_steps": 560, "total_steps": 1250, "loss": 0.3561, "lr": 0.020267821048205698, "epoch": 4.48, "percentage": 44.8, "elapsed_time": "0:01:21", "remaining_time": "0:01:40", "throughput": 2715.88, "total_tokens": 221504}
{"current_steps": 565, "total_steps": 1250, "loss": 0.355, "lr": 0.02007121468588196, "epoch": 4.52, "percentage": 45.2, "elapsed_time": "0:01:22", "remaining_time": "0:01:39", "throughput": 2720.9, "total_tokens": 223424}
{"current_steps": 567, "total_steps": 1250, "eval_loss": 0.3839019238948822, "epoch": 4.536, "percentage": 45.36, "elapsed_time": "0:01:23", "remaining_time": "0:01:40", "throughput": 2689.6, "total_tokens": 224032}
{"current_steps": 570, "total_steps": 1250, "loss": 0.344, "lr": 0.019873619680207146, "epoch": 4.5600000000000005, "percentage": 45.6, "elapsed_time": "0:01:24", "remaining_time": "0:01:40", "throughput": 2664.7, "total_tokens": 225216}
{"current_steps": 575, "total_steps": 1250, "loss": 0.356, "lr": 0.019675074552719125, "epoch": 4.6, "percentage": 46.0, "elapsed_time": "0:01:25", "remaining_time": "0:01:39", "throughput": 2668.69, "total_tokens": 227008}
{"current_steps": 580, "total_steps": 1250, "loss": 0.3653, "lr": 0.019475618010183906, "epoch": 4.64, "percentage": 46.4, "elapsed_time": "0:01:25", "remaining_time": "0:01:38", "throughput": 2671.98, "total_tokens": 228736}
{"current_steps": 585, "total_steps": 1250, "loss": 0.3883, "lr": 0.01927528893704964, "epoch": 4.68, "percentage": 46.8, "elapsed_time": "0:01:26", "remaining_time": "0:01:37", "throughput": 2673.16, "total_tokens": 230240}
{"current_steps": 590, "total_steps": 1250, "loss": 0.3613, "lr": 0.01907412638786608, "epoch": 4.72, "percentage": 47.2, "elapsed_time": "0:01:26", "remaining_time": "0:01:36", "throughput": 2678.51, "total_tokens": 232192}
{"current_steps": 595, "total_steps": 1250, "loss": 0.4182, "lr": 0.018872169579670764, "epoch": 4.76, "percentage": 47.6, "elapsed_time": "0:01:27", "remaining_time": "0:01:36", "throughput": 2685.41, "total_tokens": 234368}
{"current_steps": 600, "total_steps": 1250, "loss": 0.4025, "lr": 0.01866945788434361, "epoch": 4.8, "percentage": 48.0, "elapsed_time": "0:01:27", "remaining_time": "0:01:35", "throughput": 2688.2, "total_tokens": 236032}
{"current_steps": 605, "total_steps": 1250, "loss": 0.3321, "lr": 0.018466030820931272, "epoch": 4.84, "percentage": 48.4, "elapsed_time": "0:01:28", "remaining_time": "0:01:34", "throughput": 2692.81, "total_tokens": 237920}
{"current_steps": 610, "total_steps": 1250, "loss": 0.3808, "lr": 0.01826192804794282, "epoch": 4.88, "percentage": 48.8, "elapsed_time": "0:01:28", "remaining_time": "0:01:33", "throughput": 2696.11, "total_tokens": 239680}
{"current_steps": 615, "total_steps": 1250, "loss": 0.4191, "lr": 0.018057189355618276, "epoch": 4.92, "percentage": 49.2, "elapsed_time": "0:01:29", "remaining_time": "0:01:32", "throughput": 2699.98, "total_tokens": 241504}
{"current_steps": 620, "total_steps": 1250, "loss": 0.433, "lr": 0.01785185465817135, "epoch": 4.96, "percentage": 49.6, "elapsed_time": "0:01:30", "remaining_time": "0:01:31", "throughput": 2705.79, "total_tokens": 243584}
{"current_steps": 625, "total_steps": 1250, "loss": 0.3915, "lr": 0.017645963986008185, "epoch": 5.0, "percentage": 50.0, "elapsed_time": "0:01:30", "remaining_time": "0:01:30", "throughput": 2705.85, "total_tokens": 245472}
{"current_steps": 630, "total_steps": 1250, "loss": 0.355, "lr": 0.017439557477923254, "epoch": 5.04, "percentage": 50.4, "elapsed_time": "0:01:31", "remaining_time": "0:01:30", "throughput": 2704.46, "total_tokens": 247424}
{"current_steps": 630, "total_steps": 1250, "eval_loss": 0.3732368052005768, "epoch": 5.04, "percentage": 50.4, "elapsed_time": "0:01:32", "remaining_time": "0:01:31", "throughput": 2675.4, "total_tokens": 247424}
{"current_steps": 635, "total_steps": 1250, "loss": 0.3655, "lr": 0.017232675373274282, "epoch": 5.08, "percentage": 50.8, "elapsed_time": "0:01:34", "remaining_time": "0:01:31", "throughput": 2653.07, "total_tokens": 249472}
{"current_steps": 640, "total_steps": 1250, "loss": 0.2432, "lr": 0.017025358004137486, "epoch": 5.12, "percentage": 51.2, "elapsed_time": "0:01:34", "remaining_time": "0:01:30", "throughput": 2656.85, "total_tokens": 251296}
{"current_steps": 645, "total_steps": 1250, "loss": 0.5144, "lr": 0.016817645787444758, "epoch": 5.16, "percentage": 51.6, "elapsed_time": "0:01:35", "remaining_time": "0:01:29", "throughput": 2662.51, "total_tokens": 253344}
{"current_steps": 650, "total_steps": 1250, "loss": 0.4451, "lr": 0.0166095792171043, "epoch": 5.2, "percentage": 52.0, "elapsed_time": "0:01:35", "remaining_time": "0:01:28", "throughput": 2665.83, "total_tokens": 255104}
{"current_steps": 655, "total_steps": 1250, "loss": 0.4395, "lr": 0.01640119885610626, "epoch": 5.24, "percentage": 52.4, "elapsed_time": "0:01:36", "remaining_time": "0:01:27", "throughput": 2668.97, "total_tokens": 256832}
{"current_steps": 660, "total_steps": 1250, "loss": 0.4452, "lr": 0.016192545328614895, "epoch": 5.28, "percentage": 52.8, "elapsed_time": "0:01:36", "remaining_time": "0:01:26", "throughput": 2673.2, "total_tokens": 258720}
{"current_steps": 665, "total_steps": 1250, "loss": 0.3978, "lr": 0.015983659312048825, "epoch": 5.32, "percentage": 53.2, "elapsed_time": "0:01:37", "remaining_time": "0:01:25", "throughput": 2676.97, "total_tokens": 260576}
{"current_steps": 670, "total_steps": 1250, "loss": 0.3766, "lr": 0.015774581529150847, "epoch": 5.36, "percentage": 53.6, "elapsed_time": "0:01:37", "remaining_time": "0:01:24", "throughput": 2680.19, "total_tokens": 262368}
{"current_steps": 675, "total_steps": 1250, "loss": 0.35, "lr": 0.01556535274004902, "epoch": 5.4, "percentage": 54.0, "elapsed_time": "0:01:38", "remaining_time": "0:01:23", "throughput": 2684.69, "total_tokens": 264320}
{"current_steps": 680, "total_steps": 1250, "loss": 0.3589, "lr": 0.01535601373431033, "epoch": 5.44, "percentage": 54.4, "elapsed_time": "0:01:39", "remaining_time": "0:01:23", "throughput": 2689.38, "total_tokens": 266304}
{"current_steps": 685, "total_steps": 1250, "loss": 0.3521, "lr": 0.015146605322988737, "epoch": 5.48, "percentage": 54.8, "elapsed_time": "0:01:39", "remaining_time": "0:01:22", "throughput": 2694.63, "total_tokens": 268384}
{"current_steps": 690, "total_steps": 1250, "loss": 0.3847, "lr": 0.014937168330668944, "epoch": 5.52, "percentage": 55.2, "elapsed_time": "0:01:40", "remaining_time": "0:01:21", "throughput": 2697.81, "total_tokens": 270208}
{"current_steps": 693, "total_steps": 1250, "eval_loss": 0.3541419804096222, "epoch": 5.5440000000000005, "percentage": 55.44, "elapsed_time": "0:01:41", "remaining_time": "0:01:21", "throughput": 2671.7, "total_tokens": 271232}
{"current_steps": 695, "total_steps": 1250, "loss": 0.3409, "lr": 0.014727743587507579, "epoch": 5.5600000000000005, "percentage": 55.6, "elapsed_time": "0:01:42", "remaining_time": "0:01:22", "throughput": 2644.44, "total_tokens": 271840}
{"current_steps": 700, "total_steps": 1250, "loss": 0.4763, "lr": 0.014518371921273277, "epoch": 5.6, "percentage": 56.0, "elapsed_time": "0:01:43", "remaining_time": "0:01:21", "throughput": 2648.81, "total_tokens": 273888}
{"current_steps": 705, "total_steps": 1250, "loss": 0.3572, "lr": 0.014309094149387214, "epoch": 5.64, "percentage": 56.4, "elapsed_time": "0:01:43", "remaining_time": "0:01:20", "throughput": 2653.47, "total_tokens": 275904}
{"current_steps": 710, "total_steps": 1250, "loss": 0.3598, "lr": 0.014099951070965693, "epoch": 5.68, "percentage": 56.8, "elapsed_time": "0:01:44", "remaining_time": "0:01:19", "throughput": 2657.88, "total_tokens": 277888}
{"current_steps": 715, "total_steps": 1250, "loss": 0.351, "lr": 0.013890983458866225, "epoch": 5.72, "percentage": 57.2, "elapsed_time": "0:01:45", "remaining_time": "0:01:18", "throughput": 2662.38, "total_tokens": 279872}
{"current_steps": 720, "total_steps": 1250, "loss": 0.3676, "lr": 0.013682232051738852, "epoch": 5.76, "percentage": 57.6, "elapsed_time": "0:01:45", "remaining_time": "0:01:17", "throughput": 2665.63, "total_tokens": 281664}
{"current_steps": 725, "total_steps": 1250, "loss": 0.3742, "lr": 0.013473737546084006, "epoch": 5.8, "percentage": 58.0, "elapsed_time": "0:01:46", "remaining_time": "0:01:16", "throughput": 2670.76, "total_tokens": 283776}
{"current_steps": 730, "total_steps": 1250, "loss": 0.3707, "lr": 0.013265540588318678, "epoch": 5.84, "percentage": 58.4, "elapsed_time": "0:01:46", "remaining_time": "0:01:16", "throughput": 2673.82, "total_tokens": 285568}
{"current_steps": 735, "total_steps": 1250, "loss": 0.3345, "lr": 0.013057681766852297, "epoch": 5.88, "percentage": 58.8, "elapsed_time": "0:01:47", "remaining_time": "0:01:15", "throughput": 2676.93, "total_tokens": 287360}
{"current_steps": 740, "total_steps": 1250, "loss": 0.4732, "lr": 0.012850201604173958, "epoch": 5.92, "percentage": 59.2, "elapsed_time": "0:01:47", "remaining_time": "0:01:14", "throughput": 2683.18, "total_tokens": 289632}
{"current_steps": 745, "total_steps": 1250, "loss": 0.3843, "lr": 0.012643140548952488, "epoch": 5.96, "percentage": 59.6, "elapsed_time": "0:01:48", "remaining_time": "0:01:13", "throughput": 2686.93, "total_tokens": 291552}
{"current_steps": 750, "total_steps": 1250, "loss": 0.3671, "lr": 0.012436538968150852, "epoch": 6.0, "percentage": 60.0, "elapsed_time": "0:01:49", "remaining_time": "0:01:12", "throughput": 2689.11, "total_tokens": 293616}
{"current_steps": 755, "total_steps": 1250, "loss": 0.4328, "lr": 0.012230437139156598, "epoch": 6.04, "percentage": 60.4, "elapsed_time": "0:01:49", "remaining_time": "0:01:12", "throughput": 2687.0, "total_tokens": 295440}
{"current_steps": 756, "total_steps": 1250, "eval_loss": 0.3728449046611786, "epoch": 6.048, "percentage": 60.48, "elapsed_time": "0:01:51", "remaining_time": "0:01:12", "throughput": 2663.83, "total_tokens": 295728}
{"current_steps": 760, "total_steps": 1250, "loss": 0.386, "lr": 0.012024875241929653, "epoch": 6.08, "percentage": 60.8, "elapsed_time": "0:01:52", "remaining_time": "0:01:12", "throughput": 2643.75, "total_tokens": 297360}
{"current_steps": 765, "total_steps": 1250, "loss": 0.4223, "lr": 0.011819893351169184, "epoch": 6.12, "percentage": 61.2, "elapsed_time": "0:01:53", "remaining_time": "0:01:11", "throughput": 2646.88, "total_tokens": 299312}
{"current_steps": 770, "total_steps": 1250, "loss": 0.3884, "lr": 0.011615531428500938, "epoch": 6.16, "percentage": 61.6, "elapsed_time": "0:01:53", "remaining_time": "0:01:10", "throughput": 2652.05, "total_tokens": 301488}
{"current_steps": 775, "total_steps": 1250, "loss": 0.3521, "lr": 0.01141182931468666, "epoch": 6.2, "percentage": 62.0, "elapsed_time": "0:01:54", "remaining_time": "0:01:10", "throughput": 2657.61, "total_tokens": 303696}
{"current_steps": 780, "total_steps": 1250, "loss": 0.2873, "lr": 0.01120882672185706, "epoch": 6.24, "percentage": 62.4, "elapsed_time": "0:01:54", "remaining_time": "0:01:09", "throughput": 2659.29, "total_tokens": 305360}
{"current_steps": 785, "total_steps": 1250, "loss": 0.4382, "lr": 0.011006563225769832, "epoch": 6.28, "percentage": 62.8, "elapsed_time": "0:01:55", "remaining_time": "0:01:08", "throughput": 2663.83, "total_tokens": 307408}
{"current_steps": 790, "total_steps": 1250, "loss": 0.3574, "lr": 0.010805078258094304, "epoch": 6.32, "percentage": 63.2, "elapsed_time": "0:01:55", "remaining_time": "0:01:07", "throughput": 2666.84, "total_tokens": 309232}
{"current_steps": 795, "total_steps": 1250, "loss": 0.366, "lr": 0.01060441109872414, "epoch": 6.36, "percentage": 63.6, "elapsed_time": "0:01:56", "remaining_time": "0:01:06", "throughput": 2670.73, "total_tokens": 311184}
{"current_steps": 800, "total_steps": 1250, "loss": 0.3755, "lr": 0.01040460086811966, "epoch": 6.4, "percentage": 64.0, "elapsed_time": "0:01:57", "remaining_time": "0:01:05", "throughput": 2674.21, "total_tokens": 313072}
{"current_steps": 805, "total_steps": 1250, "loss": 0.4247, "lr": 0.010205686519681232, "epoch": 6.44, "percentage": 64.4, "elapsed_time": "0:01:57", "remaining_time": "0:01:05", "throughput": 2676.81, "total_tokens": 314832}
{"current_steps": 810, "total_steps": 1250, "loss": 0.3554, "lr": 0.0100077068321552, "epoch": 6.48, "percentage": 64.8, "elapsed_time": "0:01:58", "remaining_time": "0:01:04", "throughput": 2679.28, "total_tokens": 316560}
{"current_steps": 815, "total_steps": 1250, "loss": 0.3633, "lr": 0.009810700402073928, "epoch": 6.52, "percentage": 65.2, "elapsed_time": "0:01:58", "remaining_time": "0:01:03", "throughput": 2685.72, "total_tokens": 318960}
{"current_steps": 819, "total_steps": 1250, "eval_loss": 0.35502758622169495, "epoch": 6.552, "percentage": 65.52, "elapsed_time": "0:02:00", "remaining_time": "0:01:03", "throughput": 2666.18, "total_tokens": 320464}
{"current_steps": 820, "total_steps": 1250, "loss": 0.3535, "lr": 0.009614705636231307, "epoch": 6.5600000000000005, "percentage": 65.6, "elapsed_time": "0:02:01", "remaining_time": "0:01:03", "throughput": 2645.86, "total_tokens": 320880}
{"current_steps": 825, "total_steps": 1250, "loss": 0.3542, "lr": 0.009419760744195283, "epoch": 6.6, "percentage": 66.0, "elapsed_time": "0:02:01", "remaining_time": "0:01:02", "throughput": 2651.62, "total_tokens": 323184}
{"current_steps": 830, "total_steps": 1250, "loss": 0.3698, "lr": 0.00922590373085881, "epoch": 6.64, "percentage": 66.4, "elapsed_time": "0:02:02", "remaining_time": "0:01:01", "throughput": 2655.63, "total_tokens": 325168}
{"current_steps": 835, "total_steps": 1250, "loss": 0.393, "lr": 0.009033172389030755, "epoch": 6.68, "percentage": 66.8, "elapsed_time": "0:02:02", "remaining_time": "0:01:01", "throughput": 2658.55, "total_tokens": 326992}
{"current_steps": 840, "total_steps": 1250, "loss": 0.37, "lr": 0.00884160429206808, "epoch": 6.72, "percentage": 67.2, "elapsed_time": "0:02:03", "remaining_time": "0:01:00", "throughput": 2663.26, "total_tokens": 329104}
{"current_steps": 845, "total_steps": 1250, "loss": 0.4608, "lr": 0.008651236786550862, "epoch": 6.76, "percentage": 67.6, "elapsed_time": "0:02:04", "remaining_time": "0:00:59", "throughput": 2667.23, "total_tokens": 331088}
{"current_steps": 850, "total_steps": 1250, "loss": 0.3707, "lr": 0.00846210698500149, "epoch": 6.8, "percentage": 68.0, "elapsed_time": "0:02:04", "remaining_time": "0:00:58", "throughput": 2670.7, "total_tokens": 333008}
{"current_steps": 855, "total_steps": 1250, "loss": 0.2943, "lr": 0.008274251758649518, "epoch": 6.84, "percentage": 68.4, "elapsed_time": "0:02:05", "remaining_time": "0:00:57", "throughput": 2674.78, "total_tokens": 335024}
{"current_steps": 860, "total_steps": 1250, "loss": 0.4466, "lr": 0.008087707730243539, "epoch": 6.88, "percentage": 68.8, "elapsed_time": "0:02:05", "remaining_time": "0:00:57", "throughput": 2679.09, "total_tokens": 337104}
{"current_steps": 865, "total_steps": 1250, "loss": 0.3659, "lr": 0.007902511266911504, "epoch": 6.92, "percentage": 69.2, "elapsed_time": "0:02:06", "remaining_time": "0:00:56", "throughput": 2682.12, "total_tokens": 338960}
{"current_steps": 870, "total_steps": 1250, "loss": 0.4244, "lr": 0.00771869847307089, "epoch": 6.96, "percentage": 69.6, "elapsed_time": "0:02:06", "remaining_time": "0:00:55", "throughput": 2685.42, "total_tokens": 340848}
{"current_steps": 875, "total_steps": 1250, "loss": 0.3219, "lr": 0.007536305183390062, "epoch": 7.0, "percentage": 70.0, "elapsed_time": "0:02:07", "remaining_time": "0:00:54", "throughput": 2688.16, "total_tokens": 343040}
{"current_steps": 880, "total_steps": 1250, "loss": 0.2882, "lr": 0.007355366955802234, "epoch": 7.04, "percentage": 70.4, "elapsed_time": "0:02:08", "remaining_time": "0:00:53", "throughput": 2687.32, "total_tokens": 345056}
{"current_steps": 882, "total_steps": 1250, "eval_loss": 0.5431165099143982, "epoch": 7.056, "percentage": 70.56, "elapsed_time": "0:02:09", "remaining_time": "0:00:54", "throughput": 2668.99, "total_tokens": 345856}
{"current_steps": 885, "total_steps": 1250, "loss": 0.3407, "lr": 0.007175919064573383, "epoch": 7.08, "percentage": 70.8, "elapsed_time": "0:02:10", "remaining_time": "0:00:53", "throughput": 2653.97, "total_tokens": 347232}
{"current_steps": 890, "total_steps": 1250, "loss": 0.484, "lr": 0.006997996493425461, "epoch": 7.12, "percentage": 71.2, "elapsed_time": "0:02:11", "remaining_time": "0:00:53", "throughput": 2658.83, "total_tokens": 349504}
{"current_steps": 895, "total_steps": 1250, "loss": 0.3891, "lr": 0.0068216339287162486, "epoch": 7.16, "percentage": 71.6, "elapsed_time": "0:02:11", "remaining_time": "0:00:52", "throughput": 2661.47, "total_tokens": 351296}
{"current_steps": 900, "total_steps": 1250, "loss": 0.4381, "lr": 0.006646865752677185, "epoch": 7.2, "percentage": 72.0, "elapsed_time": "0:02:12", "remaining_time": "0:00:51", "throughput": 2664.72, "total_tokens": 353248}
{"current_steps": 905, "total_steps": 1250, "loss": 0.4013, "lr": 0.00647372603671046, "epoch": 7.24, "percentage": 72.4, "elapsed_time": "0:02:13", "remaining_time": "0:00:50", "throughput": 2668.44, "total_tokens": 355232}
{"current_steps": 910, "total_steps": 1250, "loss": 0.3963, "lr": 0.0063022485347467615, "epoch": 7.28, "percentage": 72.8, "elapsed_time": "0:02:13", "remaining_time": "0:00:49", "throughput": 2672.78, "total_tokens": 357376}
{"current_steps": 915, "total_steps": 1250, "loss": 0.303, "lr": 0.00613246667666487, "epoch": 7.32, "percentage": 73.2, "elapsed_time": "0:02:14", "remaining_time": "0:00:49", "throughput": 2675.67, "total_tokens": 359232}
{"current_steps": 920, "total_steps": 1250, "loss": 0.4389, "lr": 0.005964413561774424, "epoch": 7.36, "percentage": 73.6, "elapsed_time": "0:02:14", "remaining_time": "0:00:48", "throughput": 2677.79, "total_tokens": 361024}
{"current_steps": 925, "total_steps": 1250, "loss": 0.3223, "lr": 0.0057981219523631404, "epoch": 7.4, "percentage": 74.0, "elapsed_time": "0:02:15", "remaining_time": "0:00:47", "throughput": 2682.2, "total_tokens": 363200}
{"current_steps": 930, "total_steps": 1250, "loss": 0.4041, "lr": 0.005633624267309767, "epoch": 7.44, "percentage": 74.4, "elapsed_time": "0:02:15", "remaining_time": "0:00:46", "throughput": 2685.48, "total_tokens": 365152}
{"current_steps": 935, "total_steps": 1250, "loss": 0.3497, "lr": 0.005470952575763933, "epoch": 7.48, "percentage": 74.8, "elapsed_time": "0:02:16", "remaining_time": "0:00:46", "throughput": 2689.83, "total_tokens": 367328}
{"current_steps": 940, "total_steps": 1250, "loss": 0.3507, "lr": 0.0053101385908942405, "epoch": 7.52, "percentage": 75.2, "elapsed_time": "0:02:17", "remaining_time": "0:00:45", "throughput": 2692.9, "total_tokens": 369248}
{"current_steps": 945, "total_steps": 1250, "loss": 0.3561, "lr": 0.0051512136637056555, "epoch": 7.5600000000000005, "percentage": 75.6, "elapsed_time": "0:02:17", "remaining_time": "0:00:44", "throughput": 2695.16, "total_tokens": 371040}
{"current_steps": 945, "total_steps": 1250, "eval_loss": 0.35614675283432007, "epoch": 7.5600000000000005, "percentage": 75.6, "elapsed_time": "0:02:18", "remaining_time": "0:00:44", "throughput": 2676.43, "total_tokens": 371040}
{"current_steps": 950, "total_steps": 1250, "loss": 0.3589, "lr": 0.004994208776927635, "epoch": 7.6, "percentage": 76.0, "elapsed_time": "0:02:20", "remaining_time": "0:00:44", "throughput": 2662.14, "total_tokens": 372928}
{"current_steps": 955, "total_steps": 1250, "loss": 0.3537, "lr": 0.004839154538973943, "epoch": 7.64, "percentage": 76.4, "elapsed_time": "0:02:20", "remaining_time": "0:00:43", "throughput": 2665.09, "total_tokens": 374816}
{"current_steps": 960, "total_steps": 1250, "loss": 0.3578, "lr": 0.00468608117797549, "epoch": 7.68, "percentage": 76.8, "elapsed_time": "0:02:21", "remaining_time": "0:00:42", "throughput": 2667.96, "total_tokens": 376704}
{"current_steps": 965, "total_steps": 1250, "loss": 0.3526, "lr": 0.004535018535887305, "epoch": 7.72, "percentage": 77.2, "elapsed_time": "0:02:21", "remaining_time": "0:00:41", "throughput": 2670.94, "total_tokens": 378624}
{"current_steps": 970, "total_steps": 1250, "loss": 0.3456, "lr": 0.004385996062670774, "epoch": 7.76, "percentage": 77.6, "elapsed_time": "0:02:22", "remaining_time": "0:00:41", "throughput": 2673.46, "total_tokens": 380480}
{"current_steps": 975, "total_steps": 1250, "loss": 0.3575, "lr": 0.0042390428105523225, "epoch": 7.8, "percentage": 78.0, "elapsed_time": "0:02:22", "remaining_time": "0:00:40", "throughput": 2675.12, "total_tokens": 382144}
{"current_steps": 980, "total_steps": 1250, "loss": 0.3544, "lr": 0.004094187428359625, "epoch": 7.84, "percentage": 78.4, "elapsed_time": "0:02:23", "remaining_time": "0:00:39", "throughput": 2679.33, "total_tokens": 384320}
{"current_steps": 985, "total_steps": 1250, "loss": 0.3762, "lr": 0.003951458155936452, "epoch": 7.88, "percentage": 78.8, "elapsed_time": "0:02:24", "remaining_time": "0:00:38", "throughput": 2684.29, "total_tokens": 386656}
{"current_steps": 990, "total_steps": 1250, "loss": 0.3497, "lr": 0.0038108828186372685, "epoch": 7.92, "percentage": 79.2, "elapsed_time": "0:02:24", "remaining_time": "0:00:37", "throughput": 2687.75, "total_tokens": 388672}
{"current_steps": 995, "total_steps": 1250, "loss": 0.3919, "lr": 0.003672488821902614, "epoch": 7.96, "percentage": 79.6, "elapsed_time": "0:02:25", "remaining_time": "0:00:37", "throughput": 2689.8, "total_tokens": 390400}
{"current_steps": 1000, "total_steps": 1250, "loss": 0.3462, "lr": 0.0035363031459163647, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:02:25", "remaining_time": "0:00:36", "throughput": 2689.25, "total_tokens": 392080}
{"current_steps": 1005, "total_steps": 1250, "loss": 0.3555, "lr": 0.0034023523403458908, "epoch": 8.04, "percentage": 80.4, "elapsed_time": "0:02:26", "remaining_time": "0:00:35", "throughput": 2688.24, "total_tokens": 394160}
{"current_steps": 1008, "total_steps": 1250, "eval_loss": 0.35737964510917664, "epoch": 8.064, "percentage": 80.64, "elapsed_time": "0:02:27", "remaining_time": "0:00:35", "throughput": 2672.08, "total_tokens": 395216}
{"current_steps": 1010, "total_steps": 1250, "loss": 0.3613, "lr": 0.003270662519166149, "epoch": 8.08, "percentage": 80.8, "elapsed_time": "0:02:29", "remaining_time": "0:00:35", "throughput": 2655.77, "total_tokens": 395888}
{"current_steps": 1015, "total_steps": 1250, "loss": 0.3469, "lr": 0.003141259355568705, "epoch": 8.12, "percentage": 81.2, "elapsed_time": "0:02:29", "remaining_time": "0:00:34", "throughput": 2659.52, "total_tokens": 398032}
{"current_steps": 1020, "total_steps": 1250, "loss": 0.3543, "lr": 0.003014168076956707, "epoch": 8.16, "percentage": 81.6, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2661.86, "total_tokens": 399856}
{"current_steps": 1025, "total_steps": 1250, "loss": 0.3636, "lr": 0.002889413460026724, "epoch": 8.2, "percentage": 82.0, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2663.87, "total_tokens": 401616}
{"current_steps": 1030, "total_steps": 1250, "loss": 0.3521, "lr": 0.0027670198259385275, "epoch": 8.24, "percentage": 82.4, "elapsed_time": "0:02:31", "remaining_time": "0:00:32", "throughput": 2666.86, "total_tokens": 403568}
{"current_steps": 1035, "total_steps": 1250, "loss": 0.3611, "lr": 0.0026470110355735882, "epoch": 8.28, "percentage": 82.8, "elapsed_time": "0:02:31", "remaining_time": "0:00:31", "throughput": 2670.85, "total_tokens": 405712}
{"current_steps": 1040, "total_steps": 1250, "loss": 0.3538, "lr": 0.0025294104848833754, "epoch": 8.32, "percentage": 83.2, "elapsed_time": "0:02:32", "remaining_time": "0:00:30", "throughput": 2674.96, "total_tokens": 407888}
{"current_steps": 1045, "total_steps": 1250, "loss": 0.3559, "lr": 0.002414241100328251, "epoch": 8.36, "percentage": 83.6, "elapsed_time": "0:02:33", "remaining_time": "0:00:30", "throughput": 2677.28, "total_tokens": 409712}
{"current_steps": 1050, "total_steps": 1250, "loss": 0.3549, "lr": 0.002301525334407931, "epoch": 8.4, "percentage": 84.0, "elapsed_time": "0:02:33", "remaining_time": "0:00:29", "throughput": 2679.41, "total_tokens": 411504}
{"current_steps": 1055, "total_steps": 1250, "loss": 0.3515, "lr": 0.0021912851612843243, "epoch": 8.44, "percentage": 84.4, "elapsed_time": "0:02:34", "remaining_time": "0:00:28", "throughput": 2680.98, "total_tokens": 413168}
{"current_steps": 1060, "total_steps": 1250, "loss": 0.3453, "lr": 0.002083542072497606, "epoch": 8.48, "percentage": 84.8, "elapsed_time": "0:02:34", "remaining_time": "0:00:27", "throughput": 2683.24, "total_tokens": 414960}
{"current_steps": 1065, "total_steps": 1250, "loss": 0.3562, "lr": 0.001978317072776413, "epoch": 8.52, "percentage": 85.2, "elapsed_time": "0:02:35", "remaining_time": "0:00:26", "throughput": 2685.95, "total_tokens": 416880}
{"current_steps": 1070, "total_steps": 1250, "loss": 0.3562, "lr": 0.0018756306759429363, "epoch": 8.56, "percentage": 85.6, "elapsed_time": "0:02:35", "remaining_time": "0:00:26", "throughput": 2688.49, "total_tokens": 418768}
{"current_steps": 1071, "total_steps": 1250, "eval_loss": 0.354716032743454, "epoch": 8.568, "percentage": 85.68, "elapsed_time": "0:02:36", "remaining_time": "0:00:26", "throughput": 2672.09, "total_tokens": 419184}
{"current_steps": 1075, "total_steps": 1250, "loss": 0.3512, "lr": 0.001775502900913697, "epoch": 8.6, "percentage": 86.0, "elapsed_time": "0:02:38", "remaining_time": "0:00:25", "throughput": 2655.84, "total_tokens": 421008}
{"current_steps": 1080, "total_steps": 1250, "loss": 0.352, "lr": 0.0016779532677968327, "epoch": 8.64, "percentage": 86.4, "elapsed_time": "0:02:39", "remaining_time": "0:00:25", "throughput": 2658.3, "total_tokens": 422864}
{"current_steps": 1085, "total_steps": 1250, "loss": 0.3618, "lr": 0.0015830007940866035, "epoch": 8.68, "percentage": 86.8, "elapsed_time": "0:02:39", "remaining_time": "0:00:24", "throughput": 2662.01, "total_tokens": 424976}
{"current_steps": 1090, "total_steps": 1250, "loss": 0.3384, "lr": 0.0014906639909558954, "epoch": 8.72, "percentage": 87.2, "elapsed_time": "0:02:40", "remaining_time": "0:00:23", "throughput": 2665.52, "total_tokens": 427120}
{"current_steps": 1095, "total_steps": 1250, "loss": 0.3384, "lr": 0.0014009608596474348, "epoch": 8.76, "percentage": 87.6, "elapsed_time": "0:02:40", "remaining_time": "0:00:22", "throughput": 2669.1, "total_tokens": 429264}
{"current_steps": 1100, "total_steps": 1250, "loss": 0.3566, "lr": 0.001313908887964409, "epoch": 8.8, "percentage": 88.0, "elapsed_time": "0:02:41", "remaining_time": "0:00:22", "throughput": 2671.09, "total_tokens": 431056}
{"current_steps": 1105, "total_steps": 1250, "loss": 0.3671, "lr": 0.0012295250468611779, "epoch": 8.84, "percentage": 88.4, "elapsed_time": "0:02:41", "remaining_time": "0:00:21", "throughput": 2674.58, "total_tokens": 433136}
{"current_steps": 1110, "total_steps": 1250, "loss": 0.3551, "lr": 0.0011478257871347663, "epoch": 8.88, "percentage": 88.8, "elapsed_time": "0:02:42", "remaining_time": "0:00:20", "throughput": 2677.96, "total_tokens": 435216}
{"current_steps": 1115, "total_steps": 1250, "loss": 0.3383, "lr": 0.0010688270362177355, "epoch": 8.92, "percentage": 89.2, "elapsed_time": "0:02:43", "remaining_time": "0:00:19", "throughput": 2680.2, "total_tokens": 437040}
{"current_steps": 1120, "total_steps": 1250, "loss": 0.3615, "lr": 0.0009925441950730985, "epoch": 8.96, "percentage": 89.6, "elapsed_time": "0:02:43", "remaining_time": "0:00:18", "throughput": 2683.46, "total_tokens": 439088}
{"current_steps": 1125, "total_steps": 1250, "loss": 0.3257, "lr": 0.0009189921351918889, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:44", "remaining_time": "0:00:18", "throughput": 2684.33, "total_tokens": 440848}
{"current_steps": 1130, "total_steps": 1250, "loss": 0.3731, "lr": 0.0008481851956939134, "epoch": 9.04, "percentage": 90.4, "elapsed_time": "0:02:45", "remaining_time": "0:00:17", "throughput": 2684.01, "total_tokens": 442864}
{"current_steps": 1134, "total_steps": 1250, "eval_loss": 0.3622165322303772, "epoch": 9.072, "percentage": 90.72, "elapsed_time": "0:02:46", "remaining_time": "0:00:17", "throughput": 2670.96, "total_tokens": 444560}
{"current_steps": 1135, "total_steps": 1250, "loss": 0.3666, "lr": 0.0007801371805323276, "epoch": 9.08, "percentage": 90.8, "elapsed_time": "0:02:47", "remaining_time": "0:00:16", "throughput": 2654.34, "total_tokens": 445040}
{"current_steps": 1140, "total_steps": 1250, "loss": 0.3579, "lr": 0.0007148613558025102, "epoch": 9.12, "percentage": 91.2, "elapsed_time": "0:02:48", "remaining_time": "0:00:16", "throughput": 2655.66, "total_tokens": 446864}
{"current_steps": 1145, "total_steps": 1250, "loss": 0.3587, "lr": 0.0006523704471558306, "epoch": 9.16, "percentage": 91.6, "elapsed_time": "0:02:48", "remaining_time": "0:00:15", "throughput": 2659.2, "total_tokens": 449008}
{"current_steps": 1150, "total_steps": 1250, "loss": 0.3458, "lr": 0.0005926766373187531, "epoch": 9.2, "percentage": 92.0, "elapsed_time": "0:02:49", "remaining_time": "0:00:14", "throughput": 2661.37, "total_tokens": 450832}
{"current_steps": 1155, "total_steps": 1250, "loss": 0.3488, "lr": 0.0005357915637177817, "epoch": 9.24, "percentage": 92.4, "elapsed_time": "0:02:49", "remaining_time": "0:00:13", "throughput": 2664.01, "total_tokens": 452784}
{"current_steps": 1160, "total_steps": 1250, "loss": 0.34, "lr": 0.00048172631621072045, "epoch": 9.28, "percentage": 92.8, "elapsed_time": "0:02:50", "remaining_time": "0:00:13", "throughput": 2667.78, "total_tokens": 454992}
{"current_steps": 1165, "total_steps": 1250, "loss": 0.3476, "lr": 0.00043049143492470017, "epoch": 9.32, "percentage": 93.2, "elapsed_time": "0:02:51", "remaining_time": "0:00:12", "throughput": 2668.66, "total_tokens": 456528}
{"current_steps": 1170, "total_steps": 1250, "loss": 0.3434, "lr": 0.00038209690820134145, "epoch": 9.36, "percentage": 93.6, "elapsed_time": "0:02:51", "remaining_time": "0:00:11", "throughput": 2672.4, "total_tokens": 458768}
{"current_steps": 1175, "total_steps": 1250, "loss": 0.3664, "lr": 0.0003365521706495234, "epoch": 9.4, "percentage": 94.0, "elapsed_time": "0:02:52", "remaining_time": "0:00:10", "throughput": 2675.44, "total_tokens": 460816}
{"current_steps": 1180, "total_steps": 1250, "loss": 0.3411, "lr": 0.00029386610130606504, "epoch": 9.44, "percentage": 94.4, "elapsed_time": "0:02:52", "remaining_time": "0:00:10", "throughput": 2676.17, "total_tokens": 462384}
{"current_steps": 1185, "total_steps": 1250, "loss": 0.3474, "lr": 0.00025404702190476856, "epoch": 9.48, "percentage": 94.8, "elapsed_time": "0:02:53", "remaining_time": "0:00:09", "throughput": 2678.71, "total_tokens": 464304}
{"current_steps": 1190, "total_steps": 1250, "loss": 0.3438, "lr": 0.00021710269525405834, "epoch": 9.52, "percentage": 95.2, "elapsed_time": "0:02:53", "remaining_time": "0:00:08", "throughput": 2681.88, "total_tokens": 466384}
{"current_steps": 1195, "total_steps": 1250, "loss": 0.3406, "lr": 0.00018304032372361666, "epoch": 9.56, "percentage": 95.6, "elapsed_time": "0:02:54", "remaining_time": "0:00:08", "throughput": 2684.67, "total_tokens": 468368}
{"current_steps": 1197, "total_steps": 1250, "eval_loss": 0.3528577983379364, "epoch": 9.576, "percentage": 95.76, "elapsed_time": "0:02:55", "remaining_time": "0:00:07", "throughput": 2670.33, "total_tokens": 469104}
{"current_steps": 1200, "total_steps": 1250, "loss": 0.3416, "lr": 0.00015186654784026365, "epoch": 9.6, "percentage": 96.0, "elapsed_time": "0:02:57", "remaining_time": "0:00:07", "throughput": 2655.46, "total_tokens": 470192}
{"current_steps": 1205, "total_steps": 1250, "loss": 0.3411, "lr": 0.00012358744499337603, "epoch": 9.64, "percentage": 96.4, "elapsed_time": "0:02:57", "remaining_time": "0:00:06", "throughput": 2658.4, "total_tokens": 472240}
{"current_steps": 1210, "total_steps": 1250, "loss": 0.3502, "lr": 9.820852825008664e-05, "epoch": 9.68, "percentage": 96.8, "elapsed_time": "0:02:58", "remaining_time": "0:00:05", "throughput": 2660.92, "total_tokens": 474160}
{"current_steps": 1215, "total_steps": 1250, "loss": 0.3552, "lr": 7.57347452804974e-05, "epoch": 9.72, "percentage": 97.2, "elapsed_time": "0:02:58", "remaining_time": "0:00:05", "throughput": 2666.24, "total_tokens": 476784}
{"current_steps": 1220, "total_steps": 1250, "loss": 0.3365, "lr": 5.6170477393130966e-05, "epoch": 9.76, "percentage": 97.6, "elapsed_time": "0:02:59", "remaining_time": "0:00:04", "throughput": 2669.27, "total_tokens": 478864}
{"current_steps": 1225, "total_steps": 1250, "loss": 0.3472, "lr": 3.951953868077229e-05, "epoch": 9.8, "percentage": 98.0, "elapsed_time": "0:02:59", "remaining_time": "0:00:03", "throughput": 2671.88, "total_tokens": 480816}
{"current_steps": 1230, "total_steps": 1250, "loss": 0.3398, "lr": 2.5785175276920034e-05, "epoch": 9.84, "percentage": 98.4, "elapsed_time": "0:03:00", "remaining_time": "0:00:02", "throughput": 2674.61, "total_tokens": 482832}
{"current_steps": 1235, "total_steps": 1250, "loss": 0.3538, "lr": 1.4970064722929499e-05, "epoch": 9.88, "percentage": 98.8, "elapsed_time": "0:03:01", "remaining_time": "0:00:02", "throughput": 2677.65, "total_tokens": 484912}
{"current_steps": 1240, "total_steps": 1250, "loss": 0.3555, "lr": 7.076315446033487e-06, "epoch": 9.92, "percentage": 99.2, "elapsed_time": "0:03:01", "remaining_time": "0:00:01", "throughput": 2679.24, "total_tokens": 486640}
{"current_steps": 1245, "total_steps": 1250, "loss": 0.3613, "lr": 2.105466348294449e-06, "epoch": 9.96, "percentage": 99.6, "elapsed_time": "0:03:02", "remaining_time": "0:00:00", "throughput": 2680.75, "total_tokens": 488336}
{"current_steps": 1250, "total_steps": 1250, "loss": 0.3414, "lr": 5.848650659112664e-08, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:02", "remaining_time": "0:00:00", "throughput": 2680.35, "total_tokens": 490000}
{"current_steps": 1250, "total_steps": 1250, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:03:03", "remaining_time": "0:00:00", "throughput": 2667.01, "total_tokens": 490000}