| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "global_step": 58268, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.957094803322579e-05, | |
| "loss": 2.4741, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 4.914189606645157e-05, | |
| "loss": 2.4661, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 4.871284409967736e-05, | |
| "loss": 2.4505, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 4.828379213290314e-05, | |
| "loss": 2.4463, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 4.785474016612893e-05, | |
| "loss": 2.4309, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 4.742568819935471e-05, | |
| "loss": 2.4286, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 4.699663623258049e-05, | |
| "loss": 2.4326, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 4.656758426580627e-05, | |
| "loss": 2.424, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.613853229903206e-05, | |
| "loss": 2.4179, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 4.570948033225785e-05, | |
| "loss": 2.4141, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 4.528042836548363e-05, | |
| "loss": 2.4121, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.4851376398709416e-05, | |
| "loss": 2.407, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.44223244319352e-05, | |
| "loss": 2.399, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 4.3993272465160985e-05, | |
| "loss": 2.3935, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 4.3564220498386766e-05, | |
| "loss": 2.3822, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.3135168531612554e-05, | |
| "loss": 2.3822, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 4.2706116564838335e-05, | |
| "loss": 2.387, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 4.227706459806412e-05, | |
| "loss": 2.3601, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 4.1848012631289904e-05, | |
| "loss": 2.3582, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 4.1418960664515685e-05, | |
| "loss": 2.3706, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.098990869774147e-05, | |
| "loss": 2.3669, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 4.0560856730967254e-05, | |
| "loss": 2.3518, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.013180476419304e-05, | |
| "loss": 2.3392, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.970275279741882e-05, | |
| "loss": 2.3502, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 3.927370083064461e-05, | |
| "loss": 2.3437, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 3.884464886387039e-05, | |
| "loss": 2.3577, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.841559689709618e-05, | |
| "loss": 2.3435, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.798654493032196e-05, | |
| "loss": 2.3456, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 3.755749296354775e-05, | |
| "loss": 2.3461, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 3.712844099677354e-05, | |
| "loss": 2.3472, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 3.669938902999932e-05, | |
| "loss": 2.3362, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.62703370632251e-05, | |
| "loss": 2.345, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 3.584128509645088e-05, | |
| "loss": 2.336, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 3.541223312967667e-05, | |
| "loss": 2.3365, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 3.498318116290245e-05, | |
| "loss": 2.3245, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 3.455412919612824e-05, | |
| "loss": 2.3086, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 3.412507722935402e-05, | |
| "loss": 2.3251, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 3.3696025262579806e-05, | |
| "loss": 2.32, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 3.326697329580559e-05, | |
| "loss": 2.3084, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 3.2837921329031375e-05, | |
| "loss": 2.3164, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "learning_rate": 3.240886936225716e-05, | |
| "loss": 2.3054, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 3.1979817395482944e-05, | |
| "loss": 2.3172, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 3.1550765428708725e-05, | |
| "loss": 2.31, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 3.112171346193451e-05, | |
| "loss": 2.3141, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 3.0692661495160294e-05, | |
| "loss": 2.3226, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "learning_rate": 3.026360952838608e-05, | |
| "loss": 2.3194, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 2.9834557561611863e-05, | |
| "loss": 2.3203, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 2.9405505594837644e-05, | |
| "loss": 2.3065, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 2.8976453628063432e-05, | |
| "loss": 2.3112, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 2.854740166128922e-05, | |
| "loss": 2.2985, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 2.8118349694515e-05, | |
| "loss": 2.3152, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 2.768929772774079e-05, | |
| "loss": 2.2912, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 2.726024576096657e-05, | |
| "loss": 2.2759, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 3.24, | |
| "learning_rate": 2.6831193794192354e-05, | |
| "loss": 2.2952, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 2.6402141827418136e-05, | |
| "loss": 2.2971, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 2.5973089860643923e-05, | |
| "loss": 2.2987, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 2.5544037893869704e-05, | |
| "loss": 2.2859, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 2.5114985927095492e-05, | |
| "loss": 2.2779, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 2.4685933960321277e-05, | |
| "loss": 2.2878, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "learning_rate": 2.4256881993547058e-05, | |
| "loss": 2.2762, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 2.3827830026772842e-05, | |
| "loss": 2.2728, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 2.3398778059998627e-05, | |
| "loss": 2.2898, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 2.296972609322441e-05, | |
| "loss": 2.2836, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 2.25406741264502e-05, | |
| "loss": 2.2882, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 3.9, | |
| "learning_rate": 2.211162215967598e-05, | |
| "loss": 2.2778, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 2.1682570192901765e-05, | |
| "loss": 2.2793, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 2.125351822612755e-05, | |
| "loss": 2.2699, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 2.0824466259353334e-05, | |
| "loss": 2.2778, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 4.14, | |
| "learning_rate": 2.0395414292579118e-05, | |
| "loss": 2.2668, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 1.9966362325804903e-05, | |
| "loss": 2.257, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 4.26, | |
| "learning_rate": 1.9537310359030687e-05, | |
| "loss": 2.2496, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 1.9108258392256472e-05, | |
| "loss": 2.2742, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 1.8679206425482253e-05, | |
| "loss": 2.2643, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.825015445870804e-05, | |
| "loss": 2.2517, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 1.7821102491933825e-05, | |
| "loss": 2.2677, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.739205052515961e-05, | |
| "loss": 2.2654, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 4.63, | |
| "learning_rate": 1.6962998558385394e-05, | |
| "loss": 2.2645, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 1.6533946591611175e-05, | |
| "loss": 2.2456, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 1.610489462483696e-05, | |
| "loss": 2.2686, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.5675842658062744e-05, | |
| "loss": 2.2478, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.5246790691288529e-05, | |
| "loss": 2.2558, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 1.4817738724514313e-05, | |
| "loss": 2.2541, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.4388686757740098e-05, | |
| "loss": 2.236, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 1.3959634790965884e-05, | |
| "loss": 2.2543, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 1.3530582824191668e-05, | |
| "loss": 2.2479, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 1.3101530857417451e-05, | |
| "loss": 2.2525, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 1.2672478890643236e-05, | |
| "loss": 2.2506, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "learning_rate": 1.224342692386902e-05, | |
| "loss": 2.2282, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 1.1814374957094803e-05, | |
| "loss": 2.2406, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 1.1385322990320587e-05, | |
| "loss": 2.2357, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 1.0956271023546374e-05, | |
| "loss": 2.2457, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 1.0527219056772156e-05, | |
| "loss": 2.2262, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 1.0098167089997941e-05, | |
| "loss": 2.2423, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 9.669115123223725e-06, | |
| "loss": 2.2435, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 9.24006315644951e-06, | |
| "loss": 2.2277, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 5.77, | |
| "learning_rate": 8.811011189675294e-06, | |
| "loss": 2.2445, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "learning_rate": 8.381959222901079e-06, | |
| "loss": 2.2395, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 7.952907256126863e-06, | |
| "loss": 2.2506, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 5.95, | |
| "learning_rate": 7.523855289352647e-06, | |
| "loss": 2.234, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 7.0948033225784306e-06, | |
| "loss": 2.2488, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 6.665751355804216e-06, | |
| "loss": 2.2242, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 6.13, | |
| "learning_rate": 6.2366993890299995e-06, | |
| "loss": 2.2297, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 5.807647422255784e-06, | |
| "loss": 2.2259, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 5.378595455481568e-06, | |
| "loss": 2.2263, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 4.949543488707353e-06, | |
| "loss": 2.2214, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.520491521933137e-06, | |
| "loss": 2.2287, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 6.43, | |
| "learning_rate": 4.091439555158921e-06, | |
| "loss": 2.2384, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 3.662387588384705e-06, | |
| "loss": 2.2319, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 6.55, | |
| "learning_rate": 3.2333356216104892e-06, | |
| "loss": 2.2328, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 2.804283654836274e-06, | |
| "loss": 2.2359, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 2.375231688062058e-06, | |
| "loss": 2.2372, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 1.9461797212878423e-06, | |
| "loss": 2.2402, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 1.5171277545136267e-06, | |
| "loss": 2.2243, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 1.088075787739411e-06, | |
| "loss": 2.2267, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 6.590238209651953e-07, | |
| "loss": 2.2254, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 2.2997185419097963e-07, | |
| "loss": 2.2278, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "step": 58268, | |
| "total_flos": 3.6314933518540524e+18, | |
| "train_runtime": 64951.4124, | |
| "train_samples_per_second": 0.897 | |
| } | |
| ], | |
| "max_steps": 58268, | |
| "num_train_epochs": 7, | |
| "total_flos": 3.6314933518540524e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |