| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 14.013452914798206, | |
| "global_step": 50000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 9.900000000000001e-05, | |
| "loss": 9.7624, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 9.8e-05, | |
| "loss": 8.2156, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 9.7e-05, | |
| "loss": 7.5205, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.6e-05, | |
| "loss": 7.3362, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 9.5e-05, | |
| "loss": 7.1343, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 9.4e-05, | |
| "loss": 6.9313, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.300000000000001e-05, | |
| "loss": 6.7408, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 9.200000000000001e-05, | |
| "loss": 6.5858, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 9.1e-05, | |
| "loss": 6.4674, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9e-05, | |
| "loss": 6.3797, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 8.900000000000001e-05, | |
| "loss": 6.3089, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 6.2535, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 8.7e-05, | |
| "loss": 6.2054, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 8.6e-05, | |
| "loss": 6.1582, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "learning_rate": 8.5e-05, | |
| "loss": 6.1192, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 8.4e-05, | |
| "loss": 6.0872, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 8.3e-05, | |
| "loss": 6.0494, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 8.2e-05, | |
| "loss": 6.0233, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 8.1e-05, | |
| "loss": 5.993, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8e-05, | |
| "loss": 5.9731, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 7.900000000000001e-05, | |
| "loss": 5.9495, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 5.9287, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 7.7e-05, | |
| "loss": 5.9077, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 7.6e-05, | |
| "loss": 5.8884, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 5.8712, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 7.4e-05, | |
| "loss": 5.8562, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 7.3e-05, | |
| "loss": 5.836, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 7.2e-05, | |
| "loss": 5.8282, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 7.1e-05, | |
| "loss": 5.8108, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 7e-05, | |
| "loss": 5.7997, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 6.9e-05, | |
| "loss": 5.7869, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 6.800000000000001e-05, | |
| "loss": 5.7755, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 6.7e-05, | |
| "loss": 5.7686, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 6.6e-05, | |
| "loss": 5.7561, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 6.500000000000001e-05, | |
| "loss": 5.7481, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.04, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 5.7397, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 6.3e-05, | |
| "loss": 5.7285, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 6.2e-05, | |
| "loss": 5.7194, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 6.1e-05, | |
| "loss": 5.7117, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 6e-05, | |
| "loss": 5.7066, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 5.9e-05, | |
| "loss": 5.6968, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 5.8e-05, | |
| "loss": 5.6889, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 5.6999999999999996e-05, | |
| "loss": 5.681, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 5.6000000000000006e-05, | |
| "loss": 5.6788, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 5.500000000000001e-05, | |
| "loss": 5.6668, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 5.4000000000000005e-05, | |
| "loss": 5.6649, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 5.300000000000001e-05, | |
| "loss": 5.6574, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 5.2000000000000004e-05, | |
| "loss": 5.6499, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 5.1000000000000006e-05, | |
| "loss": 5.6465, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 5e-05, | |
| "loss": 5.6419, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.15, | |
| "learning_rate": 4.9e-05, | |
| "loss": 5.6358, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 4.8e-05, | |
| "loss": 5.6312, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 4.7e-05, | |
| "loss": 5.6293, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 4.600000000000001e-05, | |
| "loss": 5.6218, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 4.5e-05, | |
| "loss": 5.618, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 4.4000000000000006e-05, | |
| "loss": 5.613, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 7.99, | |
| "learning_rate": 4.3e-05, | |
| "loss": 5.6101, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 4.2e-05, | |
| "loss": 5.6048, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 4.1e-05, | |
| "loss": 5.6, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 4e-05, | |
| "loss": 5.5976, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "learning_rate": 3.9000000000000006e-05, | |
| "loss": 5.5936, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 3.8e-05, | |
| "loss": 5.5931, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 8.83, | |
| "learning_rate": 3.7e-05, | |
| "loss": 5.5898, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 3.6e-05, | |
| "loss": 5.5888, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 9.11, | |
| "learning_rate": 3.5e-05, | |
| "loss": 5.582, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 3.4000000000000007e-05, | |
| "loss": 5.5817, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.3e-05, | |
| "loss": 5.5726, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 5.5778, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 3.1e-05, | |
| "loss": 5.5718, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 3e-05, | |
| "loss": 5.569, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 2.9e-05, | |
| "loss": 5.5703, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 2.8000000000000003e-05, | |
| "loss": 5.5668, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 5.561, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 5.5554, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 2.5e-05, | |
| "loss": 5.5621, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 2.4e-05, | |
| "loss": 5.558, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 10.79, | |
| "learning_rate": 2.3000000000000003e-05, | |
| "loss": 5.5557, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 5.5572, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 11.07, | |
| "learning_rate": 2.1e-05, | |
| "loss": 5.5519, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 2e-05, | |
| "loss": 5.5502, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 11.35, | |
| "learning_rate": 1.9e-05, | |
| "loss": 5.5436, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 11.49, | |
| "learning_rate": 1.8e-05, | |
| "loss": 5.5482, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 5.5524, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 11.77, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 5.5449, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 11.91, | |
| "learning_rate": 1.5e-05, | |
| "loss": 5.5443, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 1.4000000000000001e-05, | |
| "loss": 5.5466, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 12.19, | |
| "learning_rate": 1.3000000000000001e-05, | |
| "loss": 5.5388, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 1.2e-05, | |
| "loss": 5.5397, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 12.47, | |
| "learning_rate": 1.1000000000000001e-05, | |
| "loss": 5.5385, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "learning_rate": 1e-05, | |
| "loss": 5.5437, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 12.75, | |
| "learning_rate": 9e-06, | |
| "loss": 5.541, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 12.89, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 5.5395, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 5.5357, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 13.17, | |
| "learning_rate": 6e-06, | |
| "loss": 5.5351, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 13.31, | |
| "learning_rate": 5e-06, | |
| "loss": 5.5353, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 13.45, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 5.5371, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5343, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 13.73, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 5.5371, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 13.87, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 5.538, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "learning_rate": 0.0, | |
| "loss": 5.5386, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 14.01, | |
| "step": 50000, | |
| "total_flos": 336513375928320.0, | |
| "train_loss": 5.854185776367188, | |
| "train_runtime": 10672.3435, | |
| "train_samples_per_second": 149.92, | |
| "train_steps_per_second": 4.685 | |
| } | |
| ], | |
| "max_steps": 50000, | |
| "num_train_epochs": 15, | |
| "total_flos": 336513375928320.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |