| { | |
| "best_metric": 0.03605493903160095, | |
| "best_model_checkpoint": "/home/jupyter/pt-train/models/pt-ai-detector-sent/checkpoint-11250", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 11250, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.017777777777777778, | |
| "grad_norm": 0.2309582382440567, | |
| "learning_rate": 9.825777777777779e-06, | |
| "loss": 1.1561, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.035555555555555556, | |
| "grad_norm": 0.12385065108537674, | |
| "learning_rate": 9.648000000000001e-06, | |
| "loss": 0.1037, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.05333333333333334, | |
| "grad_norm": 1.04470694065094, | |
| "learning_rate": 9.470222222222222e-06, | |
| "loss": 0.0954, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07111111111111111, | |
| "grad_norm": 0.06991372257471085, | |
| "learning_rate": 9.292444444444445e-06, | |
| "loss": 0.0774, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": 0.7150733470916748, | |
| "learning_rate": 9.114666666666668e-06, | |
| "loss": 0.0644, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.10666666666666667, | |
| "grad_norm": 10.402215003967285, | |
| "learning_rate": 8.93688888888889e-06, | |
| "loss": 0.0759, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.12444444444444444, | |
| "grad_norm": 16.095781326293945, | |
| "learning_rate": 8.76e-06, | |
| "loss": 0.0708, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.14222222222222222, | |
| "grad_norm": 0.015104565769433975, | |
| "learning_rate": 8.582222222222223e-06, | |
| "loss": 0.072, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.05716638267040253, | |
| "learning_rate": 8.404444444444444e-06, | |
| "loss": 0.0583, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": 0.18205951154232025, | |
| "learning_rate": 8.226666666666667e-06, | |
| "loss": 0.0471, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.19555555555555557, | |
| "grad_norm": 0.010154439136385918, | |
| "learning_rate": 8.04888888888889e-06, | |
| "loss": 0.047, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.21333333333333335, | |
| "grad_norm": 0.016992947086691856, | |
| "learning_rate": 7.871111111111112e-06, | |
| "loss": 0.0563, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2311111111111111, | |
| "grad_norm": 18.458301544189453, | |
| "learning_rate": 7.693333333333333e-06, | |
| "loss": 0.0491, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.24888888888888888, | |
| "grad_norm": 0.12355442345142365, | |
| "learning_rate": 7.515555555555556e-06, | |
| "loss": 0.0612, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.26666666666666666, | |
| "grad_norm": 0.027129609137773514, | |
| "learning_rate": 7.337777777777778e-06, | |
| "loss": 0.0545, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.28444444444444444, | |
| "grad_norm": 16.13323211669922, | |
| "learning_rate": 7.16e-06, | |
| "loss": 0.0608, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.3022222222222222, | |
| "grad_norm": 0.0006109599489718676, | |
| "learning_rate": 6.982222222222223e-06, | |
| "loss": 0.0336, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 5.837182998657227, | |
| "learning_rate": 6.8044444444444444e-06, | |
| "loss": 0.0485, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.3377777777777778, | |
| "grad_norm": 2.619032859802246, | |
| "learning_rate": 6.626666666666667e-06, | |
| "loss": 0.0454, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.35555555555555557, | |
| "grad_norm": 0.10736280679702759, | |
| "learning_rate": 6.448888888888889e-06, | |
| "loss": 0.0562, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.37333333333333335, | |
| "grad_norm": 0.010529163293540478, | |
| "learning_rate": 6.271111111111111e-06, | |
| "loss": 0.0397, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.39111111111111113, | |
| "grad_norm": 7.122026443481445, | |
| "learning_rate": 6.093333333333333e-06, | |
| "loss": 0.0455, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.4088888888888889, | |
| "grad_norm": 14.466997146606445, | |
| "learning_rate": 5.915555555555556e-06, | |
| "loss": 0.0437, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.4266666666666667, | |
| "grad_norm": 0.011481579393148422, | |
| "learning_rate": 5.737777777777778e-06, | |
| "loss": 0.0365, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 0.09981393069028854, | |
| "learning_rate": 5.560000000000001e-06, | |
| "loss": 0.0373, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4622222222222222, | |
| "grad_norm": 0.0657946765422821, | |
| "learning_rate": 5.382222222222223e-06, | |
| "loss": 0.0444, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.3812452554702759, | |
| "learning_rate": 5.204444444444445e-06, | |
| "loss": 0.0336, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.49777777777777776, | |
| "grad_norm": 0.032792165875434875, | |
| "learning_rate": 5.026666666666667e-06, | |
| "loss": 0.039, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.5155555555555555, | |
| "grad_norm": 1.433627963066101, | |
| "learning_rate": 4.848888888888889e-06, | |
| "loss": 0.0347, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.5333333333333333, | |
| "grad_norm": 18.49650001525879, | |
| "learning_rate": 4.6711111111111115e-06, | |
| "loss": 0.0421, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5511111111111111, | |
| "grad_norm": 0.15248766541481018, | |
| "learning_rate": 4.493333333333333e-06, | |
| "loss": 0.0466, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.5688888888888889, | |
| "grad_norm": 1.2822659015655518, | |
| "learning_rate": 4.315555555555556e-06, | |
| "loss": 0.0397, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5866666666666667, | |
| "grad_norm": 0.04721234738826752, | |
| "learning_rate": 4.1377777777777784e-06, | |
| "loss": 0.0419, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.6044444444444445, | |
| "grad_norm": 0.04303908720612526, | |
| "learning_rate": 3.96e-06, | |
| "loss": 0.0405, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.6222222222222222, | |
| "grad_norm": 0.009814753197133541, | |
| "learning_rate": 3.782222222222223e-06, | |
| "loss": 0.0304, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.25008726119995117, | |
| "learning_rate": 3.604444444444445e-06, | |
| "loss": 0.0397, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.6577777777777778, | |
| "grad_norm": 0.00500706909224391, | |
| "learning_rate": 3.426666666666667e-06, | |
| "loss": 0.0369, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.6755555555555556, | |
| "grad_norm": 0.0132444491609931, | |
| "learning_rate": 3.2488888888888894e-06, | |
| "loss": 0.0314, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6933333333333334, | |
| "grad_norm": 5.168927192687988, | |
| "learning_rate": 3.072e-06, | |
| "loss": 0.0382, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.7111111111111111, | |
| "grad_norm": 7.176743507385254, | |
| "learning_rate": 2.8951111111111114e-06, | |
| "loss": 0.0476, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.7288888888888889, | |
| "grad_norm": 0.0021492040250450373, | |
| "learning_rate": 2.7173333333333336e-06, | |
| "loss": 0.0368, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.7466666666666667, | |
| "grad_norm": 0.006746188271790743, | |
| "learning_rate": 2.539555555555556e-06, | |
| "loss": 0.0382, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.7644444444444445, | |
| "grad_norm": 0.005719976499676704, | |
| "learning_rate": 2.361777777777778e-06, | |
| "loss": 0.0297, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.7822222222222223, | |
| "grad_norm": 0.04104682430624962, | |
| "learning_rate": 2.184e-06, | |
| "loss": 0.0379, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.009711818769574165, | |
| "learning_rate": 2.0062222222222224e-06, | |
| "loss": 0.0323, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.8177777777777778, | |
| "grad_norm": 0.03058827668428421, | |
| "learning_rate": 1.8284444444444445e-06, | |
| "loss": 0.0456, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.8355555555555556, | |
| "grad_norm": 0.009686224162578583, | |
| "learning_rate": 1.6506666666666667e-06, | |
| "loss": 0.0321, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.8533333333333334, | |
| "grad_norm": 0.03049510158598423, | |
| "learning_rate": 1.472888888888889e-06, | |
| "loss": 0.0308, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.8711111111111111, | |
| "grad_norm": 0.12437938898801804, | |
| "learning_rate": 1.295111111111111e-06, | |
| "loss": 0.0384, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 0.012638283893465996, | |
| "learning_rate": 1.1173333333333335e-06, | |
| "loss": 0.0328, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.9066666666666666, | |
| "grad_norm": 0.061814695596694946, | |
| "learning_rate": 9.395555555555557e-07, | |
| "loss": 0.0415, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.9244444444444444, | |
| "grad_norm": 0.0710972398519516, | |
| "learning_rate": 7.617777777777779e-07, | |
| "loss": 0.025, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.9422222222222222, | |
| "grad_norm": 17.15258026123047, | |
| "learning_rate": 5.84e-07, | |
| "loss": 0.0346, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.21364516019821167, | |
| "learning_rate": 4.062222222222222e-07, | |
| "loss": 0.0291, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.9777777777777777, | |
| "grad_norm": 0.24612575769424438, | |
| "learning_rate": 2.2844444444444446e-07, | |
| "loss": 0.0304, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.9955555555555555, | |
| "grad_norm": 1.2707738876342773, | |
| "learning_rate": 5.066666666666667e-08, | |
| "loss": 0.0288, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.03605493903160095, | |
| "eval_runtime": 22.7203, | |
| "eval_samples_per_second": 880.272, | |
| "eval_steps_per_second": 55.017, | |
| "step": 11250 | |
| } | |
| ], | |
| "logging_steps": 200, | |
| "max_steps": 11250, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7865326777845120.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |