| { | |
| "best_metric": 0.9565217391304348, | |
| "best_model_checkpoint": "vit-msn-small-wbc-blur-detector/checkpoint-15", | |
| "epoch": 60.0, | |
| "eval_steps": 500, | |
| "global_step": 180, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6304347826086957, | |
| "eval_loss": 0.6141545176506042, | |
| "eval_runtime": 0.3002, | |
| "eval_samples_per_second": 306.507, | |
| "eval_steps_per_second": 9.995, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8695652173913043, | |
| "eval_loss": 0.3853473365306854, | |
| "eval_runtime": 0.2807, | |
| "eval_samples_per_second": 327.721, | |
| "eval_steps_per_second": 10.687, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8260869565217391, | |
| "eval_loss": 0.40699997544288635, | |
| "eval_runtime": 0.2954, | |
| "eval_samples_per_second": 311.393, | |
| "eval_steps_per_second": 10.154, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 6.74481725692749, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.494, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.1460711508989334, | |
| "eval_runtime": 0.2916, | |
| "eval_samples_per_second": 315.469, | |
| "eval_steps_per_second": 10.287, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.11890643835067749, | |
| "eval_runtime": 0.2956, | |
| "eval_samples_per_second": 311.209, | |
| "eval_steps_per_second": 10.148, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.15268591046333313, | |
| "eval_runtime": 0.2919, | |
| "eval_samples_per_second": 315.184, | |
| "eval_steps_per_second": 10.278, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 6.666666666666667, | |
| "grad_norm": 32.091392517089844, | |
| "learning_rate": 4.938271604938271e-05, | |
| "loss": 0.2024, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.9021739130434783, | |
| "eval_loss": 0.3323056697845459, | |
| "eval_runtime": 0.2758, | |
| "eval_samples_per_second": 333.524, | |
| "eval_steps_per_second": 10.876, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.15201511979103088, | |
| "eval_runtime": 0.2782, | |
| "eval_samples_per_second": 330.705, | |
| "eval_steps_per_second": 10.784, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.1572241336107254, | |
| "eval_runtime": 0.2775, | |
| "eval_samples_per_second": 331.58, | |
| "eval_steps_per_second": 10.812, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 10.290038108825684, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.1419, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.18135777115821838, | |
| "eval_runtime": 0.2913, | |
| "eval_samples_per_second": 315.861, | |
| "eval_steps_per_second": 10.3, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.17784903943538666, | |
| "eval_runtime": 0.2749, | |
| "eval_samples_per_second": 334.701, | |
| "eval_steps_per_second": 10.914, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.15048673748970032, | |
| "eval_runtime": 0.2937, | |
| "eval_samples_per_second": 313.271, | |
| "eval_steps_per_second": 10.215, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.1890527755022049, | |
| "eval_runtime": 0.3016, | |
| "eval_samples_per_second": 305.089, | |
| "eval_steps_per_second": 9.949, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 13.333333333333334, | |
| "grad_norm": 6.0901970863342285, | |
| "learning_rate": 4.3209876543209875e-05, | |
| "loss": 0.1053, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7934782608695652, | |
| "eval_loss": 0.7274413108825684, | |
| "eval_runtime": 0.272, | |
| "eval_samples_per_second": 338.198, | |
| "eval_steps_per_second": 11.028, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.2668982148170471, | |
| "eval_runtime": 0.2934, | |
| "eval_samples_per_second": 313.551, | |
| "eval_steps_per_second": 10.224, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.22397232055664062, | |
| "eval_runtime": 0.3087, | |
| "eval_samples_per_second": 298.059, | |
| "eval_steps_per_second": 9.719, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 16.666666666666668, | |
| "grad_norm": 7.414670467376709, | |
| "learning_rate": 4.012345679012346e-05, | |
| "loss": 0.3044, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.8913043478260869, | |
| "eval_loss": 0.3497091829776764, | |
| "eval_runtime": 0.2758, | |
| "eval_samples_per_second": 333.612, | |
| "eval_steps_per_second": 10.879, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.2208346724510193, | |
| "eval_runtime": 0.274, | |
| "eval_samples_per_second": 335.712, | |
| "eval_steps_per_second": 10.947, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.17329861223697662, | |
| "eval_runtime": 0.285, | |
| "eval_samples_per_second": 322.844, | |
| "eval_steps_per_second": 10.528, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 5.56092643737793, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.151, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9239130434782609, | |
| "eval_loss": 0.20381057262420654, | |
| "eval_runtime": 0.2821, | |
| "eval_samples_per_second": 326.07, | |
| "eval_steps_per_second": 10.633, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.12818782031536102, | |
| "eval_runtime": 0.2765, | |
| "eval_samples_per_second": 332.708, | |
| "eval_steps_per_second": 10.849, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.9239130434782609, | |
| "eval_loss": 0.3231411874294281, | |
| "eval_runtime": 0.3046, | |
| "eval_samples_per_second": 302.028, | |
| "eval_steps_per_second": 9.849, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.15651515126228333, | |
| "eval_runtime": 0.296, | |
| "eval_samples_per_second": 310.773, | |
| "eval_steps_per_second": 10.134, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 23.333333333333332, | |
| "grad_norm": 6.024500370025635, | |
| "learning_rate": 3.395061728395062e-05, | |
| "loss": 0.0875, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.19810304045677185, | |
| "eval_runtime": 0.3099, | |
| "eval_samples_per_second": 296.864, | |
| "eval_steps_per_second": 9.68, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.19737455248832703, | |
| "eval_runtime": 0.2724, | |
| "eval_samples_per_second": 337.687, | |
| "eval_steps_per_second": 11.012, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.20447766780853271, | |
| "eval_runtime": 0.2918, | |
| "eval_samples_per_second": 315.301, | |
| "eval_steps_per_second": 10.282, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 26.666666666666668, | |
| "grad_norm": 4.34276008605957, | |
| "learning_rate": 3.08641975308642e-05, | |
| "loss": 0.0851, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.1840977817773819, | |
| "eval_runtime": 0.2713, | |
| "eval_samples_per_second": 339.118, | |
| "eval_steps_per_second": 11.058, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.20613481104373932, | |
| "eval_runtime": 0.2718, | |
| "eval_samples_per_second": 338.491, | |
| "eval_steps_per_second": 11.038, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.20765484869480133, | |
| "eval_runtime": 0.2926, | |
| "eval_samples_per_second": 314.467, | |
| "eval_steps_per_second": 10.254, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "grad_norm": 2.4659526348114014, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.046, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.21993966400623322, | |
| "eval_runtime": 0.2942, | |
| "eval_samples_per_second": 312.716, | |
| "eval_steps_per_second": 10.197, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.2038496881723404, | |
| "eval_runtime": 0.3027, | |
| "eval_samples_per_second": 303.903, | |
| "eval_steps_per_second": 9.91, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.20770704746246338, | |
| "eval_runtime": 0.281, | |
| "eval_samples_per_second": 327.359, | |
| "eval_steps_per_second": 10.675, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.18766026198863983, | |
| "eval_runtime": 0.2757, | |
| "eval_samples_per_second": 333.675, | |
| "eval_steps_per_second": 10.881, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 33.333333333333336, | |
| "grad_norm": 1.9909802675247192, | |
| "learning_rate": 2.4691358024691357e-05, | |
| "loss": 0.0533, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.2383040189743042, | |
| "eval_runtime": 0.2759, | |
| "eval_samples_per_second": 333.465, | |
| "eval_steps_per_second": 10.874, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.9239130434782609, | |
| "eval_loss": 0.25708499550819397, | |
| "eval_runtime": 0.2913, | |
| "eval_samples_per_second": 315.812, | |
| "eval_steps_per_second": 10.298, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.23297645151615143, | |
| "eval_runtime": 0.2763, | |
| "eval_samples_per_second": 332.955, | |
| "eval_steps_per_second": 10.857, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 36.666666666666664, | |
| "grad_norm": 9.732784271240234, | |
| "learning_rate": 2.1604938271604937e-05, | |
| "loss": 0.0451, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.24198591709136963, | |
| "eval_runtime": 0.2748, | |
| "eval_samples_per_second": 334.8, | |
| "eval_steps_per_second": 10.917, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.9239130434782609, | |
| "eval_loss": 0.2881980240345001, | |
| "eval_runtime": 0.291, | |
| "eval_samples_per_second": 316.1, | |
| "eval_steps_per_second": 10.308, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.23858585953712463, | |
| "eval_runtime": 0.2975, | |
| "eval_samples_per_second": 309.204, | |
| "eval_steps_per_second": 10.083, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 1.4663212299346924, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.0401, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.2512564957141876, | |
| "eval_runtime": 0.2721, | |
| "eval_samples_per_second": 338.098, | |
| "eval_steps_per_second": 11.025, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.2671690285205841, | |
| "eval_runtime": 0.2823, | |
| "eval_samples_per_second": 325.908, | |
| "eval_steps_per_second": 10.627, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.2950490117073059, | |
| "eval_runtime": 0.2748, | |
| "eval_samples_per_second": 334.771, | |
| "eval_steps_per_second": 10.916, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.32324346899986267, | |
| "eval_runtime": 0.2744, | |
| "eval_samples_per_second": 335.279, | |
| "eval_steps_per_second": 10.933, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 43.333333333333336, | |
| "grad_norm": 6.655643463134766, | |
| "learning_rate": 1.54320987654321e-05, | |
| "loss": 0.0329, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.9239130434782609, | |
| "eval_loss": 0.3711928427219391, | |
| "eval_runtime": 0.2829, | |
| "eval_samples_per_second": 325.258, | |
| "eval_steps_per_second": 10.606, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.9347826086956522, | |
| "eval_loss": 0.35285505652427673, | |
| "eval_runtime": 0.2761, | |
| "eval_samples_per_second": 333.224, | |
| "eval_steps_per_second": 10.866, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.29050472378730774, | |
| "eval_runtime": 0.292, | |
| "eval_samples_per_second": 315.053, | |
| "eval_steps_per_second": 10.273, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 46.666666666666664, | |
| "grad_norm": 5.307190895080566, | |
| "learning_rate": 1.2345679012345678e-05, | |
| "loss": 0.0519, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.26701638102531433, | |
| "eval_runtime": 0.3064, | |
| "eval_samples_per_second": 300.308, | |
| "eval_steps_per_second": 9.793, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.2628593444824219, | |
| "eval_runtime": 0.2788, | |
| "eval_samples_per_second": 330.007, | |
| "eval_steps_per_second": 10.761, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.2760757505893707, | |
| "eval_runtime": 0.2738, | |
| "eval_samples_per_second": 336.03, | |
| "eval_steps_per_second": 10.957, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "grad_norm": 4.582086563110352, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.0281, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.3040381669998169, | |
| "eval_runtime": 0.2766, | |
| "eval_samples_per_second": 332.655, | |
| "eval_steps_per_second": 10.847, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.31911370158195496, | |
| "eval_runtime": 0.2968, | |
| "eval_samples_per_second": 309.998, | |
| "eval_steps_per_second": 10.109, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.32143697142601013, | |
| "eval_runtime": 0.3072, | |
| "eval_samples_per_second": 299.499, | |
| "eval_steps_per_second": 9.766, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.31315502524375916, | |
| "eval_runtime": 0.3028, | |
| "eval_samples_per_second": 303.834, | |
| "eval_steps_per_second": 9.908, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 53.333333333333336, | |
| "grad_norm": 4.059518814086914, | |
| "learning_rate": 6.172839506172839e-06, | |
| "loss": 0.028, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.31154391169548035, | |
| "eval_runtime": 0.2938, | |
| "eval_samples_per_second": 313.128, | |
| "eval_steps_per_second": 10.211, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_accuracy": 0.9565217391304348, | |
| "eval_loss": 0.31155669689178467, | |
| "eval_runtime": 0.2755, | |
| "eval_samples_per_second": 333.966, | |
| "eval_steps_per_second": 10.89, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.3224737346172333, | |
| "eval_runtime": 0.302, | |
| "eval_samples_per_second": 304.679, | |
| "eval_steps_per_second": 9.935, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 56.666666666666664, | |
| "grad_norm": 9.457426071166992, | |
| "learning_rate": 3.0864197530864196e-06, | |
| "loss": 0.0361, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.3235200047492981, | |
| "eval_runtime": 0.2841, | |
| "eval_samples_per_second": 323.798, | |
| "eval_steps_per_second": 10.559, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.3200394809246063, | |
| "eval_runtime": 0.2914, | |
| "eval_samples_per_second": 315.666, | |
| "eval_steps_per_second": 10.293, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.318322092294693, | |
| "eval_runtime": 0.2746, | |
| "eval_samples_per_second": 335.002, | |
| "eval_steps_per_second": 10.924, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "grad_norm": 9.799005508422852, | |
| "learning_rate": 0.0, | |
| "loss": 0.0312, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.9456521739130435, | |
| "eval_loss": 0.31814736127853394, | |
| "eval_runtime": 0.2764, | |
| "eval_samples_per_second": 332.886, | |
| "eval_steps_per_second": 10.855, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "step": 180, | |
| "total_flos": 4.3676735454019584e+17, | |
| "train_loss": 0.1091307305627399, | |
| "train_runtime": 203.4457, | |
| "train_samples_per_second": 109.71, | |
| "train_steps_per_second": 0.885 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 180, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 60, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.3676735454019584e+17, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |