| { | |
| "best_metric": 0.8284289836883545, | |
| "best_model_checkpoint": "/output/longformer-large-4096-scratch-mlm-zgt-radpat/checkpoint-31300", | |
| "epoch": 9.999175145683829, | |
| "global_step": 32200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.12266339212733007, | |
| "eval_loss": 6.975634574890137, | |
| "eval_runtime": 6116.0321, | |
| "eval_samples_per_second": 16.872, | |
| "eval_steps_per_second": 4.218, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.5527950310559007e-05, | |
| "loss": 7.8221, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.15375473317214908, | |
| "eval_loss": 6.221883773803711, | |
| "eval_runtime": 6119.4463, | |
| "eval_samples_per_second": 16.862, | |
| "eval_steps_per_second": 4.216, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.16615663705726413, | |
| "eval_loss": 6.070300102233887, | |
| "eval_runtime": 6123.864, | |
| "eval_samples_per_second": 16.85, | |
| "eval_steps_per_second": 4.213, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 3.1055900621118014e-05, | |
| "loss": 6.2078, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 0.17121056433657572, | |
| "eval_loss": 5.859891414642334, | |
| "eval_runtime": 6127.1504, | |
| "eval_samples_per_second": 16.841, | |
| "eval_steps_per_second": 4.21, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 4.658385093167702e-05, | |
| "loss": 5.8885, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.2018564191205144, | |
| "eval_loss": 5.480071544647217, | |
| "eval_runtime": 6125.2208, | |
| "eval_samples_per_second": 16.846, | |
| "eval_steps_per_second": 4.212, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.28117280447637577, | |
| "eval_loss": 4.865741729736328, | |
| "eval_runtime": 6125.2116, | |
| "eval_samples_per_second": 16.846, | |
| "eval_steps_per_second": 4.212, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 6.211180124223603e-05, | |
| "loss": 5.222, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 0.3429139079332977, | |
| "eval_loss": 4.355594158172607, | |
| "eval_runtime": 6130.4382, | |
| "eval_samples_per_second": 16.832, | |
| "eval_steps_per_second": 4.208, | |
| "step": 2191 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 7.763975155279503e-05, | |
| "loss": 4.4722, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.40118303502111985, | |
| "eval_loss": 3.8668248653411865, | |
| "eval_runtime": 6127.9717, | |
| "eval_samples_per_second": 16.839, | |
| "eval_steps_per_second": 4.21, | |
| "step": 2504 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.5023100325479124, | |
| "eval_loss": 3.0883595943450928, | |
| "eval_runtime": 6125.5473, | |
| "eval_samples_per_second": 16.845, | |
| "eval_steps_per_second": 4.211, | |
| "step": 2817 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 9.316770186335404e-05, | |
| "loss": 3.4756, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.5704291572279797, | |
| "eval_loss": 2.500981569290161, | |
| "eval_runtime": 6130.9045, | |
| "eval_samples_per_second": 16.831, | |
| "eval_steps_per_second": 4.208, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_accuracy": 0.6179390492627002, | |
| "eval_loss": 2.098602771759033, | |
| "eval_runtime": 6130.0855, | |
| "eval_samples_per_second": 16.833, | |
| "eval_steps_per_second": 4.208, | |
| "step": 3443 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.903381642512077e-05, | |
| "loss": 2.473, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_accuracy": 0.6461125725768823, | |
| "eval_loss": 1.8769867420196533, | |
| "eval_runtime": 6125.6206, | |
| "eval_samples_per_second": 16.845, | |
| "eval_steps_per_second": 4.211, | |
| "step": 3756 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 9.730848861283644e-05, | |
| "loss": 1.9842, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_accuracy": 0.6658163018931873, | |
| "eval_loss": 1.7306807041168213, | |
| "eval_runtime": 6126.2778, | |
| "eval_samples_per_second": 16.843, | |
| "eval_steps_per_second": 4.211, | |
| "step": 4069 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "eval_accuracy": 0.6793036581603035, | |
| "eval_loss": 1.6312057971954346, | |
| "eval_runtime": 6129.7016, | |
| "eval_samples_per_second": 16.834, | |
| "eval_steps_per_second": 4.209, | |
| "step": 4382 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 9.558316080055211e-05, | |
| "loss": 1.7588, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_accuracy": 0.6910826606245232, | |
| "eval_loss": 1.5486171245574951, | |
| "eval_runtime": 6126.9734, | |
| "eval_samples_per_second": 16.841, | |
| "eval_steps_per_second": 4.21, | |
| "step": 4695 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.385783298826778e-05, | |
| "loss": 1.6227, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_accuracy": 0.7005288313548309, | |
| "eval_loss": 1.4852144718170166, | |
| "eval_runtime": 6125.5835, | |
| "eval_samples_per_second": 16.845, | |
| "eval_steps_per_second": 4.211, | |
| "step": 5008 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_accuracy": 0.7085754746399128, | |
| "eval_loss": 1.4299465417861938, | |
| "eval_runtime": 6127.5749, | |
| "eval_samples_per_second": 16.84, | |
| "eval_steps_per_second": 4.21, | |
| "step": 5321 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 9.213250517598345e-05, | |
| "loss": 1.5262, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_accuracy": 0.714780717522465, | |
| "eval_loss": 1.3879172801971436, | |
| "eval_runtime": 6127.0567, | |
| "eval_samples_per_second": 16.841, | |
| "eval_steps_per_second": 4.21, | |
| "step": 5634 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.7206512153561178, | |
| "eval_loss": 1.3517948389053345, | |
| "eval_runtime": 6124.3738, | |
| "eval_samples_per_second": 16.849, | |
| "eval_steps_per_second": 4.212, | |
| "step": 5947 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "learning_rate": 9.04071773636991e-05, | |
| "loss": 1.4504, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 0.7259066376655939, | |
| "eval_loss": 1.3164656162261963, | |
| "eval_runtime": 6118.789, | |
| "eval_samples_per_second": 16.864, | |
| "eval_steps_per_second": 4.216, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 8.868184955141477e-05, | |
| "loss": 1.3953, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_accuracy": 0.730818673639555, | |
| "eval_loss": 1.285917043685913, | |
| "eval_runtime": 6120.6477, | |
| "eval_samples_per_second": 16.859, | |
| "eval_steps_per_second": 4.215, | |
| "step": 6573 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_accuracy": 0.734559869460255, | |
| "eval_loss": 1.2613903284072876, | |
| "eval_runtime": 6121.6933, | |
| "eval_samples_per_second": 16.856, | |
| "eval_steps_per_second": 4.214, | |
| "step": 6886 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 1.3444, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_accuracy": 0.7384604131666711, | |
| "eval_loss": 1.236000895500183, | |
| "eval_runtime": 6122.4897, | |
| "eval_samples_per_second": 16.854, | |
| "eval_steps_per_second": 4.213, | |
| "step": 7199 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 8.523119392684611e-05, | |
| "loss": 1.3047, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_accuracy": 0.7415128788148121, | |
| "eval_loss": 1.2168104648590088, | |
| "eval_runtime": 6126.7013, | |
| "eval_samples_per_second": 16.842, | |
| "eval_steps_per_second": 4.211, | |
| "step": 7512 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_accuracy": 0.7450029545595697, | |
| "eval_loss": 1.1964406967163086, | |
| "eval_runtime": 6123.0501, | |
| "eval_samples_per_second": 16.852, | |
| "eval_steps_per_second": 4.213, | |
| "step": 7825 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 8.350586611456177e-05, | |
| "loss": 1.2713, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 0.7467751766581295, | |
| "eval_loss": 1.1841331720352173, | |
| "eval_runtime": 6122.3999, | |
| "eval_samples_per_second": 16.854, | |
| "eval_steps_per_second": 4.214, | |
| "step": 8138 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_accuracy": 0.750416850580808, | |
| "eval_loss": 1.1633927822113037, | |
| "eval_runtime": 6127.4486, | |
| "eval_samples_per_second": 16.84, | |
| "eval_steps_per_second": 4.21, | |
| "step": 8451 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 8.178053830227743e-05, | |
| "loss": 1.2431, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_accuracy": 0.7527193981891372, | |
| "eval_loss": 1.146986722946167, | |
| "eval_runtime": 6131.9044, | |
| "eval_samples_per_second": 16.828, | |
| "eval_steps_per_second": 4.207, | |
| "step": 8764 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 8.00552104899931e-05, | |
| "loss": 1.2164, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 0.7551538736391906, | |
| "eval_loss": 1.132608413696289, | |
| "eval_runtime": 6121.9035, | |
| "eval_samples_per_second": 16.855, | |
| "eval_steps_per_second": 4.214, | |
| "step": 9077 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_accuracy": 0.7571211907517355, | |
| "eval_loss": 1.1203465461730957, | |
| "eval_runtime": 6121.527, | |
| "eval_samples_per_second": 16.856, | |
| "eval_steps_per_second": 4.214, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 7.832988267770877e-05, | |
| "loss": 1.1951, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.01, | |
| "eval_accuracy": 0.7589963980672606, | |
| "eval_loss": 1.1114239692687988, | |
| "eval_runtime": 6126.4612, | |
| "eval_samples_per_second": 16.843, | |
| "eval_steps_per_second": 4.211, | |
| "step": 9703 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 7.660455486542444e-05, | |
| "loss": 1.1705, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "eval_accuracy": 0.7612426818924412, | |
| "eval_loss": 1.0974253416061401, | |
| "eval_runtime": 6122.547, | |
| "eval_samples_per_second": 16.854, | |
| "eval_steps_per_second": 4.213, | |
| "step": 10016 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "eval_accuracy": 0.7631302412738202, | |
| "eval_loss": 1.0867012739181519, | |
| "eval_runtime": 6126.2709, | |
| "eval_samples_per_second": 16.843, | |
| "eval_steps_per_second": 4.211, | |
| "step": 10329 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 7.48792270531401e-05, | |
| "loss": 1.1516, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "eval_accuracy": 0.7646139267496522, | |
| "eval_loss": 1.0770790576934814, | |
| "eval_runtime": 6130.429, | |
| "eval_samples_per_second": 16.832, | |
| "eval_steps_per_second": 4.208, | |
| "step": 10642 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "eval_accuracy": 0.7660438596581639, | |
| "eval_loss": 1.0668072700500488, | |
| "eval_runtime": 6129.5434, | |
| "eval_samples_per_second": 16.834, | |
| "eval_steps_per_second": 4.209, | |
| "step": 10955 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 7.315389924085577e-05, | |
| "loss": 1.1345, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "eval_accuracy": 0.7675726293257004, | |
| "eval_loss": 1.05952787399292, | |
| "eval_runtime": 6126.4998, | |
| "eval_samples_per_second": 16.843, | |
| "eval_steps_per_second": 4.211, | |
| "step": 11268 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "learning_rate": 7.142857142857143e-05, | |
| "loss": 1.1192, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 3.6, | |
| "eval_accuracy": 0.7694602055551931, | |
| "eval_loss": 1.0479472875595093, | |
| "eval_runtime": 6127.4827, | |
| "eval_samples_per_second": 16.84, | |
| "eval_steps_per_second": 4.21, | |
| "step": 11581 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "eval_accuracy": 0.7707531140981431, | |
| "eval_loss": 1.0423223972320557, | |
| "eval_runtime": 6131.6585, | |
| "eval_samples_per_second": 16.829, | |
| "eval_steps_per_second": 4.207, | |
| "step": 11894 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 6.970324361628709e-05, | |
| "loss": 1.106, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "eval_accuracy": 0.7719773558500885, | |
| "eval_loss": 1.0328373908996582, | |
| "eval_runtime": 6128.1273, | |
| "eval_samples_per_second": 16.838, | |
| "eval_steps_per_second": 4.21, | |
| "step": 12207 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 6.797791580400277e-05, | |
| "loss": 1.0916, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "eval_accuracy": 0.7731614368018522, | |
| "eval_loss": 1.0272808074951172, | |
| "eval_runtime": 6120.3326, | |
| "eval_samples_per_second": 16.86, | |
| "eval_steps_per_second": 4.215, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "eval_accuracy": 0.7742511503011699, | |
| "eval_loss": 1.0189120769500732, | |
| "eval_runtime": 6131.1757, | |
| "eval_samples_per_second": 16.83, | |
| "eval_steps_per_second": 4.208, | |
| "step": 12833 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 6.625258799171843e-05, | |
| "loss": 1.0789, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "eval_accuracy": 0.7757384860054987, | |
| "eval_loss": 1.0113306045532227, | |
| "eval_runtime": 6133.3354, | |
| "eval_samples_per_second": 16.824, | |
| "eval_steps_per_second": 4.206, | |
| "step": 13146 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "eval_accuracy": 0.776816006797112, | |
| "eval_loss": 1.0058414936065674, | |
| "eval_runtime": 6130.4902, | |
| "eval_samples_per_second": 16.832, | |
| "eval_steps_per_second": 4.208, | |
| "step": 13459 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 6.45272601794341e-05, | |
| "loss": 1.0631, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "eval_accuracy": 0.7777869709950421, | |
| "eval_loss": 1.000064730644226, | |
| "eval_runtime": 6129.8863, | |
| "eval_samples_per_second": 16.833, | |
| "eval_steps_per_second": 4.208, | |
| "step": 13772 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 6.280193236714976e-05, | |
| "loss": 1.0557, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "eval_accuracy": 0.778843659908514, | |
| "eval_loss": 0.993532121181488, | |
| "eval_runtime": 6126.5895, | |
| "eval_samples_per_second": 16.842, | |
| "eval_steps_per_second": 4.211, | |
| "step": 14085 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "eval_accuracy": 0.7797456195039035, | |
| "eval_loss": 0.9887062311172485, | |
| "eval_runtime": 6127.2121, | |
| "eval_samples_per_second": 16.841, | |
| "eval_steps_per_second": 4.21, | |
| "step": 14398 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.107660455486542e-05, | |
| "loss": 1.0438, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "eval_accuracy": 0.7807731355140578, | |
| "eval_loss": 0.9825865030288696, | |
| "eval_runtime": 6126.4985, | |
| "eval_samples_per_second": 16.843, | |
| "eval_steps_per_second": 4.211, | |
| "step": 14711 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 5.9351276742581096e-05, | |
| "loss": 1.0361, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "eval_accuracy": 0.7818996676870377, | |
| "eval_loss": 0.9763655662536621, | |
| "eval_runtime": 6127.4496, | |
| "eval_samples_per_second": 16.84, | |
| "eval_steps_per_second": 4.21, | |
| "step": 15024 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "eval_accuracy": 0.782940768919716, | |
| "eval_loss": 0.9697893857955933, | |
| "eval_runtime": 6126.6806, | |
| "eval_samples_per_second": 16.842, | |
| "eval_steps_per_second": 4.211, | |
| "step": 15337 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 5.762594893029676e-05, | |
| "loss": 1.0264, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_accuracy": 0.7841247808628483, | |
| "eval_loss": 0.9644368290901184, | |
| "eval_runtime": 6128.2176, | |
| "eval_samples_per_second": 16.838, | |
| "eval_steps_per_second": 4.21, | |
| "step": 15650 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_accuracy": 0.7846301810721098, | |
| "eval_loss": 0.9614962339401245, | |
| "eval_runtime": 6132.4257, | |
| "eval_samples_per_second": 16.826, | |
| "eval_steps_per_second": 4.207, | |
| "step": 15963 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 5.590062111801242e-05, | |
| "loss": 1.0176, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "eval_accuracy": 0.7858738693048405, | |
| "eval_loss": 0.9536014795303345, | |
| "eval_runtime": 6134.6669, | |
| "eval_samples_per_second": 16.82, | |
| "eval_steps_per_second": 4.205, | |
| "step": 16276 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 5.417529330572809e-05, | |
| "loss": 1.007, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "eval_accuracy": 0.7867571419814423, | |
| "eval_loss": 0.9484899044036865, | |
| "eval_runtime": 6130.582, | |
| "eval_samples_per_second": 16.832, | |
| "eval_steps_per_second": 4.208, | |
| "step": 16589 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "eval_accuracy": 0.7867586112749555, | |
| "eval_loss": 0.9482876658439636, | |
| "eval_runtime": 6124.9513, | |
| "eval_samples_per_second": 16.847, | |
| "eval_steps_per_second": 4.212, | |
| "step": 16902 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 5.244996549344375e-05, | |
| "loss": 0.9965, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "eval_accuracy": 0.7880718537015102, | |
| "eval_loss": 0.9402521848678589, | |
| "eval_runtime": 6133.8455, | |
| "eval_samples_per_second": 16.823, | |
| "eval_steps_per_second": 4.206, | |
| "step": 17215 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 5.072463768115943e-05, | |
| "loss": 0.9911, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "eval_accuracy": 0.7888353320614213, | |
| "eval_loss": 0.9360187649726868, | |
| "eval_runtime": 6131.2854, | |
| "eval_samples_per_second": 16.83, | |
| "eval_steps_per_second": 4.207, | |
| "step": 17528 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "eval_accuracy": 0.7896846598862644, | |
| "eval_loss": 0.9315310120582581, | |
| "eval_runtime": 6130.221, | |
| "eval_samples_per_second": 16.833, | |
| "eval_steps_per_second": 4.208, | |
| "step": 17841 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 4.899930986887509e-05, | |
| "loss": 0.9861, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 5.64, | |
| "eval_accuracy": 0.7902251551194575, | |
| "eval_loss": 0.9286208152770996, | |
| "eval_runtime": 6135.9888, | |
| "eval_samples_per_second": 16.817, | |
| "eval_steps_per_second": 4.204, | |
| "step": 18154 | |
| }, | |
| { | |
| "epoch": 5.73, | |
| "eval_accuracy": 0.7910160835517881, | |
| "eval_loss": 0.9242651462554932, | |
| "eval_runtime": 6134.4232, | |
| "eval_samples_per_second": 16.821, | |
| "eval_steps_per_second": 4.205, | |
| "step": 18467 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 4.727398205659075e-05, | |
| "loss": 0.9787, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "eval_accuracy": 0.7916774902149969, | |
| "eval_loss": 0.9199575185775757, | |
| "eval_runtime": 6127.6258, | |
| "eval_samples_per_second": 16.84, | |
| "eval_steps_per_second": 4.21, | |
| "step": 18780 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "learning_rate": 4.554865424430642e-05, | |
| "loss": 0.972, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "eval_accuracy": 0.7921690081239334, | |
| "eval_loss": 0.9167630076408386, | |
| "eval_runtime": 6121.9416, | |
| "eval_samples_per_second": 16.855, | |
| "eval_steps_per_second": 4.214, | |
| "step": 19093 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "eval_accuracy": 0.7929045391491827, | |
| "eval_loss": 0.9131466150283813, | |
| "eval_runtime": 6136.433, | |
| "eval_samples_per_second": 16.815, | |
| "eval_steps_per_second": 4.204, | |
| "step": 19406 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 4.382332643202209e-05, | |
| "loss": 0.9642, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "eval_accuracy": 0.7933599893983608, | |
| "eval_loss": 0.9112694263458252, | |
| "eval_runtime": 6128.5893, | |
| "eval_samples_per_second": 16.837, | |
| "eval_steps_per_second": 4.209, | |
| "step": 19719 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 4.209799861973775e-05, | |
| "loss": 0.9576, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "eval_accuracy": 0.7940601199523715, | |
| "eval_loss": 0.9060889482498169, | |
| "eval_runtime": 6120.6148, | |
| "eval_samples_per_second": 16.859, | |
| "eval_steps_per_second": 4.215, | |
| "step": 20032 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "eval_accuracy": 0.7948685797545274, | |
| "eval_loss": 0.9030121564865112, | |
| "eval_runtime": 6124.1894, | |
| "eval_samples_per_second": 16.849, | |
| "eval_steps_per_second": 4.212, | |
| "step": 20345 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.0372670807453414e-05, | |
| "loss": 0.9514, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "eval_accuracy": 0.7954765058682228, | |
| "eval_loss": 0.8997820615768433, | |
| "eval_runtime": 6126.3307, | |
| "eval_samples_per_second": 16.843, | |
| "eval_steps_per_second": 4.211, | |
| "step": 20658 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "eval_accuracy": 0.7961196847197146, | |
| "eval_loss": 0.8957119584083557, | |
| "eval_runtime": 6121.3143, | |
| "eval_samples_per_second": 16.857, | |
| "eval_steps_per_second": 4.214, | |
| "step": 20971 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 3.864734299516908e-05, | |
| "loss": 0.9457, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "eval_accuracy": 0.7966353338873807, | |
| "eval_loss": 0.8925579190254211, | |
| "eval_runtime": 6121.7054, | |
| "eval_samples_per_second": 16.856, | |
| "eval_steps_per_second": 4.214, | |
| "step": 21284 | |
| }, | |
| { | |
| "epoch": 6.68, | |
| "learning_rate": 3.692201518288475e-05, | |
| "loss": 0.9411, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 6.71, | |
| "eval_accuracy": 0.7968278874690401, | |
| "eval_loss": 0.8926752805709839, | |
| "eval_runtime": 6123.2773, | |
| "eval_samples_per_second": 16.852, | |
| "eval_steps_per_second": 4.213, | |
| "step": 21597 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "eval_accuracy": 0.7974544355055755, | |
| "eval_loss": 0.8880347609519958, | |
| "eval_runtime": 6121.4872, | |
| "eval_samples_per_second": 16.857, | |
| "eval_steps_per_second": 4.214, | |
| "step": 21910 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "learning_rate": 3.519668737060042e-05, | |
| "loss": 0.9349, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "eval_accuracy": 0.7982437294026129, | |
| "eval_loss": 0.8834199905395508, | |
| "eval_runtime": 6123.2699, | |
| "eval_samples_per_second": 16.852, | |
| "eval_steps_per_second": 4.213, | |
| "step": 22223 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 3.347135955831608e-05, | |
| "loss": 0.9319, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7990805845521158, | |
| "eval_loss": 0.8799129724502563, | |
| "eval_runtime": 6120.2145, | |
| "eval_samples_per_second": 16.86, | |
| "eval_steps_per_second": 4.215, | |
| "step": 22536 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_accuracy": 0.7991272231482186, | |
| "eval_loss": 0.879518449306488, | |
| "eval_runtime": 6125.0222, | |
| "eval_samples_per_second": 16.847, | |
| "eval_steps_per_second": 4.212, | |
| "step": 22849 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 3.1746031746031745e-05, | |
| "loss": 0.9235, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "eval_accuracy": 0.7999484030167242, | |
| "eval_loss": 0.8756560683250427, | |
| "eval_runtime": 6127.211, | |
| "eval_samples_per_second": 16.841, | |
| "eval_steps_per_second": 4.21, | |
| "step": 23162 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "eval_accuracy": 0.8001440250718516, | |
| "eval_loss": 0.8739376068115234, | |
| "eval_runtime": 6134.261, | |
| "eval_samples_per_second": 16.821, | |
| "eval_steps_per_second": 4.205, | |
| "step": 23475 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 3.0020703933747414e-05, | |
| "loss": 0.9198, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "eval_accuracy": 0.8010690824018636, | |
| "eval_loss": 0.8693613409996033, | |
| "eval_runtime": 6132.2846, | |
| "eval_samples_per_second": 16.827, | |
| "eval_steps_per_second": 4.207, | |
| "step": 23788 | |
| }, | |
| { | |
| "epoch": 7.45, | |
| "learning_rate": 2.829537612146308e-05, | |
| "loss": 0.9158, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "eval_accuracy": 0.8011952977602468, | |
| "eval_loss": 0.8689371943473816, | |
| "eval_runtime": 6129.2095, | |
| "eval_samples_per_second": 16.835, | |
| "eval_steps_per_second": 4.209, | |
| "step": 24101 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "eval_accuracy": 0.8017360324487328, | |
| "eval_loss": 0.8663704991340637, | |
| "eval_runtime": 6128.5565, | |
| "eval_samples_per_second": 16.837, | |
| "eval_steps_per_second": 4.209, | |
| "step": 24414 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 2.6570048309178748e-05, | |
| "loss": 0.9125, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "eval_accuracy": 0.8020007406811046, | |
| "eval_loss": 0.8649431467056274, | |
| "eval_runtime": 6132.8666, | |
| "eval_samples_per_second": 16.825, | |
| "eval_steps_per_second": 4.206, | |
| "step": 24727 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 2.484472049689441e-05, | |
| "loss": 0.9099, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "eval_accuracy": 0.8026024276561983, | |
| "eval_loss": 0.8605436086654663, | |
| "eval_runtime": 6126.7586, | |
| "eval_samples_per_second": 16.842, | |
| "eval_steps_per_second": 4.211, | |
| "step": 25040 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "eval_accuracy": 0.80301129412462, | |
| "eval_loss": 0.8582573533058167, | |
| "eval_runtime": 6127.3341, | |
| "eval_samples_per_second": 16.84, | |
| "eval_steps_per_second": 4.21, | |
| "step": 25353 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 2.311939268461008e-05, | |
| "loss": 0.9054, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "eval_accuracy": 0.8034071794966846, | |
| "eval_loss": 0.8573377132415771, | |
| "eval_runtime": 6131.9465, | |
| "eval_samples_per_second": 16.828, | |
| "eval_steps_per_second": 4.207, | |
| "step": 25666 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "eval_accuracy": 0.8038572222331624, | |
| "eval_loss": 0.8544816374778748, | |
| "eval_runtime": 6128.9922, | |
| "eval_samples_per_second": 16.836, | |
| "eval_steps_per_second": 4.209, | |
| "step": 25979 | |
| }, | |
| { | |
| "epoch": 8.07, | |
| "learning_rate": 2.139406487232574e-05, | |
| "loss": 0.8998, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "eval_accuracy": 0.8044058818938022, | |
| "eval_loss": 0.8519273400306702, | |
| "eval_runtime": 6124.6473, | |
| "eval_samples_per_second": 16.848, | |
| "eval_steps_per_second": 4.212, | |
| "step": 26292 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 1.966873706004141e-05, | |
| "loss": 0.8939, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "eval_accuracy": 0.8044216416179728, | |
| "eval_loss": 0.8512473702430725, | |
| "eval_runtime": 6126.8526, | |
| "eval_samples_per_second": 16.842, | |
| "eval_steps_per_second": 4.21, | |
| "step": 26605 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "eval_accuracy": 0.804752442721678, | |
| "eval_loss": 0.8492391705513, | |
| "eval_runtime": 6127.2647, | |
| "eval_samples_per_second": 16.841, | |
| "eval_steps_per_second": 4.21, | |
| "step": 26918 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 1.7943409247757076e-05, | |
| "loss": 0.8942, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 8.46, | |
| "eval_accuracy": 0.8051816524786768, | |
| "eval_loss": 0.8468219637870789, | |
| "eval_runtime": 6124.9306, | |
| "eval_samples_per_second": 16.847, | |
| "eval_steps_per_second": 4.212, | |
| "step": 27231 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "learning_rate": 1.621808143547274e-05, | |
| "loss": 0.8904, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 8.55, | |
| "eval_accuracy": 0.8055019757141467, | |
| "eval_loss": 0.8458420634269714, | |
| "eval_runtime": 6124.8245, | |
| "eval_samples_per_second": 16.847, | |
| "eval_steps_per_second": 4.212, | |
| "step": 27544 | |
| }, | |
| { | |
| "epoch": 8.65, | |
| "eval_accuracy": 0.8057308816675628, | |
| "eval_loss": 0.8443206548690796, | |
| "eval_runtime": 6129.9291, | |
| "eval_samples_per_second": 16.833, | |
| "eval_steps_per_second": 4.208, | |
| "step": 27857 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 1.4492753623188407e-05, | |
| "loss": 0.8862, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "eval_accuracy": 0.805897348183967, | |
| "eval_loss": 0.843222439289093, | |
| "eval_runtime": 6128.1919, | |
| "eval_samples_per_second": 16.838, | |
| "eval_steps_per_second": 4.21, | |
| "step": 28170 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "eval_accuracy": 0.8064984673341041, | |
| "eval_loss": 0.84042888879776, | |
| "eval_runtime": 6116.6369, | |
| "eval_samples_per_second": 16.87, | |
| "eval_steps_per_second": 4.218, | |
| "step": 28483 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 1.276742581090407e-05, | |
| "loss": 0.8842, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "eval_accuracy": 0.806853518328651, | |
| "eval_loss": 0.8381487727165222, | |
| "eval_runtime": 6118.9718, | |
| "eval_samples_per_second": 16.863, | |
| "eval_steps_per_second": 4.216, | |
| "step": 28796 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 1.1042097998619738e-05, | |
| "loss": 0.8812, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "eval_accuracy": 0.8070338579198731, | |
| "eval_loss": 0.8374488353729248, | |
| "eval_runtime": 6118.7308, | |
| "eval_samples_per_second": 16.864, | |
| "eval_steps_per_second": 4.216, | |
| "step": 29109 | |
| }, | |
| { | |
| "epoch": 9.14, | |
| "eval_accuracy": 0.8068436046687713, | |
| "eval_loss": 0.8375363945960999, | |
| "eval_runtime": 6128.5918, | |
| "eval_samples_per_second": 16.837, | |
| "eval_steps_per_second": 4.209, | |
| "step": 29422 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 9.316770186335403e-06, | |
| "loss": 0.8774, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 9.23, | |
| "eval_accuracy": 0.8077565106716271, | |
| "eval_loss": 0.8336867094039917, | |
| "eval_runtime": 6119.8095, | |
| "eval_samples_per_second": 16.861, | |
| "eval_steps_per_second": 4.215, | |
| "step": 29735 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 7.591442374051071e-06, | |
| "loss": 0.8752, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "eval_accuracy": 0.8081288482769053, | |
| "eval_loss": 0.8320378661155701, | |
| "eval_runtime": 6119.7341, | |
| "eval_samples_per_second": 16.861, | |
| "eval_steps_per_second": 4.215, | |
| "step": 30048 | |
| }, | |
| { | |
| "epoch": 9.43, | |
| "eval_accuracy": 0.8082356261550239, | |
| "eval_loss": 0.8310965299606323, | |
| "eval_runtime": 6119.6431, | |
| "eval_samples_per_second": 16.862, | |
| "eval_steps_per_second": 4.215, | |
| "step": 30361 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 5.866114561766736e-06, | |
| "loss": 0.8732, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "eval_accuracy": 0.8083999448820824, | |
| "eval_loss": 0.8303462266921997, | |
| "eval_runtime": 6118.4989, | |
| "eval_samples_per_second": 16.865, | |
| "eval_steps_per_second": 4.216, | |
| "step": 30674 | |
| }, | |
| { | |
| "epoch": 9.62, | |
| "eval_accuracy": 0.8084419046833061, | |
| "eval_loss": 0.8290849328041077, | |
| "eval_runtime": 6127.7892, | |
| "eval_samples_per_second": 16.839, | |
| "eval_steps_per_second": 4.21, | |
| "step": 30987 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 4.140786749482402e-06, | |
| "loss": 0.8715, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "eval_accuracy": 0.8088197529604327, | |
| "eval_loss": 0.8284289836883545, | |
| "eval_runtime": 6124.7156, | |
| "eval_samples_per_second": 16.848, | |
| "eval_steps_per_second": 4.212, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 2.4154589371980677e-06, | |
| "loss": 0.8705, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 9.82, | |
| "eval_accuracy": 0.8085015827448934, | |
| "eval_loss": 0.8298270106315613, | |
| "eval_runtime": 6120.6207, | |
| "eval_samples_per_second": 16.859, | |
| "eval_steps_per_second": 4.215, | |
| "step": 31613 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "eval_accuracy": 0.8086080278025564, | |
| "eval_loss": 0.8285703659057617, | |
| "eval_runtime": 6122.3492, | |
| "eval_samples_per_second": 16.854, | |
| "eval_steps_per_second": 4.214, | |
| "step": 31926 | |
| }, | |
| { | |
| "epoch": 9.94, | |
| "learning_rate": 6.901311249137336e-07, | |
| "loss": 0.8676, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "step": 32200, | |
| "total_flos": 9.597056792179405e+18, | |
| "train_loss": 1.5035098029663845, | |
| "train_runtime": 2595975.6547, | |
| "train_samples_per_second": 3.176, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "max_steps": 32200, | |
| "num_train_epochs": 10, | |
| "total_flos": 9.597056792179405e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |