| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1090, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 1.8348623853211012e-07, | |
| "loss": 1.6972, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 9.174311926605506e-07, | |
| "loss": 1.6464, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 1.8348623853211011e-06, | |
| "loss": 1.3919, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 2.7522935779816517e-06, | |
| "loss": 1.2141, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 3.6697247706422022e-06, | |
| "loss": 1.17, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.587155963302753e-06, | |
| "loss": 1.1172, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 5.504587155963303e-06, | |
| "loss": 1.0704, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 6.422018348623854e-06, | |
| "loss": 1.0576, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 7.3394495412844045e-06, | |
| "loss": 1.052, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 8.256880733944956e-06, | |
| "loss": 1.0469, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 9.174311926605506e-06, | |
| "loss": 1.0308, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.0091743119266055e-05, | |
| "loss": 1.0091, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.1009174311926607e-05, | |
| "loss": 1.0356, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.1926605504587156e-05, | |
| "loss": 1.0079, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 1.2844036697247708e-05, | |
| "loss": 1.0001, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.3761467889908258e-05, | |
| "loss": 1.0001, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 1.4678899082568809e-05, | |
| "loss": 0.9918, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.559633027522936e-05, | |
| "loss": 1.0148, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.6513761467889912e-05, | |
| "loss": 0.9973, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.743119266055046e-05, | |
| "loss": 1.0154, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.834862385321101e-05, | |
| "loss": 1.0163, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.9266055045871563e-05, | |
| "loss": 0.9872, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.999994872196626e-05, | |
| "loss": 1.0056, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9998154046002822e-05, | |
| "loss": 1.0141, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.999379599421534e-05, | |
| "loss": 1.0058, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9986875683942535e-05, | |
| "loss": 1.02, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9977394889447526e-05, | |
| "loss": 1.0237, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 1.9965356041462954e-05, | |
| "loss": 1.0304, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9950762226567783e-05, | |
| "loss": 1.0284, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9933617186395917e-05, | |
| "loss": 1.0046, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9913925316676946e-05, | |
| "loss": 1.0026, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9891691666109112e-05, | |
| "loss": 1.0226, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9866921935064907e-05, | |
| "loss": 1.0136, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9839622474129595e-05, | |
| "loss": 1.016, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9809800282473014e-05, | |
| "loss": 0.9979, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.977746300605507e-05, | |
| "loss": 1.0219, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9742618935665478e-05, | |
| "loss": 1.0086, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9705277004798072e-05, | |
| "loss": 0.9983, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 1.9665446787360444e-05, | |
| "loss": 1.0106, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.9623138495219292e-05, | |
| "loss": 1.0084, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.957836297558229e-05, | |
| "loss": 1.0272, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.9531131708217005e-05, | |
| "loss": 0.989, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.948145680250766e-05, | |
| "loss": 1.0114, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9429350994350483e-05, | |
| "loss": 0.993, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.93748276428884e-05, | |
| "loss": 1.0229, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.931790072708596e-05, | |
| "loss": 1.0191, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "learning_rate": 1.9258584842145342e-05, | |
| "loss": 1.0016, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9196895195764363e-05, | |
| "loss": 0.9955, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.913284760423745e-05, | |
| "loss": 1.0212, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9066458488400586e-05, | |
| "loss": 1.0258, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.8997744869421248e-05, | |
| "loss": 1.0036, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.8926724364434447e-05, | |
| "loss": 0.9951, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.8853415182025953e-05, | |
| "loss": 1.0088, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.8777836117563894e-05, | |
| "loss": 1.0197, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.8700006548379898e-05, | |
| "loss": 0.9919, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 1.861994642880105e-05, | |
| "loss": 1.0036, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.8537676285033886e-05, | |
| "loss": 1.0064, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.845321720990181e-05, | |
| "loss": 1.0098, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.8366590857437182e-05, | |
| "loss": 1.0154, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.8277819437329577e-05, | |
| "loss": 0.993, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.8186925709231534e-05, | |
| "loss": 0.9877, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.809393297692334e-05, | |
| "loss": 0.9892, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.799886508233829e-05, | |
| "loss": 0.9869, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.790174639944997e-05, | |
| "loss": 0.9894, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.780260182802314e-05, | |
| "loss": 0.9984, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.7701456787229805e-05, | |
| "loss": 0.9878, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.7598337209132142e-05, | |
| "loss": 1.0103, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.7493269532033882e-05, | |
| "loss": 0.994, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.738628069370195e-05, | |
| "loss": 0.9978, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.7277398124460022e-05, | |
| "loss": 1.0071, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 1.71666497401558e-05, | |
| "loss": 0.9925, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.7054063935003813e-05, | |
| "loss": 0.9971, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.6939669574305565e-05, | |
| "loss": 1.0069, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.6823495987048922e-05, | |
| "loss": 0.9703, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.6705572958388576e-05, | |
| "loss": 0.9989, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.6585930722009602e-05, | |
| "loss": 0.9951, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.6464599952375998e-05, | |
| "loss": 0.9971, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.63416117568662e-05, | |
| "loss": 0.9811, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.621699766779763e-05, | |
| "loss": 1.009, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.6090789634342278e-05, | |
| "loss": 1.0029, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.5963020014335437e-05, | |
| "loss": 0.9934, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 1.583372156597961e-05, | |
| "loss": 0.9753, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.570292743944583e-05, | |
| "loss": 1.009, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.557067116837444e-05, | |
| "loss": 0.968, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5436986661277578e-05, | |
| "loss": 0.9961, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.530190819284555e-05, | |
| "loss": 1.0131, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 1.5165470395159314e-05, | |
| "loss": 0.9881, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.5027708248811331e-05, | |
| "loss": 0.9887, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.4888657073937077e-05, | |
| "loss": 0.9793, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4748352521159492e-05, | |
| "loss": 0.9766, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.4606830562448692e-05, | |
| "loss": 0.9877, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.4464127481899312e-05, | |
| "loss": 0.9998, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.4320279866427798e-05, | |
| "loss": 0.9789, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.4175324596392075e-05, | |
| "loss": 0.9832, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.402929883613599e-05, | |
| "loss": 0.9766, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.3882240024460928e-05, | |
| "loss": 0.986, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.3734185865027061e-05, | |
| "loss": 0.9834, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 1.358517431668672e-05, | |
| "loss": 0.9832, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.3435243583752294e-05, | |
| "loss": 0.9749, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.3284432106201233e-05, | |
| "loss": 0.9799, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.313277854982062e-05, | |
| "loss": 0.9585, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.2980321796293838e-05, | |
| "loss": 0.9886, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.2827100933231904e-05, | |
| "loss": 0.9694, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.2673155244151985e-05, | |
| "loss": 0.9797, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.2518524198405699e-05, | |
| "loss": 0.9662, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.2363247441059775e-05, | |
| "loss": 0.9711, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.2207364782731657e-05, | |
| "loss": 0.9798, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.2050916189382646e-05, | |
| "loss": 0.9638, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.189394177207125e-05, | |
| "loss": 0.9826, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.1736481776669307e-05, | |
| "loss": 0.9816, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.1578576573543541e-05, | |
| "loss": 0.9614, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.1420266647205232e-05, | |
| "loss": 0.9689, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.1261592585930576e-05, | |
| "loss": 0.9876, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.1102595071354471e-05, | |
| "loss": 0.9447, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.0943314868040365e-05, | |
| "loss": 0.9584, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.0783792813028828e-05, | |
| "loss": 0.9776, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 1.0624069805367558e-05, | |
| "loss": 0.9402, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.0464186795625481e-05, | |
| "loss": 0.9637, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.0304184775393642e-05, | |
| "loss": 0.9573, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 1.0144104766775574e-05, | |
| "loss": 0.9694, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 9.983987811869863e-06, | |
| "loss": 0.967, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.823874962247565e-06, | |
| "loss": 0.9652, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.663807268427197e-06, | |
| "loss": 0.951, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 9.503825769350016e-06, | |
| "loss": 0.9735, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.343971481858246e-06, | |
| "loss": 0.9491, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 9.184285390178978e-06, | |
| "loss": 0.9637, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 9.024808435416435e-06, | |
| "loss": 0.9434, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 8.865581505055292e-06, | |
| "loss": 0.9501, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.706645422477739e-06, | |
| "loss": 0.9619, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 8.548040936496989e-06, | |
| "loss": 0.9455, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.389808710909881e-06, | |
| "loss": 0.9741, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 8.231989314071318e-06, | |
| "loss": 0.9608, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 8.07462320849313e-06, | |
| "loss": 0.9602, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.917750740470116e-06, | |
| "loss": 0.9696, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 7.761412129735853e-06, | |
| "loss": 0.9628, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.605647459150961e-06, | |
| "loss": 0.9485, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 7.4504966644264775e-06, | |
| "loss": 0.9485, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.295999523884921e-06, | |
| "loss": 0.9424, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 7.142195648261747e-06, | |
| "loss": 0.9648, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.989124470549746e-06, | |
| "loss": 0.9602, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 6.83682523588902e-06, | |
| "loss": 0.944, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.685336991505122e-06, | |
| "loss": 0.9575, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 6.5346985766979384e-06, | |
| "loss": 0.9498, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.384948612883872e-06, | |
| "loss": 0.9475, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 6.2361254936939e-06, | |
| "loss": 0.9385, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 6.0882673751300235e-06, | |
| "loss": 0.9501, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.941412165782645e-06, | |
| "loss": 0.9485, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 5.79559751711138e-06, | |
| "loss": 0.9352, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.650860813791786e-06, | |
| "loss": 0.9446, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 5.507239164130501e-06, | |
| "loss": 0.9523, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.364769390551225e-06, | |
| "loss": 0.9508, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 5.223488020154028e-06, | |
| "loss": 0.946, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 5.083431275350312e-06, | |
| "loss": 0.9396, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 4.9446350645759885e-06, | |
| "loss": 0.9467, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.807134973085036e-06, | |
| "loss": 0.9438, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 4.670966253826027e-06, | |
| "loss": 0.9502, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.53616381840377e-06, | |
| "loss": 0.9336, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.402762228128531e-06, | |
| "loss": 0.9408, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 4.270795685155001e-06, | |
| "loss": 0.9482, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.140298023713416e-06, | |
| "loss": 0.9288, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.0113027014349374e-06, | |
| "loss": 0.9438, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.883842790773647e-06, | |
| "loss": 0.9325, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.757950970527249e-06, | |
| "loss": 0.9366, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.633659517458736e-06, | |
| "loss": 0.931, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 3.511000298021098e-06, | |
| "loss": 0.9497, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.39000476018726e-06, | |
| "loss": 0.9438, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 3.2707039253872796e-06, | |
| "loss": 0.9353, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.153128380554941e-06, | |
| "loss": 0.9419, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.037308270285709e-06, | |
| "loss": 0.9201, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.923273289108115e-06, | |
| "loss": 0.9461, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.8110526738705345e-06, | |
| "loss": 0.937, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 2.700675196245288e-06, | |
| "loss": 0.9465, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.592169155352031e-06, | |
| "loss": 0.9157, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.485562370502279e-06, | |
| "loss": 0.9282, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.3808821740669608e-06, | |
| "loss": 0.9293, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.2781554044688015e-06, | |
| "loss": 0.9402, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.1774083993013715e-06, | |
| "loss": 0.9362, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.0786669885765044e-06, | |
| "loss": 0.9342, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.981956488101898e-06, | |
| "loss": 0.9245, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.8873016929904942e-06, | |
| "loss": 0.9338, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.7947268713034128e-06, | |
| "loss": 0.9483, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.704255757827963e-06, | |
| "loss": 0.9351, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6159115479924259e-06, | |
| "loss": 0.9239, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.529716891919074e-06, | |
| "loss": 0.9212, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.4456938886170413e-06, | |
| "loss": 0.9379, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.3638640803164516e-06, | |
| "loss": 0.9246, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.2842484469453365e-06, | |
| "loss": 0.9156, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.2068674007506787e-06, | |
| "loss": 0.9285, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.1317407810650372e-06, | |
| "loss": 0.9214, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.0588878492200261e-06, | |
| "loss": 0.9344, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.883272836080116e-07, | |
| "loss": 0.9293, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 9.200771748932513e-07, | |
| "loss": 0.9416, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 8.541550213737171e-07, | |
| "loss": 0.9241, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.905777244947954e-07, | |
| "loss": 0.9194, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 7.293615845160196e-07, | |
| "loss": 0.9124, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 6.705222963319191e-07, | |
| "loss": 0.9281, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.140749454480932e-07, | |
| "loss": 0.9382, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 5.600340041135133e-07, | |
| "loss": 0.9257, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 5.0841332761005e-07, | |
| "loss": 0.9277, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.592261507001994e-07, | |
| "loss": 0.9249, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 4.124850842338779e-07, | |
| "loss": 0.9249, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 3.6820211191520127e-07, | |
| "loss": 0.9233, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 3.263885872300343e-07, | |
| "loss": 0.9086, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.870552305351382e-07, | |
| "loss": 0.929, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.5021212630962246e-07, | |
| "loss": 0.9328, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 2.158687205694443e-07, | |
| "loss": 0.9213, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.840338184455881e-07, | |
| "loss": 0.9144, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.5471558192656776e-07, | |
| "loss": 0.923, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.279215277658097e-07, | |
| "loss": 0.9124, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.0365852555447642e-07, | |
| "loss": 0.9406, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 8.19327959602012e-08, | |
| "loss": 0.9343, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 6.274990913221035e-08, | |
| "loss": 0.92, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 4.6114783273213395e-08, | |
| "loss": 0.9212, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 3.203168337845508e-08, | |
| "loss": 0.9127, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 2.05042201422323e-08, | |
| "loss": 0.9267, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 1.1535349032167908e-08, | |
| "loss": 0.9248, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 5.127369531473525e-09, | |
| "loss": 0.9453, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.2819245493955746e-09, | |
| "loss": 0.9159, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.9075, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.9353461265563965, | |
| "eval_runtime": 142.6393, | |
| "eval_samples_per_second": 108.182, | |
| "eval_steps_per_second": 1.697, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 1090, | |
| "total_flos": 456447649382400.0, | |
| "train_loss": 0.9776473476252424, | |
| "train_runtime": 5608.2856, | |
| "train_samples_per_second": 24.861, | |
| "train_steps_per_second": 0.194 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1090, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 456447649382400.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |