samtuckervegan's picture
Upload folder using huggingface_hub
7d55f8a verified
{
"best_metric": 0.03257535398006439,
"best_model_checkpoint": "rationality/checkpoint-7980",
"epoch": 3.0,
"eval_steps": 500,
"global_step": 7980,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.009398496240601503,
"grad_norm": 7.586980819702148,
"learning_rate": 1.5037593984962406e-06,
"loss": 0.5008,
"step": 25
},
{
"epoch": 0.018796992481203006,
"grad_norm": 2.3110930919647217,
"learning_rate": 3.070175438596491e-06,
"loss": 0.2727,
"step": 50
},
{
"epoch": 0.02819548872180451,
"grad_norm": 1.191091775894165,
"learning_rate": 4.636591478696742e-06,
"loss": 0.0819,
"step": 75
},
{
"epoch": 0.03759398496240601,
"grad_norm": 0.787821888923645,
"learning_rate": 6.203007518796992e-06,
"loss": 0.0536,
"step": 100
},
{
"epoch": 0.046992481203007516,
"grad_norm": 0.840717077255249,
"learning_rate": 7.769423558897243e-06,
"loss": 0.0584,
"step": 125
},
{
"epoch": 0.05639097744360902,
"grad_norm": 0.927150309085846,
"learning_rate": 9.335839598997493e-06,
"loss": 0.0549,
"step": 150
},
{
"epoch": 0.06578947368421052,
"grad_norm": 0.40393584966659546,
"learning_rate": 1.0902255639097744e-05,
"loss": 0.0567,
"step": 175
},
{
"epoch": 0.07518796992481203,
"grad_norm": 0.8180475234985352,
"learning_rate": 1.2468671679197996e-05,
"loss": 0.0517,
"step": 200
},
{
"epoch": 0.08458646616541353,
"grad_norm": 1.3004846572875977,
"learning_rate": 1.4035087719298246e-05,
"loss": 0.0549,
"step": 225
},
{
"epoch": 0.09398496240601503,
"grad_norm": 0.9580408930778503,
"learning_rate": 1.5601503759398496e-05,
"loss": 0.0537,
"step": 250
},
{
"epoch": 0.10338345864661654,
"grad_norm": 0.7632073760032654,
"learning_rate": 1.7167919799498746e-05,
"loss": 0.0532,
"step": 275
},
{
"epoch": 0.11278195488721804,
"grad_norm": 1.0923666954040527,
"learning_rate": 1.8734335839599e-05,
"loss": 0.0461,
"step": 300
},
{
"epoch": 0.12218045112781954,
"grad_norm": 0.6970840096473694,
"learning_rate": 2.0300751879699247e-05,
"loss": 0.0491,
"step": 325
},
{
"epoch": 0.13157894736842105,
"grad_norm": 0.4116126596927643,
"learning_rate": 2.18671679197995e-05,
"loss": 0.0479,
"step": 350
},
{
"epoch": 0.14097744360902256,
"grad_norm": 0.3232148289680481,
"learning_rate": 2.3433583959899748e-05,
"loss": 0.0498,
"step": 375
},
{
"epoch": 0.15037593984962405,
"grad_norm": 0.5330539345741272,
"learning_rate": 2.5e-05,
"loss": 0.052,
"step": 400
},
{
"epoch": 0.15977443609022557,
"grad_norm": 0.579289436340332,
"learning_rate": 2.656641604010025e-05,
"loss": 0.0451,
"step": 425
},
{
"epoch": 0.16917293233082706,
"grad_norm": 1.0427942276000977,
"learning_rate": 2.8132832080200505e-05,
"loss": 0.0486,
"step": 450
},
{
"epoch": 0.17857142857142858,
"grad_norm": 0.8240389823913574,
"learning_rate": 2.9699248120300755e-05,
"loss": 0.0427,
"step": 475
},
{
"epoch": 0.18796992481203006,
"grad_norm": 0.7741993069648743,
"learning_rate": 3.1265664160401006e-05,
"loss": 0.0422,
"step": 500
},
{
"epoch": 0.19736842105263158,
"grad_norm": 0.29159414768218994,
"learning_rate": 3.2832080200501256e-05,
"loss": 0.0414,
"step": 525
},
{
"epoch": 0.20676691729323307,
"grad_norm": 1.3077598810195923,
"learning_rate": 3.4398496240601506e-05,
"loss": 0.0476,
"step": 550
},
{
"epoch": 0.2161654135338346,
"grad_norm": 1.3951746225357056,
"learning_rate": 3.5964912280701756e-05,
"loss": 0.0477,
"step": 575
},
{
"epoch": 0.22556390977443608,
"grad_norm": 0.3251698613166809,
"learning_rate": 3.753132832080201e-05,
"loss": 0.0482,
"step": 600
},
{
"epoch": 0.2349624060150376,
"grad_norm": 0.5529103875160217,
"learning_rate": 3.909774436090226e-05,
"loss": 0.0448,
"step": 625
},
{
"epoch": 0.24436090225563908,
"grad_norm": 0.3678141236305237,
"learning_rate": 4.066416040100251e-05,
"loss": 0.0449,
"step": 650
},
{
"epoch": 0.25375939849624063,
"grad_norm": 0.8352123498916626,
"learning_rate": 4.223057644110276e-05,
"loss": 0.0398,
"step": 675
},
{
"epoch": 0.2631578947368421,
"grad_norm": 0.3434634804725647,
"learning_rate": 4.379699248120301e-05,
"loss": 0.0486,
"step": 700
},
{
"epoch": 0.2725563909774436,
"grad_norm": 0.29232466220855713,
"learning_rate": 4.536340852130326e-05,
"loss": 0.0454,
"step": 725
},
{
"epoch": 0.2819548872180451,
"grad_norm": 0.4716828167438507,
"learning_rate": 4.6929824561403515e-05,
"loss": 0.0453,
"step": 750
},
{
"epoch": 0.29135338345864664,
"grad_norm": 0.6824334859848022,
"learning_rate": 4.849624060150376e-05,
"loss": 0.044,
"step": 775
},
{
"epoch": 0.3007518796992481,
"grad_norm": 0.4220779538154602,
"learning_rate": 4.9993038150932894e-05,
"loss": 0.0473,
"step": 800
},
{
"epoch": 0.3101503759398496,
"grad_norm": 0.36610105633735657,
"learning_rate": 4.9818991924255085e-05,
"loss": 0.0407,
"step": 825
},
{
"epoch": 0.31954887218045114,
"grad_norm": 0.4304879903793335,
"learning_rate": 4.9644945697577276e-05,
"loss": 0.0408,
"step": 850
},
{
"epoch": 0.32894736842105265,
"grad_norm": 0.4744877815246582,
"learning_rate": 4.9470899470899475e-05,
"loss": 0.0415,
"step": 875
},
{
"epoch": 0.3383458646616541,
"grad_norm": 0.4376464784145355,
"learning_rate": 4.9296853244221666e-05,
"loss": 0.0417,
"step": 900
},
{
"epoch": 0.34774436090225563,
"grad_norm": 0.32754096388816833,
"learning_rate": 4.912280701754386e-05,
"loss": 0.0441,
"step": 925
},
{
"epoch": 0.35714285714285715,
"grad_norm": 0.7860820293426514,
"learning_rate": 4.8948760790866056e-05,
"loss": 0.0447,
"step": 950
},
{
"epoch": 0.36654135338345867,
"grad_norm": 0.2285061627626419,
"learning_rate": 4.8774714564188254e-05,
"loss": 0.0446,
"step": 975
},
{
"epoch": 0.37593984962406013,
"grad_norm": 0.4588755965232849,
"learning_rate": 4.8600668337510445e-05,
"loss": 0.0403,
"step": 1000
},
{
"epoch": 0.38533834586466165,
"grad_norm": 0.36558273434638977,
"learning_rate": 4.8426622110832637e-05,
"loss": 0.0394,
"step": 1025
},
{
"epoch": 0.39473684210526316,
"grad_norm": 1.0970860719680786,
"learning_rate": 4.8252575884154835e-05,
"loss": 0.041,
"step": 1050
},
{
"epoch": 0.4041353383458647,
"grad_norm": 1.1496220827102661,
"learning_rate": 4.8078529657477026e-05,
"loss": 0.0504,
"step": 1075
},
{
"epoch": 0.41353383458646614,
"grad_norm": 0.31684789061546326,
"learning_rate": 4.7904483430799224e-05,
"loss": 0.0392,
"step": 1100
},
{
"epoch": 0.42293233082706766,
"grad_norm": 0.28997454047203064,
"learning_rate": 4.7730437204121416e-05,
"loss": 0.0397,
"step": 1125
},
{
"epoch": 0.4323308270676692,
"grad_norm": 0.3564881980419159,
"learning_rate": 4.755639097744361e-05,
"loss": 0.0407,
"step": 1150
},
{
"epoch": 0.4417293233082707,
"grad_norm": 0.9638842344284058,
"learning_rate": 4.7382344750765805e-05,
"loss": 0.0403,
"step": 1175
},
{
"epoch": 0.45112781954887216,
"grad_norm": 0.33509504795074463,
"learning_rate": 4.7208298524088003e-05,
"loss": 0.0385,
"step": 1200
},
{
"epoch": 0.4605263157894737,
"grad_norm": 0.6345932483673096,
"learning_rate": 4.7034252297410195e-05,
"loss": 0.0392,
"step": 1225
},
{
"epoch": 0.4699248120300752,
"grad_norm": 0.17547644674777985,
"learning_rate": 4.6860206070732386e-05,
"loss": 0.0391,
"step": 1250
},
{
"epoch": 0.4793233082706767,
"grad_norm": 0.2999178469181061,
"learning_rate": 4.6686159844054584e-05,
"loss": 0.0383,
"step": 1275
},
{
"epoch": 0.48872180451127817,
"grad_norm": 0.4045429229736328,
"learning_rate": 4.6512113617376776e-05,
"loss": 0.0418,
"step": 1300
},
{
"epoch": 0.4981203007518797,
"grad_norm": 0.3870411813259125,
"learning_rate": 4.633806739069897e-05,
"loss": 0.0409,
"step": 1325
},
{
"epoch": 0.5075187969924813,
"grad_norm": 0.5039701461791992,
"learning_rate": 4.6164021164021165e-05,
"loss": 0.0393,
"step": 1350
},
{
"epoch": 0.5169172932330827,
"grad_norm": 0.5128330588340759,
"learning_rate": 4.5989974937343364e-05,
"loss": 0.0391,
"step": 1375
},
{
"epoch": 0.5263157894736842,
"grad_norm": 0.40783753991127014,
"learning_rate": 4.5815928710665555e-05,
"loss": 0.0399,
"step": 1400
},
{
"epoch": 0.5357142857142857,
"grad_norm": 0.4718676209449768,
"learning_rate": 4.5641882483987746e-05,
"loss": 0.0391,
"step": 1425
},
{
"epoch": 0.5451127819548872,
"grad_norm": 0.30212482810020447,
"learning_rate": 4.5467836257309945e-05,
"loss": 0.0392,
"step": 1450
},
{
"epoch": 0.5545112781954887,
"grad_norm": 0.4065203368663788,
"learning_rate": 4.5293790030632136e-05,
"loss": 0.0389,
"step": 1475
},
{
"epoch": 0.5639097744360902,
"grad_norm": 0.2927418649196625,
"learning_rate": 4.5119743803954334e-05,
"loss": 0.0371,
"step": 1500
},
{
"epoch": 0.5733082706766918,
"grad_norm": 0.540820300579071,
"learning_rate": 4.4945697577276526e-05,
"loss": 0.0373,
"step": 1525
},
{
"epoch": 0.5827067669172933,
"grad_norm": 0.3592277765274048,
"learning_rate": 4.477165135059872e-05,
"loss": 0.0379,
"step": 1550
},
{
"epoch": 0.5921052631578947,
"grad_norm": 0.33663302659988403,
"learning_rate": 4.4597605123920915e-05,
"loss": 0.0391,
"step": 1575
},
{
"epoch": 0.6015037593984962,
"grad_norm": 0.7124751210212708,
"learning_rate": 4.442355889724311e-05,
"loss": 0.0407,
"step": 1600
},
{
"epoch": 0.6109022556390977,
"grad_norm": 0.30568742752075195,
"learning_rate": 4.4249512670565305e-05,
"loss": 0.0353,
"step": 1625
},
{
"epoch": 0.6203007518796992,
"grad_norm": 0.4824027121067047,
"learning_rate": 4.4075466443887496e-05,
"loss": 0.0369,
"step": 1650
},
{
"epoch": 0.6296992481203008,
"grad_norm": 0.38219839334487915,
"learning_rate": 4.3901420217209694e-05,
"loss": 0.0407,
"step": 1675
},
{
"epoch": 0.6390977443609023,
"grad_norm": 0.43268170952796936,
"learning_rate": 4.3727373990531886e-05,
"loss": 0.0362,
"step": 1700
},
{
"epoch": 0.6484962406015038,
"grad_norm": 0.3550353944301605,
"learning_rate": 4.355332776385408e-05,
"loss": 0.0389,
"step": 1725
},
{
"epoch": 0.6578947368421053,
"grad_norm": 0.2517308294773102,
"learning_rate": 4.3379281537176275e-05,
"loss": 0.0397,
"step": 1750
},
{
"epoch": 0.6672932330827067,
"grad_norm": 0.5207804441452026,
"learning_rate": 4.3205235310498473e-05,
"loss": 0.041,
"step": 1775
},
{
"epoch": 0.6766917293233082,
"grad_norm": 0.5029985904693604,
"learning_rate": 4.3031189083820665e-05,
"loss": 0.034,
"step": 1800
},
{
"epoch": 0.6860902255639098,
"grad_norm": 0.2845001816749573,
"learning_rate": 4.2857142857142856e-05,
"loss": 0.0377,
"step": 1825
},
{
"epoch": 0.6954887218045113,
"grad_norm": 0.23876015841960907,
"learning_rate": 4.2683096630465054e-05,
"loss": 0.0388,
"step": 1850
},
{
"epoch": 0.7048872180451128,
"grad_norm": 1.0393779277801514,
"learning_rate": 4.2509050403787246e-05,
"loss": 0.0364,
"step": 1875
},
{
"epoch": 0.7142857142857143,
"grad_norm": 0.4601641297340393,
"learning_rate": 4.2335004177109444e-05,
"loss": 0.0384,
"step": 1900
},
{
"epoch": 0.7236842105263158,
"grad_norm": 0.34031376242637634,
"learning_rate": 4.2160957950431635e-05,
"loss": 0.0368,
"step": 1925
},
{
"epoch": 0.7330827067669173,
"grad_norm": 0.2456611692905426,
"learning_rate": 4.198691172375383e-05,
"loss": 0.0355,
"step": 1950
},
{
"epoch": 0.7424812030075187,
"grad_norm": 0.41420137882232666,
"learning_rate": 4.1812865497076025e-05,
"loss": 0.037,
"step": 1975
},
{
"epoch": 0.7518796992481203,
"grad_norm": 0.5716177225112915,
"learning_rate": 4.163881927039822e-05,
"loss": 0.0378,
"step": 2000
},
{
"epoch": 0.7612781954887218,
"grad_norm": 0.7877705693244934,
"learning_rate": 4.1464773043720415e-05,
"loss": 0.0375,
"step": 2025
},
{
"epoch": 0.7706766917293233,
"grad_norm": 0.521252453327179,
"learning_rate": 4.1290726817042606e-05,
"loss": 0.038,
"step": 2050
},
{
"epoch": 0.7800751879699248,
"grad_norm": 0.4914921224117279,
"learning_rate": 4.1116680590364804e-05,
"loss": 0.0367,
"step": 2075
},
{
"epoch": 0.7894736842105263,
"grad_norm": 0.3504331111907959,
"learning_rate": 4.0942634363687e-05,
"loss": 0.0354,
"step": 2100
},
{
"epoch": 0.7988721804511278,
"grad_norm": 0.42998361587524414,
"learning_rate": 4.076858813700919e-05,
"loss": 0.0406,
"step": 2125
},
{
"epoch": 0.8082706766917294,
"grad_norm": 0.3703167736530304,
"learning_rate": 4.0594541910331385e-05,
"loss": 0.0389,
"step": 2150
},
{
"epoch": 0.8176691729323309,
"grad_norm": 0.32070252299308777,
"learning_rate": 4.042049568365358e-05,
"loss": 0.0381,
"step": 2175
},
{
"epoch": 0.8270676691729323,
"grad_norm": 0.3124656081199646,
"learning_rate": 4.0246449456975775e-05,
"loss": 0.0397,
"step": 2200
},
{
"epoch": 0.8364661654135338,
"grad_norm": 0.45335420966148376,
"learning_rate": 4.0072403230297966e-05,
"loss": 0.0338,
"step": 2225
},
{
"epoch": 0.8458646616541353,
"grad_norm": 0.8689984083175659,
"learning_rate": 3.9898357003620164e-05,
"loss": 0.034,
"step": 2250
},
{
"epoch": 0.8552631578947368,
"grad_norm": 0.48539942502975464,
"learning_rate": 3.9724310776942356e-05,
"loss": 0.0371,
"step": 2275
},
{
"epoch": 0.8646616541353384,
"grad_norm": 0.24096523225307465,
"learning_rate": 3.9550264550264554e-05,
"loss": 0.0384,
"step": 2300
},
{
"epoch": 0.8740601503759399,
"grad_norm": 0.5698876976966858,
"learning_rate": 3.9376218323586745e-05,
"loss": 0.036,
"step": 2325
},
{
"epoch": 0.8834586466165414,
"grad_norm": 0.26149895787239075,
"learning_rate": 3.920217209690894e-05,
"loss": 0.0371,
"step": 2350
},
{
"epoch": 0.8928571428571429,
"grad_norm": 0.3663557171821594,
"learning_rate": 3.9028125870231135e-05,
"loss": 0.0408,
"step": 2375
},
{
"epoch": 0.9022556390977443,
"grad_norm": 0.5307694673538208,
"learning_rate": 3.885407964355333e-05,
"loss": 0.0376,
"step": 2400
},
{
"epoch": 0.9116541353383458,
"grad_norm": 0.33842626214027405,
"learning_rate": 3.8680033416875525e-05,
"loss": 0.0374,
"step": 2425
},
{
"epoch": 0.9210526315789473,
"grad_norm": 0.7476523518562317,
"learning_rate": 3.8505987190197716e-05,
"loss": 0.0371,
"step": 2450
},
{
"epoch": 0.9304511278195489,
"grad_norm": 0.39809858798980713,
"learning_rate": 3.8331940963519914e-05,
"loss": 0.0384,
"step": 2475
},
{
"epoch": 0.9398496240601504,
"grad_norm": 1.4509490728378296,
"learning_rate": 3.815789473684211e-05,
"loss": 0.0363,
"step": 2500
},
{
"epoch": 0.9492481203007519,
"grad_norm": 0.5149303674697876,
"learning_rate": 3.79838485101643e-05,
"loss": 0.0366,
"step": 2525
},
{
"epoch": 0.9586466165413534,
"grad_norm": 0.5460497736930847,
"learning_rate": 3.7809802283486495e-05,
"loss": 0.0404,
"step": 2550
},
{
"epoch": 0.9680451127819549,
"grad_norm": 0.5033419132232666,
"learning_rate": 3.763575605680869e-05,
"loss": 0.0383,
"step": 2575
},
{
"epoch": 0.9774436090225563,
"grad_norm": 0.6479871869087219,
"learning_rate": 3.7461709830130885e-05,
"loss": 0.0403,
"step": 2600
},
{
"epoch": 0.9868421052631579,
"grad_norm": 0.31846144795417786,
"learning_rate": 3.7287663603453076e-05,
"loss": 0.0369,
"step": 2625
},
{
"epoch": 0.9962406015037594,
"grad_norm": 0.7051171660423279,
"learning_rate": 3.7113617376775274e-05,
"loss": 0.0363,
"step": 2650
},
{
"epoch": 1.0,
"eval_explained_variance": 0.34664106369018555,
"eval_loss": 0.03611931949853897,
"eval_mae": 0.1509377658367157,
"eval_mse": 0.0361127145588398,
"eval_r2": 0.34323060512542725,
"eval_rmse": 0.1900334564197573,
"eval_runtime": 5.0512,
"eval_samples_per_second": 4212.238,
"eval_steps_per_second": 65.924,
"step": 2660
},
{
"epoch": 1.005639097744361,
"grad_norm": 0.20418846607208252,
"learning_rate": 3.6939571150097466e-05,
"loss": 0.0363,
"step": 2675
},
{
"epoch": 1.0150375939849625,
"grad_norm": 0.30139222741127014,
"learning_rate": 3.6765524923419664e-05,
"loss": 0.0337,
"step": 2700
},
{
"epoch": 1.0244360902255638,
"grad_norm": 0.5572872161865234,
"learning_rate": 3.6591478696741855e-05,
"loss": 0.0338,
"step": 2725
},
{
"epoch": 1.0338345864661653,
"grad_norm": 0.4931128919124603,
"learning_rate": 3.6417432470064053e-05,
"loss": 0.039,
"step": 2750
},
{
"epoch": 1.0432330827067668,
"grad_norm": 0.35547202825546265,
"learning_rate": 3.6243386243386245e-05,
"loss": 0.0391,
"step": 2775
},
{
"epoch": 1.0526315789473684,
"grad_norm": 0.23790113627910614,
"learning_rate": 3.606934001670844e-05,
"loss": 0.0346,
"step": 2800
},
{
"epoch": 1.0620300751879699,
"grad_norm": 0.7549180388450623,
"learning_rate": 3.5895293790030634e-05,
"loss": 0.0362,
"step": 2825
},
{
"epoch": 1.0714285714285714,
"grad_norm": 0.35359710454940796,
"learning_rate": 3.5721247563352826e-05,
"loss": 0.0353,
"step": 2850
},
{
"epoch": 1.080827067669173,
"grad_norm": 0.32629117369651794,
"learning_rate": 3.5547201336675024e-05,
"loss": 0.0353,
"step": 2875
},
{
"epoch": 1.0902255639097744,
"grad_norm": 0.5467492341995239,
"learning_rate": 3.537315510999722e-05,
"loss": 0.0401,
"step": 2900
},
{
"epoch": 1.099624060150376,
"grad_norm": 0.46580857038497925,
"learning_rate": 3.519910888331941e-05,
"loss": 0.0374,
"step": 2925
},
{
"epoch": 1.1090225563909775,
"grad_norm": 0.19464190304279327,
"learning_rate": 3.5025062656641605e-05,
"loss": 0.0315,
"step": 2950
},
{
"epoch": 1.118421052631579,
"grad_norm": 0.6511549949645996,
"learning_rate": 3.48510164299638e-05,
"loss": 0.0371,
"step": 2975
},
{
"epoch": 1.1278195488721805,
"grad_norm": 0.3231920599937439,
"learning_rate": 3.4676970203285995e-05,
"loss": 0.034,
"step": 3000
},
{
"epoch": 1.137218045112782,
"grad_norm": 0.3325081467628479,
"learning_rate": 3.4502923976608186e-05,
"loss": 0.0332,
"step": 3025
},
{
"epoch": 1.1466165413533835,
"grad_norm": 0.3780410885810852,
"learning_rate": 3.4328877749930384e-05,
"loss": 0.0365,
"step": 3050
},
{
"epoch": 1.156015037593985,
"grad_norm": 0.39700257778167725,
"learning_rate": 3.4154831523252576e-05,
"loss": 0.0356,
"step": 3075
},
{
"epoch": 1.1654135338345863,
"grad_norm": 0.41819268465042114,
"learning_rate": 3.3980785296574774e-05,
"loss": 0.0351,
"step": 3100
},
{
"epoch": 1.1748120300751879,
"grad_norm": 0.592147707939148,
"learning_rate": 3.3806739069896965e-05,
"loss": 0.0374,
"step": 3125
},
{
"epoch": 1.1842105263157894,
"grad_norm": 0.4350600838661194,
"learning_rate": 3.363269284321916e-05,
"loss": 0.0352,
"step": 3150
},
{
"epoch": 1.193609022556391,
"grad_norm": 0.3941713273525238,
"learning_rate": 3.3458646616541355e-05,
"loss": 0.0338,
"step": 3175
},
{
"epoch": 1.2030075187969924,
"grad_norm": 0.9201453328132629,
"learning_rate": 3.328460038986355e-05,
"loss": 0.0342,
"step": 3200
},
{
"epoch": 1.212406015037594,
"grad_norm": 0.6875268816947937,
"learning_rate": 3.3110554163185744e-05,
"loss": 0.0378,
"step": 3225
},
{
"epoch": 1.2218045112781954,
"grad_norm": 0.44067856669425964,
"learning_rate": 3.2936507936507936e-05,
"loss": 0.0323,
"step": 3250
},
{
"epoch": 1.231203007518797,
"grad_norm": 0.6714525818824768,
"learning_rate": 3.2762461709830134e-05,
"loss": 0.0356,
"step": 3275
},
{
"epoch": 1.2406015037593985,
"grad_norm": 0.2296379655599594,
"learning_rate": 3.258841548315233e-05,
"loss": 0.0356,
"step": 3300
},
{
"epoch": 1.25,
"grad_norm": 0.5694165825843811,
"learning_rate": 3.241436925647452e-05,
"loss": 0.0347,
"step": 3325
},
{
"epoch": 1.2593984962406015,
"grad_norm": 0.2145877480506897,
"learning_rate": 3.2240323029796715e-05,
"loss": 0.0345,
"step": 3350
},
{
"epoch": 1.268796992481203,
"grad_norm": 0.32587698101997375,
"learning_rate": 3.206627680311891e-05,
"loss": 0.036,
"step": 3375
},
{
"epoch": 1.2781954887218046,
"grad_norm": 0.34945639967918396,
"learning_rate": 3.1892230576441104e-05,
"loss": 0.0371,
"step": 3400
},
{
"epoch": 1.287593984962406,
"grad_norm": 1.0401064157485962,
"learning_rate": 3.1718184349763296e-05,
"loss": 0.0411,
"step": 3425
},
{
"epoch": 1.2969924812030076,
"grad_norm": 0.45730292797088623,
"learning_rate": 3.1544138123085494e-05,
"loss": 0.0408,
"step": 3450
},
{
"epoch": 1.306390977443609,
"grad_norm": 0.2677878439426422,
"learning_rate": 3.1370091896407685e-05,
"loss": 0.0372,
"step": 3475
},
{
"epoch": 1.3157894736842106,
"grad_norm": 0.6013411283493042,
"learning_rate": 3.1196045669729884e-05,
"loss": 0.0344,
"step": 3500
},
{
"epoch": 1.3251879699248121,
"grad_norm": 0.45004361867904663,
"learning_rate": 3.1021999443052075e-05,
"loss": 0.035,
"step": 3525
},
{
"epoch": 1.3345864661654137,
"grad_norm": 0.7524242997169495,
"learning_rate": 3.084795321637427e-05,
"loss": 0.0344,
"step": 3550
},
{
"epoch": 1.3439849624060152,
"grad_norm": 0.4096704125404358,
"learning_rate": 3.0673906989696465e-05,
"loss": 0.0362,
"step": 3575
},
{
"epoch": 1.3533834586466165,
"grad_norm": 0.6386268138885498,
"learning_rate": 3.0499860763018663e-05,
"loss": 0.0336,
"step": 3600
},
{
"epoch": 1.362781954887218,
"grad_norm": 0.2265842705965042,
"learning_rate": 3.032581453634085e-05,
"loss": 0.0324,
"step": 3625
},
{
"epoch": 1.3721804511278195,
"grad_norm": 0.4210365116596222,
"learning_rate": 3.015176830966305e-05,
"loss": 0.0362,
"step": 3650
},
{
"epoch": 1.381578947368421,
"grad_norm": 0.3626422882080078,
"learning_rate": 2.9977722082985244e-05,
"loss": 0.0323,
"step": 3675
},
{
"epoch": 1.3909774436090225,
"grad_norm": 0.37654510140419006,
"learning_rate": 2.980367585630744e-05,
"loss": 0.032,
"step": 3700
},
{
"epoch": 1.400375939849624,
"grad_norm": 0.6583954691886902,
"learning_rate": 2.962962962962963e-05,
"loss": 0.0376,
"step": 3725
},
{
"epoch": 1.4097744360902256,
"grad_norm": 0.5544891953468323,
"learning_rate": 2.9455583402951825e-05,
"loss": 0.0373,
"step": 3750
},
{
"epoch": 1.419172932330827,
"grad_norm": 0.3765174150466919,
"learning_rate": 2.928153717627402e-05,
"loss": 0.0343,
"step": 3775
},
{
"epoch": 1.4285714285714286,
"grad_norm": 0.74721360206604,
"learning_rate": 2.9107490949596218e-05,
"loss": 0.0323,
"step": 3800
},
{
"epoch": 1.4379699248120301,
"grad_norm": 0.287864625453949,
"learning_rate": 2.8933444722918406e-05,
"loss": 0.0348,
"step": 3825
},
{
"epoch": 1.4473684210526316,
"grad_norm": 0.7538726925849915,
"learning_rate": 2.8759398496240604e-05,
"loss": 0.0344,
"step": 3850
},
{
"epoch": 1.4567669172932332,
"grad_norm": 0.3344118595123291,
"learning_rate": 2.85853522695628e-05,
"loss": 0.0347,
"step": 3875
},
{
"epoch": 1.4661654135338344,
"grad_norm": 0.3745761215686798,
"learning_rate": 2.8411306042884993e-05,
"loss": 0.0345,
"step": 3900
},
{
"epoch": 1.475563909774436,
"grad_norm": 0.2883740961551666,
"learning_rate": 2.8237259816207185e-05,
"loss": 0.0344,
"step": 3925
},
{
"epoch": 1.4849624060150375,
"grad_norm": 0.4642302989959717,
"learning_rate": 2.806321358952938e-05,
"loss": 0.0348,
"step": 3950
},
{
"epoch": 1.494360902255639,
"grad_norm": 0.4955233037471771,
"learning_rate": 2.7889167362851574e-05,
"loss": 0.038,
"step": 3975
},
{
"epoch": 1.5037593984962405,
"grad_norm": 0.29833799600601196,
"learning_rate": 2.7715121136173773e-05,
"loss": 0.0337,
"step": 4000
},
{
"epoch": 1.513157894736842,
"grad_norm": 0.2732565402984619,
"learning_rate": 2.754107490949596e-05,
"loss": 0.0327,
"step": 4025
},
{
"epoch": 1.5225563909774436,
"grad_norm": 0.5221346020698547,
"learning_rate": 2.736702868281816e-05,
"loss": 0.0366,
"step": 4050
},
{
"epoch": 1.531954887218045,
"grad_norm": 0.26338204741477966,
"learning_rate": 2.7192982456140354e-05,
"loss": 0.0359,
"step": 4075
},
{
"epoch": 1.5413533834586466,
"grad_norm": 0.21089033782482147,
"learning_rate": 2.701893622946255e-05,
"loss": 0.0375,
"step": 4100
},
{
"epoch": 1.550751879699248,
"grad_norm": 0.38757413625717163,
"learning_rate": 2.684489000278474e-05,
"loss": 0.0329,
"step": 4125
},
{
"epoch": 1.5601503759398496,
"grad_norm": 0.36682936549186707,
"learning_rate": 2.6670843776106935e-05,
"loss": 0.0347,
"step": 4150
},
{
"epoch": 1.5695488721804511,
"grad_norm": 0.37840262055397034,
"learning_rate": 2.649679754942913e-05,
"loss": 0.0352,
"step": 4175
},
{
"epoch": 1.5789473684210527,
"grad_norm": 0.4169174134731293,
"learning_rate": 2.6322751322751328e-05,
"loss": 0.0348,
"step": 4200
},
{
"epoch": 1.5883458646616542,
"grad_norm": 0.3553796708583832,
"learning_rate": 2.6148705096073516e-05,
"loss": 0.0352,
"step": 4225
},
{
"epoch": 1.5977443609022557,
"grad_norm": 0.23502863943576813,
"learning_rate": 2.5974658869395714e-05,
"loss": 0.0349,
"step": 4250
},
{
"epoch": 1.6071428571428572,
"grad_norm": 0.3328050673007965,
"learning_rate": 2.580061264271791e-05,
"loss": 0.0317,
"step": 4275
},
{
"epoch": 1.6165413533834587,
"grad_norm": 0.3134351074695587,
"learning_rate": 2.5626566416040103e-05,
"loss": 0.0346,
"step": 4300
},
{
"epoch": 1.6259398496240602,
"grad_norm": 0.2432243674993515,
"learning_rate": 2.5452520189362295e-05,
"loss": 0.0338,
"step": 4325
},
{
"epoch": 1.6353383458646618,
"grad_norm": 0.25854891538619995,
"learning_rate": 2.527847396268449e-05,
"loss": 0.0321,
"step": 4350
},
{
"epoch": 1.6447368421052633,
"grad_norm": 0.7508202791213989,
"learning_rate": 2.5104427736006684e-05,
"loss": 0.0322,
"step": 4375
},
{
"epoch": 1.6541353383458648,
"grad_norm": 0.4984326958656311,
"learning_rate": 2.493038150932888e-05,
"loss": 0.0339,
"step": 4400
},
{
"epoch": 1.6635338345864663,
"grad_norm": 0.3120623230934143,
"learning_rate": 2.475633528265107e-05,
"loss": 0.0351,
"step": 4425
},
{
"epoch": 1.6729323308270678,
"grad_norm": 0.5180129408836365,
"learning_rate": 2.458228905597327e-05,
"loss": 0.0331,
"step": 4450
},
{
"epoch": 1.6823308270676691,
"grad_norm": 0.3674444556236267,
"learning_rate": 2.4415204678362576e-05,
"loss": 0.0339,
"step": 4475
},
{
"epoch": 1.6917293233082706,
"grad_norm": 0.2138279378414154,
"learning_rate": 2.4241158451684767e-05,
"loss": 0.0325,
"step": 4500
},
{
"epoch": 1.7011278195488722,
"grad_norm": 0.4587240219116211,
"learning_rate": 2.4067112225006962e-05,
"loss": 0.0333,
"step": 4525
},
{
"epoch": 1.7105263157894737,
"grad_norm": 0.3884350061416626,
"learning_rate": 2.3893065998329157e-05,
"loss": 0.0364,
"step": 4550
},
{
"epoch": 1.7199248120300752,
"grad_norm": 0.4955609440803528,
"learning_rate": 2.371901977165135e-05,
"loss": 0.0319,
"step": 4575
},
{
"epoch": 1.7293233082706767,
"grad_norm": 0.22079665958881378,
"learning_rate": 2.3544973544973546e-05,
"loss": 0.0306,
"step": 4600
},
{
"epoch": 1.7387218045112782,
"grad_norm": 0.45004066824913025,
"learning_rate": 2.337092731829574e-05,
"loss": 0.0323,
"step": 4625
},
{
"epoch": 1.7481203007518797,
"grad_norm": 0.35747626423835754,
"learning_rate": 2.3196881091617932e-05,
"loss": 0.033,
"step": 4650
},
{
"epoch": 1.7575187969924813,
"grad_norm": 0.6423829793930054,
"learning_rate": 2.302283486494013e-05,
"loss": 0.0325,
"step": 4675
},
{
"epoch": 1.7669172932330826,
"grad_norm": 0.4049668312072754,
"learning_rate": 2.2848788638262322e-05,
"loss": 0.0329,
"step": 4700
},
{
"epoch": 1.776315789473684,
"grad_norm": 0.4574697017669678,
"learning_rate": 2.2674742411584517e-05,
"loss": 0.037,
"step": 4725
},
{
"epoch": 1.7857142857142856,
"grad_norm": 0.2724074423313141,
"learning_rate": 2.250069618490671e-05,
"loss": 0.0347,
"step": 4750
},
{
"epoch": 1.795112781954887,
"grad_norm": 0.881424069404602,
"learning_rate": 2.2326649958228906e-05,
"loss": 0.0354,
"step": 4775
},
{
"epoch": 1.8045112781954886,
"grad_norm": 0.6708984971046448,
"learning_rate": 2.21526037315511e-05,
"loss": 0.0372,
"step": 4800
},
{
"epoch": 1.8139097744360901,
"grad_norm": 0.6843132376670837,
"learning_rate": 2.1978557504873296e-05,
"loss": 0.0321,
"step": 4825
},
{
"epoch": 1.8233082706766917,
"grad_norm": 0.32172465324401855,
"learning_rate": 2.1804511278195487e-05,
"loss": 0.0344,
"step": 4850
},
{
"epoch": 1.8327067669172932,
"grad_norm": 0.3380116820335388,
"learning_rate": 2.1630465051517686e-05,
"loss": 0.0329,
"step": 4875
},
{
"epoch": 1.8421052631578947,
"grad_norm": 0.5369280576705933,
"learning_rate": 2.1456418824839877e-05,
"loss": 0.0331,
"step": 4900
},
{
"epoch": 1.8515037593984962,
"grad_norm": 0.4699563682079315,
"learning_rate": 2.1282372598162072e-05,
"loss": 0.0342,
"step": 4925
},
{
"epoch": 1.8609022556390977,
"grad_norm": 1.1310768127441406,
"learning_rate": 2.1108326371484267e-05,
"loss": 0.0349,
"step": 4950
},
{
"epoch": 1.8703007518796992,
"grad_norm": 0.2779694199562073,
"learning_rate": 2.093428014480646e-05,
"loss": 0.0318,
"step": 4975
},
{
"epoch": 1.8796992481203008,
"grad_norm": 0.5871944427490234,
"learning_rate": 2.0760233918128656e-05,
"loss": 0.0344,
"step": 5000
},
{
"epoch": 1.8890977443609023,
"grad_norm": 0.48690375685691833,
"learning_rate": 2.058618769145085e-05,
"loss": 0.0351,
"step": 5025
},
{
"epoch": 1.8984962406015038,
"grad_norm": 0.29975295066833496,
"learning_rate": 2.0412141464773042e-05,
"loss": 0.0354,
"step": 5050
},
{
"epoch": 1.9078947368421053,
"grad_norm": 0.6784300208091736,
"learning_rate": 2.023809523809524e-05,
"loss": 0.0313,
"step": 5075
},
{
"epoch": 1.9172932330827068,
"grad_norm": 0.7290612459182739,
"learning_rate": 2.0064049011417432e-05,
"loss": 0.0338,
"step": 5100
},
{
"epoch": 1.9266917293233083,
"grad_norm": 0.40152326226234436,
"learning_rate": 1.9890002784739627e-05,
"loss": 0.0335,
"step": 5125
},
{
"epoch": 1.9360902255639099,
"grad_norm": 0.36136892437934875,
"learning_rate": 1.971595655806182e-05,
"loss": 0.0325,
"step": 5150
},
{
"epoch": 1.9454887218045114,
"grad_norm": 0.8763560652732849,
"learning_rate": 1.9541910331384016e-05,
"loss": 0.0344,
"step": 5175
},
{
"epoch": 1.954887218045113,
"grad_norm": 0.25709742307662964,
"learning_rate": 1.936786410470621e-05,
"loss": 0.0337,
"step": 5200
},
{
"epoch": 1.9642857142857144,
"grad_norm": 0.31332555413246155,
"learning_rate": 1.9193817878028406e-05,
"loss": 0.0315,
"step": 5225
},
{
"epoch": 1.973684210526316,
"grad_norm": 0.41366493701934814,
"learning_rate": 1.9019771651350597e-05,
"loss": 0.0362,
"step": 5250
},
{
"epoch": 1.9830827067669174,
"grad_norm": 0.48200878500938416,
"learning_rate": 1.8845725424672795e-05,
"loss": 0.0348,
"step": 5275
},
{
"epoch": 1.9924812030075187,
"grad_norm": 0.22511008381843567,
"learning_rate": 1.8671679197994987e-05,
"loss": 0.0357,
"step": 5300
},
{
"epoch": 2.0,
"eval_explained_variance": 0.39413392543792725,
"eval_loss": 0.033320341259241104,
"eval_mae": 0.14226087927818298,
"eval_mse": 0.03331853449344635,
"eval_r2": 0.3940473794937134,
"eval_rmse": 0.18253365304361371,
"eval_runtime": 5.0333,
"eval_samples_per_second": 4227.223,
"eval_steps_per_second": 66.159,
"step": 5320
},
{
"epoch": 2.0018796992481205,
"grad_norm": 0.3990646004676819,
"learning_rate": 1.849763297131718e-05,
"loss": 0.0323,
"step": 5325
},
{
"epoch": 2.011278195488722,
"grad_norm": 0.3467103838920593,
"learning_rate": 1.8323586744639376e-05,
"loss": 0.0319,
"step": 5350
},
{
"epoch": 2.0206766917293235,
"grad_norm": 0.4198233485221863,
"learning_rate": 1.814954051796157e-05,
"loss": 0.0348,
"step": 5375
},
{
"epoch": 2.030075187969925,
"grad_norm": 0.28035154938697815,
"learning_rate": 1.7975494291283766e-05,
"loss": 0.0309,
"step": 5400
},
{
"epoch": 2.039473684210526,
"grad_norm": 0.5984301567077637,
"learning_rate": 1.780144806460596e-05,
"loss": 0.0301,
"step": 5425
},
{
"epoch": 2.0488721804511276,
"grad_norm": 0.2896101772785187,
"learning_rate": 1.7627401837928152e-05,
"loss": 0.0278,
"step": 5450
},
{
"epoch": 2.058270676691729,
"grad_norm": 0.36555686593055725,
"learning_rate": 1.745335561125035e-05,
"loss": 0.0339,
"step": 5475
},
{
"epoch": 2.0676691729323307,
"grad_norm": 0.2957359254360199,
"learning_rate": 1.7279309384572542e-05,
"loss": 0.0302,
"step": 5500
},
{
"epoch": 2.077067669172932,
"grad_norm": 0.27549412846565247,
"learning_rate": 1.7105263157894737e-05,
"loss": 0.0318,
"step": 5525
},
{
"epoch": 2.0864661654135337,
"grad_norm": 0.4103783965110779,
"learning_rate": 1.693121693121693e-05,
"loss": 0.0326,
"step": 5550
},
{
"epoch": 2.095864661654135,
"grad_norm": 0.288662850856781,
"learning_rate": 1.6757170704539126e-05,
"loss": 0.0296,
"step": 5575
},
{
"epoch": 2.1052631578947367,
"grad_norm": 0.6283071041107178,
"learning_rate": 1.658312447786132e-05,
"loss": 0.0279,
"step": 5600
},
{
"epoch": 2.1146616541353382,
"grad_norm": 0.3841685354709625,
"learning_rate": 1.6409078251183516e-05,
"loss": 0.0301,
"step": 5625
},
{
"epoch": 2.1240601503759398,
"grad_norm": 0.4866536557674408,
"learning_rate": 1.6235032024505707e-05,
"loss": 0.0305,
"step": 5650
},
{
"epoch": 2.1334586466165413,
"grad_norm": 0.3846096098423004,
"learning_rate": 1.6060985797827905e-05,
"loss": 0.0293,
"step": 5675
},
{
"epoch": 2.142857142857143,
"grad_norm": 0.32383719086647034,
"learning_rate": 1.5886939571150097e-05,
"loss": 0.0278,
"step": 5700
},
{
"epoch": 2.1522556390977443,
"grad_norm": 0.45821473002433777,
"learning_rate": 1.571289334447229e-05,
"loss": 0.0291,
"step": 5725
},
{
"epoch": 2.161654135338346,
"grad_norm": 0.5681780576705933,
"learning_rate": 1.5538847117794486e-05,
"loss": 0.0298,
"step": 5750
},
{
"epoch": 2.1710526315789473,
"grad_norm": 0.3051597774028778,
"learning_rate": 1.536480089111668e-05,
"loss": 0.0275,
"step": 5775
},
{
"epoch": 2.180451127819549,
"grad_norm": 0.49849197268486023,
"learning_rate": 1.5190754664438874e-05,
"loss": 0.0306,
"step": 5800
},
{
"epoch": 2.1898496240601504,
"grad_norm": 0.3272913098335266,
"learning_rate": 1.501670843776107e-05,
"loss": 0.0309,
"step": 5825
},
{
"epoch": 2.199248120300752,
"grad_norm": 0.30019038915634155,
"learning_rate": 1.4842662211083264e-05,
"loss": 0.0311,
"step": 5850
},
{
"epoch": 2.2086466165413534,
"grad_norm": 0.24176378548145294,
"learning_rate": 1.4668615984405459e-05,
"loss": 0.0317,
"step": 5875
},
{
"epoch": 2.218045112781955,
"grad_norm": 0.4282451570034027,
"learning_rate": 1.4494569757727652e-05,
"loss": 0.0306,
"step": 5900
},
{
"epoch": 2.2274436090225564,
"grad_norm": 0.24212364852428436,
"learning_rate": 1.4320523531049848e-05,
"loss": 0.0329,
"step": 5925
},
{
"epoch": 2.236842105263158,
"grad_norm": 0.3030043840408325,
"learning_rate": 1.4146477304372041e-05,
"loss": 0.0292,
"step": 5950
},
{
"epoch": 2.2462406015037595,
"grad_norm": 0.6198856830596924,
"learning_rate": 1.3972431077694236e-05,
"loss": 0.0308,
"step": 5975
},
{
"epoch": 2.255639097744361,
"grad_norm": 0.36712250113487244,
"learning_rate": 1.3798384851016429e-05,
"loss": 0.0307,
"step": 6000
},
{
"epoch": 2.2650375939849625,
"grad_norm": 0.4682950973510742,
"learning_rate": 1.3624338624338626e-05,
"loss": 0.0321,
"step": 6025
},
{
"epoch": 2.274436090225564,
"grad_norm": 0.4325818419456482,
"learning_rate": 1.3450292397660819e-05,
"loss": 0.0328,
"step": 6050
},
{
"epoch": 2.2838345864661656,
"grad_norm": 0.48816877603530884,
"learning_rate": 1.3276246170983015e-05,
"loss": 0.0327,
"step": 6075
},
{
"epoch": 2.293233082706767,
"grad_norm": 0.34565097093582153,
"learning_rate": 1.3102199944305207e-05,
"loss": 0.0315,
"step": 6100
},
{
"epoch": 2.3026315789473686,
"grad_norm": 0.40750962495803833,
"learning_rate": 1.2928153717627403e-05,
"loss": 0.0283,
"step": 6125
},
{
"epoch": 2.31203007518797,
"grad_norm": 0.32673898339271545,
"learning_rate": 1.2754107490949596e-05,
"loss": 0.032,
"step": 6150
},
{
"epoch": 2.3214285714285716,
"grad_norm": 0.41216543316841125,
"learning_rate": 1.2580061264271793e-05,
"loss": 0.0298,
"step": 6175
},
{
"epoch": 2.3308270676691727,
"grad_norm": 0.3217643201351166,
"learning_rate": 1.2406015037593984e-05,
"loss": 0.0268,
"step": 6200
},
{
"epoch": 2.340225563909774,
"grad_norm": 0.4416458010673523,
"learning_rate": 1.2231968810916179e-05,
"loss": 0.0307,
"step": 6225
},
{
"epoch": 2.3496240601503757,
"grad_norm": 0.468988299369812,
"learning_rate": 1.2057922584238374e-05,
"loss": 0.0335,
"step": 6250
},
{
"epoch": 2.3590225563909772,
"grad_norm": 0.3771836757659912,
"learning_rate": 1.1883876357560568e-05,
"loss": 0.0318,
"step": 6275
},
{
"epoch": 2.3684210526315788,
"grad_norm": 0.8457861542701721,
"learning_rate": 1.1709830130882762e-05,
"loss": 0.0303,
"step": 6300
},
{
"epoch": 2.3778195488721803,
"grad_norm": 0.38719773292541504,
"learning_rate": 1.1535783904204956e-05,
"loss": 0.0304,
"step": 6325
},
{
"epoch": 2.387218045112782,
"grad_norm": 0.5245744585990906,
"learning_rate": 1.1361737677527151e-05,
"loss": 0.0306,
"step": 6350
},
{
"epoch": 2.3966165413533833,
"grad_norm": 0.392035573720932,
"learning_rate": 1.1187691450849346e-05,
"loss": 0.0331,
"step": 6375
},
{
"epoch": 2.406015037593985,
"grad_norm": 0.7871630787849426,
"learning_rate": 1.1013645224171539e-05,
"loss": 0.0293,
"step": 6400
},
{
"epoch": 2.4154135338345863,
"grad_norm": 0.5946124196052551,
"learning_rate": 1.0839598997493734e-05,
"loss": 0.0315,
"step": 6425
},
{
"epoch": 2.424812030075188,
"grad_norm": 0.44132673740386963,
"learning_rate": 1.0665552770815929e-05,
"loss": 0.0288,
"step": 6450
},
{
"epoch": 2.4342105263157894,
"grad_norm": 0.3801218569278717,
"learning_rate": 1.0491506544138123e-05,
"loss": 0.0324,
"step": 6475
},
{
"epoch": 2.443609022556391,
"grad_norm": 0.18658651411533356,
"learning_rate": 1.0317460317460318e-05,
"loss": 0.0323,
"step": 6500
},
{
"epoch": 2.4530075187969924,
"grad_norm": 0.2590925991535187,
"learning_rate": 1.0143414090782511e-05,
"loss": 0.0328,
"step": 6525
},
{
"epoch": 2.462406015037594,
"grad_norm": 0.49319398403167725,
"learning_rate": 9.969367864104706e-06,
"loss": 0.0294,
"step": 6550
},
{
"epoch": 2.4718045112781954,
"grad_norm": 0.37835025787353516,
"learning_rate": 9.795321637426901e-06,
"loss": 0.0303,
"step": 6575
},
{
"epoch": 2.481203007518797,
"grad_norm": 0.6518813371658325,
"learning_rate": 9.621275410749096e-06,
"loss": 0.0316,
"step": 6600
},
{
"epoch": 2.4906015037593985,
"grad_norm": 0.4249250292778015,
"learning_rate": 9.447229184071289e-06,
"loss": 0.0289,
"step": 6625
},
{
"epoch": 2.5,
"grad_norm": 0.45102307200431824,
"learning_rate": 9.273182957393484e-06,
"loss": 0.0307,
"step": 6650
},
{
"epoch": 2.5093984962406015,
"grad_norm": 0.3769674003124237,
"learning_rate": 9.099136730715678e-06,
"loss": 0.0273,
"step": 6675
},
{
"epoch": 2.518796992481203,
"grad_norm": 0.3190540075302124,
"learning_rate": 8.925090504037873e-06,
"loss": 0.0296,
"step": 6700
},
{
"epoch": 2.5281954887218046,
"grad_norm": 0.2434764802455902,
"learning_rate": 8.751044277360066e-06,
"loss": 0.033,
"step": 6725
},
{
"epoch": 2.537593984962406,
"grad_norm": 0.3015526533126831,
"learning_rate": 8.576998050682261e-06,
"loss": 0.0333,
"step": 6750
},
{
"epoch": 2.5469924812030076,
"grad_norm": 0.2905600368976593,
"learning_rate": 8.402951824004456e-06,
"loss": 0.0319,
"step": 6775
},
{
"epoch": 2.556390977443609,
"grad_norm": 0.24334688484668732,
"learning_rate": 8.22890559732665e-06,
"loss": 0.03,
"step": 6800
},
{
"epoch": 2.5657894736842106,
"grad_norm": 0.8693634867668152,
"learning_rate": 8.054859370648844e-06,
"loss": 0.0284,
"step": 6825
},
{
"epoch": 2.575187969924812,
"grad_norm": 0.2691483199596405,
"learning_rate": 7.880813143971038e-06,
"loss": 0.0272,
"step": 6850
},
{
"epoch": 2.5845864661654137,
"grad_norm": 0.5078217387199402,
"learning_rate": 7.706766917293233e-06,
"loss": 0.0289,
"step": 6875
},
{
"epoch": 2.593984962406015,
"grad_norm": 0.6707290410995483,
"learning_rate": 7.532720690615427e-06,
"loss": 0.0293,
"step": 6900
},
{
"epoch": 2.6033834586466167,
"grad_norm": 0.4188375771045685,
"learning_rate": 7.358674463937622e-06,
"loss": 0.0287,
"step": 6925
},
{
"epoch": 2.612781954887218,
"grad_norm": 0.5552793145179749,
"learning_rate": 7.184628237259816e-06,
"loss": 0.0308,
"step": 6950
},
{
"epoch": 2.6221804511278197,
"grad_norm": 0.3137630820274353,
"learning_rate": 7.010582010582011e-06,
"loss": 0.0325,
"step": 6975
},
{
"epoch": 2.6315789473684212,
"grad_norm": 0.34588831663131714,
"learning_rate": 6.836535783904205e-06,
"loss": 0.0273,
"step": 7000
},
{
"epoch": 2.6409774436090228,
"grad_norm": 0.5899882316589355,
"learning_rate": 6.6624895572263995e-06,
"loss": 0.0299,
"step": 7025
},
{
"epoch": 2.6503759398496243,
"grad_norm": 0.2558177709579468,
"learning_rate": 6.4884433305485934e-06,
"loss": 0.028,
"step": 7050
},
{
"epoch": 2.659774436090226,
"grad_norm": 0.366569846868515,
"learning_rate": 6.314397103870788e-06,
"loss": 0.0297,
"step": 7075
},
{
"epoch": 2.6691729323308273,
"grad_norm": 0.3304097354412079,
"learning_rate": 6.140350877192982e-06,
"loss": 0.0306,
"step": 7100
},
{
"epoch": 2.678571428571429,
"grad_norm": 0.3274601995944977,
"learning_rate": 5.966304650515177e-06,
"loss": 0.0275,
"step": 7125
},
{
"epoch": 2.6879699248120303,
"grad_norm": 0.30296674370765686,
"learning_rate": 5.792258423837371e-06,
"loss": 0.0288,
"step": 7150
},
{
"epoch": 2.6973684210526314,
"grad_norm": 0.2754576504230499,
"learning_rate": 5.618212197159566e-06,
"loss": 0.0309,
"step": 7175
},
{
"epoch": 2.706766917293233,
"grad_norm": 0.8207494616508484,
"learning_rate": 5.44416597048176e-06,
"loss": 0.0287,
"step": 7200
},
{
"epoch": 2.7161654135338344,
"grad_norm": 0.3519987165927887,
"learning_rate": 5.2701197438039544e-06,
"loss": 0.0311,
"step": 7225
},
{
"epoch": 2.725563909774436,
"grad_norm": 0.6962947845458984,
"learning_rate": 5.096073517126148e-06,
"loss": 0.028,
"step": 7250
},
{
"epoch": 2.7349624060150375,
"grad_norm": 0.3754965662956238,
"learning_rate": 4.922027290448343e-06,
"loss": 0.0293,
"step": 7275
},
{
"epoch": 2.744360902255639,
"grad_norm": 0.3603477478027344,
"learning_rate": 4.747981063770537e-06,
"loss": 0.0284,
"step": 7300
},
{
"epoch": 2.7537593984962405,
"grad_norm": 0.207660973072052,
"learning_rate": 4.573934837092732e-06,
"loss": 0.0295,
"step": 7325
},
{
"epoch": 2.763157894736842,
"grad_norm": 0.3796231746673584,
"learning_rate": 4.399888610414926e-06,
"loss": 0.0269,
"step": 7350
},
{
"epoch": 2.7725563909774436,
"grad_norm": 0.30700182914733887,
"learning_rate": 4.225842383737121e-06,
"loss": 0.0274,
"step": 7375
},
{
"epoch": 2.781954887218045,
"grad_norm": 0.27875229716300964,
"learning_rate": 4.051796157059315e-06,
"loss": 0.0291,
"step": 7400
},
{
"epoch": 2.7913533834586466,
"grad_norm": 0.35511383414268494,
"learning_rate": 3.877749930381509e-06,
"loss": 0.025,
"step": 7425
},
{
"epoch": 2.800751879699248,
"grad_norm": 0.566415011882782,
"learning_rate": 3.7037037037037037e-06,
"loss": 0.0271,
"step": 7450
},
{
"epoch": 2.8101503759398496,
"grad_norm": 0.25591719150543213,
"learning_rate": 3.529657477025898e-06,
"loss": 0.0279,
"step": 7475
},
{
"epoch": 2.819548872180451,
"grad_norm": 0.2331235408782959,
"learning_rate": 3.3556112503480925e-06,
"loss": 0.0284,
"step": 7500
},
{
"epoch": 2.8289473684210527,
"grad_norm": 0.3275633156299591,
"learning_rate": 3.181565023670287e-06,
"loss": 0.0305,
"step": 7525
},
{
"epoch": 2.838345864661654,
"grad_norm": 0.54421466588974,
"learning_rate": 3.007518796992481e-06,
"loss": 0.0292,
"step": 7550
},
{
"epoch": 2.8477443609022557,
"grad_norm": 0.45261576771736145,
"learning_rate": 2.8334725703146756e-06,
"loss": 0.0285,
"step": 7575
},
{
"epoch": 2.857142857142857,
"grad_norm": 0.42874467372894287,
"learning_rate": 2.65942634363687e-06,
"loss": 0.0268,
"step": 7600
},
{
"epoch": 2.8665413533834587,
"grad_norm": 0.30592483282089233,
"learning_rate": 2.4853801169590643e-06,
"loss": 0.0302,
"step": 7625
},
{
"epoch": 2.8759398496240602,
"grad_norm": 0.20582637190818787,
"learning_rate": 2.3113338902812587e-06,
"loss": 0.0288,
"step": 7650
},
{
"epoch": 2.8853383458646618,
"grad_norm": 0.311002254486084,
"learning_rate": 2.137287663603453e-06,
"loss": 0.0271,
"step": 7675
},
{
"epoch": 2.8947368421052633,
"grad_norm": 0.533819317817688,
"learning_rate": 1.9632414369256474e-06,
"loss": 0.0298,
"step": 7700
},
{
"epoch": 2.904135338345865,
"grad_norm": 0.2923714518547058,
"learning_rate": 1.7891952102478418e-06,
"loss": 0.0307,
"step": 7725
},
{
"epoch": 2.9135338345864663,
"grad_norm": 0.695756196975708,
"learning_rate": 1.6151489835700362e-06,
"loss": 0.0301,
"step": 7750
},
{
"epoch": 2.922932330827068,
"grad_norm": 0.39920884370803833,
"learning_rate": 1.4411027568922305e-06,
"loss": 0.028,
"step": 7775
},
{
"epoch": 2.932330827067669,
"grad_norm": 0.4026110768318176,
"learning_rate": 1.2670565302144249e-06,
"loss": 0.0287,
"step": 7800
},
{
"epoch": 2.9417293233082704,
"grad_norm": 0.2511468827724457,
"learning_rate": 1.0930103035366193e-06,
"loss": 0.0329,
"step": 7825
},
{
"epoch": 2.951127819548872,
"grad_norm": 0.2902221977710724,
"learning_rate": 9.189640768588137e-07,
"loss": 0.0287,
"step": 7850
},
{
"epoch": 2.9605263157894735,
"grad_norm": 0.266245037317276,
"learning_rate": 7.449178501810081e-07,
"loss": 0.0302,
"step": 7875
},
{
"epoch": 2.969924812030075,
"grad_norm": 0.5273639559745789,
"learning_rate": 5.708716235032025e-07,
"loss": 0.0305,
"step": 7900
},
{
"epoch": 2.9793233082706765,
"grad_norm": 0.24314239621162415,
"learning_rate": 3.9682539682539683e-07,
"loss": 0.0286,
"step": 7925
},
{
"epoch": 2.988721804511278,
"grad_norm": 0.754489004611969,
"learning_rate": 2.227791701475912e-07,
"loss": 0.0287,
"step": 7950
},
{
"epoch": 2.9981203007518795,
"grad_norm": 0.3576299846172333,
"learning_rate": 4.873294346978558e-08,
"loss": 0.0302,
"step": 7975
},
{
"epoch": 3.0,
"eval_explained_variance": 0.4084639549255371,
"eval_loss": 0.03257535398006439,
"eval_mae": 0.1405062973499298,
"eval_mse": 0.03257712349295616,
"eval_r2": 0.40753114223480225,
"eval_rmse": 0.18049133910788118,
"eval_runtime": 5.0396,
"eval_samples_per_second": 4221.965,
"eval_steps_per_second": 66.077,
"step": 7980
}
],
"logging_steps": 25,
"max_steps": 7980,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 5,
"early_stopping_threshold": 0.01
},
"attributes": {
"early_stopping_patience_counter": 2
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 8456567720509440.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}