| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 8.0, | |
| "eval_steps": 500, | |
| "global_step": 1960, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0004999678864499828, | |
| "loss": 1.9845, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000499871554050172, | |
| "loss": 1.8444, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0004997110275491702, | |
| "loss": 1.721, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.0004994863481875841, | |
| "loss": 1.6755, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.0004991975736874289, | |
| "loss": 1.7248, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0004988447782372996, | |
| "loss": 1.6803, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0004984280524733107, | |
| "loss": 1.7063, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0004979475034558115, | |
| "loss": 1.6724, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.0004974032546418815, | |
| "loss": 1.6933, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0004967954458536126, | |
| "loss": 1.6243, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.0004961242332421882, | |
| "loss": 1.639, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0004953897892477664, | |
| "loss": 1.6545, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0004945923025551788, | |
| "loss": 1.6017, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.0004937319780454559, | |
| "loss": 1.6522, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.000492809036743191, | |
| "loss": 1.5948, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0004918237157597574, | |
| "loss": 1.5385, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.0004907762682323926, | |
| "loss": 1.6132, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0004896669632591652, | |
| "loss": 1.5396, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 0.000488496085829841, | |
| "loss": 1.5023, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.0004872639367526672, | |
| "loss": 1.4937, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0004859708325770919, | |
| "loss": 1.4824, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0004846171055124401, | |
| "loss": 1.5506, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00048320310334256625, | |
| "loss": 1.5709, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.0004817291893365054, | |
| "loss": 1.5531, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00048019574215514706, | |
| "loss": 1.4546, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.0004786031557539531, | |
| "loss": 1.4281, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.00047695183928174803, | |
| "loss": 1.4339, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.00047524221697560476, | |
| "loss": 1.3993, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.0004734747280518549, | |
| "loss": 1.3786, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00047164982659325005, | |
| "loss": 1.458, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 0.0004697679814323043, | |
| "loss": 1.3692, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00046782967603084736, | |
| "loss": 1.491, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "learning_rate": 0.00046583540835581883, | |
| "loss": 1.3637, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.0004637856907513366, | |
| "loss": 1.4276, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00046168104980707104, | |
| "loss": 1.3943, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00045952202622296013, | |
| "loss": 1.4274, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00045730917467029877, | |
| "loss": 1.4829, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.00045504306364923896, | |
| "loss": 1.3585, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00045272427534273776, | |
| "loss": 1.4194, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00045035340546698916, | |
| "loss": 1.3924, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.0004479310631183799, | |
| "loss": 1.4117, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.0004454578706170075, | |
| "loss": 1.3849, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 0.0004429344633468004, | |
| "loss": 1.3857, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "learning_rate": 0.0004403614895922836, | |
| "loss": 1.4772, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.0004377396103720278, | |
| "loss": 1.4021, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00043506949926882887, | |
| "loss": 1.3589, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 0.0004323518422566586, | |
| "loss": 1.45, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0004295873375244319, | |
| "loss": 1.2784, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00042677669529663686, | |
| "loss": 1.4127, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 0.0004239206376508716, | |
| "loss": 1.2777, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.00042101989833233654, | |
| "loss": 1.1873, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.0004180752225653292, | |
| "loss": 1.2746, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.0004150873668617898, | |
| "loss": 1.308, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.00041205709882694713, | |
| "loss": 1.2431, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0004089851969621138, | |
| "loss": 1.3411, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "learning_rate": 0.0004058724504646834, | |
| "loss": 1.2369, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "learning_rate": 0.0004027196590253786, | |
| "loss": 1.303, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 0.000399527632622804, | |
| "loss": 1.2011, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 0.0003962971913153559, | |
| "loss": 1.2239, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 0.00039302916503054243, | |
| "loss": 1.2133, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.0003897243933517679, | |
| "loss": 1.2108, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 0.00038638372530263714, | |
| "loss": 1.3095, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 0.00038300801912883415, | |
| "loss": 1.2392, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.0003795981420776313, | |
| "loss": 1.1789, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 0.0003761549701750865, | |
| "loss": 1.2285, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 0.0003726793880009845, | |
| "loss": 1.2101, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.00036917228846158136, | |
| "loss": 1.1973, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 0.00036563457256020887, | |
| "loss": 1.2238, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 0.0003620671491657992, | |
| "loss": 1.2249, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "learning_rate": 0.00035847093477938953, | |
| "loss": 1.2314, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00035484685329866423, | |
| "loss": 1.2987, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 0.00035119583578059843, | |
| "loss": 1.2416, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 0.00034751882020226174, | |
| "loss": 1.175, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.0003438167512198436, | |
| "loss": 1.2123, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 0.00034009057992596335, | |
| "loss": 1.0257, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 0.0003363412636053269, | |
| "loss": 1.118, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.14, | |
| "learning_rate": 0.00033256976548879183, | |
| "loss": 1.1586, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.0003287770545059052, | |
| "loss": 1.0559, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 0.0003249641050359779, | |
| "loss": 1.0775, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.0003211318966577581, | |
| "loss": 1.0905, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 0.00031728141389776923, | |
| "loss": 1.088, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 0.0003134136459773768, | |
| "loss": 1.0395, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 0.00030952958655864954, | |
| "loss": 1.1064, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 3.43, | |
| "learning_rate": 0.0003056302334890786, | |
| "loss": 1.0662, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 0.0003017165885452227, | |
| "loss": 1.0627, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 0.00029778965717534313, | |
| "loss": 0.9968, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 0.0002938504482410954, | |
| "loss": 1.1237, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 0.00028989997375834483, | |
| "loss": 1.0432, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 0.00028593924863717045, | |
| "loss": 1.1197, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 0.0002819692904211265, | |
| "loss": 1.0016, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "learning_rate": 0.00027799111902582696, | |
| "loss": 1.0956, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 3.76, | |
| "learning_rate": 0.00027400575647692046, | |
| "loss": 1.0878, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 0.00027001422664752335, | |
| "loss": 1.0838, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.00026601755499517824, | |
| "loss": 1.0587, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.0002620167682984052, | |
| "loss": 1.0857, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 0.00025801289439291385, | |
| "loss": 1.0107, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 0.00025400696190754345, | |
| "loss": 1.1491, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00025, | |
| "loss": 1.0726, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 0.0002459930380924566, | |
| "loss": 0.9856, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 0.0002419871056070862, | |
| "loss": 0.9726, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.00023798323170159486, | |
| "loss": 0.9675, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.0002339824450048218, | |
| "loss": 0.9831, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 0.0002299857733524767, | |
| "loss": 0.9082, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 0.00022599424352307955, | |
| "loss": 1.04, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 0.00022200888097417305, | |
| "loss": 0.9608, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 4.33, | |
| "learning_rate": 0.00021803070957887347, | |
| "loss": 1.0405, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 4.37, | |
| "learning_rate": 0.0002140607513628296, | |
| "loss": 0.9635, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 0.00021010002624165526, | |
| "loss": 0.8787, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 4.45, | |
| "learning_rate": 0.00020614955175890463, | |
| "loss": 0.9367, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 0.00020221034282465699, | |
| "loss": 1.0206, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 0.00019828341145477728, | |
| "loss": 0.962, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 0.00019436976651092142, | |
| "loss": 0.8907, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "learning_rate": 0.00019047041344135045, | |
| "loss": 0.932, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 0.0001865863540226232, | |
| "loss": 0.957, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 0.0001827185861022308, | |
| "loss": 0.9121, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 0.0001788681033422419, | |
| "loss": 0.9817, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 0.0001750358949640221, | |
| "loss": 0.9116, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 4.82, | |
| "learning_rate": 0.00017122294549409484, | |
| "loss": 0.8627, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.00016743023451120832, | |
| "loss": 0.8922, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 4.9, | |
| "learning_rate": 0.00016365873639467314, | |
| "loss": 0.9926, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 0.0001599094200740367, | |
| "loss": 1.0046, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 0.0001561832487801565, | |
| "loss": 0.927, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "learning_rate": 0.0001524811797977383, | |
| "loss": 0.8946, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 0.00014880416421940155, | |
| "loss": 0.9484, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 0.0001451531467013358, | |
| "loss": 0.7806, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.00014152906522061048, | |
| "loss": 0.8487, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 5.18, | |
| "learning_rate": 0.00013793285083420076, | |
| "loss": 0.7939, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 0.00013436542743979125, | |
| "loss": 0.9054, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 5.27, | |
| "learning_rate": 0.0001308277115384187, | |
| "loss": 0.8808, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 0.00012732061199901561, | |
| "loss": 0.8748, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 0.00012384502982491357, | |
| "loss": 0.9054, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 5.39, | |
| "learning_rate": 0.00012040185792236874, | |
| "loss": 0.8023, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 0.00011699198087116588, | |
| "loss": 0.9195, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 0.00011361627469736286, | |
| "loss": 0.8406, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 0.00011027560664823208, | |
| "loss": 0.8873, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 0.00010697083496945764, | |
| "loss": 0.8811, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 0.00010370280868464405, | |
| "loss": 0.9164, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 0.000100472367377196, | |
| "loss": 0.9604, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 9.728034097462144e-05, | |
| "loss": 0.9027, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 9.412754953531663e-05, | |
| "loss": 0.857, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 9.101480303788623e-05, | |
| "loss": 0.8198, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 5.8, | |
| "learning_rate": 8.794290117305295e-05, | |
| "loss": 0.866, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 8.491263313821021e-05, | |
| "loss": 0.8422, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 8.192477743467078e-05, | |
| "loss": 0.8921, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 7.898010166766348e-05, | |
| "loss": 0.8429, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "learning_rate": 7.60793623491284e-05, | |
| "loss": 0.8728, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 7.322330470336314e-05, | |
| "loss": 0.8335, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 6.04, | |
| "learning_rate": 7.041266247556813e-05, | |
| "loss": 0.8213, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 6.764815774334149e-05, | |
| "loss": 0.8509, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 6.493050073117116e-05, | |
| "loss": 0.7348, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 6.226038962797217e-05, | |
| "loss": 0.8422, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 5.96385104077164e-05, | |
| "loss": 0.896, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "learning_rate": 5.706553665319955e-05, | |
| "loss": 0.7791, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 6.29, | |
| "learning_rate": 5.454212938299255e-05, | |
| "loss": 0.8045, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "learning_rate": 5.206893688162009e-05, | |
| "loss": 0.8531, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "learning_rate": 4.9646594533010875e-05, | |
| "loss": 0.8444, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 4.7275724657262294e-05, | |
| "loss": 0.7966, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 4.495693635076101e-05, | |
| "loss": 0.7895, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 6.49, | |
| "learning_rate": 4.269082532970131e-05, | |
| "loss": 0.7409, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 4.047797377703985e-05, | |
| "loss": 0.875, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 3.831895019292897e-05, | |
| "loss": 0.8869, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.61, | |
| "learning_rate": 3.621430924866348e-05, | |
| "loss": 0.8159, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 3.416459164418123e-05, | |
| "loss": 0.7707, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 3.217032396915265e-05, | |
| "loss": 0.8724, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 3.0232018567695695e-05, | |
| "loss": 0.7733, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 2.8350173406749973e-05, | |
| "loss": 0.8311, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 2.652527194814511e-05, | |
| "loss": 0.8356, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "learning_rate": 2.4757783024395242e-05, | |
| "loss": 0.8144, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 6.9, | |
| "learning_rate": 2.3048160718251997e-05, | |
| "loss": 0.7767, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 2.1396844246046905e-05, | |
| "loss": 0.8402, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "learning_rate": 1.980425784485293e-05, | |
| "loss": 0.8576, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 7.02, | |
| "learning_rate": 1.827081066349459e-05, | |
| "loss": 0.7797, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.6796896657433808e-05, | |
| "loss": 0.8409, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 1.538289448755989e-05, | |
| "loss": 0.7642, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 1.4029167422908107e-05, | |
| "loss": 0.8464, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 1.273606324733284e-05, | |
| "loss": 0.7975, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.1503914170159058e-05, | |
| "loss": 0.7577, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 1.0333036740834856e-05, | |
| "loss": 0.8085, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 9.223731767607434e-06, | |
| "loss": 0.7511, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 7.35, | |
| "learning_rate": 8.176284240242638e-06, | |
| "loss": 0.845, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 7.190963256809069e-06, | |
| "loss": 0.865, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 6.268021954544096e-06, | |
| "loss": 0.8486, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 5.407697444821169e-06, | |
| "loss": 0.7099, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 7.51, | |
| "learning_rate": 4.61021075223364e-06, | |
| "loss": 0.8223, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "learning_rate": 3.8757667578118995e-06, | |
| "loss": 0.8753, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 3.2045541463874563e-06, | |
| "loss": 0.8835, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 7.63, | |
| "learning_rate": 2.5967453581185186e-06, | |
| "loss": 0.7877, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 2.052496544188487e-06, | |
| "loss": 0.8828, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.571947526689349e-06, | |
| "loss": 0.7658, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 1.1552217627004424e-06, | |
| "loss": 0.8952, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 7.8, | |
| "learning_rate": 8.024263125710751e-07, | |
| "loss": 0.8118, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 5.136518124159162e-07, | |
| "loss": 0.8503, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 2.8897245082978865e-07, | |
| "loss": 0.8248, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 7.92, | |
| "learning_rate": 1.284459498280266e-07, | |
| "loss": 0.8515, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "learning_rate": 3.2113550017198734e-08, | |
| "loss": 0.8274, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.7359, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "step": 1960, | |
| "total_flos": 7.457847416862474e+17, | |
| "train_loss": 1.1027992168251348, | |
| "train_runtime": 31447.7257, | |
| "train_samples_per_second": 0.997, | |
| "train_steps_per_second": 0.062 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1960, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 8, | |
| "save_steps": 100, | |
| "total_flos": 7.457847416862474e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |