| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9987515605493134, | |
| "eval_steps": 2000, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 4.760079834365155, | |
| "learning_rate": 8.333333333333334e-08, | |
| "logits/chosen": -1.1630980968475342, | |
| "logits/rejected": -1.0201224088668823, | |
| "logps/chosen": -350.4145812988281, | |
| "logps/rejected": -220.30422973632812, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 3.6284772366587372, | |
| "learning_rate": 8.333333333333333e-07, | |
| "logits/chosen": -1.0121445655822754, | |
| "logits/rejected": -0.9547010064125061, | |
| "logps/chosen": -320.40802001953125, | |
| "logps/rejected": -186.48419189453125, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.4982638955116272, | |
| "rewards/chosen": 0.0024558762088418007, | |
| "rewards/margins": 0.0015082670142874122, | |
| "rewards/rejected": 0.0009476091363467276, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 4.24809146375121, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": -0.9381664395332336, | |
| "logits/rejected": -0.8647511601448059, | |
| "logps/chosen": -328.6592102050781, | |
| "logps/rejected": -212.4827880859375, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.7953125238418579, | |
| "rewards/chosen": 0.034552641212940216, | |
| "rewards/margins": 0.022216904908418655, | |
| "rewards/rejected": 0.012335737235844135, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 3.9797438421104276, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -1.020849585533142, | |
| "logits/rejected": -0.9427526593208313, | |
| "logps/chosen": -329.3730163574219, | |
| "logps/rejected": -215.0742645263672, | |
| "loss": 0.6458, | |
| "rewards/accuracies": 0.854687511920929, | |
| "rewards/chosen": 0.1433950513601303, | |
| "rewards/margins": 0.10005545616149902, | |
| "rewards/rejected": 0.04333961382508278, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 2.971987358375742, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "logits/chosen": -1.1010403633117676, | |
| "logits/rejected": -1.0402967929840088, | |
| "logps/chosen": -310.91778564453125, | |
| "logps/rejected": -205.1094512939453, | |
| "loss": 0.5801, | |
| "rewards/accuracies": 0.885937511920929, | |
| "rewards/chosen": 0.3175004720687866, | |
| "rewards/margins": 0.25547632575035095, | |
| "rewards/rejected": 0.06202414631843567, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 2.284422715303357, | |
| "learning_rate": 4.166666666666667e-06, | |
| "logits/chosen": -1.176831841468811, | |
| "logits/rejected": -1.1168550252914429, | |
| "logps/chosen": -289.9400634765625, | |
| "logps/rejected": -193.466552734375, | |
| "loss": 0.5192, | |
| "rewards/accuracies": 0.8765624761581421, | |
| "rewards/chosen": 0.4821470379829407, | |
| "rewards/margins": 0.42469802498817444, | |
| "rewards/rejected": 0.057449012994766235, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 2.693731441358458, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -1.1333694458007812, | |
| "logits/rejected": -1.0512011051177979, | |
| "logps/chosen": -268.7071838378906, | |
| "logps/rejected": -216.21939086914062, | |
| "loss": 0.4625, | |
| "rewards/accuracies": 0.890625, | |
| "rewards/chosen": 0.570508599281311, | |
| "rewards/margins": 0.6040245890617371, | |
| "rewards/rejected": -0.03351598605513573, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 2.982413579425004, | |
| "learning_rate": 4.995770395678171e-06, | |
| "logits/chosen": -0.9235696792602539, | |
| "logits/rejected": -0.8171814680099487, | |
| "logps/chosen": -288.454833984375, | |
| "logps/rejected": -243.9117431640625, | |
| "loss": 0.3919, | |
| "rewards/accuracies": 0.9359375238418579, | |
| "rewards/chosen": 0.4965239465236664, | |
| "rewards/margins": 0.8616136312484741, | |
| "rewards/rejected": -0.36508968472480774, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 3.0591291304636767, | |
| "learning_rate": 4.983095894354858e-06, | |
| "logits/chosen": -0.5841406583786011, | |
| "logits/rejected": -0.5758659243583679, | |
| "logps/chosen": -282.39080810546875, | |
| "logps/rejected": -267.4037170410156, | |
| "loss": 0.3708, | |
| "rewards/accuracies": 0.9234374761581421, | |
| "rewards/chosen": 0.3620058298110962, | |
| "rewards/margins": 1.001431941986084, | |
| "rewards/rejected": -0.639426052570343, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 2.7852348655427495, | |
| "learning_rate": 4.962019382530521e-06, | |
| "logits/chosen": -0.3057808578014374, | |
| "logits/rejected": -0.08402713388204575, | |
| "logps/chosen": -304.71258544921875, | |
| "logps/rejected": -306.744873046875, | |
| "loss": 0.3332, | |
| "rewards/accuracies": 0.934374988079071, | |
| "rewards/chosen": 0.34849730134010315, | |
| "rewards/margins": 1.234140157699585, | |
| "rewards/rejected": -0.8856428861618042, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 3.904931302273915, | |
| "learning_rate": 4.93261217644956e-06, | |
| "logits/chosen": -0.3401206433773041, | |
| "logits/rejected": -0.006557087413966656, | |
| "logps/chosen": -306.9206848144531, | |
| "logps/rejected": -352.503173828125, | |
| "loss": 0.284, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.13179777562618256, | |
| "rewards/margins": 1.5720919370651245, | |
| "rewards/rejected": -1.4402940273284912, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 4.332999329515302, | |
| "learning_rate": 4.894973780788722e-06, | |
| "logits/chosen": -0.3165335953235626, | |
| "logits/rejected": 0.0790834054350853, | |
| "logps/chosen": -350.21405029296875, | |
| "logps/rejected": -433.90643310546875, | |
| "loss": 0.2218, | |
| "rewards/accuracies": 0.9609375, | |
| "rewards/chosen": -0.08845386654138565, | |
| "rewards/margins": 2.1888351440429688, | |
| "rewards/rejected": -2.2772889137268066, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 3.8287946526526464, | |
| "learning_rate": 4.849231551964771e-06, | |
| "logits/chosen": -0.34387272596359253, | |
| "logits/rejected": -0.02132757380604744, | |
| "logps/chosen": -342.6011047363281, | |
| "logps/rejected": -551.7183837890625, | |
| "loss": 0.1787, | |
| "rewards/accuracies": 0.9468749761581421, | |
| "rewards/chosen": -0.15126076340675354, | |
| "rewards/margins": 3.2273590564727783, | |
| "rewards/rejected": -3.378619432449341, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 3.7220399456471203, | |
| "learning_rate": 4.7955402672006855e-06, | |
| "logits/chosen": -0.4739972949028015, | |
| "logits/rejected": -0.22087886929512024, | |
| "logps/chosen": -355.5228271484375, | |
| "logps/rejected": -637.9622192382812, | |
| "loss": 0.1651, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -0.20811741054058075, | |
| "rewards/margins": 4.130118370056152, | |
| "rewards/rejected": -4.338235378265381, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 2.216070201202239, | |
| "learning_rate": 4.734081600808531e-06, | |
| "logits/chosen": -0.5762065649032593, | |
| "logits/rejected": -0.37428033351898193, | |
| "logps/chosen": -342.99853515625, | |
| "logps/rejected": -746.65234375, | |
| "loss": 0.1198, | |
| "rewards/accuracies": 0.973437488079071, | |
| "rewards/chosen": -0.11533623933792114, | |
| "rewards/margins": 5.1436309814453125, | |
| "rewards/rejected": -5.258967399597168, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 4.452614644496966, | |
| "learning_rate": 4.665063509461098e-06, | |
| "logits/chosen": -0.43740949034690857, | |
| "logits/rejected": -0.276010662317276, | |
| "logps/chosen": -353.4092712402344, | |
| "logps/rejected": -838.3703002929688, | |
| "loss": 0.145, | |
| "rewards/accuracies": 0.9671875238418579, | |
| "rewards/chosen": -0.23360753059387207, | |
| "rewards/margins": 5.936570167541504, | |
| "rewards/rejected": -6.170177459716797, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 6.909783574695525, | |
| "learning_rate": 4.588719528532342e-06, | |
| "logits/chosen": 0.02835695818066597, | |
| "logits/rejected": 0.3119501769542694, | |
| "logps/chosen": -353.52203369140625, | |
| "logps/rejected": -654.4629516601562, | |
| "loss": 0.1368, | |
| "rewards/accuracies": 0.973437488079071, | |
| "rewards/chosen": -0.25593429803848267, | |
| "rewards/margins": 4.225825786590576, | |
| "rewards/rejected": -4.481760501861572, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.7107339593657505, | |
| "learning_rate": 4.50530798188761e-06, | |
| "logits/chosen": -0.15743690729141235, | |
| "logits/rejected": 0.23819151520729065, | |
| "logps/chosen": -333.4959716796875, | |
| "logps/rejected": -659.2352294921875, | |
| "loss": 0.1267, | |
| "rewards/accuracies": 0.965624988079071, | |
| "rewards/chosen": -0.00615291204303503, | |
| "rewards/margins": 4.596193790435791, | |
| "rewards/rejected": -4.602346420288086, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 3.623547042771, | |
| "learning_rate": 4.415111107797445e-06, | |
| "logits/chosen": -0.31918713450431824, | |
| "logits/rejected": 0.03602874279022217, | |
| "logps/chosen": -330.2435302734375, | |
| "logps/rejected": -757.0585327148438, | |
| "loss": 0.1056, | |
| "rewards/accuracies": 0.979687511920929, | |
| "rewards/chosen": 0.007477378938347101, | |
| "rewards/margins": 5.585347652435303, | |
| "rewards/rejected": -5.577870845794678, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.1551725469519507, | |
| "learning_rate": 4.318434103932622e-06, | |
| "logits/chosen": -0.3329532742500305, | |
| "logits/rejected": -0.041298139840364456, | |
| "logps/chosen": -382.2855529785156, | |
| "logps/rejected": -988.2845458984375, | |
| "loss": 0.1055, | |
| "rewards/accuracies": 0.9703124761581421, | |
| "rewards/chosen": -0.40997394919395447, | |
| "rewards/margins": 7.356125831604004, | |
| "rewards/rejected": -7.766099452972412, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.5178815949434505, | |
| "learning_rate": 4.215604094671835e-06, | |
| "logits/chosen": -0.4541547894477844, | |
| "logits/rejected": -0.1613123118877411, | |
| "logps/chosen": -347.7196350097656, | |
| "logps/rejected": -880.2561645507812, | |
| "loss": 0.1013, | |
| "rewards/accuracies": 0.9781249761581421, | |
| "rewards/chosen": -0.13055315613746643, | |
| "rewards/margins": 6.6189093589782715, | |
| "rewards/rejected": -6.749462127685547, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.0381257493358285, | |
| "learning_rate": 4.106969024216348e-06, | |
| "logits/chosen": -0.5205026865005493, | |
| "logits/rejected": -0.25759488344192505, | |
| "logps/chosen": -393.9586486816406, | |
| "logps/rejected": -1019.0203247070312, | |
| "loss": 0.0999, | |
| "rewards/accuracies": 0.9781249761581421, | |
| "rewards/chosen": -0.4994255006313324, | |
| "rewards/margins": 7.512589931488037, | |
| "rewards/rejected": -8.012015342712402, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 6.3674783354384115, | |
| "learning_rate": 3.992896479256966e-06, | |
| "logits/chosen": -0.6750475168228149, | |
| "logits/rejected": -0.3842785954475403, | |
| "logps/chosen": -336.7120666503906, | |
| "logps/rejected": -858.0791015625, | |
| "loss": 0.1031, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 0.017676908522844315, | |
| "rewards/margins": 6.612088680267334, | |
| "rewards/rejected": -6.594411373138428, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.419780694811269, | |
| "learning_rate": 3.8737724451770155e-06, | |
| "logits/chosen": -0.6326015591621399, | |
| "logits/rejected": -0.40925782918930054, | |
| "logps/chosen": -375.2301025390625, | |
| "logps/rejected": -1089.742919921875, | |
| "loss": 0.0811, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -0.42722994089126587, | |
| "rewards/margins": 8.390230178833008, | |
| "rewards/rejected": -8.817461013793945, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 17.310687719433773, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "logits/chosen": -0.5065186023712158, | |
| "logits/rejected": -0.2878126800060272, | |
| "logps/chosen": -386.92266845703125, | |
| "logps/rejected": -1173.471435546875, | |
| "loss": 0.0646, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": -0.5754821300506592, | |
| "rewards/margins": 9.043745040893555, | |
| "rewards/rejected": -9.619227409362793, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.566552488550366, | |
| "learning_rate": 3.621997950501156e-06, | |
| "logits/chosen": -0.23604285717010498, | |
| "logits/rejected": 0.03810877352952957, | |
| "logps/chosen": -380.695068359375, | |
| "logps/rejected": -1131.797607421875, | |
| "loss": 0.0778, | |
| "rewards/accuracies": 0.9859374761581421, | |
| "rewards/chosen": -0.5175285935401917, | |
| "rewards/margins": 8.581127166748047, | |
| "rewards/rejected": -9.098657608032227, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.2662266467083074, | |
| "learning_rate": 3.4901994150978926e-06, | |
| "logits/chosen": -0.14488890767097473, | |
| "logits/rejected": 0.2611751854419708, | |
| "logps/chosen": -305.7989196777344, | |
| "logps/rejected": -813.2493896484375, | |
| "loss": 0.1017, | |
| "rewards/accuracies": 0.9765625, | |
| "rewards/chosen": 0.1130492091178894, | |
| "rewards/margins": 6.16524600982666, | |
| "rewards/rejected": -6.052196502685547, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.023161637845827, | |
| "learning_rate": 3.3550503583141726e-06, | |
| "logits/chosen": -0.14377684891223907, | |
| "logits/rejected": 0.22304537892341614, | |
| "logps/chosen": -372.13165283203125, | |
| "logps/rejected": -896.8603515625, | |
| "loss": 0.0815, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -0.10399510711431503, | |
| "rewards/margins": 6.930342197418213, | |
| "rewards/rejected": -7.034337520599365, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 2.8073955958066197, | |
| "learning_rate": 3.217008081777726e-06, | |
| "logits/chosen": -0.10699782520532608, | |
| "logits/rejected": 0.14858277142047882, | |
| "logps/chosen": -438.87823486328125, | |
| "logps/rejected": -1203.0933837890625, | |
| "loss": 0.0873, | |
| "rewards/accuracies": 0.9703124761581421, | |
| "rewards/chosen": -1.0280810594558716, | |
| "rewards/margins": 8.752517700195312, | |
| "rewards/rejected": -9.780599594116211, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 4.347308476957998, | |
| "learning_rate": 3.0765396768561005e-06, | |
| "logits/chosen": -0.4454914927482605, | |
| "logits/rejected": -0.15811693668365479, | |
| "logps/chosen": -349.51531982421875, | |
| "logps/rejected": -1020.2342529296875, | |
| "loss": 0.0657, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.13168838620185852, | |
| "rewards/margins": 7.97658634185791, | |
| "rewards/rejected": -8.108274459838867, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 4.923260234351252, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "logits/chosen": -0.46575579047203064, | |
| "logits/rejected": -0.18623068928718567, | |
| "logps/chosen": -382.58807373046875, | |
| "logps/rejected": -1112.0509033203125, | |
| "loss": 0.0805, | |
| "rewards/accuracies": 0.979687511920929, | |
| "rewards/chosen": -0.5270034670829773, | |
| "rewards/margins": 8.469846725463867, | |
| "rewards/rejected": -8.996851921081543, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 4.035586599115476, | |
| "learning_rate": 2.7902322853130758e-06, | |
| "logits/chosen": -0.47277918457984924, | |
| "logits/rejected": -0.22883549332618713, | |
| "logps/chosen": -396.8818054199219, | |
| "logps/rejected": -1298.666748046875, | |
| "loss": 0.0646, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": -0.6961787343025208, | |
| "rewards/margins": 10.189523696899414, | |
| "rewards/rejected": -10.885702133178711, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.7894983493848236, | |
| "learning_rate": 2.6453620722761897e-06, | |
| "logits/chosen": -0.5364641547203064, | |
| "logits/rejected": -0.28673312067985535, | |
| "logps/chosen": -379.64337158203125, | |
| "logps/rejected": -1212.439697265625, | |
| "loss": 0.0705, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -0.35370904207229614, | |
| "rewards/margins": 9.660139083862305, | |
| "rewards/rejected": -10.013849258422852, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 3.3096114791299955, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": -0.5816466212272644, | |
| "logits/rejected": -0.35151442885398865, | |
| "logps/chosen": -379.9997863769531, | |
| "logps/rejected": -1187.98876953125, | |
| "loss": 0.0533, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.41831880807876587, | |
| "rewards/margins": 9.45728874206543, | |
| "rewards/rejected": -9.875606536865234, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 3.165697815332725, | |
| "learning_rate": 2.3546379277238107e-06, | |
| "logits/chosen": -0.47588858008384705, | |
| "logits/rejected": -0.26532530784606934, | |
| "logps/chosen": -454.8421325683594, | |
| "logps/rejected": -1527.9405517578125, | |
| "loss": 0.0613, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.2038371562957764, | |
| "rewards/margins": 11.936580657958984, | |
| "rewards/rejected": -13.140419006347656, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.150182775021995, | |
| "learning_rate": 2.2097677146869242e-06, | |
| "logits/chosen": -0.5888150930404663, | |
| "logits/rejected": -0.2789239287376404, | |
| "logps/chosen": -349.5444030761719, | |
| "logps/rejected": -1127.449951171875, | |
| "loss": 0.0722, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.23732244968414307, | |
| "rewards/margins": 8.962444305419922, | |
| "rewards/rejected": -9.199767112731934, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.370472414334315, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "logits/chosen": -0.4766604006290436, | |
| "logits/rejected": -0.22312171757221222, | |
| "logps/chosen": -409.70135498046875, | |
| "logps/rejected": -1264.747314453125, | |
| "loss": 0.0719, | |
| "rewards/accuracies": 0.979687511920929, | |
| "rewards/chosen": -0.5473332405090332, | |
| "rewards/margins": 9.985517501831055, | |
| "rewards/rejected": -10.532853126525879, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 4.485106240623341, | |
| "learning_rate": 1.9234603231439e-06, | |
| "logits/chosen": -0.40441417694091797, | |
| "logits/rejected": -0.13194730877876282, | |
| "logps/chosen": -391.0859069824219, | |
| "logps/rejected": -1242.066162109375, | |
| "loss": 0.0632, | |
| "rewards/accuracies": 0.9859374761581421, | |
| "rewards/chosen": -0.6057429909706116, | |
| "rewards/margins": 9.785425186157227, | |
| "rewards/rejected": -10.391169548034668, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.3547912002584528, | |
| "learning_rate": 1.7829919182222752e-06, | |
| "logits/chosen": -0.3805684447288513, | |
| "logits/rejected": -0.11748667806386948, | |
| "logps/chosen": -420.901611328125, | |
| "logps/rejected": -1328.623291015625, | |
| "loss": 0.0609, | |
| "rewards/accuracies": 0.9859374761581421, | |
| "rewards/chosen": -0.752387523651123, | |
| "rewards/margins": 10.4163236618042, | |
| "rewards/rejected": -11.16871166229248, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 7.41839867309329, | |
| "learning_rate": 1.6449496416858285e-06, | |
| "logits/chosen": -0.439796507358551, | |
| "logits/rejected": -0.18884414434432983, | |
| "logps/chosen": -361.68634033203125, | |
| "logps/rejected": -1187.6492919921875, | |
| "loss": 0.093, | |
| "rewards/accuracies": 0.9859374761581421, | |
| "rewards/chosen": -0.2755175232887268, | |
| "rewards/margins": 9.546114921569824, | |
| "rewards/rejected": -9.82163143157959, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.9341085433225174, | |
| "learning_rate": 1.509800584902108e-06, | |
| "logits/chosen": -0.4470156133174896, | |
| "logits/rejected": -0.15826158225536346, | |
| "logps/chosen": -325.4810791015625, | |
| "logps/rejected": -963.2032470703125, | |
| "loss": 0.0697, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": 0.05155152827501297, | |
| "rewards/margins": 7.570789337158203, | |
| "rewards/rejected": -7.519238471984863, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 2.2887818701903795, | |
| "learning_rate": 1.3780020494988447e-06, | |
| "logits/chosen": -0.38168513774871826, | |
| "logits/rejected": -0.13108135759830475, | |
| "logps/chosen": -338.5289001464844, | |
| "logps/rejected": -994.54345703125, | |
| "loss": 0.0767, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -0.13217389583587646, | |
| "rewards/margins": 7.651003360748291, | |
| "rewards/rejected": -7.783177375793457, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 10.334424423093157, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "logits/chosen": -0.32895341515541077, | |
| "logits/rejected": -0.11540427058935165, | |
| "logps/chosen": -408.0703125, | |
| "logps/rejected": -1260.2664794921875, | |
| "loss": 0.0555, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -0.7070173025131226, | |
| "rewards/margins": 9.677043914794922, | |
| "rewards/rejected": -10.384060859680176, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 3.446631906756751, | |
| "learning_rate": 1.1262275548229852e-06, | |
| "logits/chosen": -0.45178350806236267, | |
| "logits/rejected": -0.1636919528245926, | |
| "logps/chosen": -414.2293395996094, | |
| "logps/rejected": -1283.5029296875, | |
| "loss": 0.0678, | |
| "rewards/accuracies": 0.984375, | |
| "rewards/chosen": -0.8513700366020203, | |
| "rewards/margins": 9.903493881225586, | |
| "rewards/rejected": -10.754863739013672, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 3.1936523282167912, | |
| "learning_rate": 1.0071035207430352e-06, | |
| "logits/chosen": -0.44514569640159607, | |
| "logits/rejected": -0.20628270506858826, | |
| "logps/chosen": -388.30224609375, | |
| "logps/rejected": -1252.75146484375, | |
| "loss": 0.048, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": -0.6398779153823853, | |
| "rewards/margins": 9.881658554077148, | |
| "rewards/rejected": -10.521535873413086, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.7973345573120514, | |
| "learning_rate": 8.930309757836517e-07, | |
| "logits/chosen": -0.5846482515335083, | |
| "logits/rejected": -0.3184075355529785, | |
| "logps/chosen": -380.7689514160156, | |
| "logps/rejected": -1142.24365234375, | |
| "loss": 0.0603, | |
| "rewards/accuracies": 0.995312511920929, | |
| "rewards/chosen": -0.4020051956176758, | |
| "rewards/margins": 8.967304229736328, | |
| "rewards/rejected": -9.36931037902832, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.7683111661015913, | |
| "learning_rate": 7.843959053281663e-07, | |
| "logits/chosen": -0.4719081521034241, | |
| "logits/rejected": -0.248517706990242, | |
| "logps/chosen": -379.7674560546875, | |
| "logps/rejected": -1191.5079345703125, | |
| "loss": 0.0567, | |
| "rewards/accuracies": 0.9859374761581421, | |
| "rewards/chosen": -0.3425825238227844, | |
| "rewards/margins": 9.473920822143555, | |
| "rewards/rejected": -9.816503524780273, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.619770880026216, | |
| "learning_rate": 6.815658960673782e-07, | |
| "logits/chosen": -0.5005173683166504, | |
| "logits/rejected": -0.22156552970409393, | |
| "logps/chosen": -389.5422668457031, | |
| "logps/rejected": -1232.5107421875, | |
| "loss": 0.0507, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.6200243830680847, | |
| "rewards/margins": 9.514973640441895, | |
| "rewards/rejected": -10.134997367858887, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 2.206190169965742, | |
| "learning_rate": 5.848888922025553e-07, | |
| "logits/chosen": -0.46861904859542847, | |
| "logits/rejected": -0.18907694518566132, | |
| "logps/chosen": -388.5583801269531, | |
| "logps/rejected": -1196.972900390625, | |
| "loss": 0.0648, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": -0.40207749605178833, | |
| "rewards/margins": 9.50928783416748, | |
| "rewards/rejected": -9.911364555358887, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 5.22635215684865, | |
| "learning_rate": 4.946920181123904e-07, | |
| "logits/chosen": -0.49871087074279785, | |
| "logits/rejected": -0.23706772923469543, | |
| "logps/chosen": -378.83209228515625, | |
| "logps/rejected": -1230.994140625, | |
| "loss": 0.0594, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.36057573556900024, | |
| "rewards/margins": 9.88486099243164, | |
| "rewards/rejected": -10.245436668395996, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 4.399177165103731, | |
| "learning_rate": 4.1128047146765936e-07, | |
| "logits/chosen": -0.4957138001918793, | |
| "logits/rejected": -0.22013764083385468, | |
| "logps/chosen": -358.6220703125, | |
| "logps/rejected": -1129.539794921875, | |
| "loss": 0.0699, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": -0.2314276248216629, | |
| "rewards/margins": 9.046243667602539, | |
| "rewards/rejected": -9.277670860290527, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 2.4578322691260586, | |
| "learning_rate": 3.3493649053890325e-07, | |
| "logits/chosen": -0.3916376233100891, | |
| "logits/rejected": -0.13147909939289093, | |
| "logps/chosen": -376.29583740234375, | |
| "logps/rejected": -1182.824462890625, | |
| "loss": 0.0487, | |
| "rewards/accuracies": 0.995312511920929, | |
| "rewards/chosen": -0.43332648277282715, | |
| "rewards/margins": 9.195769309997559, | |
| "rewards/rejected": -9.629096031188965, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.130490613601597, | |
| "learning_rate": 2.6591839919146963e-07, | |
| "logits/chosen": -0.36289340257644653, | |
| "logits/rejected": -0.13988874852657318, | |
| "logps/chosen": -386.994873046875, | |
| "logps/rejected": -1234.4404296875, | |
| "loss": 0.0544, | |
| "rewards/accuracies": 0.9859374761581421, | |
| "rewards/chosen": -0.5290244221687317, | |
| "rewards/margins": 9.592443466186523, | |
| "rewards/rejected": -10.121468544006348, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.5265891239165703, | |
| "learning_rate": 2.044597327993153e-07, | |
| "logits/chosen": -0.44066888093948364, | |
| "logits/rejected": -0.1672184318304062, | |
| "logps/chosen": -404.02349853515625, | |
| "logps/rejected": -1213.29638671875, | |
| "loss": 0.0571, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": -0.50049889087677, | |
| "rewards/margins": 9.578344345092773, | |
| "rewards/rejected": -10.07884407043457, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.3929764183361966, | |
| "learning_rate": 1.507684480352292e-07, | |
| "logits/chosen": -0.4457983374595642, | |
| "logits/rejected": -0.20653533935546875, | |
| "logps/chosen": -387.2595520019531, | |
| "logps/rejected": -1257.850341796875, | |
| "loss": 0.0582, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": -0.5553634762763977, | |
| "rewards/margins": 9.88083267211914, | |
| "rewards/rejected": -10.436195373535156, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.8197547536284149, | |
| "learning_rate": 1.0502621921127776e-07, | |
| "logits/chosen": -0.4422330856323242, | |
| "logits/rejected": -0.1755351722240448, | |
| "logps/chosen": -414.64715576171875, | |
| "logps/rejected": -1265.412353515625, | |
| "loss": 0.052, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.6048385500907898, | |
| "rewards/margins": 9.867313385009766, | |
| "rewards/rejected": -10.472152709960938, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 3.101887331185102, | |
| "learning_rate": 6.738782355044048e-08, | |
| "logits/chosen": -0.4752727448940277, | |
| "logits/rejected": -0.20606884360313416, | |
| "logps/chosen": -390.14984130859375, | |
| "logps/rejected": -1334.9105224609375, | |
| "loss": 0.0647, | |
| "rewards/accuracies": 0.979687511920929, | |
| "rewards/chosen": -0.6308324933052063, | |
| "rewards/margins": 10.629243850708008, | |
| "rewards/rejected": -11.260076522827148, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 3.441898096294271, | |
| "learning_rate": 3.798061746947995e-08, | |
| "logits/chosen": -0.480734646320343, | |
| "logits/rejected": -0.21511860191822052, | |
| "logps/chosen": -387.4568786621094, | |
| "logps/rejected": -1270.74853515625, | |
| "loss": 0.0527, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.7548898458480835, | |
| "rewards/margins": 9.84511947631836, | |
| "rewards/rejected": -10.600008964538574, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 2.2741744504193826, | |
| "learning_rate": 1.6904105645142443e-08, | |
| "logits/chosen": -0.44097834825515747, | |
| "logits/rejected": -0.2139013707637787, | |
| "logps/chosen": -391.39453125, | |
| "logps/rejected": -1291.123779296875, | |
| "loss": 0.0469, | |
| "rewards/accuracies": 0.9906250238418579, | |
| "rewards/chosen": -0.5810464024543762, | |
| "rewards/margins": 10.310724258422852, | |
| "rewards/rejected": -10.891772270202637, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.909048473319547, | |
| "learning_rate": 4.229604321829561e-09, | |
| "logits/chosen": -0.48271116614341736, | |
| "logits/rejected": -0.22559651732444763, | |
| "logps/chosen": -406.5685119628906, | |
| "logps/rejected": -1254.4326171875, | |
| "loss": 0.0513, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": -0.6645079851150513, | |
| "rewards/margins": 9.709487915039062, | |
| "rewards/rejected": -10.373995780944824, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 2.7152944727872392, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.49268728494644165, | |
| "logits/rejected": -0.23753933608531952, | |
| "logps/chosen": -411.12884521484375, | |
| "logps/rejected": -1246.8544921875, | |
| "loss": 0.0566, | |
| "rewards/accuracies": 0.9921875, | |
| "rewards/chosen": -0.7164795994758606, | |
| "rewards/margins": 9.6405668258667, | |
| "rewards/rejected": -10.357046127319336, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 600, | |
| "total_flos": 0.0, | |
| "train_loss": 0.150745850255092, | |
| "train_runtime": 48820.8334, | |
| "train_samples_per_second": 0.788, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |