| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.11193194537721066, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1655.296875, | |
| "epoch": 0.00022386389075442132, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1472.515625, | |
| "epoch": 0.00044772778150884264, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1531.5, | |
| "epoch": 0.000671591672263264, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 3 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1373.15625, | |
| "epoch": 0.0008954555630176853, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 4 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1413.40625, | |
| "epoch": 0.0011193194537721066, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 5 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1326.8125, | |
| "epoch": 0.001343183344526528, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 6 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1552.796875, | |
| "epoch": 0.0015670472352809493, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 7 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1336.5546875, | |
| "epoch": 0.0017909111260353706, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 8 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1413.3671875, | |
| "epoch": 0.0020147750167897917, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 9 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1703.4140625, | |
| "epoch": 0.0022386389075442132, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 10 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1376.2109375, | |
| "epoch": 0.0024625027982986343, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 11 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1418.2421875, | |
| "epoch": 0.002686366689053056, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 12 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1530.109375, | |
| "epoch": 0.002910230579807477, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 13 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1515.1953125, | |
| "epoch": 0.0031340944705618985, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 14 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1539.0703125, | |
| "epoch": 0.0033579583613163196, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 15 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1492.453125, | |
| "epoch": 0.003581822252070741, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.999952797253148e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 16 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1330.4921875, | |
| "epoch": 0.0038056861428251623, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9998111909931225e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 17 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1294.21875, | |
| "epoch": 0.004029550033579583, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.999575187161439e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 18 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1471.2578125, | |
| "epoch": 0.004253413924334005, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9992447956603455e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 19 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1558.03125, | |
| "epoch": 0.0044772778150884264, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.998820030352409e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 20 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1480.296875, | |
| "epoch": 0.0047011417058428475, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.998300909059929e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 21 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1680.7265625, | |
| "epoch": 0.004925005596597269, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.997687453564198e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 22 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1656.953125, | |
| "epoch": 0.00514886948735169, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9969796896045775e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 23 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1574.109375, | |
| "epoch": 0.005372733378106112, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.996177646877426e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 24 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1613.09375, | |
| "epoch": 0.005596597268860533, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.995281359034851e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 25 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1466.1171875, | |
| "epoch": 0.005820461159614954, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.994290863683296e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 26 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1433.765625, | |
| "epoch": 0.006044325050369375, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.99320620238196e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 27 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1406.484375, | |
| "epoch": 0.006268188941123797, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.99202742064106e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 28 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1569.953125, | |
| "epoch": 0.006492052831878218, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.990754567919917e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 29 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1444.7265625, | |
| "epoch": 0.006715916722632639, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.989387697624881e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 30 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1343.515625, | |
| "epoch": 0.00693978061338706, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.987926867107095e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 31 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1213.265625, | |
| "epoch": 0.007163644504141482, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.986372137660078e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 32 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1399.734375, | |
| "epoch": 0.007387508394895903, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.984723574517165e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 33 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1394.203125, | |
| "epoch": 0.0076113722856503245, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9829812468487655e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 34 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1479.8671875, | |
| "epoch": 0.007835236176404746, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.981145227759457e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 35 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1360.265625, | |
| "epoch": 0.008059100067159167, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.979215594284924e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 36 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1391.1484375, | |
| "epoch": 0.008282963957913588, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.977192427388722e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 37 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1436.90625, | |
| "epoch": 0.00850682784866801, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9750758119588824e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 38 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1430.6953125, | |
| "epoch": 0.008730691739422432, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.972865836804349e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 39 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1507.3359375, | |
| "epoch": 0.008954555630176853, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.970562594651254e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 40 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1441.7890625, | |
| "epoch": 0.009178419520931274, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.968166182139026e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 41 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1425.890625, | |
| "epoch": 0.009402283411685695, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9656766998163306e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 42 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1454.359375, | |
| "epoch": 0.009626147302440116, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.963094252136865e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 43 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1311.328125, | |
| "epoch": 0.009850011193194537, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.960418947454958e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 44 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1199.078125, | |
| "epoch": 0.010073875083948958, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.957650898021038e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 45 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1283.125, | |
| "epoch": 0.01029773897470338, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.954790219976915e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 46 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1484.90625, | |
| "epoch": 0.010521602865457802, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.95183703335091e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 47 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1561.3671875, | |
| "epoch": 0.010745466756212223, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.948791462052819e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 48 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1448.921875, | |
| "epoch": 0.010969330646966645, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.945653633868716e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 49 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1646.078125, | |
| "epoch": 0.011193194537721066, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.942423680455584e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 50 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1225.1328125, | |
| "epoch": 0.011417058428475487, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.939101737335802e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 51 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1421.21875, | |
| "epoch": 0.011640922319229908, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.935687943891447e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 52 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1568.203125, | |
| "epoch": 0.011864786209984329, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.932182443358458e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 53 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1404.1484375, | |
| "epoch": 0.01208865010073875, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.928585382820616e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 54 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1405.2734375, | |
| "epoch": 0.012312513991493173, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.924896913203376e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 55 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1475.7265625, | |
| "epoch": 0.012536377882247594, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.921117189267535e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 56 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1386.4375, | |
| "epoch": 0.012760241773002015, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.917246369602742e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 57 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1531.7578125, | |
| "epoch": 0.012984105663756436, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9132846166208355e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 58 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1518.1328125, | |
| "epoch": 0.013207969554510857, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.9092320965490365e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 59 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1547.7421875, | |
| "epoch": 0.013431833445265278, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.905088979422971e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 60 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1492.53125, | |
| "epoch": 0.0136556973360197, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.900855439079536e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 61 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1513.7890625, | |
| "epoch": 0.01387956122677412, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.8965316531496055e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 62 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1364.2109375, | |
| "epoch": 0.014103425117528543, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.892117803050578e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 63 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1276.1640625, | |
| "epoch": 0.014327289008282965, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.887614073978761e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 64 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1471.140625, | |
| "epoch": 0.014551152899037386, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.883020654901609e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 65 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1466.140625, | |
| "epoch": 0.014775016789791807, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.878337738549785e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 66 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1515.09375, | |
| "epoch": 0.014998880680546228, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.873565521409082e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 67 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1340.1171875, | |
| "epoch": 0.015222744571300649, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.868704203712173e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 68 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1311.34375, | |
| "epoch": 0.01544660846205507, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.86375398943021e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 69 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1511.328125, | |
| "epoch": 0.01567047235280949, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.858715086264274e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 70 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1520.1328125, | |
| "epoch": 0.015894336243563914, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.853587705636646e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 71 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1335.328125, | |
| "epoch": 0.016118200134318333, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.84837206268195e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 72 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1419.09375, | |
| "epoch": 0.016342064025072756, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.8430683762381195e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 73 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1327.6171875, | |
| "epoch": 0.016565927915827176, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.837676868837213e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 74 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1501.1953125, | |
| "epoch": 0.0167897918065816, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.832197766696085e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 75 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1382.734375, | |
| "epoch": 0.01701365569733602, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.826631299706887e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 76 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1493.1015625, | |
| "epoch": 0.01723751958809044, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.820977701427424e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 77 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1346.6640625, | |
| "epoch": 0.017461383478844864, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.81523720907136e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 78 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1282.984375, | |
| "epoch": 0.017685247369599283, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.809410063498254e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 79 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.0234375, | |
| "epoch": 0.017909111260353706, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.8034965092034656e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 80 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1567.171875, | |
| "epoch": 0.018132975151108125, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.797496794307889e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 81 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1468.453125, | |
| "epoch": 0.018356839041862548, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.791411170547545e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 82 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1549.4765625, | |
| "epoch": 0.018580702932616967, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.785239893263017e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 83 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1455.671875, | |
| "epoch": 0.01880456682337139, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.778983221388742e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 84 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1346.6015625, | |
| "epoch": 0.019028430714125813, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.77264141744214e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 85 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1385.125, | |
| "epoch": 0.019252294604880232, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.766214747512603e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 86 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1342.3046875, | |
| "epoch": 0.019476158495634655, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.759703481250331e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 87 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1342.1953125, | |
| "epoch": 0.019700022386389075, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.753107891855015e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 88 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1262.2578125, | |
| "epoch": 0.019923886277143497, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.746428256064375e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 89 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1415.8359375, | |
| "epoch": 0.020147750167897917, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.7396648541425534e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 90 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1452.890625, | |
| "epoch": 0.02037161405865234, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.732817969868348e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 91 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1325.6328125, | |
| "epoch": 0.02059547794940676, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.7258878905233095e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 92 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1520.609375, | |
| "epoch": 0.020819341840161182, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.718874906879688e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 93 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1448.4453125, | |
| "epoch": 0.021043205730915605, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.711779313188231e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 94 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1488.46875, | |
| "epoch": 0.021267069621670024, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.70460140716584e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 95 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1200.421875, | |
| "epoch": 0.021490933512424447, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.697341489983076e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 96 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1338.4765625, | |
| "epoch": 0.021714797403178866, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.6899998662515215e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 97 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1540.8515625, | |
| "epoch": 0.02193866129393329, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.682576844011007e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 98 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.515625, | |
| "epoch": 0.02216252518468771, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.675072734716678e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 99 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1551.9296875, | |
| "epoch": 0.02238638907544213, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.667487853225931e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 100 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1638.984375, | |
| "epoch": 0.022610252966196554, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.659822517785203e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 101 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1587.796875, | |
| "epoch": 0.022834116856950974, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.6520770500166165e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 102 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1385.234375, | |
| "epoch": 0.023057980747705396, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.644251774904487e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 103 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1506.09375, | |
| "epoch": 0.023281844638459816, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.636347020781684e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 104 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1262.0546875, | |
| "epoch": 0.02350570852921424, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.6283631193158605e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 105 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1430.171875, | |
| "epoch": 0.023729572419968658, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.620300405495532e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 106 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1396.125, | |
| "epoch": 0.02395343631072308, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.612159217616022e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 107 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1554.3671875, | |
| "epoch": 0.0241773002014775, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.603939897265268e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 108 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1577.40625, | |
| "epoch": 0.024401164092231923, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.595642789309492e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 109 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1502.78125, | |
| "epoch": 0.024625027982986346, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.587268241878724e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 110 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1501.1875, | |
| "epoch": 0.024848891873740765, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.578816606352205e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 111 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1301.2109375, | |
| "epoch": 0.025072755764495188, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.570288237343632e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 112 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1681.3828125, | |
| "epoch": 0.025296619655249607, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.561683492686289e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 113 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1349.203125, | |
| "epoch": 0.02552048354600403, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.5530027334180285e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 114 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1492.296875, | |
| "epoch": 0.02574434743675845, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.544246323766122e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 115 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1513.1640625, | |
| "epoch": 0.025968211327512872, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.535414631131983e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 116 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1376.2890625, | |
| "epoch": 0.026192075218267292, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.526508026075746e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 117 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1359.21875, | |
| "epoch": 0.026415939109021715, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.517526882300721e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 118 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1456.6796875, | |
| "epoch": 0.026639802999776137, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.508471576637713e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 119 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1253.671875, | |
| "epoch": 0.026863666890530557, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.499342489029211e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 120 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1372.2578125, | |
| "epoch": 0.02708753078128498, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.490140002513449e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 121 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1431.2265625, | |
| "epoch": 0.0273113946720394, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.48086450320833e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 122 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1420.8515625, | |
| "epoch": 0.027535258562793822, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.4715163802952266e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 123 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1577.1328125, | |
| "epoch": 0.02775912245354824, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.462096026002655e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 124 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1618.8671875, | |
| "epoch": 0.027982986344302664, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.4526038355898144e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 125 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1437.9140625, | |
| "epoch": 0.028206850235057087, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.4430402073300035e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 126 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1653.28125, | |
| "epoch": 0.028430714125811506, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.433405542493909e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 127 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1571.09375, | |
| "epoch": 0.02865457801656593, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.4237002453327734e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 128 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1553.875, | |
| "epoch": 0.02887844190732035, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.4139247230614245e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 129 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1370.7265625, | |
| "epoch": 0.02910230579807477, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.404079385841201e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 130 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1252.796875, | |
| "epoch": 0.02932616968882919, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.394164646762734e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 131 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1470.53125, | |
| "epoch": 0.029550033579583614, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.384180921828618e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 132 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1385.6875, | |
| "epoch": 0.029773897470338033, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.374128629935955e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 133 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1506.8125, | |
| "epoch": 0.029997761361092456, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.364008192858781e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 134 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1270.125, | |
| "epoch": 0.03022162525184688, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.353820035230366e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 135 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1430.71875, | |
| "epoch": 0.030445489142601298, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.3435645845254e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 136 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1276.1953125, | |
| "epoch": 0.03066935303335572, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.333242271042054e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 137 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1428.0234375, | |
| "epoch": 0.03089321692411014, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.32285352788393e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 138 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1613.7734375, | |
| "epoch": 0.031117080814864563, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.312398790941882e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 139 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1413.2421875, | |
| "epoch": 0.03134094470561898, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.301878498875735e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 140 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1433.546875, | |
| "epoch": 0.0315648085963734, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.291293093095873e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 141 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1392.9375, | |
| "epoch": 0.03178867248712783, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.280643017744723e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 142 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1436.703125, | |
| "epoch": 0.03201253637788225, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.269928719678117e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 143 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1377.3671875, | |
| "epoch": 0.03223640026863667, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.2591506484465426e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 144 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1543.734375, | |
| "epoch": 0.03246026415939109, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.248309256276283e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 145 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1514.2578125, | |
| "epoch": 0.03268412805014551, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.23740499805044e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 146 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1440.0703125, | |
| "epoch": 0.03290799194089993, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.22643833128985e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 147 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1525.921875, | |
| "epoch": 0.03313185583165435, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.215409716133885e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 148 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1506.9140625, | |
| "epoch": 0.03335571972240878, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.204319615321151e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 149 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1276.328125, | |
| "epoch": 0.0335795836131632, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.193168494170065e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 150 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1629.078125, | |
| "epoch": 0.033803447503917616, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.181956820559339e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 151 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1391.03125, | |
| "epoch": 0.03402731139467204, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.170685064908342e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 152 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1526.6328125, | |
| "epoch": 0.03425117528542646, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.159353700157365e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 153 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1522.6171875, | |
| "epoch": 0.03447503917618088, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.14796320174778e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 154 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1647.359375, | |
| "epoch": 0.0346989030669353, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.136514047602087e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 155 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1414.6640625, | |
| "epoch": 0.03492276695768973, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.1250067181038635e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 156 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1453.640625, | |
| "epoch": 0.035146630848444146, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.113441696077608e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 157 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1346.765625, | |
| "epoch": 0.035370494739198566, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.101819466768484e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 158 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1451.0625, | |
| "epoch": 0.035594358629952985, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.0901405178219535e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 159 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1380.1171875, | |
| "epoch": 0.03581822252070741, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.078405339263326e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 160 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1542.1015625, | |
| "epoch": 0.03604208641146183, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.06661442347719e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 161 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1697.640625, | |
| "epoch": 0.03626595030221625, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.054768265186758e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 162 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1322.03125, | |
| "epoch": 0.036489814192970677, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.0428673614331036e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 163 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1337.0703125, | |
| "epoch": 0.036713678083725096, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.030912211554316e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 164 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1601.0859375, | |
| "epoch": 0.036937541974479515, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.018903317164539e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 165 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1404.96875, | |
| "epoch": 0.037161405865233935, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 4.006841182132932e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 166 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1469.59375, | |
| "epoch": 0.03738526975598836, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.9947263125625195e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 167 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1533.4140625, | |
| "epoch": 0.03760913364674278, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.982559216768967e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 168 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1460.34375, | |
| "epoch": 0.0378329975374972, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.970340405259245e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 169 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1378.4375, | |
| "epoch": 0.038056861428251626, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.958070390710214e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 170 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1522.8984375, | |
| "epoch": 0.038280725319006045, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.945749687947109e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 171 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1606.2265625, | |
| "epoch": 0.038504589209760465, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.933378813921942e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 172 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1675.6640625, | |
| "epoch": 0.038728453100514884, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.920958287691811e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 173 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1531.328125, | |
| "epoch": 0.03895231699126931, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.908488630397121e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 174 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1332.2734375, | |
| "epoch": 0.03917618088202373, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.8959703652397175e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 175 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1413.65625, | |
| "epoch": 0.03940004477277815, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.883404017460935e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 176 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1529.328125, | |
| "epoch": 0.039623908663532575, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.870790114319559e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 177 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.3671875, | |
| "epoch": 0.039847772554286995, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.858129185069701e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 178 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1562.75, | |
| "epoch": 0.040071636445041414, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.845421760938597e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 179 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1550.078125, | |
| "epoch": 0.040295500335795834, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.832668375104312e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 180 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1467.390625, | |
| "epoch": 0.04051936422655026, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.8198695626733725e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 181 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1349.46875, | |
| "epoch": 0.04074322811730468, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.8070258606583156e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 182 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1501.734375, | |
| "epoch": 0.0409670920080591, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.7941378079551544e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 183 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1670.75, | |
| "epoch": 0.04119095589881352, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.7812059453207677e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 184 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1625.234375, | |
| "epoch": 0.041414819789567944, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.768230815350213e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 185 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1620.7890625, | |
| "epoch": 0.041638683680322364, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.7552129624539557e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 186 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1274.6484375, | |
| "epoch": 0.04186254757107678, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.7421529328350316e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 187 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1626.6953125, | |
| "epoch": 0.04208641146183121, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.7290512744661274e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 188 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1592.765625, | |
| "epoch": 0.04231027535258563, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.715908537066589e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 189 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1371.4296875, | |
| "epoch": 0.04253413924334005, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.7027252720793538e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 190 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1485.9921875, | |
| "epoch": 0.04275800313409447, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.689502032647817e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 191 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1573.65625, | |
| "epoch": 0.042981867024848894, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.6762393735926245e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 192 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1593.4921875, | |
| "epoch": 0.04320573091560331, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.6629378513883852e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 193 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1447.59375, | |
| "epoch": 0.04342959480635773, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.6495980241403307e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 194 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1442.171875, | |
| "epoch": 0.04365345869711216, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.636220451560896e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 195 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1492.578125, | |
| "epoch": 0.04387732258786658, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.622805694946235e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 196 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1453.671875, | |
| "epoch": 0.044101186478621, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.609354317152667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 197 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1406.3359375, | |
| "epoch": 0.04432505036937542, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.595866882573063e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 198 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1382.8125, | |
| "epoch": 0.04454891426012984, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.5823439571131675e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 199 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1143.0390625, | |
| "epoch": 0.04477277815088426, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.5687861081678477e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 200 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1513.890625, | |
| "epoch": 0.04499664204163868, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.555193904597291e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 201 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1355.09375, | |
| "epoch": 0.04522050593239311, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.541567916703138e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 202 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1350.5234375, | |
| "epoch": 0.04544436982314753, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.5279087162045517e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 203 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1582.9765625, | |
| "epoch": 0.04566823371390195, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.5142168762142265e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 204 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1417.4765625, | |
| "epoch": 0.045892097604656366, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.500492971214347e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 205 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1668.515625, | |
| "epoch": 0.04611596149541079, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.48673757703248e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 206 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1426.3203125, | |
| "epoch": 0.04633982538616521, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.472951270817418e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 207 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1679.1171875, | |
| "epoch": 0.04656368927691963, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.4591346310149578e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 208 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1321.53125, | |
| "epoch": 0.04678755316767405, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.445288237343632e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 209 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1421.25, | |
| "epoch": 0.04701141705842848, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.4314126707703895e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 210 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1598.328125, | |
| "epoch": 0.047235280949182896, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.4175085134862128e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 211 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1452.09375, | |
| "epoch": 0.047459144839937316, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.4035763488816953e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 212 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1622.578125, | |
| "epoch": 0.04768300873069174, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.3896167615225594e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 213 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1509.5078125, | |
| "epoch": 0.04790687262144616, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.375630337125133e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 214 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1313.828125, | |
| "epoch": 0.04813073651220058, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.361617662531772e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 215 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1342.265625, | |
| "epoch": 0.048354600402955, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.347579325686237e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 216 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1338.90625, | |
| "epoch": 0.04857846429370943, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.333515915609027e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 217 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1424.359375, | |
| "epoch": 0.048802328184463846, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.3194280223726616e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 218 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1512.2734375, | |
| "epoch": 0.049026192075218265, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.305316237076927e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 219 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1394.265625, | |
| "epoch": 0.04925005596597269, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.291181151824071e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 220 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1480.78125, | |
| "epoch": 0.04947391985672711, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.27702335969396e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 221 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1458.0859375, | |
| "epoch": 0.04969778374748153, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.2628434547191985e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 222 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1398.53125, | |
| "epoch": 0.04992164763823595, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.2486420318601973e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 223 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1486.484375, | |
| "epoch": 0.050145511528990376, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.2344196869802187e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 224 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1520.59375, | |
| "epoch": 0.050369375419744795, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.2201770168203694e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 225 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1339.84375, | |
| "epoch": 0.050593239310499215, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.205914618974563e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 226 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1396.390625, | |
| "epoch": 0.05081710320125364, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.1916330918644496e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 227 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1500.984375, | |
| "epoch": 0.05104096709200806, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.177333034714303e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 228 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1577.0078125, | |
| "epoch": 0.05126483098276248, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.1630150475258813e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 229 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1447.8984375, | |
| "epoch": 0.0514886948735169, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.148679731053252e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 230 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1379.21875, | |
| "epoch": 0.051712558764271326, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.1343276867775805e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 231 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1550.953125, | |
| "epoch": 0.051936422655025745, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.1199595168819043e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 232 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1334.3046875, | |
| "epoch": 0.052160286545780164, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.105575824225852e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 233 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1439.53125, | |
| "epoch": 0.052384150436534584, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.091177212320363e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 234 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1200.859375, | |
| "epoch": 0.05260801432728901, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.0767642853023538e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 235 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1532.390625, | |
| "epoch": 0.05283187821804343, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.062337647909376e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 236 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1206.203125, | |
| "epoch": 0.05305574210879785, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.04789790545424e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 237 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1334.2109375, | |
| "epoch": 0.053279605999552275, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.033445663799621e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 238 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1636.328125, | |
| "epoch": 0.053503469890306694, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.018981529332633e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 239 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1461.765625, | |
| "epoch": 0.053727333781061114, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 3.00450610893939e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 240 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1383.0078125, | |
| "epoch": 0.05395119767181553, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.9900200099795396e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 241 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1368.546875, | |
| "epoch": 0.05417506156256996, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.9755238402607826e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 242 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1509.5546875, | |
| "epoch": 0.05439892545332438, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.961018208013367e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 243 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1199.5234375, | |
| "epoch": 0.0546227893440788, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.9465037218645694e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 244 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1420.0078125, | |
| "epoch": 0.054846653234833224, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.9319809908131604e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 245 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1104.4921875, | |
| "epoch": 0.055070517125587644, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.917450624203847e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 246 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1537.046875, | |
| "epoch": 0.05529438101634206, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.9029132317017118e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 247 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1604.6953125, | |
| "epoch": 0.05551824490709648, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.888369423266629e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 248 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1496.03125, | |
| "epoch": 0.05574210879785091, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.8738198091276712e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 249 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1330.9765625, | |
| "epoch": 0.05596597268860533, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.859264999757509e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 250 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1429.515625, | |
| "epoch": 0.05618983657935975, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.8447056058467928e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 251 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1445.1953125, | |
| "epoch": 0.056413700470114174, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.830142238278531e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 252 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1581.8125, | |
| "epoch": 0.05663756436086859, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.81557550810246e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 253 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1187.6015625, | |
| "epoch": 0.05686142825162301, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.8010060265094026e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 254 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1456.046875, | |
| "epoch": 0.05708529214237743, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.786434404805629e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 255 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1392.8125, | |
| "epoch": 0.05730915603313186, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.771861254387199e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 256 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1638.6640625, | |
| "epoch": 0.05753301992388628, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.7572871867143204e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 257 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1276.59375, | |
| "epoch": 0.0577568838146407, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.742712813285681e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 258 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1369.6328125, | |
| "epoch": 0.057980747705395116, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.7281387456128017e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 259 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1550.6171875, | |
| "epoch": 0.05820461159614954, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.7135655951943716e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 260 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1666.109375, | |
| "epoch": 0.05842847548690396, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.698993973490598e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 261 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1419.6953125, | |
| "epoch": 0.05865233937765838, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.6844244918975416e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 262 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1420.859375, | |
| "epoch": 0.05887620326841281, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.66985776172147e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 263 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1583.546875, | |
| "epoch": 0.05910006715916723, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.6552943941532088e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 264 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1304.328125, | |
| "epoch": 0.05932393104992165, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.6407350002424927e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 265 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1455.9765625, | |
| "epoch": 0.059547794940676066, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.626180190872329e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 266 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1476.6328125, | |
| "epoch": 0.05977165883143049, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.611630576733372e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 267 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1479.375, | |
| "epoch": 0.05999552272218491, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.5970867682982885e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 268 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1421.4296875, | |
| "epoch": 0.06021938661293933, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.582549375796154e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 269 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1453.5859375, | |
| "epoch": 0.06044325050369376, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.568019009186841e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 270 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1624.34375, | |
| "epoch": 0.06066711439444818, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.5534962781354317e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 271 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1538.3828125, | |
| "epoch": 0.060890978285202596, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.538981791986634e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 272 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1196.140625, | |
| "epoch": 0.061114842175957015, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.524476159739218e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 273 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1578.375, | |
| "epoch": 0.06133870606671144, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.5099799900204607e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 274 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1680.6484375, | |
| "epoch": 0.06156256995746586, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.4954938910606108e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 275 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1419.1328125, | |
| "epoch": 0.06178643384822028, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.481018470667368e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 276 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1616.4921875, | |
| "epoch": 0.06201029773897471, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.4665543362003802e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 277 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1446.71875, | |
| "epoch": 0.062234161629729126, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.4521020945457615e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 278 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1533.21875, | |
| "epoch": 0.062458025520483546, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.4376623520906255e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 279 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1385.2421875, | |
| "epoch": 0.06268188941123796, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.4232357146976478e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 280 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1737.515625, | |
| "epoch": 0.06290575330199238, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.408822787679637e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 281 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1574.046875, | |
| "epoch": 0.0631296171927468, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.3944241757741475e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 282 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1595.078125, | |
| "epoch": 0.06335348108350124, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.380040483118097e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 283 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1381.3359375, | |
| "epoch": 0.06357734497425566, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.365672313222419e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 284 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1536.546875, | |
| "epoch": 0.06380120886501008, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.351320268946749e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 285 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1512.8828125, | |
| "epoch": 0.0640250727557645, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.336984952474119e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 286 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1456.5546875, | |
| "epoch": 0.06424893664651891, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.322666965285697e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 287 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1447.5234375, | |
| "epoch": 0.06447280053727333, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.3083669081355507e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 288 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1554.15625, | |
| "epoch": 0.06469666442802775, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.2940853810254377e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 289 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1519.109375, | |
| "epoch": 0.06492052831878219, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.2798229831796313e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 290 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1438.859375, | |
| "epoch": 0.0651443922095366, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.2655803130197816e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 291 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1451.375, | |
| "epoch": 0.06536825610029103, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.2513579681398034e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 292 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.6875, | |
| "epoch": 0.06559211999104544, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.237156545280803e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 293 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1201.9140625, | |
| "epoch": 0.06581598388179986, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.2229766403060403e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 294 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1589.5625, | |
| "epoch": 0.06603984777255428, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.2088188481759305e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 295 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1309.203125, | |
| "epoch": 0.0662637116633087, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.194683762923073e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 296 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1641.0078125, | |
| "epoch": 0.06648757555406314, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.1805719776273387e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 297 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1508.078125, | |
| "epoch": 0.06671143944481756, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.166484084390974e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 298 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1214.8125, | |
| "epoch": 0.06693530333557197, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.1524206743137636e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 299 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1619.9609375, | |
| "epoch": 0.0671591672263264, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.1383823374682287e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 300 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1463.71875, | |
| "epoch": 0.06738303111708081, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.124369662874868e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 301 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1666.4609375, | |
| "epoch": 0.06760689500783523, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.110383238477441e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 302 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1336.5703125, | |
| "epoch": 0.06783075889858965, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.096423651118305e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 303 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1333.3671875, | |
| "epoch": 0.06805462278934409, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.082491486513788e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 304 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1485.6875, | |
| "epoch": 0.0682784866800985, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.0685873292296116e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 305 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1450.7890625, | |
| "epoch": 0.06850235057085292, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.054711762656369e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 306 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1428.734375, | |
| "epoch": 0.06872621446160734, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.040865368985044e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 307 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1494.046875, | |
| "epoch": 0.06895007835236176, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.027048729182583e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 308 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1544.8359375, | |
| "epoch": 0.06917394224311618, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 2.0132624229675205e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 309 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1643.40625, | |
| "epoch": 0.0693978061338706, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.9995070287856546e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 310 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1415.703125, | |
| "epoch": 0.06962167002462503, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.985783123785774e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 311 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1426.6953125, | |
| "epoch": 0.06984553391537945, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.9720912837954486e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 312 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1361.1953125, | |
| "epoch": 0.07006939780613387, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.958432083296862e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 313 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1577.53125, | |
| "epoch": 0.07029326169688829, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.9448060954027093e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 314 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1325.90625, | |
| "epoch": 0.07051712558764271, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.931213891832153e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 315 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1618.9296875, | |
| "epoch": 0.07074098947839713, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.9176560428868336e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 316 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1358.9453125, | |
| "epoch": 0.07096485336915155, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.9041331174269373e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 317 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1430.90625, | |
| "epoch": 0.07118871725990597, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.8906456828473341e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 318 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1514.84375, | |
| "epoch": 0.0714125811506604, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.8771943050537656e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 319 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1505.9140625, | |
| "epoch": 0.07163644504141482, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.8637795484391046e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 320 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1491.15625, | |
| "epoch": 0.07186030893216924, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.8504019758596698e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 321 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1327.7578125, | |
| "epoch": 0.07208417282292366, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.8370621486116163e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 322 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1466.53125, | |
| "epoch": 0.07230803671367808, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.823760626407377e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 323 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1357.6796875, | |
| "epoch": 0.0725319006044325, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.8104979673521838e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 324 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1455.0, | |
| "epoch": 0.07275576449518692, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.7972747279206482e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 325 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1434.8203125, | |
| "epoch": 0.07297962838594135, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.7840914629334122e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 326 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1322.0703125, | |
| "epoch": 0.07320349227669577, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.7709487255338731e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 327 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1245.1484375, | |
| "epoch": 0.07342735616745019, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.7578470671649684e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 328 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.828125, | |
| "epoch": 0.07365122005820461, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.744787037546045e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 329 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1606.09375, | |
| "epoch": 0.07387508394895903, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.731769184649788e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 330 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1481.09375, | |
| "epoch": 0.07409894783971345, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.7187940546792325e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 331 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1494.6171875, | |
| "epoch": 0.07432281173046787, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.7058621920448465e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 332 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1517.8046875, | |
| "epoch": 0.0745466756212223, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6929741393416855e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 333 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1455.8984375, | |
| "epoch": 0.07477053951197672, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6801304373266286e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 334 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1306.453125, | |
| "epoch": 0.07499440340273114, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.667331624895689e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 335 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1468.578125, | |
| "epoch": 0.07521826729348556, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6545782390614037e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 336 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1329.8203125, | |
| "epoch": 0.07544213118423998, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6418708149302992e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 337 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1359.84375, | |
| "epoch": 0.0756659950749944, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6292098856804423e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 338 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1402.5625, | |
| "epoch": 0.07588985896574882, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.6165959825390661e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 339 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1364.6484375, | |
| "epoch": 0.07611372285650325, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.604029634760284e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 340 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1468.4609375, | |
| "epoch": 0.07633758674725767, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.59151136960288e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 341 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1500.7734375, | |
| "epoch": 0.07656145063801209, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5790417123081903e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 342 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1548.4921875, | |
| "epoch": 0.07678531452876651, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5666211860780583e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 343 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1538.8984375, | |
| "epoch": 0.07700917841952093, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5542503120528918e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 344 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1399.3671875, | |
| "epoch": 0.07723304231027535, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5419296092897866e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 345 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1465.7578125, | |
| "epoch": 0.07745690620102977, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.529659594740755e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 346 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1602.140625, | |
| "epoch": 0.0776807700917842, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5174407832310338e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 347 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1442.0234375, | |
| "epoch": 0.07790463398253862, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.5052736874374815e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 348 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1425.34375, | |
| "epoch": 0.07812849787329304, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.4931588178670695e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 349 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1572.390625, | |
| "epoch": 0.07835236176404746, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.4810966828354605e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 350 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1454.6640625, | |
| "epoch": 0.07857622565480188, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.469087788445684e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 351 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1519.0546875, | |
| "epoch": 0.0788000895455563, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.4571326385668965e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 352 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1459.09375, | |
| "epoch": 0.07902395343631072, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.4452317348132434e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 353 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1474.5859375, | |
| "epoch": 0.07924781732706515, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.4333855765228104e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 354 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1464.34375, | |
| "epoch": 0.07947168121781957, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.421594660736675e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 355 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1269.4140625, | |
| "epoch": 0.07969554510857399, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.4098594821780476e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 356 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1528.3984375, | |
| "epoch": 0.07991940899932841, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3981805332315174e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 357 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1263.546875, | |
| "epoch": 0.08014327289008283, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3865583039223929e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 358 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1560.5, | |
| "epoch": 0.08036713678083725, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.374993281896137e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 359 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1522.3203125, | |
| "epoch": 0.08059100067159167, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3634859523979134e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 360 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1240.7578125, | |
| "epoch": 0.0808148645623461, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3520367982522208e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 361 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1401.5859375, | |
| "epoch": 0.08103872845310052, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3406462998426358e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 362 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1452.921875, | |
| "epoch": 0.08126259234385494, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3293149350916595e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 363 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1346.984375, | |
| "epoch": 0.08148645623460936, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3180431794406623e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 364 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1390.0546875, | |
| "epoch": 0.08171032012536378, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.3068315058299358e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 365 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1426.1875, | |
| "epoch": 0.0819341840161182, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2956803846788503e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 366 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1468.3828125, | |
| "epoch": 0.08215804790687262, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.284590283866116e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 367 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1470.9609375, | |
| "epoch": 0.08238191179762704, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2735616687101518e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 368 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1554.578125, | |
| "epoch": 0.08260577568838147, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2625950019495614e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 369 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1267.1328125, | |
| "epoch": 0.08282963957913589, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.251690743723718e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 370 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1234.9453125, | |
| "epoch": 0.08305350346989031, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2408493515534581e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 371 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1346.1015625, | |
| "epoch": 0.08327736736064473, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2300712803218834e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 372 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1513.484375, | |
| "epoch": 0.08350123125139915, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2193569822552772e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 373 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1473.25, | |
| "epoch": 0.08372509514215357, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.2087069069041268e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 374 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1517.140625, | |
| "epoch": 0.08394895903290799, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1981215011242654e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 375 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1289.6484375, | |
| "epoch": 0.08417282292366242, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1876012090581184e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 376 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1416.7734375, | |
| "epoch": 0.08439668681441684, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.177146472116071e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 377 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1420.9140625, | |
| "epoch": 0.08462055070517126, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1667577289579462e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 378 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1370.1171875, | |
| "epoch": 0.08484441459592568, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1564354154746007e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 379 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1233.609375, | |
| "epoch": 0.0850682784866801, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.146179964769635e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 380 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1499.0859375, | |
| "epoch": 0.08529214237743452, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1359918071412195e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 381 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1437.0078125, | |
| "epoch": 0.08551600626818893, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1258713700640456e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 382 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.328125, | |
| "epoch": 0.08573987015894337, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.115819078171383e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 383 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1570.640625, | |
| "epoch": 0.08596373404969779, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.1058353532372667e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 384 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1112.671875, | |
| "epoch": 0.0861875979404522, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0959206141587998e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 385 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1472.609375, | |
| "epoch": 0.08641146183120663, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0860752769385766e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 386 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 945.75, | |
| "epoch": 0.08663532572196105, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0762997546672279e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 387 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1362.640625, | |
| "epoch": 0.08685918961271547, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0665944575060914e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 388 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1332.109375, | |
| "epoch": 0.08708305350346988, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.056959792669997e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 389 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1375.59375, | |
| "epoch": 0.08730691739422432, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0473961644101856e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 390 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1387.0390625, | |
| "epoch": 0.08753078128497874, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.037903973997345e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 391 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1490.0234375, | |
| "epoch": 0.08775464517573316, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0284836197047737e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 392 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1583.2421875, | |
| "epoch": 0.08797850906648758, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0191354967916712e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 393 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1380.75, | |
| "epoch": 0.088202372957242, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0098599974865515e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 394 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1510.3671875, | |
| "epoch": 0.08842623684799641, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 1.0006575109707898e-06, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 395 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1108.640625, | |
| "epoch": 0.08865010073875083, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.915284233622877e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 396 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1488.6640625, | |
| "epoch": 0.08887396462950527, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.824731176992796e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 397 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1448.71875, | |
| "epoch": 0.08909782852025969, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.734919739242543e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 398 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1476.46875, | |
| "epoch": 0.0893216924110141, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.645853688680177e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 399 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1459.3671875, | |
| "epoch": 0.08954555630176853, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.557536762338786e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 400 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1398.9296875, | |
| "epoch": 0.08976942019252294, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.46997266581973e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 401 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1231.3671875, | |
| "epoch": 0.08999328408327736, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.383165073137115e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 402 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1453.921875, | |
| "epoch": 0.09021714797403178, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.297117626563687e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 403 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1582.4453125, | |
| "epoch": 0.09044101186478622, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.211833936477957e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 404 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1592.3828125, | |
| "epoch": 0.09066487575554064, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.127317581212753e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 405 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1571.1640625, | |
| "epoch": 0.09088873964629506, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 9.043572106905084e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 406 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1231.3359375, | |
| "epoch": 0.09111260353704947, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.960601027347321e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 407 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1090.6796875, | |
| "epoch": 0.0913364674278039, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.878407823839788e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 408 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1351.0, | |
| "epoch": 0.09156033131855831, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.796995945044689e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 409 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1567.1796875, | |
| "epoch": 0.09178419520931273, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.716368806841405e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 410 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1507.6015625, | |
| "epoch": 0.09200805910006717, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.636529792183171e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 411 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1486.1015625, | |
| "epoch": 0.09223192299082159, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.557482250955144e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 412 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1281.234375, | |
| "epoch": 0.092455786881576, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.479229499833844e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 413 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1405.96875, | |
| "epoch": 0.09267965077233042, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.401774822147976e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 414 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1729.21875, | |
| "epoch": 0.09290351466308484, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.325121467740695e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 415 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.265625, | |
| "epoch": 0.09312737855383926, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.249272652833226e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 416 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1568.90625, | |
| "epoch": 0.09335124244459368, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.174231559889931e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 417 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1293.1015625, | |
| "epoch": 0.0935751063353481, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.100001337484787e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 418 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1516.359375, | |
| "epoch": 0.09379897022610253, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 8.026585100169251e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 419 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1420.5703125, | |
| "epoch": 0.09402283411685695, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.953985928341601e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 420 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1404.53125, | |
| "epoch": 0.09424669800761137, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.882206868117693e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 421 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1470.5703125, | |
| "epoch": 0.09447056189836579, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.81125093120313e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 422 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1381.515625, | |
| "epoch": 0.09469442578912021, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.741121094766916e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 423 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1545.0703125, | |
| "epoch": 0.09491828967987463, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.671820301316532e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 424 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1384.6328125, | |
| "epoch": 0.09514215357062905, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.603351458574474e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 425 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1461.125, | |
| "epoch": 0.09536601746138348, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.535717439356255e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 426 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1380.734375, | |
| "epoch": 0.0955898813521379, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.46892108144986e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 427 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1254.3984375, | |
| "epoch": 0.09581374524289232, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.402965187496697e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 428 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1474.8359375, | |
| "epoch": 0.09603760913364674, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.337852524873974e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 429 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1384.65625, | |
| "epoch": 0.09626147302440116, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.273585825578608e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 430 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1230.328125, | |
| "epoch": 0.09648533691515558, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.21016778611259e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 431 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1351.9921875, | |
| "epoch": 0.09670920080591, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.147601067369835e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 432 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1351.2109375, | |
| "epoch": 0.09693306469666443, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.085888294524561e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 433 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1345.40625, | |
| "epoch": 0.09715692858741885, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 7.025032056921117e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 434 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1295.9375, | |
| "epoch": 0.09738079247817327, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.965034907965349e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 435 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1495.1953125, | |
| "epoch": 0.09760465636892769, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.905899365017462e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 436 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1297.8515625, | |
| "epoch": 0.09782852025968211, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.847627909286409e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 437 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1510.5859375, | |
| "epoch": 0.09805238415043653, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.790222985725761e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 438 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1452.78125, | |
| "epoch": 0.09827624804119095, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.733687002931141e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 439 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1150.546875, | |
| "epoch": 0.09850011193194538, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.678022333039158e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 440 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1543.7421875, | |
| "epoch": 0.0987239758226998, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.623231311627876e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 441 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1264.8984375, | |
| "epoch": 0.09894783971345422, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.569316237618811e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 442 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1556.9296875, | |
| "epoch": 0.09917170360420864, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.516279373180499e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 443 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.3359375, | |
| "epoch": 0.09939556749496306, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.464122943633543e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 444 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1447.71875, | |
| "epoch": 0.09961943138571748, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.412849137357271e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 445 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1545.7421875, | |
| "epoch": 0.0998432952764719, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.3624601056979e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 446 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1590.8125, | |
| "epoch": 0.10006715916722633, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.312957962878278e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 447 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1411.96875, | |
| "epoch": 0.10029102305798075, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.264344785909181e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 448 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1288.7109375, | |
| "epoch": 0.10051488694873517, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.216622614502149e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 449 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1496.296875, | |
| "epoch": 0.10073875083948959, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.169793450983916e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 450 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1410.15625, | |
| "epoch": 0.10096261473024401, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.123859260212393e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 451 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1453.546875, | |
| "epoch": 0.10118647862099843, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.07882196949423e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 452 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1545.1015625, | |
| "epoch": 0.10141034251175285, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 6.034683468503948e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 453 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1455.1484375, | |
| "epoch": 0.10163420640250728, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.991445609204641e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 454 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1451.4921875, | |
| "epoch": 0.1018580702932617, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.949110205770292e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 455 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1541.390625, | |
| "epoch": 0.10208193418401612, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.90767903450964e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 456 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1585.8125, | |
| "epoch": 0.10230579807477054, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.867153833791652e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 457 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1348.7421875, | |
| "epoch": 0.10252966196552496, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.827536303972587e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 458 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1373.125, | |
| "epoch": 0.10275352585627938, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.78882810732465e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 459 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1574.8984375, | |
| "epoch": 0.1029773897470338, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.75103086796625e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 460 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1429.90625, | |
| "epoch": 0.10320125363778823, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.714146171793846e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 461 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1407.2421875, | |
| "epoch": 0.10342511752854265, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.678175566415422e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 462 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1446.109375, | |
| "epoch": 0.10364898141929707, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.643120561085528e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 463 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1593.7265625, | |
| "epoch": 0.10387284531005149, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.608982626641991e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 464 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1531.2734375, | |
| "epoch": 0.10409670920080591, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.575763195444166e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 465 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1387.28125, | |
| "epoch": 0.10432057309156033, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.543463661312847e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 466 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1382.375, | |
| "epoch": 0.10454443698231475, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.512085379471808e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 467 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1527.578125, | |
| "epoch": 0.10476830087306917, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.481629666490903e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 468 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1333.6640625, | |
| "epoch": 0.1049921647638236, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.452097800230853e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 469 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1274.53125, | |
| "epoch": 0.10521602865457802, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.423491019789623e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 470 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1305.0859375, | |
| "epoch": 0.10543989254533244, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.395810525450425e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 471 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1565.7734375, | |
| "epoch": 0.10566375643608686, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.369057478631359e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 472 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1579.1640625, | |
| "epoch": 0.10588762032684128, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.343233001836694e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 473 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1391.8359375, | |
| "epoch": 0.1061114842175957, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.318338178609754e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 474 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1531.5703125, | |
| "epoch": 0.10633534810835012, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.294374053487459e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 475 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1525.25, | |
| "epoch": 0.10655921199910455, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.271341631956511e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 476 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1503.3203125, | |
| "epoch": 0.10678307588985897, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.249241880411181e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 477 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1622.6953125, | |
| "epoch": 0.10700693978061339, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.228075726112785e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 478 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1439.09375, | |
| "epoch": 0.10723080367136781, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.207844057150768e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 479 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1590.5703125, | |
| "epoch": 0.10745466756212223, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.188547722405437e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 480 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1399.9765625, | |
| "epoch": 0.10767853145287665, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.170187531512351e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 481 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1549.7109375, | |
| "epoch": 0.10790239534363107, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.152764254828348e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 482 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1590.671875, | |
| "epoch": 0.1081262592343855, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.136278623399225e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 483 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1375.3125, | |
| "epoch": 0.10835012312513992, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.120731328929058e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 484 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1490.1796875, | |
| "epoch": 0.10857398701589434, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.106123023751187e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 485 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1458.296875, | |
| "epoch": 0.10879785090664876, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.092454320800833e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 486 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1546.21875, | |
| "epoch": 0.10902171479740318, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.079725793589405e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 487 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1451.734375, | |
| "epoch": 0.1092455786881576, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.067937976180407e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 488 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1481.3125, | |
| "epoch": 0.10946944257891202, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.057091363167046e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 489 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1301.890625, | |
| "epoch": 0.10969330646966645, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.047186409651489e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 490 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1402.359375, | |
| "epoch": 0.10991717036042087, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.038223531225742e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 491 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1407.75, | |
| "epoch": 0.11014103425117529, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.030203103954232e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 492 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1529.09375, | |
| "epoch": 0.11036489814192971, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.023125464358026e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 493 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1500.359375, | |
| "epoch": 0.11058876203268413, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.016990909400709e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 494 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1246.203125, | |
| "epoch": 0.11081262592343855, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.011799696475915e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 495 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1267.1953125, | |
| "epoch": 0.11103648981419297, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.007552043396547e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 496 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1458.609375, | |
| "epoch": 0.1112603537049474, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.004248128385618e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 497 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1453.4609375, | |
| "epoch": 0.11148421759570182, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.001888090068784e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 498 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1439.4453125, | |
| "epoch": 0.11170808148645624, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.000472027468528e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 499 | |
| }, | |
| { | |
| "clip_ratio": 0.0, | |
| "completion_length": 1444.859375, | |
| "epoch": 0.11193194537721066, | |
| "grad_norm": 0.0, | |
| "kl": 0.0, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 0.0, | |
| "reward": 0.0, | |
| "reward_std": 0.0, | |
| "rewards/code_reward": 0.0, | |
| "rewards/format_reward": 0.0, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11193194537721066, | |
| "step": 500, | |
| "total_flos": 0.0, | |
| "train_loss": 0.0, | |
| "train_runtime": 34238.863, | |
| "train_samples_per_second": 1.869, | |
| "train_steps_per_second": 0.015 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |