| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.2222222222222222, | |
| "eval_steps": 500, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 309.04, | |
| "epoch": 0.011111111111111112, | |
| "grad_norm": NaN, | |
| "kl": 222.66988746643065, | |
| "learning_rate": 5.444444444444444e-07, | |
| "loss": 8.9068, | |
| "reward": -18.06266725540161, | |
| "reward_std": 6.391496688127518, | |
| "rewards/check_first_pass": -9.93666666984558, | |
| "rewards/check_solution": -7.600000243186951, | |
| "rewards/check_solution_words": -6.068000079095364, | |
| "rewards/check_word_guesses": 5.54200014591217, | |
| "step": 50 | |
| }, | |
| { | |
| "completion_length": 368.64, | |
| "epoch": 0.022222222222222223, | |
| "grad_norm": NaN, | |
| "kl": 557.3866543316841, | |
| "learning_rate": 1.1e-06, | |
| "loss": 22.2955, | |
| "reward": -17.431167125701904, | |
| "reward_std": 5.4497878611087796, | |
| "rewards/check_first_pass": -9.859833374023438, | |
| "rewards/check_solution": -7.2583335638046265, | |
| "rewards/check_solution_words": -5.878333521187305, | |
| "rewards/check_word_guesses": 5.565333509445191, | |
| "step": 100 | |
| }, | |
| { | |
| "completion_length": 346.92, | |
| "epoch": 0.03333333333333333, | |
| "grad_norm": NaN, | |
| "kl": 4737.8455329227445, | |
| "learning_rate": 1.6555555555555559e-06, | |
| "loss": 189.5138, | |
| "reward": -18.070500688552855, | |
| "reward_std": 7.8515861177444455, | |
| "rewards/check_first_pass": -9.786166725158692, | |
| "rewards/check_solution": -7.325000324249268, | |
| "rewards/check_solution_words": -7.050333592891693, | |
| "rewards/check_word_guesses": 6.091000156402588, | |
| "step": 150 | |
| }, | |
| { | |
| "completion_length": 322.2, | |
| "epoch": 0.044444444444444446, | |
| "grad_norm": NaN, | |
| "kl": 32057.38775477886, | |
| "learning_rate": 2.2111111111111113e-06, | |
| "loss": 1282.2956, | |
| "reward": -15.816333751678467, | |
| "reward_std": 6.191992573738098, | |
| "rewards/check_first_pass": -9.895000038146973, | |
| "rewards/check_solution": -7.100000200271606, | |
| "rewards/check_solution_words": -4.8800000631809235, | |
| "rewards/check_word_guesses": 6.058666839599609, | |
| "step": 200 | |
| }, | |
| { | |
| "completion_length": 349.9, | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": NaN, | |
| "kl": 5074.338300862312, | |
| "learning_rate": 2.766666666666667e-06, | |
| "loss": 202.9736, | |
| "reward": -17.724167308807374, | |
| "reward_std": 6.207637655735016, | |
| "rewards/check_first_pass": -9.912833366394043, | |
| "rewards/check_solution": -7.358333556652069, | |
| "rewards/check_solution_words": -6.180666843354702, | |
| "rewards/check_word_guesses": 5.727666816711426, | |
| "step": 250 | |
| }, | |
| { | |
| "completion_length": 336.42, | |
| "epoch": 0.06666666666666667, | |
| "grad_norm": NaN, | |
| "kl": 315.6221669435501, | |
| "learning_rate": 3.322222222222222e-06, | |
| "loss": 12.6249, | |
| "reward": -16.775000438690185, | |
| "reward_std": 5.353409328460693, | |
| "rewards/check_first_pass": -9.81633337020874, | |
| "rewards/check_solution": -7.341666927337647, | |
| "rewards/check_solution_words": -5.623000101844471, | |
| "rewards/check_word_guesses": 6.006000165939331, | |
| "step": 300 | |
| }, | |
| { | |
| "completion_length": 307.04, | |
| "epoch": 0.07777777777777778, | |
| "grad_norm": NaN, | |
| "kl": 6570.5719665384295, | |
| "learning_rate": 3.877777777777778e-06, | |
| "loss": 262.8229, | |
| "reward": -17.077000389099123, | |
| "reward_std": 5.669408960938454, | |
| "rewards/check_first_pass": -9.886666717529296, | |
| "rewards/check_solution": -7.250000200271606, | |
| "rewards/check_solution_words": -5.695666807889938, | |
| "rewards/check_word_guesses": 5.755333452224732, | |
| "step": 350 | |
| }, | |
| { | |
| "completion_length": 313.08, | |
| "epoch": 0.08888888888888889, | |
| "grad_norm": NaN, | |
| "kl": 1532.4928638124466, | |
| "learning_rate": 4.433333333333334e-06, | |
| "loss": 61.2997, | |
| "reward": -17.507167091369627, | |
| "reward_std": 5.527194731235504, | |
| "rewards/check_first_pass": -9.908166694641114, | |
| "rewards/check_solution": -7.30833353638649, | |
| "rewards/check_solution_words": -6.251000165343284, | |
| "rewards/check_word_guesses": 5.9603334903717045, | |
| "step": 400 | |
| }, | |
| { | |
| "completion_length": 329.37666687011716, | |
| "epoch": 0.1, | |
| "grad_norm": NaN, | |
| "kl": 1601.70994805336, | |
| "learning_rate": 4.988888888888889e-06, | |
| "loss": 64.0684, | |
| "reward": -17.980167026519776, | |
| "reward_std": 6.458992264270782, | |
| "rewards/check_first_pass": -9.801500053405762, | |
| "rewards/check_solution": -7.2666668963432315, | |
| "rewards/check_solution_words": -6.554666934013366, | |
| "rewards/check_word_guesses": 5.64266683101654, | |
| "step": 450 | |
| }, | |
| { | |
| "completion_length": 307.52, | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": NaN, | |
| "kl": 702.5347912788391, | |
| "learning_rate": 4.998194324998843e-06, | |
| "loss": 28.1014, | |
| "reward": -16.74250042915344, | |
| "reward_std": 6.445133271217347, | |
| "rewards/check_first_pass": -9.824500045776368, | |
| "rewards/check_solution": -7.308333573341369, | |
| "rewards/check_solution_words": -5.524333542585373, | |
| "rewards/check_word_guesses": 5.914666795730591, | |
| "step": 500 | |
| }, | |
| { | |
| "completion_length": 335.9, | |
| "epoch": 0.12222222222222222, | |
| "grad_norm": NaN, | |
| "kl": 19601.83191286087, | |
| "learning_rate": 4.992631880567301e-06, | |
| "loss": 784.0733, | |
| "reward": -17.86000030517578, | |
| "reward_std": 7.05341215133667, | |
| "rewards/check_first_pass": -9.785000047683717, | |
| "rewards/check_solution": -7.49166690826416, | |
| "rewards/check_solution_words": -6.301333554983139, | |
| "rewards/check_word_guesses": 5.71800015449524, | |
| "step": 550 | |
| }, | |
| { | |
| "completion_length": 298.2, | |
| "epoch": 0.13333333333333333, | |
| "grad_norm": NaN, | |
| "kl": 1115.117756202221, | |
| "learning_rate": 4.983320281008445e-06, | |
| "loss": 44.6047, | |
| "reward": -16.99700037956238, | |
| "reward_std": 5.631768324375153, | |
| "rewards/check_first_pass": -9.813000040054321, | |
| "rewards/check_solution": -7.041666898727417, | |
| "rewards/check_solution_words": -6.250666889995337, | |
| "rewards/check_word_guesses": 6.108333473205566, | |
| "step": 600 | |
| }, | |
| { | |
| "completion_length": 318.48, | |
| "epoch": 0.14444444444444443, | |
| "grad_norm": NaN, | |
| "kl": 3946.661036362648, | |
| "learning_rate": 4.970273531852536e-06, | |
| "loss": 157.8665, | |
| "reward": -17.999333934783934, | |
| "reward_std": 6.210418889522552, | |
| "rewards/check_first_pass": -9.89133337020874, | |
| "rewards/check_solution": -7.458333578109741, | |
| "rewards/check_solution_words": -6.459333531856537, | |
| "rewards/check_word_guesses": 5.809666805267334, | |
| "step": 650 | |
| }, | |
| { | |
| "completion_length": 351.9, | |
| "epoch": 0.15555555555555556, | |
| "grad_norm": NaN, | |
| "kl": 2870.44579018116, | |
| "learning_rate": 4.953511256649632e-06, | |
| "loss": 114.8178, | |
| "reward": -17.553834075927735, | |
| "reward_std": 5.835132333040238, | |
| "rewards/check_first_pass": -9.929833374023438, | |
| "rewards/check_solution": -7.383333520889282, | |
| "rewards/check_solution_words": -6.055666868388653, | |
| "rewards/check_word_guesses": 5.815000147819519, | |
| "step": 700 | |
| }, | |
| { | |
| "completion_length": 308.34, | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": NaN, | |
| "kl": 164.13174985408784, | |
| "learning_rate": 4.933058667453916e-06, | |
| "loss": 6.5653, | |
| "reward": -16.56966731071472, | |
| "reward_std": 6.621588716208935, | |
| "rewards/check_first_pass": -9.908333358764649, | |
| "rewards/check_solution": -7.291666874885559, | |
| "rewards/check_solution_words": -5.485666743516922, | |
| "rewards/check_word_guesses": 6.116000127792359, | |
| "step": 750 | |
| }, | |
| { | |
| "completion_length": 342.34, | |
| "epoch": 0.17777777777777778, | |
| "grad_norm": NaN, | |
| "kl": 1447.0631847190857, | |
| "learning_rate": 4.9089465269023596e-06, | |
| "loss": 57.8825, | |
| "reward": -17.248333780765535, | |
| "reward_std": 6.114709348678589, | |
| "rewards/check_first_pass": -9.830000019073486, | |
| "rewards/check_solution": -7.2333335685729985, | |
| "rewards/check_solution_words": -6.300666825771332, | |
| "rewards/check_word_guesses": 6.115666842460632, | |
| "step": 800 | |
| }, | |
| { | |
| "completion_length": 354.18, | |
| "epoch": 0.18888888888888888, | |
| "grad_norm": NaN, | |
| "kl": 23526.59426044941, | |
| "learning_rate": 4.881211101944802e-06, | |
| "loss": 941.0638, | |
| "reward": -17.54183391571045, | |
| "reward_std": 6.4859533834457395, | |
| "rewards/check_first_pass": -9.808833379745483, | |
| "rewards/check_solution": -7.708333535194397, | |
| "rewards/check_solution_words": -5.9636668264865875, | |
| "rewards/check_word_guesses": 5.939000129699707, | |
| "step": 850 | |
| }, | |
| { | |
| "completion_length": 308.18, | |
| "epoch": 0.2, | |
| "grad_norm": NaN, | |
| "kl": 138.43031896591185, | |
| "learning_rate": 4.84989410929501e-06, | |
| "loss": 5.5372, | |
| "reward": -17.896833839416505, | |
| "reward_std": 5.668911509513855, | |
| "rewards/check_first_pass": -9.863166694641114, | |
| "rewards/check_solution": -7.233333587646484, | |
| "rewards/check_solution_words": -6.624666909873485, | |
| "rewards/check_word_guesses": 5.824333515167236, | |
| "step": 900 | |
| }, | |
| { | |
| "completion_length": 314.82, | |
| "epoch": 0.2111111111111111, | |
| "grad_norm": NaN, | |
| "kl": 1218.171366314888, | |
| "learning_rate": 4.815042652684779e-06, | |
| "loss": 48.7269, | |
| "reward": -16.533334035873413, | |
| "reward_std": 7.360376672744751, | |
| "rewards/check_first_pass": -9.612000093460082, | |
| "rewards/check_solution": -7.158333578109741, | |
| "rewards/check_solution_words": -5.995000202357769, | |
| "rewards/check_word_guesses": 6.232000198364258, | |
| "step": 950 | |
| }, | |
| { | |
| "completion_length": 339.66, | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": NaN, | |
| "kl": 174.28998464107514, | |
| "learning_rate": 4.776709152015443e-06, | |
| "loss": 6.9716, | |
| "reward": -17.22483383178711, | |
| "reward_std": 6.013938563764095, | |
| "rewards/check_first_pass": -9.816166725158691, | |
| "rewards/check_solution": -7.008333616256714, | |
| "rewards/check_solution_words": -6.318000204563141, | |
| "rewards/check_word_guesses": 5.9176667785644534, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 12, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |