{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.03333333333333333, "eval_steps": 500, "global_step": 150, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "completion_length": 309.04, "epoch": 0.011111111111111112, "grad_norm": NaN, "kl": 222.66988746643065, "learning_rate": 5.444444444444444e-07, "loss": 8.9068, "reward": -18.06266725540161, "reward_std": 6.391496688127518, "rewards/check_first_pass": -9.93666666984558, "rewards/check_solution": -7.600000243186951, "rewards/check_solution_words": -6.068000079095364, "rewards/check_word_guesses": 5.54200014591217, "step": 50 }, { "completion_length": 368.64, "epoch": 0.022222222222222223, "grad_norm": NaN, "kl": 557.3866543316841, "learning_rate": 1.1e-06, "loss": 22.2955, "reward": -17.431167125701904, "reward_std": 5.4497878611087796, "rewards/check_first_pass": -9.859833374023438, "rewards/check_solution": -7.2583335638046265, "rewards/check_solution_words": -5.878333521187305, "rewards/check_word_guesses": 5.565333509445191, "step": 100 }, { "completion_length": 346.92, "epoch": 0.03333333333333333, "grad_norm": NaN, "kl": 4737.8455329227445, "learning_rate": 1.6555555555555559e-06, "loss": 189.5138, "reward": -18.070500688552855, "reward_std": 7.8515861177444455, "rewards/check_first_pass": -9.786166725158692, "rewards/check_solution": -7.325000324249268, "rewards/check_solution_words": -7.050333592891693, "rewards/check_word_guesses": 6.091000156402588, "step": 150 } ], "logging_steps": 50, "max_steps": 4500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 12, "trial_name": null, "trial_params": null }