gsarti's picture
Upload 1170 files
3cdd523 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.2222222222222222,
"eval_steps": 500,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"completion_length": 309.04,
"epoch": 0.011111111111111112,
"grad_norm": NaN,
"kl": 222.66988746643065,
"learning_rate": 5.444444444444444e-07,
"loss": 8.9068,
"reward": -18.06266725540161,
"reward_std": 6.391496688127518,
"rewards/check_first_pass": -9.93666666984558,
"rewards/check_solution": -7.600000243186951,
"rewards/check_solution_words": -6.068000079095364,
"rewards/check_word_guesses": 5.54200014591217,
"step": 50
},
{
"completion_length": 368.64,
"epoch": 0.022222222222222223,
"grad_norm": NaN,
"kl": 557.3866543316841,
"learning_rate": 1.1e-06,
"loss": 22.2955,
"reward": -17.431167125701904,
"reward_std": 5.4497878611087796,
"rewards/check_first_pass": -9.859833374023438,
"rewards/check_solution": -7.2583335638046265,
"rewards/check_solution_words": -5.878333521187305,
"rewards/check_word_guesses": 5.565333509445191,
"step": 100
},
{
"completion_length": 346.92,
"epoch": 0.03333333333333333,
"grad_norm": NaN,
"kl": 4737.8455329227445,
"learning_rate": 1.6555555555555559e-06,
"loss": 189.5138,
"reward": -18.070500688552855,
"reward_std": 7.8515861177444455,
"rewards/check_first_pass": -9.786166725158692,
"rewards/check_solution": -7.325000324249268,
"rewards/check_solution_words": -7.050333592891693,
"rewards/check_word_guesses": 6.091000156402588,
"step": 150
},
{
"completion_length": 322.2,
"epoch": 0.044444444444444446,
"grad_norm": NaN,
"kl": 32057.38775477886,
"learning_rate": 2.2111111111111113e-06,
"loss": 1282.2956,
"reward": -15.816333751678467,
"reward_std": 6.191992573738098,
"rewards/check_first_pass": -9.895000038146973,
"rewards/check_solution": -7.100000200271606,
"rewards/check_solution_words": -4.8800000631809235,
"rewards/check_word_guesses": 6.058666839599609,
"step": 200
},
{
"completion_length": 349.9,
"epoch": 0.05555555555555555,
"grad_norm": NaN,
"kl": 5074.338300862312,
"learning_rate": 2.766666666666667e-06,
"loss": 202.9736,
"reward": -17.724167308807374,
"reward_std": 6.207637655735016,
"rewards/check_first_pass": -9.912833366394043,
"rewards/check_solution": -7.358333556652069,
"rewards/check_solution_words": -6.180666843354702,
"rewards/check_word_guesses": 5.727666816711426,
"step": 250
},
{
"completion_length": 336.42,
"epoch": 0.06666666666666667,
"grad_norm": NaN,
"kl": 315.6221669435501,
"learning_rate": 3.322222222222222e-06,
"loss": 12.6249,
"reward": -16.775000438690185,
"reward_std": 5.353409328460693,
"rewards/check_first_pass": -9.81633337020874,
"rewards/check_solution": -7.341666927337647,
"rewards/check_solution_words": -5.623000101844471,
"rewards/check_word_guesses": 6.006000165939331,
"step": 300
},
{
"completion_length": 307.04,
"epoch": 0.07777777777777778,
"grad_norm": NaN,
"kl": 6570.5719665384295,
"learning_rate": 3.877777777777778e-06,
"loss": 262.8229,
"reward": -17.077000389099123,
"reward_std": 5.669408960938454,
"rewards/check_first_pass": -9.886666717529296,
"rewards/check_solution": -7.250000200271606,
"rewards/check_solution_words": -5.695666807889938,
"rewards/check_word_guesses": 5.755333452224732,
"step": 350
},
{
"completion_length": 313.08,
"epoch": 0.08888888888888889,
"grad_norm": NaN,
"kl": 1532.4928638124466,
"learning_rate": 4.433333333333334e-06,
"loss": 61.2997,
"reward": -17.507167091369627,
"reward_std": 5.527194731235504,
"rewards/check_first_pass": -9.908166694641114,
"rewards/check_solution": -7.30833353638649,
"rewards/check_solution_words": -6.251000165343284,
"rewards/check_word_guesses": 5.9603334903717045,
"step": 400
},
{
"completion_length": 329.37666687011716,
"epoch": 0.1,
"grad_norm": NaN,
"kl": 1601.70994805336,
"learning_rate": 4.988888888888889e-06,
"loss": 64.0684,
"reward": -17.980167026519776,
"reward_std": 6.458992264270782,
"rewards/check_first_pass": -9.801500053405762,
"rewards/check_solution": -7.2666668963432315,
"rewards/check_solution_words": -6.554666934013366,
"rewards/check_word_guesses": 5.64266683101654,
"step": 450
},
{
"completion_length": 307.52,
"epoch": 0.1111111111111111,
"grad_norm": NaN,
"kl": 702.5347912788391,
"learning_rate": 4.998194324998843e-06,
"loss": 28.1014,
"reward": -16.74250042915344,
"reward_std": 6.445133271217347,
"rewards/check_first_pass": -9.824500045776368,
"rewards/check_solution": -7.308333573341369,
"rewards/check_solution_words": -5.524333542585373,
"rewards/check_word_guesses": 5.914666795730591,
"step": 500
},
{
"completion_length": 335.9,
"epoch": 0.12222222222222222,
"grad_norm": NaN,
"kl": 19601.83191286087,
"learning_rate": 4.992631880567301e-06,
"loss": 784.0733,
"reward": -17.86000030517578,
"reward_std": 7.05341215133667,
"rewards/check_first_pass": -9.785000047683717,
"rewards/check_solution": -7.49166690826416,
"rewards/check_solution_words": -6.301333554983139,
"rewards/check_word_guesses": 5.71800015449524,
"step": 550
},
{
"completion_length": 298.2,
"epoch": 0.13333333333333333,
"grad_norm": NaN,
"kl": 1115.117756202221,
"learning_rate": 4.983320281008445e-06,
"loss": 44.6047,
"reward": -16.99700037956238,
"reward_std": 5.631768324375153,
"rewards/check_first_pass": -9.813000040054321,
"rewards/check_solution": -7.041666898727417,
"rewards/check_solution_words": -6.250666889995337,
"rewards/check_word_guesses": 6.108333473205566,
"step": 600
},
{
"completion_length": 318.48,
"epoch": 0.14444444444444443,
"grad_norm": NaN,
"kl": 3946.661036362648,
"learning_rate": 4.970273531852536e-06,
"loss": 157.8665,
"reward": -17.999333934783934,
"reward_std": 6.210418889522552,
"rewards/check_first_pass": -9.89133337020874,
"rewards/check_solution": -7.458333578109741,
"rewards/check_solution_words": -6.459333531856537,
"rewards/check_word_guesses": 5.809666805267334,
"step": 650
},
{
"completion_length": 351.9,
"epoch": 0.15555555555555556,
"grad_norm": NaN,
"kl": 2870.44579018116,
"learning_rate": 4.953511256649632e-06,
"loss": 114.8178,
"reward": -17.553834075927735,
"reward_std": 5.835132333040238,
"rewards/check_first_pass": -9.929833374023438,
"rewards/check_solution": -7.383333520889282,
"rewards/check_solution_words": -6.055666868388653,
"rewards/check_word_guesses": 5.815000147819519,
"step": 700
},
{
"completion_length": 308.34,
"epoch": 0.16666666666666666,
"grad_norm": NaN,
"kl": 164.13174985408784,
"learning_rate": 4.933058667453916e-06,
"loss": 6.5653,
"reward": -16.56966731071472,
"reward_std": 6.621588716208935,
"rewards/check_first_pass": -9.908333358764649,
"rewards/check_solution": -7.291666874885559,
"rewards/check_solution_words": -5.485666743516922,
"rewards/check_word_guesses": 6.116000127792359,
"step": 750
},
{
"completion_length": 342.34,
"epoch": 0.17777777777777778,
"grad_norm": NaN,
"kl": 1447.0631847190857,
"learning_rate": 4.9089465269023596e-06,
"loss": 57.8825,
"reward": -17.248333780765535,
"reward_std": 6.114709348678589,
"rewards/check_first_pass": -9.830000019073486,
"rewards/check_solution": -7.2333335685729985,
"rewards/check_solution_words": -6.300666825771332,
"rewards/check_word_guesses": 6.115666842460632,
"step": 800
},
{
"completion_length": 354.18,
"epoch": 0.18888888888888888,
"grad_norm": NaN,
"kl": 23526.59426044941,
"learning_rate": 4.881211101944802e-06,
"loss": 941.0638,
"reward": -17.54183391571045,
"reward_std": 6.4859533834457395,
"rewards/check_first_pass": -9.808833379745483,
"rewards/check_solution": -7.708333535194397,
"rewards/check_solution_words": -5.9636668264865875,
"rewards/check_word_guesses": 5.939000129699707,
"step": 850
},
{
"completion_length": 308.18,
"epoch": 0.2,
"grad_norm": NaN,
"kl": 138.43031896591185,
"learning_rate": 4.84989410929501e-06,
"loss": 5.5372,
"reward": -17.896833839416505,
"reward_std": 5.668911509513855,
"rewards/check_first_pass": -9.863166694641114,
"rewards/check_solution": -7.233333587646484,
"rewards/check_solution_words": -6.624666909873485,
"rewards/check_word_guesses": 5.824333515167236,
"step": 900
},
{
"completion_length": 314.82,
"epoch": 0.2111111111111111,
"grad_norm": NaN,
"kl": 1218.171366314888,
"learning_rate": 4.815042652684779e-06,
"loss": 48.7269,
"reward": -16.533334035873413,
"reward_std": 7.360376672744751,
"rewards/check_first_pass": -9.612000093460082,
"rewards/check_solution": -7.158333578109741,
"rewards/check_solution_words": -5.995000202357769,
"rewards/check_word_guesses": 6.232000198364258,
"step": 950
},
{
"completion_length": 339.66,
"epoch": 0.2222222222222222,
"grad_norm": NaN,
"kl": 174.28998464107514,
"learning_rate": 4.776709152015443e-06,
"loss": 6.9716,
"reward": -17.22483383178711,
"reward_std": 6.013938563764095,
"rewards/check_first_pass": -9.816166725158691,
"rewards/check_solution": -7.008333616256714,
"rewards/check_solution_words": -6.318000204563141,
"rewards/check_word_guesses": 5.9176667785644534,
"step": 1000
}
],
"logging_steps": 50,
"max_steps": 4500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 50,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 12,
"trial_name": null,
"trial_params": null
}