| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9932523616734144, | |
| "eval_steps": 400, | |
| "global_step": 115, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00863697705802969, | |
| "grad_norm": 4.147310027334897, | |
| "learning_rate": 4.166666666666666e-08, | |
| "logits/chosen": -1.0488052368164062, | |
| "logits/rejected": -0.5750762224197388, | |
| "logps/chosen": -273.85137939453125, | |
| "logps/rejected": -288.77301025390625, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.043184885290148446, | |
| "grad_norm": 4.588427612553101, | |
| "learning_rate": 2.0833333333333333e-07, | |
| "logits/chosen": -1.007105827331543, | |
| "logits/rejected": -0.7435885667800903, | |
| "logps/chosen": -253.44888305664062, | |
| "logps/rejected": -266.1280517578125, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4140625, | |
| "rewards/chosen": 0.0007548874709755182, | |
| "rewards/margins": 0.0008569365600124002, | |
| "rewards/rejected": -0.0001020491763483733, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.08636977058029689, | |
| "grad_norm": 4.548746866725734, | |
| "learning_rate": 4.1666666666666667e-07, | |
| "logits/chosen": -0.9015377759933472, | |
| "logits/rejected": -0.7045949101448059, | |
| "logps/chosen": -256.82440185546875, | |
| "logps/rejected": -265.10888671875, | |
| "loss": 0.693, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.0008276132866740227, | |
| "rewards/margins": 5.403275281423703e-05, | |
| "rewards/rejected": -0.0008816460031084716, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.12955465587044535, | |
| "grad_norm": 4.335294831627272, | |
| "learning_rate": 4.989541370516523e-07, | |
| "logits/chosen": -0.9198816418647766, | |
| "logits/rejected": -0.6794015169143677, | |
| "logps/chosen": -249.69180297851562, | |
| "logps/rejected": -263.43963623046875, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.006573961116373539, | |
| "rewards/margins": 0.003779920982196927, | |
| "rewards/rejected": -0.010353881865739822, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.17273954116059378, | |
| "grad_norm": 4.361096350569026, | |
| "learning_rate": 4.925944144036026e-07, | |
| "logits/chosen": -0.966783344745636, | |
| "logits/rejected": -0.6541803479194641, | |
| "logps/chosen": -256.15704345703125, | |
| "logps/rejected": -271.27569580078125, | |
| "loss": 0.6873, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.01756737381219864, | |
| "rewards/margins": 0.011632733047008514, | |
| "rewards/rejected": -0.029200103133916855, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.21592442645074225, | |
| "grad_norm": 7.667484004542981, | |
| "learning_rate": 4.806034494187948e-07, | |
| "logits/chosen": -0.9687163233757019, | |
| "logits/rejected": -0.6905485391616821, | |
| "logps/chosen": -263.11126708984375, | |
| "logps/rejected": -281.65869140625, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.046140603721141815, | |
| "rewards/margins": 0.02697494998574257, | |
| "rewards/rejected": -0.07311554253101349, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.2591093117408907, | |
| "grad_norm": 4.383791095797972, | |
| "learning_rate": 4.632595833354104e-07, | |
| "logits/chosen": -0.9939810633659363, | |
| "logits/rejected": -0.7504791021347046, | |
| "logps/chosen": -259.5863952636719, | |
| "logps/rejected": -277.5854187011719, | |
| "loss": 0.6719, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.09566532075405121, | |
| "rewards/margins": 0.046156905591487885, | |
| "rewards/rejected": -0.1418222188949585, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.30229419703103916, | |
| "grad_norm": 7.675641265735793, | |
| "learning_rate": 4.409654120384862e-07, | |
| "logits/chosen": -1.0092413425445557, | |
| "logits/rejected": -0.7530118227005005, | |
| "logps/chosen": -287.18853759765625, | |
| "logps/rejected": -316.9033203125, | |
| "loss": 0.6416, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.2571231424808502, | |
| "rewards/margins": 0.14525508880615234, | |
| "rewards/rejected": -0.4023781716823578, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.34547908232118757, | |
| "grad_norm": 8.57248295436779, | |
| "learning_rate": 4.1423844077058456e-07, | |
| "logits/chosen": -1.3185703754425049, | |
| "logits/rejected": -0.9748894572257996, | |
| "logps/chosen": -328.22552490234375, | |
| "logps/rejected": -359.8807678222656, | |
| "loss": 0.6414, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": -0.6841151714324951, | |
| "rewards/margins": 0.19913408160209656, | |
| "rewards/rejected": -0.8832491636276245, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.38866396761133604, | |
| "grad_norm": 12.061629079887432, | |
| "learning_rate": 3.8369907149976854e-07, | |
| "logits/chosen": -1.420593500137329, | |
| "logits/rejected": -1.1807670593261719, | |
| "logps/chosen": -342.5406188964844, | |
| "logps/rejected": -385.1309509277344, | |
| "loss": 0.5983, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.9875448346138, | |
| "rewards/margins": 0.29662927985191345, | |
| "rewards/rejected": -1.2841740846633911, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.4318488529014845, | |
| "grad_norm": 14.093087795015201, | |
| "learning_rate": 3.5005620178906946e-07, | |
| "logits/chosen": -1.455536127090454, | |
| "logits/rejected": -1.192246675491333, | |
| "logps/chosen": -368.83203125, | |
| "logps/rejected": -424.71746826171875, | |
| "loss": 0.6055, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -1.1400392055511475, | |
| "rewards/margins": 0.5143567323684692, | |
| "rewards/rejected": -1.6543958187103271, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4750337381916329, | |
| "grad_norm": 15.447721210624003, | |
| "learning_rate": 3.1409076945484506e-07, | |
| "logits/chosen": -1.4618675708770752, | |
| "logits/rejected": -1.0830767154693604, | |
| "logps/chosen": -355.78460693359375, | |
| "logps/rejected": -439.4186096191406, | |
| "loss": 0.5889, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -1.087812900543213, | |
| "rewards/margins": 0.5787354707717896, | |
| "rewards/rejected": -1.666548490524292, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.5182186234817814, | |
| "grad_norm": 81.08313417012548, | |
| "learning_rate": 2.76637624984969e-07, | |
| "logits/chosen": -1.527682900428772, | |
| "logits/rejected": -1.2854654788970947, | |
| "logps/chosen": -379.35748291015625, | |
| "logps/rejected": -489.2784118652344, | |
| "loss": 0.6214, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.3896359205245972, | |
| "rewards/margins": 0.8340662121772766, | |
| "rewards/rejected": -2.2237021923065186, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5614035087719298, | |
| "grad_norm": 17.702377673251867, | |
| "learning_rate": 2.3856615250480137e-07, | |
| "logits/chosen": -1.4518334865570068, | |
| "logits/rejected": -1.2453023195266724, | |
| "logps/chosen": -411.2533264160156, | |
| "logps/rejected": -539.5147705078125, | |
| "loss": 0.5552, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.6073110103607178, | |
| "rewards/margins": 1.1000343561172485, | |
| "rewards/rejected": -2.707345485687256, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.6045883940620783, | |
| "grad_norm": 29.87177949584331, | |
| "learning_rate": 2.0076008912832354e-07, | |
| "logits/chosen": -1.4434335231781006, | |
| "logits/rejected": -1.3378995656967163, | |
| "logps/chosen": -387.21954345703125, | |
| "logps/rejected": -468.7499084472656, | |
| "loss": 0.5621, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -1.3989627361297607, | |
| "rewards/margins": 0.7221517562866211, | |
| "rewards/rejected": -2.121114492416382, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6477732793522267, | |
| "grad_norm": 12.350143755505647, | |
| "learning_rate": 1.640970111393718e-07, | |
| "logits/chosen": -1.4322903156280518, | |
| "logits/rejected": -1.3162221908569336, | |
| "logps/chosen": -376.51715087890625, | |
| "logps/rejected": -457.11688232421875, | |
| "loss": 0.5743, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -1.2510515451431274, | |
| "rewards/margins": 0.697724461555481, | |
| "rewards/rejected": -1.9487760066986084, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6909581646423751, | |
| "grad_norm": 13.161201224229147, | |
| "learning_rate": 1.2942796318163593e-07, | |
| "logits/chosen": -1.448541283607483, | |
| "logits/rejected": -1.256659984588623, | |
| "logps/chosen": -365.4916076660156, | |
| "logps/rejected": -444.9703674316406, | |
| "loss": 0.552, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.1402069330215454, | |
| "rewards/margins": 0.6963046789169312, | |
| "rewards/rejected": -1.8365116119384766, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.7341430499325237, | |
| "grad_norm": 16.80049441603505, | |
| "learning_rate": 9.755770331648641e-08, | |
| "logits/chosen": -1.5232871770858765, | |
| "logits/rejected": -1.2771618366241455, | |
| "logps/chosen": -405.6431884765625, | |
| "logps/rejected": -508.34942626953125, | |
| "loss": 0.5654, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -1.496346116065979, | |
| "rewards/margins": 0.8162988424301147, | |
| "rewards/rejected": -2.3126449584960938, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7773279352226721, | |
| "grad_norm": 14.621658954691451, | |
| "learning_rate": 6.92260225118122e-08, | |
| "logits/chosen": -1.5320792198181152, | |
| "logits/rejected": -1.258817195892334, | |
| "logps/chosen": -430.893310546875, | |
| "logps/rejected": -581.5165405273438, | |
| "loss": 0.5326, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.643710732460022, | |
| "rewards/margins": 1.3027892112731934, | |
| "rewards/rejected": -2.946500062942505, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 14.642434820372547, | |
| "learning_rate": 4.509057218473686e-08, | |
| "logits/chosen": -1.4839651584625244, | |
| "logits/rejected": -1.4334385395050049, | |
| "logps/chosen": -416.0023498535156, | |
| "logps/rejected": -521.580810546875, | |
| "loss": 0.5546, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -1.589343786239624, | |
| "rewards/margins": 0.9084415435791016, | |
| "rewards/rejected": -2.4977850914001465, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.863697705802969, | |
| "grad_norm": 19.36387744586808, | |
| "learning_rate": 2.5711598415256496e-08, | |
| "logits/chosen": -1.5397363901138306, | |
| "logits/rejected": -1.4642789363861084, | |
| "logps/chosen": -442.88238525390625, | |
| "logps/rejected": -596.0775146484375, | |
| "loss": 0.5146, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.7794828414916992, | |
| "rewards/margins": 1.3754386901855469, | |
| "rewards/rejected": -3.154921770095825, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9068825910931174, | |
| "grad_norm": 15.226538097583589, | |
| "learning_rate": 1.1538937189091823e-08, | |
| "logits/chosen": -1.541649580001831, | |
| "logits/rejected": -1.3221733570098877, | |
| "logps/chosen": -427.8173828125, | |
| "logps/rejected": -551.5006103515625, | |
| "loss": 0.527, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -1.7289737462997437, | |
| "rewards/margins": 1.1109263896942139, | |
| "rewards/rejected": -2.839900255203247, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.9500674763832658, | |
| "grad_norm": 19.202027293438395, | |
| "learning_rate": 2.901572543725972e-09, | |
| "logits/chosen": -1.5520999431610107, | |
| "logits/rejected": -1.2729403972625732, | |
| "logps/chosen": -413.5086364746094, | |
| "logps/rejected": -557.3928833007812, | |
| "loss": 0.5247, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.6233694553375244, | |
| "rewards/margins": 1.2928354740142822, | |
| "rewards/rejected": -2.9162049293518066, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9932523616734144, | |
| "grad_norm": 12.814179367231938, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.5744760036468506, | |
| "logits/rejected": -1.363406777381897, | |
| "logps/chosen": -421.62530517578125, | |
| "logps/rejected": -535.97216796875, | |
| "loss": 0.5234, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.6327297687530518, | |
| "rewards/margins": 1.0214827060699463, | |
| "rewards/rejected": -2.654212236404419, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.9932523616734144, | |
| "step": 115, | |
| "total_flos": 0.0, | |
| "train_loss": 0.6000239335972329, | |
| "train_runtime": 5043.4745, | |
| "train_samples_per_second": 2.938, | |
| "train_steps_per_second": 0.023 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 115, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |