| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9966024915062288, | |
| "eval_steps": 1000, | |
| "global_step": 110, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.009060022650056626, | |
| "grad_norm": 2.142748189776569, | |
| "learning_rate": 4.545454545454545e-08, | |
| "logits/chosen": -2.2157700061798096, | |
| "logits/rejected": -2.1868345737457275, | |
| "logps/chosen": -314.38787841796875, | |
| "logps/rejected": -291.1216735839844, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.09060022650056625, | |
| "grad_norm": 2.150777513425362, | |
| "learning_rate": 4.545454545454545e-07, | |
| "logits/chosen": -2.2591588497161865, | |
| "logits/rejected": -2.233074188232422, | |
| "logps/chosen": -323.4332275390625, | |
| "logps/rejected": -301.56719970703125, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.4713541567325592, | |
| "rewards/chosen": 0.0009152439888566732, | |
| "rewards/margins": 0.000497353496029973, | |
| "rewards/rejected": 0.00041789052193053067, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1812004530011325, | |
| "grad_norm": 2.066434141751538, | |
| "learning_rate": 4.898732434036243e-07, | |
| "logits/chosen": -2.2525153160095215, | |
| "logits/rejected": -2.2378909587860107, | |
| "logps/chosen": -325.8227233886719, | |
| "logps/rejected": -305.6023254394531, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.7789062261581421, | |
| "rewards/chosen": 0.0187881700694561, | |
| "rewards/margins": 0.014950500801205635, | |
| "rewards/rejected": 0.0038376704324036837, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2718006795016987, | |
| "grad_norm": 1.935984789640625, | |
| "learning_rate": 4.5591914535745817e-07, | |
| "logits/chosen": -2.1835825443267822, | |
| "logits/rejected": -2.173578977584839, | |
| "logps/chosen": -313.19586181640625, | |
| "logps/rejected": -300.5938720703125, | |
| "loss": 0.6661, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": 0.04926218464970589, | |
| "rewards/margins": 0.05489668250083923, | |
| "rewards/rejected": -0.005634505767375231, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.362400906002265, | |
| "grad_norm": 2.2867184944223293, | |
| "learning_rate": 4.0140242178441665e-07, | |
| "logits/chosen": -2.1189799308776855, | |
| "logits/rejected": -2.114716053009033, | |
| "logps/chosen": -317.4134826660156, | |
| "logps/rejected": -306.8409423828125, | |
| "loss": 0.6458, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": 0.03598688170313835, | |
| "rewards/margins": 0.09652377665042877, | |
| "rewards/rejected": -0.06053689122200012, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.45300113250283125, | |
| "grad_norm": 2.40453710231171, | |
| "learning_rate": 3.317669908293554e-07, | |
| "logits/chosen": -1.9152988195419312, | |
| "logits/rejected": -1.9253225326538086, | |
| "logps/chosen": -321.01800537109375, | |
| "logps/rejected": -334.33349609375, | |
| "loss": 0.5959, | |
| "rewards/accuracies": 0.8109375238418579, | |
| "rewards/chosen": -0.13218708336353302, | |
| "rewards/margins": 0.22598442435264587, | |
| "rewards/rejected": -0.3581715524196625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.5436013590033975, | |
| "grad_norm": 2.548257476263302, | |
| "learning_rate": 2.53966490958702e-07, | |
| "logits/chosen": -1.8517974615097046, | |
| "logits/rejected": -1.851008653640747, | |
| "logps/chosen": -349.90252685546875, | |
| "logps/rejected": -362.4208984375, | |
| "loss": 0.5612, | |
| "rewards/accuracies": 0.815625011920929, | |
| "rewards/chosen": -0.2689761817455292, | |
| "rewards/margins": 0.33339887857437134, | |
| "rewards/rejected": -0.6023750305175781, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.6342015855039638, | |
| "grad_norm": 2.564454636474787, | |
| "learning_rate": 1.7576990616793137e-07, | |
| "logits/chosen": -1.8310279846191406, | |
| "logits/rejected": -1.8546864986419678, | |
| "logps/chosen": -362.08538818359375, | |
| "logps/rejected": -394.1742858886719, | |
| "loss": 0.5353, | |
| "rewards/accuracies": 0.803906261920929, | |
| "rewards/chosen": -0.4547205865383148, | |
| "rewards/margins": 0.4052005410194397, | |
| "rewards/rejected": -0.8599211573600769, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.72480181200453, | |
| "grad_norm": 2.5983243283209183, | |
| "learning_rate": 1.0498577260720048e-07, | |
| "logits/chosen": -1.8076627254486084, | |
| "logits/rejected": -1.8311001062393188, | |
| "logps/chosen": -375.9088134765625, | |
| "logps/rejected": -410.9624938964844, | |
| "loss": 0.5223, | |
| "rewards/accuracies": 0.782031238079071, | |
| "rewards/chosen": -0.5610671639442444, | |
| "rewards/margins": 0.4681544303894043, | |
| "rewards/rejected": -1.029221534729004, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.8154020385050963, | |
| "grad_norm": 2.742604490623868, | |
| "learning_rate": 4.868243561723534e-08, | |
| "logits/chosen": -1.809565544128418, | |
| "logits/rejected": -1.8266630172729492, | |
| "logps/chosen": -381.6236267089844, | |
| "logps/rejected": -415.415771484375, | |
| "loss": 0.513, | |
| "rewards/accuracies": 0.792187511920929, | |
| "rewards/chosen": -0.6585050225257874, | |
| "rewards/margins": 0.4940672814846039, | |
| "rewards/rejected": -1.1525723934173584, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.9060022650056625, | |
| "grad_norm": 2.4312610291944887, | |
| "learning_rate": 1.2482220564763667e-08, | |
| "logits/chosen": -1.8194091320037842, | |
| "logits/rejected": -1.811342477798462, | |
| "logps/chosen": -386.8002624511719, | |
| "logps/rejected": -417.9185485839844, | |
| "loss": 0.508, | |
| "rewards/accuracies": 0.817187488079071, | |
| "rewards/chosen": -0.6888748407363892, | |
| "rewards/margins": 0.544217050075531, | |
| "rewards/rejected": -1.2330917119979858, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.9966024915062288, | |
| "grad_norm": 2.609201337420324, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -1.8002452850341797, | |
| "logits/rejected": -1.796565294265747, | |
| "logps/chosen": -391.4776611328125, | |
| "logps/rejected": -427.072265625, | |
| "loss": 0.506, | |
| "rewards/accuracies": 0.8046875, | |
| "rewards/chosen": -0.7028344869613647, | |
| "rewards/margins": 0.5477779507637024, | |
| "rewards/rejected": -1.250612497329712, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9966024915062288, | |
| "step": 110, | |
| "total_flos": 0.0, | |
| "train_loss": 0.5847668994556774, | |
| "train_runtime": 2901.4735, | |
| "train_samples_per_second": 38.945, | |
| "train_steps_per_second": 0.038 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 110, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |