{ "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 648, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.6746310611384398, "precision": 0.6075949367088608, "recall": 0.7582938388625592, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.4376506024096386, "precision": 0.550587343690792, "recall": 0.3631592101974506, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.620034542314335, "precision": 0.5483008781977854, "recall": 0.7133631395926477, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8620734063103671, "precision": 0.8396186972154862, "recall": 0.8857621736062103, "support": 11336.0 }, "eval_O": { "f1-score": 0.8807729058286443, "precision": 0.8630878069080317, "recall": 0.8991979189247779, "support": 9226.0 }, "eval_accuracy": 0.785908251565951, "eval_loss": 0.5655931830406189, "eval_macro avg": { "f1-score": 0.49645178828591785, "precision": 0.48702709467442235, "recall": 0.5171108973119495, "support": 27619.0 }, "eval_runtime": 4.8252, "eval_samples_per_second": 16.58, "eval_steps_per_second": 2.072, "eval_weighted avg": { "f1-score": 0.7721020318885458, "precision": 0.766573839857488, "recall": 0.785908251565951, "support": 27619.0 }, "step": 81 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.36036036036036034, "precision": 0.4624277456647399, "recall": 0.2952029520295203, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.5836909871244635, "precision": 0.723404255319149, "recall": 0.4892086330935252, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7396491228070177, "precision": 0.6654040404040404, "recall": 0.8325434439178515, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.46704216775764956, "precision": 0.5973520249221184, "recall": 0.3834041489627593, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7326456649010316, "precision": 0.8348157560355781, "recall": 0.6527570789865872, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8679505932065659, "precision": 0.8045344983428744, "recall": 0.9422194777699365, "support": 11336.0 }, "eval_O": { "f1-score": 0.9101698606271778, "precision": 0.9143513454386348, "recall": 0.9060264469976155, "support": 9226.0 }, "eval_accuracy": 0.8169376154096818, "eval_loss": 0.4831336438655853, "eval_macro avg": { "f1-score": 0.6659298223977524, "precision": 0.7146128094467336, "recall": 0.6430517402511137, "support": 27619.0 }, "eval_runtime": 4.8125, "eval_samples_per_second": 16.623, "eval_steps_per_second": 2.078, "eval_weighted avg": { "f1-score": 0.8047632099274016, "precision": 0.8064582361050344, "recall": 0.8169376154096818, "support": 27619.0 }, "step": 162 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.5806451612903225, "precision": 0.59765625, "recall": 0.5645756457564576, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7138047138047138, "precision": 0.6708860759493671, "recall": 0.762589928057554, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7739984882842026, "precision": 0.7420289855072464, "recall": 0.8088467614533965, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6128912532475566, "precision": 0.6068103870651641, "recall": 0.6190952261934516, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8072942336126171, "precision": 0.8009779951100244, "recall": 0.8137108792846498, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8797559224694903, "precision": 0.8951789627465303, "recall": 0.8648553281580804, "support": 11336.0 }, "eval_O": { "f1-score": 0.9226235130211126, "precision": 0.912356930902925, "recall": 0.933123780619987, "support": 9226.0 }, "eval_accuracy": 0.8435859372171332, "eval_loss": 0.4283505082130432, "eval_macro avg": { "f1-score": 0.7558590408185736, "precision": 0.7465565124687511, "recall": 0.7666853642176539, "support": 27619.0 }, "eval_runtime": 4.8554, "eval_samples_per_second": 16.476, "eval_steps_per_second": 2.06, "eval_weighted avg": { "f1-score": 0.8439412578936614, "precision": 0.8447189682878564, "recall": 0.8435859372171332, "support": 27619.0 }, "step": 243 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.6033333333333333, "precision": 0.5501519756838906, "recall": 0.6678966789667896, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.6847457627118644, "precision": 0.6474358974358975, "recall": 0.7266187050359713, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7818471337579618, "precision": 0.7881219903691814, "recall": 0.7756714060031595, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6216657593903103, "precision": 0.550733024691358, "recall": 0.7135716070982254, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7652218782249742, "precision": 0.7960279119699409, "recall": 0.7367113760556383, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8739165400768475, "precision": 0.8853883758826725, "recall": 0.8627381792519407, "support": 11336.0 }, "eval_O": { "f1-score": 0.9062570845613239, "precision": 0.9497505345687812, "recall": 0.8665727292434424, "support": 9226.0 }, "eval_accuracy": 0.8286324631594192, "eval_loss": 0.4826153814792633, "eval_macro avg": { "f1-score": 0.7481410702938023, "precision": 0.7382299586573888, "recall": 0.7642543830935953, "support": 27619.0 }, "eval_runtime": 4.8516, "eval_samples_per_second": 16.49, "eval_steps_per_second": 2.061, "eval_weighted avg": { "f1-score": 0.8345383371838407, "precision": 0.8451795530099128, "recall": 0.8286324631594192, "support": 27619.0 }, "step": 324 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.5825242718446603, "precision": 0.6147540983606558, "recall": 0.5535055350553506, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7317073170731707, "precision": 0.7094594594594594, "recall": 0.7553956834532374, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7840735068912711, "precision": 0.7607726597325408, "recall": 0.8088467614533965, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5830055074744296, "precision": 0.6132413793103448, "recall": 0.5556110972256936, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.803728638011393, "precision": 0.8393726338561385, "recall": 0.7709885742672627, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8823829787234042, "precision": 0.8523512002630713, "recall": 0.9146083274523642, "support": 11336.0 }, "eval_O": { "f1-score": 0.9254767941792524, "precision": 0.9415657245401525, "recall": 0.9099284630392369, "support": 9226.0 }, "eval_accuracy": 0.8438031789709982, "eval_loss": 0.513404905796051, "eval_macro avg": { "f1-score": 0.7561284305996544, "precision": 0.7616453079317661, "recall": 0.7526977774209346, "support": 27619.0 }, "eval_runtime": 4.8184, "eval_samples_per_second": 16.603, "eval_steps_per_second": 2.075, "eval_weighted avg": { "f1-score": 0.8417228378374912, "precision": 0.8414191958613282, "recall": 0.8438031789709982, "support": 27619.0 }, "step": 405 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.5851851851851851, "precision": 0.587360594795539, "recall": 0.5830258302583026, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7460815047021944, "precision": 0.6611111111111111, "recall": 0.8561151079136691, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7778631821675633, "precision": 0.7574850299401198, "recall": 0.7993680884676145, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5999495204442201, "precision": 0.6059138414478715, "recall": 0.5941014746313422, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.800587227795449, "precision": 0.7888138862102217, "recall": 0.8127173373075013, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8798354629791703, "precision": 0.8729593608891977, "recall": 0.8868207480592801, "support": 11336.0 }, "eval_O": { "f1-score": 0.9269283557507548, "precision": 0.9391478473690066, "recall": 0.9150227617602428, "support": 9226.0 }, "eval_accuracy": 0.843296281545313, "eval_loss": 0.5017187595367432, "eval_macro avg": { "f1-score": 0.7594900627177908, "precision": 0.744684524537581, "recall": 0.7781673354854218, "support": 27619.0 }, "eval_runtime": 4.8298, "eval_samples_per_second": 16.564, "eval_steps_per_second": 2.07, "eval_weighted avg": { "f1-score": 0.8433438519855446, "precision": 0.8437360576786502, "recall": 0.843296281545313, "support": 27619.0 }, "step": 486 }, { "epoch": 6.17, "grad_norm": 7.375185489654541, "learning_rate": 1.7530864197530865e-05, "loss": 0.4085, "step": 500 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.5725338491295938, "precision": 0.6016260162601627, "recall": 0.5461254612546126, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.745644599303136, "precision": 0.722972972972973, "recall": 0.7697841726618705, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7915087187263078, "precision": 0.760932944606414, "recall": 0.8246445497630331, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5842240562846706, "precision": 0.6368731563421829, "recall": 0.5396150962259435, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7921810699588476, "precision": 0.8213333333333334, "recall": 0.7650273224043715, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8817056396148556, "precision": 0.8598256203890007, "recall": 0.9047282992237121, "support": 11336.0 }, "eval_O": { "f1-score": 0.9274176179194485, "precision": 0.9214637277979885, "recall": 0.9334489486234554, "support": 9226.0 }, "eval_accuracy": 0.8452152503711213, "eval_loss": 0.6029371023178101, "eval_macro avg": { "f1-score": 0.7564593644195513, "precision": 0.7607182531002935, "recall": 0.7547676928795714, "support": 27619.0 }, "eval_runtime": 4.8043, "eval_samples_per_second": 16.652, "eval_steps_per_second": 2.081, "eval_weighted avg": { "f1-score": 0.8415705604110203, "precision": 0.8398235103191511, "recall": 0.8452152503711213, "support": 27619.0 }, "step": 567 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.5641891891891891, "precision": 0.5202492211838006, "recall": 0.6162361623616236, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7318840579710144, "precision": 0.7372262773722628, "recall": 0.7266187050359713, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7673716012084594, "precision": 0.7351664254703328, "recall": 0.8025276461295419, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5930426495115559, "precision": 0.5665832005463237, "recall": 0.6220944763809048, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7816030123722432, "precision": 0.852199413489736, "recall": 0.7218082463984103, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8746564286025915, "precision": 0.8652567975830816, "recall": 0.8842625264643613, "support": 11336.0 }, "eval_O": { "f1-score": 0.924221395658691, "precision": 0.9473085239558439, "recall": 0.9022328202904835, "support": 9226.0 }, "eval_accuracy": 0.835149715775372, "eval_loss": 0.6411188840866089, "eval_macro avg": { "f1-score": 0.7481383335019636, "precision": 0.7462842656573401, "recall": 0.7536829404373281, "support": 27619.0 }, "eval_runtime": 4.8296, "eval_samples_per_second": 16.564, "eval_steps_per_second": 2.071, "eval_weighted avg": { "f1-score": 0.8374117728187233, "precision": 0.8414359188593861, "recall": 0.835149715775372, "support": 27619.0 }, "step": 648 } ], "logging_steps": 500, "max_steps": 4050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1150326501744000.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }