{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "eval_steps": 500, "global_step": 492, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_B-Claim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.22222222222222218, "precision": 0.780952380952381, "recall": 0.12954186413902052, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.3239807781739265, "precision": 0.42653061224489797, "recall": 0.26118470382404396, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.43526510480887787, "precision": 0.5735174654752234, "recall": 0.35072031793343267, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8370338686471983, "precision": 0.7484180515958556, "recall": 0.9494530698659139, "support": 11336.0 }, "eval_O": { "f1-score": 0.8603704893457543, "precision": 0.8500846381718155, "recall": 0.8709083026230219, "support": 9226.0 }, "eval_accuracy": 0.7469857706651218, "eval_loss": 0.6934069395065308, "eval_macro avg": { "f1-score": 0.3826960661711399, "precision": 0.48278616406288194, "recall": 0.3659726083407761, "support": 27619.0 }, "eval_runtime": 1.3993, "eval_samples_per_second": 57.171, "eval_steps_per_second": 7.146, "eval_weighted avg": { "f1-score": 0.7147071394985115, "precision": 0.7126373293529855, "recall": 0.7469857706651218, "support": 27619.0 }, "step": 41 }, { "epoch": 2.0, "eval_B-Claim": { "f1-score": 0.038834951456310676, "precision": 0.15789473684210525, "recall": 0.02214022140221402, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.0, "precision": 0.0, "recall": 0.0, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.6716981132075472, "precision": 0.5579937304075235, "recall": 0.8436018957345972, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.4063064542617836, "precision": 0.5924329501915708, "recall": 0.3091727068232942, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7052432687765706, "precision": 0.6722197208464655, "recall": 0.741679085941381, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8602590320381731, "precision": 0.7885760494008675, "recall": 0.9462773465067043, "support": 11336.0 }, "eval_O": { "f1-score": 0.9017727728843795, "precision": 0.928374655647383, "recall": 0.8766529373509646, "support": 9226.0 }, "eval_accuracy": 0.7996306890184294, "eval_loss": 0.5073896646499634, "eval_macro avg": { "f1-score": 0.5120163703749664, "precision": 0.5282131204765593, "recall": 0.5342177419655937, "support": 27619.0 }, "eval_runtime": 1.4087, "eval_samples_per_second": 56.791, "eval_steps_per_second": 7.099, "eval_weighted avg": { "f1-score": 0.7803558416622501, "precision": 0.7829387271741762, "recall": 0.7996306890184294, "support": 27619.0 }, "step": 82 }, { "epoch": 3.0, "eval_B-Claim": { "f1-score": 0.5127334465195247, "precision": 0.4748427672955975, "recall": 0.5571955719557196, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.4622641509433962, "precision": 0.6712328767123288, "recall": 0.35251798561151076, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7555217060167556, "precision": 0.7294117647058823, "recall": 0.7835703001579779, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6194237918215614, "precision": 0.5786846103755156, "recall": 0.6663334166458386, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7576243980738363, "precision": 0.7035775127768313, "recall": 0.8206656731246895, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8711979995453512, "precision": 0.8988648090815273, "recall": 0.8451834862385321, "support": 11336.0 }, "eval_O": { "f1-score": 0.9194933920704845, "precision": 0.9345198119543318, "recall": 0.9049425536527206, "support": 9226.0 }, "eval_accuracy": 0.8307324667801151, "eval_loss": 0.44564658403396606, "eval_macro avg": { "f1-score": 0.6997512692844158, "precision": 0.7130191647002879, "recall": 0.704344141055284, "support": 27619.0 }, "eval_runtime": 1.4028, "eval_samples_per_second": 57.03, "eval_steps_per_second": 7.129, "eval_weighted avg": { "f1-score": 0.8343535169045084, "precision": 0.8409693083395489, "recall": 0.8307324667801151, "support": 27619.0 }, "step": 123 }, { "epoch": 4.0, "eval_B-Claim": { "f1-score": 0.5682242990654206, "precision": 0.5757575757575758, "recall": 0.5608856088560885, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.6807017543859649, "precision": 0.6643835616438356, "recall": 0.697841726618705, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7734375, "precision": 0.7650695517774343, "recall": 0.7819905213270142, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.5907725907725908, "precision": 0.650827067669173, "recall": 0.540864783804049, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7778304121269539, "precision": 0.7433227704843821, "recall": 0.8156979632389468, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8844762232942798, "precision": 0.8642255892255892, "recall": 0.9056986591390261, "support": 11336.0 }, "eval_O": { "f1-score": 0.921192990094699, "precision": 0.925120244862265, "recall": 0.917298937784522, "support": 9226.0 }, "eval_accuracy": 0.8428980049965603, "eval_loss": 0.43127578496932983, "eval_macro avg": { "f1-score": 0.7423765385342728, "precision": 0.7412437659171793, "recall": 0.7457540286811932, "support": 27619.0 }, "eval_runtime": 1.4015, "eval_samples_per_second": 57.081, "eval_steps_per_second": 7.135, "eval_weighted avg": { "f1-score": 0.8397478190947073, "precision": 0.8387326527993001, "recall": 0.8428980049965603, "support": 27619.0 }, "step": 164 }, { "epoch": 5.0, "eval_B-Claim": { "f1-score": 0.5912408759124088, "precision": 0.5848375451263538, "recall": 0.5977859778597786, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7043189368770765, "precision": 0.654320987654321, "recall": 0.762589928057554, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7843450479233227, "precision": 0.7932148626817448, "recall": 0.7756714060031595, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6389629270656652, "precision": 0.6200329179402775, "recall": 0.6590852286928268, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7836119613845066, "precision": 0.7448522829006267, "recall": 0.8266269249875807, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8858710076970716, "precision": 0.8884649511978705, "recall": 0.8832921665490473, "support": 11336.0 }, "eval_O": { "f1-score": 0.922573488630061, "precision": 0.9446842344388914, "recall": 0.9014740949490571, "support": 9226.0 }, "eval_accuracy": 0.846880770484087, "eval_loss": 0.4379854202270508, "eval_macro avg": { "f1-score": 0.7587034636414446, "precision": 0.7472011117057266, "recall": 0.7723608181570006, "support": 27619.0 }, "eval_runtime": 1.3979, "eval_samples_per_second": 57.229, "eval_steps_per_second": 7.154, "eval_weighted avg": { "f1-score": 0.8487785699607272, "precision": 0.851550794162027, "recall": 0.846880770484087, "support": 27619.0 }, "step": 205 }, { "epoch": 6.0, "eval_B-Claim": { "f1-score": 0.6046511627906976, "precision": 0.5868055555555556, "recall": 0.6236162361623616, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.698961937716263, "precision": 0.6733333333333333, "recall": 0.7266187050359713, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7665130568356375, "precision": 0.7458893871449925, "recall": 0.7883096366508688, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.640272042749575, "precision": 0.6227261989133003, "recall": 0.6588352911772057, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7846272891410886, "precision": 0.8159871244635193, "recall": 0.7555886736214605, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8826475849731664, "precision": 0.8951378809869376, "recall": 0.870501058574453, "support": 11336.0 }, "eval_O": { "f1-score": 0.923027340602675, "precision": 0.9149185390267277, "recall": 0.9312811619336657, "support": 9226.0 }, "eval_accuracy": 0.846735942648177, "eval_loss": 0.45857080817222595, "eval_macro avg": { "f1-score": 0.7572429164013005, "precision": 0.750685431346338, "recall": 0.7649643947365695, "support": 27619.0 }, "eval_runtime": 1.4052, "eval_samples_per_second": 56.933, "eval_steps_per_second": 7.117, "eval_weighted avg": { "f1-score": 0.8475667251954796, "precision": 0.8489516884853674, "recall": 0.846735942648177, "support": 27619.0 }, "step": 246 }, { "epoch": 7.0, "eval_B-Claim": { "f1-score": 0.5553319919517101, "precision": 0.6106194690265486, "recall": 0.5092250922509225, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7094594594594595, "precision": 0.6687898089171974, "recall": 0.7553956834532374, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7738181818181818, "precision": 0.7169811320754716, "recall": 0.8404423380726699, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.58111927642736, "precision": 0.6686178861788618, "recall": 0.5138715321169708, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7968147957873105, "precision": 0.825, "recall": 0.7704918032786885, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8833186379476844, "precision": 0.8428010576075635, "recall": 0.9279287226534932, "support": 11336.0 }, "eval_O": { "f1-score": 0.9239772478669874, "precision": 0.9325458158533892, "recall": 0.9155647084326902, "support": 9226.0 }, "eval_accuracy": 0.8453600782070314, "eval_loss": 0.5199459791183472, "eval_macro avg": { "f1-score": 0.7462627987512419, "precision": 0.752193595665576, "recall": 0.747559982894096, "support": 27619.0 }, "eval_runtime": 1.4073, "eval_samples_per_second": 56.846, "eval_steps_per_second": 7.106, "eval_weighted avg": { "f1-score": 0.8402149723478831, "precision": 0.8402119687481008, "recall": 0.8453600782070314, "support": 27619.0 }, "step": 287 }, { "epoch": 8.0, "eval_B-Claim": { "f1-score": 0.5830258302583026, "precision": 0.5830258302583026, "recall": 0.5830258302583026, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.6918238993710691, "precision": 0.6145251396648045, "recall": 0.7913669064748201, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7906626506024096, "precision": 0.7553956834532374, "recall": 0.8293838862559242, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.604899098870415, "precision": 0.6144920061887571, "recall": 0.5956010997250687, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7920600338900993, "precision": 0.7724268177525968, "recall": 0.8127173373075013, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8806889128094725, "precision": 0.8602069139540752, "recall": 0.9021700776287932, "support": 11336.0 }, "eval_O": { "f1-score": 0.9176536626438395, "precision": 0.9516823844452207, "recall": 0.8859744201170605, "support": 9226.0 }, "eval_accuracy": 0.8404721387450668, "eval_loss": 0.5408686399459839, "eval_macro avg": { "f1-score": 0.751544869777944, "precision": 0.7359649679595704, "recall": 0.7714627939667815, "support": 27619.0 }, "eval_runtime": 1.4019, "eval_samples_per_second": 57.067, "eval_steps_per_second": 7.133, "eval_weighted avg": { "f1-score": 0.8406905872698305, "precision": 0.842412448619121, "recall": 0.8404721387450668, "support": 27619.0 }, "step": 328 }, { "epoch": 9.0, "eval_B-Claim": { "f1-score": 0.5989304812834225, "precision": 0.5793103448275863, "recall": 0.6199261992619927, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7019867549668876, "precision": 0.6503067484662577, "recall": 0.762589928057554, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7803379416282642, "precision": 0.7593423019431988, "recall": 0.8025276461295419, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6273838630806847, "precision": 0.6140224934194783, "recall": 0.6413396650837291, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7934621099554235, "precision": 0.7911111111111111, "recall": 0.7958271236959762, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8815708373690234, "precision": 0.8799437510986113, "recall": 0.8832039520112914, "support": 11336.0 }, "eval_O": { "f1-score": 0.9239843448541977, "precision": 0.9401009534492428, "recall": 0.9084110123563841, "support": 9226.0 }, "eval_accuracy": 0.8451790434121438, "eval_loss": 0.5502071976661682, "eval_macro avg": { "f1-score": 0.7582366190197004, "precision": 0.7448768149022124, "recall": 0.7734036466566385, "support": 27619.0 }, "eval_runtime": 1.4066, "eval_samples_per_second": 56.875, "eval_steps_per_second": 7.109, "eval_weighted avg": { "f1-score": 0.8464972981637662, "precision": 0.8481724117610957, "recall": 0.8451790434121438, "support": 27619.0 }, "step": 369 }, { "epoch": 10.0, "eval_B-Claim": { "f1-score": 0.5747126436781611, "precision": 0.5976095617529881, "recall": 0.5535055350553506, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7220447284345047, "precision": 0.6494252873563219, "recall": 0.8129496402877698, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7804499612102405, "precision": 0.7667682926829268, "recall": 0.7946287519747235, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6141898117678031, "precision": 0.6487764182424917, "recall": 0.583104223944014, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.8033694344163658, "precision": 0.7791783380018674, "recall": 0.829110779930452, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8862162278252652, "precision": 0.8636249476768523, "recall": 0.9100211714890614, "support": 11336.0 }, "eval_O": { "f1-score": 0.919528787124606, "precision": 0.9387916431394693, "recall": 0.901040537611099, "support": 9226.0 }, "eval_accuracy": 0.8471342191969297, "eval_loss": 0.6130572557449341, "eval_macro avg": { "f1-score": 0.7572159420652781, "precision": 0.7491677841218453, "recall": 0.7691943771846387, "support": 27619.0 }, "eval_runtime": 1.3989, "eval_samples_per_second": 57.187, "eval_steps_per_second": 7.148, "eval_weighted avg": { "f1-score": 0.845592238174344, "precision": 0.8455473111155869, "recall": 0.8471342191969297, "support": 27619.0 }, "step": 410 }, { "epoch": 11.0, "eval_B-Claim": { "f1-score": 0.6046511627906976, "precision": 0.5868055555555556, "recall": 0.6236162361623616, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7225806451612903, "precision": 0.6549707602339181, "recall": 0.8057553956834532, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.7802621434078643, "precision": 0.7620481927710844, "recall": 0.7993680884676145, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6082984392843548, "precision": 0.6177835051546392, "recall": 0.599100224943764, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7908689248895435, "precision": 0.7816593886462883, "recall": 0.8002980625931445, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8806563039723663, "precision": 0.8624830852503383, "recall": 0.8996118560338744, "support": 11336.0 }, "eval_O": { "f1-score": 0.9230940580275101, "precision": 0.9492612530065284, "recall": 0.8983308042488619, "support": 9226.0 }, "eval_accuracy": 0.8429342119555379, "eval_loss": 0.6233356595039368, "eval_macro avg": { "f1-score": 0.758630239647661, "precision": 0.7450016772311931, "recall": 0.7751543811618677, "support": 27619.0 }, "eval_runtime": 1.4039, "eval_samples_per_second": 56.982, "eval_steps_per_second": 7.123, "eval_weighted avg": { "f1-score": 0.8430287828720368, "precision": 0.8440807587297484, "recall": 0.8429342119555379, "support": 27619.0 }, "step": 451 }, { "epoch": 12.0, "eval_B-Claim": { "f1-score": 0.5878003696857672, "precision": 0.5888888888888889, "recall": 0.5867158671586716, "support": 271.0 }, "eval_B-MajorClaim": { "f1-score": 0.7242524916943521, "precision": 0.6728395061728395, "recall": 0.7841726618705036, "support": 139.0 }, "eval_B-Premise": { "f1-score": 0.782477341389728, "precision": 0.7496382054992764, "recall": 0.8183254344391785, "support": 633.0 }, "eval_I-Claim": { "f1-score": 0.6088871615494235, "precision": 0.6320602474448628, "recall": 0.5873531617095726, "support": 4001.0 }, "eval_I-MajorClaim": { "f1-score": 0.7941320293398534, "precision": 0.7818969667790082, "recall": 0.8067560854446101, "support": 2013.0 }, "eval_I-Premise": { "f1-score": 0.8839002656156285, "precision": 0.8592370481425954, "recall": 0.9100211714890614, "support": 11336.0 }, "eval_O": { "f1-score": 0.9207075498019084, "precision": 0.948821161587119, "recall": 0.8942120095382614, "support": 9226.0 }, "eval_accuracy": 0.844563525109526, "eval_loss": 0.6488178968429565, "eval_macro avg": { "f1-score": 0.7574510298680944, "precision": 0.7476260035020843, "recall": 0.7696509130928371, "support": 27619.0 }, "eval_runtime": 1.3992, "eval_samples_per_second": 57.177, "eval_steps_per_second": 7.147, "eval_weighted avg": { "f1-score": 0.8437799966523624, "precision": 0.8445128868904818, "recall": 0.844563525109526, "support": 27619.0 }, "step": 492 } ], "logging_steps": 500, "max_steps": 656, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 500, "total_flos": 1725489752616000.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }