{ "best_metric": null, "best_model_checkpoint": null, "epoch": 100.0, "eval_steps": 500, "global_step": 9600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.4137675762176514, "learning_rate": 4.9500000000000004e-05, "loss": 1.1489, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.0, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.0, "eval_ORGANIZATION_recall": 0.0, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.6808387637138367, "eval_overall_accuracy": 0.8434547908232118, "eval_overall_f1": 0.0, "eval_overall_precision": 0.0, "eval_overall_recall": 0.0, "eval_runtime": 0.278, "eval_samples_per_second": 611.441, "eval_steps_per_second": 10.79, "step": 96 }, { "epoch": 2.0, "grad_norm": 0.9978224039077759, "learning_rate": 4.9e-05, "loss": 0.6648, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.01111111111111111, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.5, "eval_ORGANIZATION_recall": 0.0056179775280898875, "eval_PERSON_f1": 0.0, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.0, "eval_PERSON_recall": 0.0, "eval_loss": 0.5507592558860779, "eval_overall_accuracy": 0.8437246963562753, "eval_overall_f1": 0.005063291139240506, "eval_overall_precision": 0.3333333333333333, "eval_overall_recall": 0.002551020408163265, "eval_runtime": 0.2793, "eval_samples_per_second": 608.59, "eval_steps_per_second": 10.74, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.2066327333450317, "learning_rate": 4.85e-05, "loss": 0.5545, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.0, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.0, "eval_LOCATION_recall": 0.0, "eval_ORGANIZATION_f1": 0.10628019323671496, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.3793103448275862, "eval_ORGANIZATION_recall": 0.06179775280898876, "eval_PERSON_f1": 0.15950920245398775, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.37142857142857144, "eval_PERSON_recall": 0.1015625, "eval_loss": 0.4323981702327728, "eval_overall_accuracy": 0.854251012145749, "eval_overall_f1": 0.10480349344978165, "eval_overall_precision": 0.36363636363636365, "eval_overall_recall": 0.061224489795918366, "eval_runtime": 0.2802, "eval_samples_per_second": 606.765, "eval_steps_per_second": 10.708, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.8178198933601379, "learning_rate": 4.8e-05, "loss": 0.4347, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.08080808080808081, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.3076923076923077, "eval_LOCATION_recall": 0.046511627906976744, "eval_ORGANIZATION_f1": 0.32573289902280134, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.3875968992248062, "eval_ORGANIZATION_recall": 0.2808988764044944, "eval_PERSON_f1": 0.45774647887323944, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.4166666666666667, "eval_PERSON_recall": 0.5078125, "eval_loss": 0.31846487522125244, "eval_overall_accuracy": 0.8909581646423752, "eval_overall_f1": 0.34492753623188405, "eval_overall_precision": 0.39932885906040266, "eval_overall_recall": 0.30357142857142855, "eval_runtime": 0.2827, "eval_samples_per_second": 601.322, "eval_steps_per_second": 10.612, "step": 384 }, { "epoch": 5.0, "grad_norm": 0.9346233606338501, "learning_rate": 4.75e-05, "loss": 0.3178, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.45070422535211263, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.5714285714285714, "eval_LOCATION_recall": 0.37209302325581395, "eval_ORGANIZATION_f1": 0.5927835051546392, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.5476190476190477, "eval_ORGANIZATION_recall": 0.6460674157303371, "eval_PERSON_f1": 0.6597938144329896, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.588957055214724, "eval_PERSON_recall": 0.75, "eval_loss": 0.23492121696472168, "eval_overall_accuracy": 0.9319838056680162, "eval_overall_f1": 0.5919610231425092, "eval_overall_precision": 0.5664335664335665, "eval_overall_recall": 0.6198979591836735, "eval_runtime": 0.2837, "eval_samples_per_second": 599.31, "eval_steps_per_second": 10.576, "step": 480 }, { "epoch": 6.0, "grad_norm": 1.3035882711410522, "learning_rate": 4.7e-05, "loss": 0.2406, "step": 576 }, { "epoch": 6.0, "eval_LOCATION_f1": 0.7185628742514969, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7407407407407407, "eval_LOCATION_recall": 0.6976744186046512, "eval_ORGANIZATION_f1": 0.712401055408971, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.6716417910447762, "eval_ORGANIZATION_recall": 0.7584269662921348, "eval_PERSON_f1": 0.8057553956834532, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.7466666666666667, "eval_PERSON_recall": 0.875, "eval_loss": 0.18347080051898956, "eval_overall_accuracy": 0.9535762483130904, "eval_overall_f1": 0.7451456310679612, "eval_overall_precision": 0.7106481481481481, "eval_overall_recall": 0.7831632653061225, "eval_runtime": 0.2844, "eval_samples_per_second": 597.709, "eval_steps_per_second": 10.548, "step": 576 }, { "epoch": 7.0, "grad_norm": 1.0643693208694458, "learning_rate": 4.6500000000000005e-05, "loss": 0.1942, "step": 672 }, { "epoch": 7.0, "eval_LOCATION_f1": 0.7745664739884394, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7701149425287356, "eval_LOCATION_recall": 0.7790697674418605, "eval_ORGANIZATION_f1": 0.7546174142480211, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7114427860696517, "eval_ORGANIZATION_recall": 0.8033707865168539, "eval_PERSON_f1": 0.917910447761194, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.8785714285714286, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.15194177627563477, "eval_overall_accuracy": 0.9624831309041836, "eval_overall_f1": 0.8121951219512196, "eval_overall_precision": 0.7780373831775701, "eval_overall_recall": 0.8494897959183674, "eval_runtime": 0.284, "eval_samples_per_second": 598.684, "eval_steps_per_second": 10.565, "step": 672 }, { "epoch": 8.0, "grad_norm": 1.0617378950119019, "learning_rate": 4.600000000000001e-05, "loss": 0.1647, "step": 768 }, { "epoch": 8.0, "eval_LOCATION_f1": 0.7836257309941521, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.788235294117647, "eval_LOCATION_recall": 0.7790697674418605, "eval_ORGANIZATION_f1": 0.7750677506775068, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7486910994764397, "eval_ORGANIZATION_recall": 0.8033707865168539, "eval_PERSON_f1": 0.9323308270676692, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.8985507246376812, "eval_PERSON_recall": 0.96875, "eval_loss": 0.127915620803833, "eval_overall_accuracy": 0.965991902834008, "eval_overall_f1": 0.8287841191066998, "eval_overall_precision": 0.8067632850241546, "eval_overall_recall": 0.8520408163265306, "eval_runtime": 0.2813, "eval_samples_per_second": 604.319, "eval_steps_per_second": 10.664, "step": 768 }, { "epoch": 9.0, "grad_norm": 0.8089593052864075, "learning_rate": 4.55e-05, "loss": 0.1479, "step": 864 }, { "epoch": 9.0, "eval_LOCATION_f1": 0.8114285714285714, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.797752808988764, "eval_LOCATION_recall": 0.8255813953488372, "eval_ORGANIZATION_f1": 0.7967914438502675, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7602040816326531, "eval_ORGANIZATION_recall": 0.8370786516853933, "eval_PERSON_f1": 0.9393939393939394, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9117647058823529, "eval_PERSON_recall": 0.96875, "eval_loss": 0.11303041875362396, "eval_overall_accuracy": 0.9689608636977058, "eval_overall_f1": 0.8462484624846248, "eval_overall_precision": 0.8171021377672208, "eval_overall_recall": 0.8775510204081632, "eval_runtime": 0.2848, "eval_samples_per_second": 596.814, "eval_steps_per_second": 10.532, "step": 864 }, { "epoch": 10.0, "grad_norm": 3.2614407539367676, "learning_rate": 4.5e-05, "loss": 0.135, "step": 960 }, { "epoch": 10.0, "eval_LOCATION_f1": 0.8, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7659574468085106, "eval_LOCATION_recall": 0.8372093023255814, "eval_ORGANIZATION_f1": 0.8128342245989305, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.7755102040816326, "eval_ORGANIZATION_recall": 0.8539325842696629, "eval_PERSON_f1": 0.9389312977099238, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.917910447761194, "eval_PERSON_recall": 0.9609375, "eval_loss": 0.10374683886766434, "eval_overall_accuracy": 0.9681511470985156, "eval_overall_f1": 0.8504901960784313, "eval_overall_precision": 0.8183962264150944, "eval_overall_recall": 0.8852040816326531, "eval_runtime": 0.2808, "eval_samples_per_second": 605.48, "eval_steps_per_second": 10.685, "step": 960 }, { "epoch": 11.0, "grad_norm": 1.1339415311813354, "learning_rate": 4.4500000000000004e-05, "loss": 0.1317, "step": 1056 }, { "epoch": 11.0, "eval_LOCATION_f1": 0.8202247191011235, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7934782608695652, "eval_LOCATION_recall": 0.8488372093023255, "eval_ORGANIZATION_f1": 0.8383561643835616, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8181818181818182, "eval_ORGANIZATION_recall": 0.8595505617977528, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.09509219974279404, "eval_overall_accuracy": 0.9732793522267207, "eval_overall_f1": 0.8728179551122195, "eval_overall_precision": 0.8536585365853658, "eval_overall_recall": 0.8928571428571429, "eval_runtime": 0.2793, "eval_samples_per_second": 608.56, "eval_steps_per_second": 10.739, "step": 1056 }, { "epoch": 12.0, "grad_norm": 0.7799615263938904, "learning_rate": 4.4000000000000006e-05, "loss": 0.1196, "step": 1152 }, { "epoch": 12.0, "eval_LOCATION_f1": 0.8131868131868132, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7708333333333334, "eval_LOCATION_recall": 0.8604651162790697, "eval_ORGANIZATION_f1": 0.8633879781420766, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8404255319148937, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9541984732824427, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9328358208955224, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.09041129052639008, "eval_overall_accuracy": 0.9748987854251012, "eval_overall_f1": 0.8814814814814814, "eval_overall_precision": 0.854066985645933, "eval_overall_recall": 0.9107142857142857, "eval_runtime": 0.2835, "eval_samples_per_second": 599.551, "eval_steps_per_second": 10.58, "step": 1152 }, { "epoch": 13.0, "grad_norm": 2.4215242862701416, "learning_rate": 4.35e-05, "loss": 0.1108, "step": 1248 }, { "epoch": 13.0, "eval_LOCATION_f1": 0.8333333333333334, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7978723404255319, "eval_LOCATION_recall": 0.872093023255814, "eval_ORGANIZATION_f1": 0.871934604904632, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8465608465608465, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.08236891031265259, "eval_overall_accuracy": 0.9767881241565453, "eval_overall_f1": 0.8908188585607941, "eval_overall_precision": 0.8671497584541062, "eval_overall_recall": 0.9158163265306123, "eval_runtime": 0.281, "eval_samples_per_second": 604.972, "eval_steps_per_second": 10.676, "step": 1248 }, { "epoch": 14.0, "grad_norm": 0.8359857797622681, "learning_rate": 4.3e-05, "loss": 0.107, "step": 1344 }, { "epoch": 14.0, "eval_LOCATION_f1": 0.839779005524862, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8, "eval_LOCATION_recall": 0.8837209302325582, "eval_ORGANIZATION_f1": 0.8802228412256268, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8729281767955801, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9538461538461539, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9393939393939394, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07973363250494003, "eval_overall_accuracy": 0.9781376518218623, "eval_overall_f1": 0.895, "eval_overall_precision": 0.8774509803921569, "eval_overall_recall": 0.9132653061224489, "eval_runtime": 0.2799, "eval_samples_per_second": 607.303, "eval_steps_per_second": 10.717, "step": 1344 }, { "epoch": 15.0, "grad_norm": 1.4929579496383667, "learning_rate": 4.25e-05, "loss": 0.1063, "step": 1440 }, { "epoch": 15.0, "eval_LOCATION_f1": 0.8222222222222222, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7872340425531915, "eval_LOCATION_recall": 0.8604651162790697, "eval_ORGANIZATION_f1": 0.8821917808219177, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8609625668449198, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9538461538461539, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9393939393939394, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07602919638156891, "eval_overall_accuracy": 0.9775978407557355, "eval_overall_f1": 0.8919254658385093, "eval_overall_precision": 0.8692493946731235, "eval_overall_recall": 0.9158163265306123, "eval_runtime": 0.2828, "eval_samples_per_second": 601.037, "eval_steps_per_second": 10.607, "step": 1440 }, { "epoch": 16.0, "grad_norm": 1.2506853342056274, "learning_rate": 4.2e-05, "loss": 0.1, "step": 1536 }, { "epoch": 16.0, "eval_LOCATION_f1": 0.8700564971751413, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8461538461538461, "eval_LOCATION_recall": 0.8953488372093024, "eval_ORGANIZATION_f1": 0.8870523415977961, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8702702702702703, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9612403100775193, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9538461538461539, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07240239530801773, "eval_overall_accuracy": 0.9794871794871794, "eval_overall_f1": 0.9072681704260652, "eval_overall_precision": 0.8916256157635468, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2817, "eval_samples_per_second": 603.532, "eval_steps_per_second": 10.651, "step": 1536 }, { "epoch": 17.0, "grad_norm": 0.9440656304359436, "learning_rate": 4.15e-05, "loss": 0.095, "step": 1632 }, { "epoch": 17.0, "eval_LOCATION_f1": 0.853932584269663, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8260869565217391, "eval_LOCATION_recall": 0.8837209302325582, "eval_ORGANIZATION_f1": 0.8901098901098902, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8709677419354839, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.07051108777523041, "eval_overall_accuracy": 0.9789473684210527, "eval_overall_f1": 0.9038701622971286, "eval_overall_precision": 0.8850855745721271, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2795, "eval_samples_per_second": 608.264, "eval_steps_per_second": 10.734, "step": 1632 }, { "epoch": 18.0, "grad_norm": 0.6154165267944336, "learning_rate": 4.1e-05, "loss": 0.0932, "step": 1728 }, { "epoch": 18.0, "eval_LOCATION_f1": 0.8651685393258426, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8369565217391305, "eval_LOCATION_recall": 0.8953488372093024, "eval_ORGANIZATION_f1": 0.8994413407821229, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8944444444444445, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.0697602704167366, "eval_overall_accuracy": 0.9802968960863697, "eval_overall_f1": 0.9106918238993711, "eval_overall_precision": 0.8982630272952854, "eval_overall_recall": 0.923469387755102, "eval_runtime": 0.2838, "eval_samples_per_second": 598.992, "eval_steps_per_second": 10.57, "step": 1728 }, { "epoch": 19.0, "grad_norm": 1.5888150930404663, "learning_rate": 4.05e-05, "loss": 0.0871, "step": 1824 }, { "epoch": 19.0, "eval_LOCATION_f1": 0.8715083798882681, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8387096774193549, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8994413407821229, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8944444444444445, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.0672474279999733, "eval_overall_accuracy": 0.9800269905533063, "eval_overall_f1": 0.9120603015075376, "eval_overall_precision": 0.8985148514851485, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.2792, "eval_samples_per_second": 608.919, "eval_steps_per_second": 10.746, "step": 1824 }, { "epoch": 20.0, "grad_norm": 0.6971399784088135, "learning_rate": 4e-05, "loss": 0.0883, "step": 1920 }, { "epoch": 20.0, "eval_LOCATION_f1": 0.8666666666666666, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8297872340425532, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8994413407821229, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8944444444444445, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06501375138759613, "eval_overall_accuracy": 0.9802968960863697, "eval_overall_f1": 0.9132075471698112, "eval_overall_precision": 0.9007444168734491, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.2806, "eval_samples_per_second": 605.941, "eval_steps_per_second": 10.693, "step": 1920 }, { "epoch": 21.0, "grad_norm": 0.7672743797302246, "learning_rate": 3.9500000000000005e-05, "loss": 0.0832, "step": 2016 }, { "epoch": 21.0, "eval_LOCATION_f1": 0.8666666666666666, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8297872340425532, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9019607843137254, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8994413407821229, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9649805447470817, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9612403100775194, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06505837291479111, "eval_overall_accuracy": 0.9811066126855601, "eval_overall_f1": 0.9143576826196474, "eval_overall_precision": 0.9029850746268657, "eval_overall_recall": 0.9260204081632653, "eval_runtime": 0.283, "eval_samples_per_second": 600.694, "eval_steps_per_second": 10.6, "step": 2016 }, { "epoch": 22.0, "grad_norm": 1.2235254049301147, "learning_rate": 3.9000000000000006e-05, "loss": 0.0829, "step": 2112 }, { "epoch": 22.0, "eval_LOCATION_f1": 0.8571428571428572, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8125, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8876712328767122, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8663101604278075, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9575289575289575, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9465648854961832, "eval_PERSON_recall": 0.96875, "eval_loss": 0.06451266258955002, "eval_overall_accuracy": 0.9786774628879892, "eval_overall_f1": 0.9032258064516129, "eval_overall_precision": 0.8792270531400966, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2828, "eval_samples_per_second": 601.12, "eval_steps_per_second": 10.608, "step": 2112 }, { "epoch": 23.0, "grad_norm": 0.933464527130127, "learning_rate": 3.85e-05, "loss": 0.0789, "step": 2208 }, { "epoch": 23.0, "eval_LOCATION_f1": 0.861878453038674, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8210526315789474, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9019607843137254, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8994413407821229, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.06014208123087883, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9168765743073048, "eval_overall_precision": 0.9054726368159204, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2819, "eval_samples_per_second": 603.138, "eval_steps_per_second": 10.644, "step": 2208 }, { "epoch": 24.0, "grad_norm": 0.9433938264846802, "learning_rate": 3.8e-05, "loss": 0.078, "step": 2304 }, { "epoch": 24.0, "eval_LOCATION_f1": 0.861878453038674, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8210526315789474, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8901408450704225, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8926553672316384, "eval_ORGANIZATION_recall": 0.8876404494382022, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.061247795820236206, "eval_overall_accuracy": 0.9805668016194332, "eval_overall_f1": 0.9116161616161615, "eval_overall_precision": 0.9025, "eval_overall_recall": 0.9209183673469388, "eval_runtime": 0.2813, "eval_samples_per_second": 604.352, "eval_steps_per_second": 10.665, "step": 2304 }, { "epoch": 25.0, "grad_norm": 1.3498409986495972, "learning_rate": 3.7500000000000003e-05, "loss": 0.0756, "step": 2400 }, { "epoch": 25.0, "eval_LOCATION_f1": 0.8666666666666666, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8297872340425532, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9044943820224719, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9044943820224719, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9689922480620154, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9615384615384616, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.059408556669950485, "eval_overall_accuracy": 0.9805668016194332, "eval_overall_f1": 0.9168765743073048, "eval_overall_precision": 0.9054726368159204, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2825, "eval_samples_per_second": 601.695, "eval_steps_per_second": 10.618, "step": 2400 }, { "epoch": 26.0, "grad_norm": 0.35765427350997925, "learning_rate": 3.7e-05, "loss": 0.0767, "step": 2496 }, { "epoch": 26.0, "eval_LOCATION_f1": 0.8449197860962566, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.7821782178217822, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.8831908831908832, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8959537572254336, "eval_ORGANIZATION_recall": 0.8707865168539326, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05880308896303177, "eval_overall_accuracy": 0.9800269905533063, "eval_overall_f1": 0.9042821158690176, "eval_overall_precision": 0.8930348258706468, "eval_overall_recall": 0.9158163265306123, "eval_runtime": 0.283, "eval_samples_per_second": 600.655, "eval_steps_per_second": 10.6, "step": 2496 }, { "epoch": 27.0, "grad_norm": 1.1899181604385376, "learning_rate": 3.65e-05, "loss": 0.0721, "step": 2592 }, { "epoch": 27.0, "eval_LOCATION_f1": 0.8571428571428572, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8125, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.8975069252077562, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.8852459016393442, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05609607696533203, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9136420525657072, "eval_overall_precision": 0.8968058968058968, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2825, "eval_samples_per_second": 601.838, "eval_steps_per_second": 10.621, "step": 2592 }, { "epoch": 28.0, "grad_norm": 1.2701897621154785, "learning_rate": 3.6e-05, "loss": 0.0719, "step": 2688 }, { "epoch": 28.0, "eval_LOCATION_f1": 0.8777777777777779, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8404255319148937, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9014084507042253, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.903954802259887, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05588143318891525, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9203539823008849, "eval_overall_precision": 0.9122807017543859, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2802, "eval_samples_per_second": 606.797, "eval_steps_per_second": 10.708, "step": 2688 }, { "epoch": 29.0, "grad_norm": 1.3545186519622803, "learning_rate": 3.55e-05, "loss": 0.0702, "step": 2784 }, { "epoch": 29.0, "eval_LOCATION_f1": 0.8764044943820224, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8478260869565217, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9141274238227147, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9016393442622951, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05434948578476906, "eval_overall_accuracy": 0.9816464237516869, "eval_overall_f1": 0.9257861635220126, "eval_overall_precision": 0.913151364764268, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2819, "eval_samples_per_second": 603.148, "eval_steps_per_second": 10.644, "step": 2784 }, { "epoch": 30.0, "grad_norm": 0.36313942074775696, "learning_rate": 3.5e-05, "loss": 0.0711, "step": 2880 }, { "epoch": 30.0, "eval_LOCATION_f1": 0.8863636363636364, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8666666666666667, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9166666666666666, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9065934065934066, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05385398864746094, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9281210592686003, "eval_overall_precision": 0.9177057356608479, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2813, "eval_samples_per_second": 604.362, "eval_steps_per_second": 10.665, "step": 2880 }, { "epoch": 31.0, "grad_norm": 1.1205620765686035, "learning_rate": 3.45e-05, "loss": 0.067, "step": 2976 }, { "epoch": 31.0, "eval_LOCATION_f1": 0.858695652173913, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8061224489795918, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05762900039553642, "eval_overall_accuracy": 0.9819163292847504, "eval_overall_f1": 0.9195979899497488, "eval_overall_precision": 0.905940594059406, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2839, "eval_samples_per_second": 598.882, "eval_steps_per_second": 10.569, "step": 2976 }, { "epoch": 32.0, "grad_norm": 0.6993715167045593, "learning_rate": 3.4000000000000007e-05, "loss": 0.0664, "step": 3072 }, { "epoch": 32.0, "eval_LOCATION_f1": 0.861878453038674, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8210526315789474, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9111111111111112, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9010989010989011, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05667094141244888, "eval_overall_accuracy": 0.9813765182186235, "eval_overall_f1": 0.9197994987468671, "eval_overall_precision": 0.9039408866995073, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2807, "eval_samples_per_second": 605.552, "eval_steps_per_second": 10.686, "step": 3072 }, { "epoch": 33.0, "grad_norm": 3.8367385864257812, "learning_rate": 3.35e-05, "loss": 0.0642, "step": 3168 }, { "epoch": 33.0, "eval_LOCATION_f1": 0.8729281767955801, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8315789473684211, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9070422535211267, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9096045197740112, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.055755291134119034, "eval_overall_accuracy": 0.9824561403508771, "eval_overall_f1": 0.9217171717171717, "eval_overall_precision": 0.9125, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2821, "eval_samples_per_second": 602.531, "eval_steps_per_second": 10.633, "step": 3168 }, { "epoch": 34.0, "grad_norm": 0.8335817456245422, "learning_rate": 3.3e-05, "loss": 0.0642, "step": 3264 }, { "epoch": 34.0, "eval_LOCATION_f1": 0.8876404494382023, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8586956521739131, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05453602224588394, "eval_overall_accuracy": 0.9835357624831309, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2794, "eval_samples_per_second": 608.461, "eval_steps_per_second": 10.738, "step": 3264 }, { "epoch": 35.0, "grad_norm": 1.6749961376190186, "learning_rate": 3.2500000000000004e-05, "loss": 0.0624, "step": 3360 }, { "epoch": 35.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05416334420442581, "eval_overall_accuracy": 0.982995951417004, "eval_overall_f1": 0.9304677623261693, "eval_overall_precision": 0.9223057644110275, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2801, "eval_samples_per_second": 606.951, "eval_steps_per_second": 10.711, "step": 3360 }, { "epoch": 36.0, "grad_norm": 1.6614550352096558, "learning_rate": 3.2000000000000005e-05, "loss": 0.0651, "step": 3456 }, { "epoch": 36.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9727626459143969, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9689922480620154, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.053541768342256546, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2829, "eval_samples_per_second": 600.914, "eval_steps_per_second": 10.604, "step": 3456 }, { "epoch": 37.0, "grad_norm": 3.9047365188598633, "learning_rate": 3.15e-05, "loss": 0.0635, "step": 3552 }, { "epoch": 37.0, "eval_LOCATION_f1": 0.896551724137931, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8863636363636364, "eval_LOCATION_recall": 0.9069767441860465, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052303437143564224, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9314720812182741, "eval_overall_precision": 0.9267676767676768, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2809, "eval_samples_per_second": 605.215, "eval_steps_per_second": 10.68, "step": 3552 }, { "epoch": 38.0, "grad_norm": 0.4264446198940277, "learning_rate": 3.1e-05, "loss": 0.0617, "step": 3648 }, { "epoch": 38.0, "eval_LOCATION_f1": 0.8876404494382023, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8586956521739131, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.052760832011699677, "eval_overall_accuracy": 0.9838056680161943, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2812, "eval_samples_per_second": 604.578, "eval_steps_per_second": 10.669, "step": 3648 }, { "epoch": 39.0, "grad_norm": 0.7656244039535522, "learning_rate": 3.05e-05, "loss": 0.0581, "step": 3744 }, { "epoch": 39.0, "eval_LOCATION_f1": 0.858695652173913, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8061224489795918, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9039548022598869, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9090909090909091, "eval_ORGANIZATION_recall": 0.898876404494382, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05475260689854622, "eval_overall_accuracy": 0.9827260458839406, "eval_overall_f1": 0.9168765743073048, "eval_overall_precision": 0.9054726368159204, "eval_overall_recall": 0.9285714285714286, "eval_runtime": 0.2823, "eval_samples_per_second": 602.161, "eval_steps_per_second": 10.626, "step": 3744 }, { "epoch": 40.0, "grad_norm": 0.5649272203445435, "learning_rate": 3e-05, "loss": 0.0597, "step": 3840 }, { "epoch": 40.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.051006708294153214, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2793, "eval_samples_per_second": 608.712, "eval_steps_per_second": 10.742, "step": 3840 }, { "epoch": 41.0, "grad_norm": 0.19076836109161377, "learning_rate": 2.95e-05, "loss": 0.0569, "step": 3936 }, { "epoch": 41.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0505344495177269, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2814, "eval_samples_per_second": 604.17, "eval_steps_per_second": 10.662, "step": 3936 }, { "epoch": 42.0, "grad_norm": 2.5419020652770996, "learning_rate": 2.9e-05, "loss": 0.0579, "step": 4032 }, { "epoch": 42.0, "eval_LOCATION_f1": 0.8977272727272728, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8777777777777778, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.050377897918224335, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2812, "eval_samples_per_second": 604.567, "eval_steps_per_second": 10.669, "step": 4032 }, { "epoch": 43.0, "grad_norm": 0.1431387960910797, "learning_rate": 2.8499999999999998e-05, "loss": 0.0564, "step": 4128 }, { "epoch": 43.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9131652661064427, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9106145251396648, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.050585947930812836, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9302915082382762, "eval_overall_precision": 0.924433249370277, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2833, "eval_samples_per_second": 600.171, "eval_steps_per_second": 10.591, "step": 4128 }, { "epoch": 44.0, "grad_norm": 1.1703745126724243, "learning_rate": 2.8000000000000003e-05, "loss": 0.0572, "step": 4224 }, { "epoch": 44.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9192200557103065, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9116022099447514, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04985123872756958, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2812, "eval_samples_per_second": 604.526, "eval_steps_per_second": 10.668, "step": 4224 }, { "epoch": 45.0, "grad_norm": 1.3757704496383667, "learning_rate": 2.7500000000000004e-05, "loss": 0.0563, "step": 4320 }, { "epoch": 45.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04884723946452141, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2837, "eval_samples_per_second": 599.302, "eval_steps_per_second": 10.576, "step": 4320 }, { "epoch": 46.0, "grad_norm": 2.0719878673553467, "learning_rate": 2.7000000000000002e-05, "loss": 0.0594, "step": 4416 }, { "epoch": 46.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9217877094972067, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9166666666666666, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0507470928132534, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2845, "eval_samples_per_second": 597.493, "eval_steps_per_second": 10.544, "step": 4416 }, { "epoch": 47.0, "grad_norm": 0.6790814399719238, "learning_rate": 2.6500000000000004e-05, "loss": 0.0545, "step": 4512 }, { "epoch": 47.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04966941103339195, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9316455696202531, "eval_overall_precision": 0.9246231155778895, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2818, "eval_samples_per_second": 603.305, "eval_steps_per_second": 10.647, "step": 4512 }, { "epoch": 48.0, "grad_norm": 1.4126778841018677, "learning_rate": 2.6000000000000002e-05, "loss": 0.0536, "step": 4608 }, { "epoch": 48.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04870031401515007, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9316455696202531, "eval_overall_precision": 0.9246231155778895, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2829, "eval_samples_per_second": 600.855, "eval_steps_per_second": 10.603, "step": 4608 }, { "epoch": 49.0, "grad_norm": 0.6721929907798767, "learning_rate": 2.5500000000000003e-05, "loss": 0.0556, "step": 4704 }, { "epoch": 49.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9070422535211267, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9096045197740112, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0500752292573452, "eval_overall_accuracy": 0.9832658569500675, "eval_overall_f1": 0.9263959390862943, "eval_overall_precision": 0.9217171717171717, "eval_overall_recall": 0.9311224489795918, "eval_runtime": 0.2821, "eval_samples_per_second": 602.659, "eval_steps_per_second": 10.635, "step": 4704 }, { "epoch": 50.0, "grad_norm": 1.7530306577682495, "learning_rate": 2.5e-05, "loss": 0.0522, "step": 4800 }, { "epoch": 50.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.05056515708565712, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2897, "eval_samples_per_second": 586.89, "eval_steps_per_second": 10.357, "step": 4800 }, { "epoch": 51.0, "grad_norm": 0.5993042588233948, "learning_rate": 2.45e-05, "loss": 0.0527, "step": 4896 }, { "epoch": 51.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.049558304250240326, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9377382465057178, "eval_overall_precision": 0.9341772151898734, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2824, "eval_samples_per_second": 601.901, "eval_steps_per_second": 10.622, "step": 4896 }, { "epoch": 52.0, "grad_norm": 0.8313167691230774, "learning_rate": 2.4e-05, "loss": 0.0529, "step": 4992 }, { "epoch": 52.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9239436619718311, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9265536723163842, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.049007900059223175, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2818, "eval_samples_per_second": 603.306, "eval_steps_per_second": 10.647, "step": 4992 }, { "epoch": 53.0, "grad_norm": 0.9506852626800537, "learning_rate": 2.35e-05, "loss": 0.0522, "step": 5088 }, { "epoch": 53.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04938499256968498, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.281, "eval_samples_per_second": 605.019, "eval_steps_per_second": 10.677, "step": 5088 }, { "epoch": 54.0, "grad_norm": 2.244389533996582, "learning_rate": 2.3000000000000003e-05, "loss": 0.0525, "step": 5184 }, { "epoch": 54.0, "eval_LOCATION_f1": 0.9090909090909092, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8888888888888888, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9269662921348315, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9269662921348315, "eval_ORGANIZATION_recall": 0.9269662921348315, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04819142818450928, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.9390862944162437, "eval_overall_precision": 0.9343434343434344, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2815, "eval_samples_per_second": 603.971, "eval_steps_per_second": 10.658, "step": 5184 }, { "epoch": 55.0, "grad_norm": 2.14715838432312, "learning_rate": 2.25e-05, "loss": 0.0512, "step": 5280 }, { "epoch": 55.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04878583922982216, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2814, "eval_samples_per_second": 604.171, "eval_steps_per_second": 10.662, "step": 5280 }, { "epoch": 56.0, "grad_norm": 0.8252633810043335, "learning_rate": 2.2000000000000003e-05, "loss": 0.053, "step": 5376 }, { "epoch": 56.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9152542372881356, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9204545454545454, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048739928752183914, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9326556543837357, "eval_overall_precision": 0.9291139240506329, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2812, "eval_samples_per_second": 604.511, "eval_steps_per_second": 10.668, "step": 5376 }, { "epoch": 57.0, "grad_norm": 0.5691685080528259, "learning_rate": 2.15e-05, "loss": 0.0498, "step": 5472 }, { "epoch": 57.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04863953962922096, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2835, "eval_samples_per_second": 599.591, "eval_steps_per_second": 10.581, "step": 5472 }, { "epoch": 58.0, "grad_norm": 0.9249998331069946, "learning_rate": 2.1e-05, "loss": 0.0504, "step": 5568 }, { "epoch": 58.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04887806251645088, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2821, "eval_samples_per_second": 602.584, "eval_steps_per_second": 10.634, "step": 5568 }, { "epoch": 59.0, "grad_norm": 0.7708175778388977, "learning_rate": 2.05e-05, "loss": 0.0456, "step": 5664 }, { "epoch": 59.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9096045197740114, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9147727272727273, "eval_ORGANIZATION_recall": 0.9044943820224719, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04921862483024597, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9289340101522842, "eval_overall_precision": 0.9242424242424242, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2809, "eval_samples_per_second": 605.172, "eval_steps_per_second": 10.68, "step": 5664 }, { "epoch": 60.0, "grad_norm": 0.6520349383354187, "learning_rate": 2e-05, "loss": 0.0504, "step": 5760 }, { "epoch": 60.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9126760563380282, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9152542372881356, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04753315821290016, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9289340101522842, "eval_overall_precision": 0.9242424242424242, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2828, "eval_samples_per_second": 601.158, "eval_steps_per_second": 10.609, "step": 5760 }, { "epoch": 61.0, "grad_norm": 0.622580349445343, "learning_rate": 1.9500000000000003e-05, "loss": 0.0494, "step": 5856 }, { "epoch": 61.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9235127478753541, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9314285714285714, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04764934256672859, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9338422391857506, "eval_overall_precision": 0.9314720812182741, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2804, "eval_samples_per_second": 606.327, "eval_steps_per_second": 10.7, "step": 5856 }, { "epoch": 62.0, "grad_norm": 0.6881121397018433, "learning_rate": 1.9e-05, "loss": 0.046, "step": 5952 }, { "epoch": 62.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9265536723163842, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9318181818181818, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0477934405207634, "eval_overall_accuracy": 0.9859649122807017, "eval_overall_f1": 0.94147582697201, "eval_overall_precision": 0.9390862944162437, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2805, "eval_samples_per_second": 606.123, "eval_steps_per_second": 10.696, "step": 5952 }, { "epoch": 63.0, "grad_norm": 1.6825617551803589, "learning_rate": 1.85e-05, "loss": 0.0463, "step": 6048 }, { "epoch": 63.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.96875, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.96875, "eval_PERSON_recall": 0.96875, "eval_loss": 0.048465996980667114, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9304677623261693, "eval_overall_precision": 0.9223057644110275, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2808, "eval_samples_per_second": 605.457, "eval_steps_per_second": 10.685, "step": 6048 }, { "epoch": 64.0, "grad_norm": 0.2234792858362198, "learning_rate": 1.8e-05, "loss": 0.0452, "step": 6144 }, { "epoch": 64.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04815259948372841, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.935361216730038, "eval_overall_precision": 0.929471032745592, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2856, "eval_samples_per_second": 595.299, "eval_steps_per_second": 10.505, "step": 6144 }, { "epoch": 65.0, "grad_norm": 0.7405194044113159, "learning_rate": 1.75e-05, "loss": 0.0446, "step": 6240 }, { "epoch": 65.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04923372343182564, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9341772151898734, "eval_overall_precision": 0.9271356783919598, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2832, "eval_samples_per_second": 600.188, "eval_steps_per_second": 10.592, "step": 6240 }, { "epoch": 66.0, "grad_norm": 1.1158796548843384, "learning_rate": 1.7000000000000003e-05, "loss": 0.0463, "step": 6336 }, { "epoch": 66.0, "eval_LOCATION_f1": 0.8876404494382023, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8586956521739131, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04949327930808067, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9265822784810125, "eval_overall_precision": 0.9195979899497487, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2821, "eval_samples_per_second": 602.567, "eval_steps_per_second": 10.634, "step": 6336 }, { "epoch": 67.0, "grad_norm": 0.48084917664527893, "learning_rate": 1.65e-05, "loss": 0.0466, "step": 6432 }, { "epoch": 67.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04914508014917374, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9302915082382762, "eval_overall_precision": 0.924433249370277, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2844, "eval_samples_per_second": 597.737, "eval_steps_per_second": 10.548, "step": 6432 }, { "epoch": 68.0, "grad_norm": 1.0782126188278198, "learning_rate": 1.6000000000000003e-05, "loss": 0.0451, "step": 6528 }, { "epoch": 68.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04990832507610321, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2832, "eval_samples_per_second": 600.234, "eval_steps_per_second": 10.592, "step": 6528 }, { "epoch": 69.0, "grad_norm": 1.2592836618423462, "learning_rate": 1.55e-05, "loss": 0.047, "step": 6624 }, { "epoch": 69.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04929433763027191, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2807, "eval_samples_per_second": 605.579, "eval_steps_per_second": 10.687, "step": 6624 }, { "epoch": 70.0, "grad_norm": 0.7667045593261719, "learning_rate": 1.5e-05, "loss": 0.0435, "step": 6720 }, { "epoch": 70.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048545148223638535, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9328263624841572, "eval_overall_precision": 0.9269521410579346, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2848, "eval_samples_per_second": 596.917, "eval_steps_per_second": 10.534, "step": 6720 }, { "epoch": 71.0, "grad_norm": 1.8045178651809692, "learning_rate": 1.45e-05, "loss": 0.045, "step": 6816 }, { "epoch": 71.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9162011173184358, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9111111111111111, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04896879196166992, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9329962073324906, "eval_overall_precision": 0.924812030075188, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2818, "eval_samples_per_second": 603.174, "eval_steps_per_second": 10.644, "step": 6816 }, { "epoch": 72.0, "grad_norm": 0.6001482009887695, "learning_rate": 1.4000000000000001e-05, "loss": 0.0458, "step": 6912 }, { "epoch": 72.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04974735528230667, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9363867684478372, "eval_overall_precision": 0.934010152284264, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2833, "eval_samples_per_second": 600.101, "eval_steps_per_second": 10.59, "step": 6912 }, { "epoch": 73.0, "grad_norm": 1.2454522848129272, "learning_rate": 1.3500000000000001e-05, "loss": 0.0442, "step": 7008 }, { "epoch": 73.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9157303370786517, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9157303370786517, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0495486781001091, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.935361216730038, "eval_overall_precision": 0.929471032745592, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2816, "eval_samples_per_second": 603.673, "eval_steps_per_second": 10.653, "step": 7008 }, { "epoch": 74.0, "grad_norm": 2.512705087661743, "learning_rate": 1.3000000000000001e-05, "loss": 0.0442, "step": 7104 }, { "epoch": 74.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9126760563380282, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9152542372881356, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04903655871748924, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2798, "eval_samples_per_second": 607.495, "eval_steps_per_second": 10.72, "step": 7104 }, { "epoch": 75.0, "grad_norm": 0.993471086025238, "learning_rate": 1.25e-05, "loss": 0.0437, "step": 7200 }, { "epoch": 75.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04867413640022278, "eval_overall_accuracy": 0.9840755735492578, "eval_overall_f1": 0.9314720812182741, "eval_overall_precision": 0.9267676767676768, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2828, "eval_samples_per_second": 601.141, "eval_steps_per_second": 10.608, "step": 7200 }, { "epoch": 76.0, "grad_norm": 0.7063285112380981, "learning_rate": 1.2e-05, "loss": 0.0458, "step": 7296 }, { "epoch": 76.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04928082972764969, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9351969504447268, "eval_overall_precision": 0.9316455696202531, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2811, "eval_samples_per_second": 604.715, "eval_steps_per_second": 10.671, "step": 7296 }, { "epoch": 77.0, "grad_norm": 1.6234911680221558, "learning_rate": 1.1500000000000002e-05, "loss": 0.0448, "step": 7392 }, { "epoch": 77.0, "eval_LOCATION_f1": 0.8926553672316384, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8681318681318682, "eval_LOCATION_recall": 0.9186046511627907, "eval_ORGANIZATION_f1": 0.9126760563380282, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9152542372881356, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04873611778020859, "eval_overall_accuracy": 0.9838056680161943, "eval_overall_f1": 0.9301143583227446, "eval_overall_precision": 0.9265822784810127, "eval_overall_recall": 0.9336734693877551, "eval_runtime": 0.2824, "eval_samples_per_second": 602.033, "eval_steps_per_second": 10.624, "step": 7392 }, { "epoch": 78.0, "grad_norm": 0.894944965839386, "learning_rate": 1.1000000000000001e-05, "loss": 0.0451, "step": 7488 }, { "epoch": 78.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.049469735473394394, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.934010152284264, "eval_overall_precision": 0.9292929292929293, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2811, "eval_samples_per_second": 604.745, "eval_steps_per_second": 10.672, "step": 7488 }, { "epoch": 79.0, "grad_norm": 0.6182302236557007, "learning_rate": 1.05e-05, "loss": 0.0449, "step": 7584 }, { "epoch": 79.0, "eval_LOCATION_f1": 0.9039548022598871, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8791208791208791, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9213483146067416, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9213483146067416, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04977456107735634, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2824, "eval_samples_per_second": 601.942, "eval_steps_per_second": 10.622, "step": 7584 }, { "epoch": 80.0, "grad_norm": 0.9781434535980225, "learning_rate": 1e-05, "loss": 0.0436, "step": 7680 }, { "epoch": 80.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9152542372881356, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9204545454545454, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.049282483756542206, "eval_overall_accuracy": 0.9843454790823212, "eval_overall_f1": 0.9326556543837357, "eval_overall_precision": 0.9291139240506329, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2832, "eval_samples_per_second": 600.22, "eval_steps_per_second": 10.592, "step": 7680 }, { "epoch": 81.0, "grad_norm": 1.0374711751937866, "learning_rate": 9.5e-06, "loss": 0.044, "step": 7776 }, { "epoch": 81.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9183098591549295, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9209039548022598, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.049396924674510956, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9365482233502538, "eval_overall_precision": 0.9318181818181818, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2821, "eval_samples_per_second": 602.607, "eval_steps_per_second": 10.634, "step": 7776 }, { "epoch": 82.0, "grad_norm": 1.2461556196212769, "learning_rate": 9e-06, "loss": 0.0438, "step": 7872 }, { "epoch": 82.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04848863556981087, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9326556543837357, "eval_overall_precision": 0.9291139240506329, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.2815, "eval_samples_per_second": 604.004, "eval_steps_per_second": 10.659, "step": 7872 }, { "epoch": 83.0, "grad_norm": 2.299999475479126, "learning_rate": 8.500000000000002e-06, "loss": 0.0434, "step": 7968 }, { "epoch": 83.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9187675070028011, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9162011173184358, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04818493500351906, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9367088607594937, "eval_overall_precision": 0.9296482412060302, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.283, "eval_samples_per_second": 600.663, "eval_steps_per_second": 10.6, "step": 7968 }, { "epoch": 84.0, "grad_norm": 2.182854413986206, "learning_rate": 8.000000000000001e-06, "loss": 0.0418, "step": 8064 }, { "epoch": 84.0, "eval_LOCATION_f1": 0.898876404494382, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8695652173913043, "eval_LOCATION_recall": 0.9302325581395349, "eval_ORGANIZATION_f1": 0.9101123595505618, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9101123595505618, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04847468063235283, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.929113924050633, "eval_overall_precision": 0.9221105527638191, "eval_overall_recall": 0.9362244897959183, "eval_runtime": 0.282, "eval_samples_per_second": 602.885, "eval_steps_per_second": 10.639, "step": 8064 }, { "epoch": 85.0, "grad_norm": 1.2896052598953247, "learning_rate": 7.5e-06, "loss": 0.0424, "step": 8160 }, { "epoch": 85.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9765625, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.9765625, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048406198620796204, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2817, "eval_samples_per_second": 603.389, "eval_steps_per_second": 10.648, "step": 8160 }, { "epoch": 86.0, "grad_norm": 0.4105066657066345, "learning_rate": 7.000000000000001e-06, "loss": 0.042, "step": 8256 }, { "epoch": 86.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9239436619718311, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9265536723163842, "eval_ORGANIZATION_recall": 0.9213483146067416, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048215754330158234, "eval_overall_accuracy": 0.9856950067476383, "eval_overall_f1": 0.9402795425667091, "eval_overall_precision": 0.9367088607594937, "eval_overall_recall": 0.9438775510204082, "eval_runtime": 0.2835, "eval_samples_per_second": 599.717, "eval_steps_per_second": 10.583, "step": 8256 }, { "epoch": 87.0, "grad_norm": 0.5451318025588989, "learning_rate": 6.5000000000000004e-06, "loss": 0.0431, "step": 8352 }, { "epoch": 87.0, "eval_LOCATION_f1": 0.9101123595505618, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8804347826086957, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04817402362823486, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9363867684478372, "eval_overall_precision": 0.934010152284264, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2829, "eval_samples_per_second": 600.903, "eval_steps_per_second": 10.604, "step": 8352 }, { "epoch": 88.0, "grad_norm": 0.7700179219245911, "learning_rate": 6e-06, "loss": 0.0417, "step": 8448 }, { "epoch": 88.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0481853149831295, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2822, "eval_samples_per_second": 602.399, "eval_steps_per_second": 10.631, "step": 8448 }, { "epoch": 89.0, "grad_norm": 0.48494595289230347, "learning_rate": 5.500000000000001e-06, "loss": 0.0421, "step": 8544 }, { "epoch": 89.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.92090395480226, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9261363636363636, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04823269695043564, "eval_overall_accuracy": 0.9854251012145749, "eval_overall_f1": 0.9389312977099236, "eval_overall_precision": 0.9365482233502538, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2832, "eval_samples_per_second": 600.189, "eval_steps_per_second": 10.592, "step": 8544 }, { "epoch": 90.0, "grad_norm": 1.2131330966949463, "learning_rate": 5e-06, "loss": 0.0412, "step": 8640 }, { "epoch": 90.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9178470254957507, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9257142857142857, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048506613820791245, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.937579617834395, "eval_overall_precision": 0.9363867684478372, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2827, "eval_samples_per_second": 601.369, "eval_steps_per_second": 10.612, "step": 8640 }, { "epoch": 91.0, "grad_norm": 1.6268497705459595, "learning_rate": 4.5e-06, "loss": 0.0407, "step": 8736 }, { "epoch": 91.0, "eval_LOCATION_f1": 0.9152542372881357, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.8901098901098901, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.0484403595328331, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9387755102040817, "eval_overall_precision": 0.9387755102040817, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2823, "eval_samples_per_second": 602.132, "eval_steps_per_second": 10.626, "step": 8736 }, { "epoch": 92.0, "grad_norm": 0.282482773065567, "learning_rate": 4.000000000000001e-06, "loss": 0.0405, "step": 8832 }, { "epoch": 92.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04866935685276985, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2824, "eval_samples_per_second": 601.955, "eval_steps_per_second": 10.623, "step": 8832 }, { "epoch": 93.0, "grad_norm": 1.4597584009170532, "learning_rate": 3.5000000000000004e-06, "loss": 0.0447, "step": 8928 }, { "epoch": 93.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04870360717177391, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2828, "eval_samples_per_second": 601.209, "eval_steps_per_second": 10.61, "step": 8928 }, { "epoch": 94.0, "grad_norm": 0.4648018777370453, "learning_rate": 3e-06, "loss": 0.0402, "step": 9024 }, { "epoch": 94.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9204545454545454, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9310344827586207, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048693280667066574, "eval_overall_accuracy": 0.9848852901484481, "eval_overall_f1": 0.9399744572158366, "eval_overall_precision": 0.9411764705882353, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2803, "eval_samples_per_second": 606.45, "eval_steps_per_second": 10.702, "step": 9024 }, { "epoch": 95.0, "grad_norm": 0.9228402376174927, "learning_rate": 2.5e-06, "loss": 0.0406, "step": 9120 }, { "epoch": 95.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048481784760951996, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2827, "eval_samples_per_second": 601.417, "eval_steps_per_second": 10.613, "step": 9120 }, { "epoch": 96.0, "grad_norm": 0.3986660838127136, "learning_rate": 2.0000000000000003e-06, "loss": 0.0413, "step": 9216 }, { "epoch": 96.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04846182465553284, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2821, "eval_samples_per_second": 602.684, "eval_steps_per_second": 10.636, "step": 9216 }, { "epoch": 97.0, "grad_norm": 0.8940658569335938, "learning_rate": 1.5e-06, "loss": 0.0404, "step": 9312 }, { "epoch": 97.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9261363636363636, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9367816091954023, "eval_ORGANIZATION_recall": 0.9157303370786517, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04837748035788536, "eval_overall_accuracy": 0.9851551956815114, "eval_overall_f1": 0.9425287356321839, "eval_overall_precision": 0.9437340153452686, "eval_overall_recall": 0.9413265306122449, "eval_runtime": 0.2813, "eval_samples_per_second": 604.426, "eval_steps_per_second": 10.666, "step": 9312 }, { "epoch": 98.0, "grad_norm": 1.2582430839538574, "learning_rate": 1.0000000000000002e-06, "loss": 0.0403, "step": 9408 }, { "epoch": 98.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.04847470298409462, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2833, "eval_samples_per_second": 600.008, "eval_steps_per_second": 10.588, "step": 9408 }, { "epoch": 99.0, "grad_norm": 1.0658987760543823, "learning_rate": 5.000000000000001e-07, "loss": 0.0403, "step": 9504 }, { "epoch": 99.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048433490097522736, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2829, "eval_samples_per_second": 600.968, "eval_steps_per_second": 10.605, "step": 9504 }, { "epoch": 100.0, "grad_norm": 1.2720928192138672, "learning_rate": 0.0, "loss": 0.0417, "step": 9600 }, { "epoch": 100.0, "eval_LOCATION_f1": 0.9204545454545455, "eval_LOCATION_number": 86, "eval_LOCATION_precision": 0.9, "eval_LOCATION_recall": 0.9418604651162791, "eval_ORGANIZATION_f1": 0.9230769230769231, "eval_ORGANIZATION_number": 178, "eval_ORGANIZATION_precision": 0.9364161849710982, "eval_ORGANIZATION_recall": 0.9101123595505618, "eval_PERSON_f1": 0.9803921568627452, "eval_PERSON_number": 128, "eval_PERSON_precision": 0.984251968503937, "eval_PERSON_recall": 0.9765625, "eval_loss": 0.048419199883937836, "eval_overall_accuracy": 0.9846153846153847, "eval_overall_f1": 0.9411764705882353, "eval_overall_precision": 0.9435897435897436, "eval_overall_recall": 0.9387755102040817, "eval_runtime": 0.2819, "eval_samples_per_second": 602.959, "eval_steps_per_second": 10.64, "step": 9600 }, { "epoch": 100.0, "step": 9600, "total_flos": 3881337493223460.0, "train_loss": 0.0924897667268912, "train_runtime": 512.434, "train_samples_per_second": 298.77, "train_steps_per_second": 18.734 } ], "logging_steps": 500, "max_steps": 9600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 3881337493223460.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }