trentmkelly commited on May 22

Commit

c86d6ca

verified ·

1 Parent(s): 10869bd

Upload folder using huggingface_hub

Browse files

Files changed (18) hide show

README.md +27 -0
checkpoint-6081/config.json +36 -0
checkpoint-6081/model.safetensors +3 -0
checkpoint-6081/optimizer.pt +3 -0
checkpoint-6081/rng_state.pth +3 -0
checkpoint-6081/scheduler.pt +3 -0
checkpoint-6081/trainer_state.json +1782 -0
checkpoint-6081/training_args.bin +3 -0
config.json +36 -0
model.safetensors +3 -0
runs/May22_15-42-05_r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do/events.out.tfevents.1747928527.r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do.89.0 +2 -2
runs/May22_15-42-05_r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do/events.out.tfevents.1747932270.r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do.89.1 +3 -0
special_tokens_map.json +37 -0
tokenizer.json +0 -0
tokenizer_config.json +58 -0
training_args.bin +3 -0
training_params.json +30 -0
vocab.txt +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,27 @@

+---
+library_name: transformers
+tags:
+- autotrain
+- text-classification
+base_model: BAAI/bge-large-en-v1.5
+widget:
+- text: "I love AutoTrain"
+---
+# Model Trained Using AutoTrain
+- Problem type: Text Classification
+## Validation Metrics
+loss: 0.29475775361061096
+f1: 0.9548948513415518
+precision: 0.9136830419095199
+recall: 1.0
+auc: 0.5282966990033326
+accuracy: 0.9136830419095199

checkpoint-6081/config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "BAAI/bge-large-en-v1.5",
+  "_num_labels": 2,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "BAD_USER",
+    "1": "GOOD_USER"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "BAD_USER": 0,
+    "GOOD_USER": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

checkpoint-6081/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:210a11360287b8671eaf6f21b6f39996c84fd82642608b82cb277de916601c7f
+size 1340622760

checkpoint-6081/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b64b911aa802208d69bc6b20795704822e1161ff03f479a3625cb24ed015d270
+size 2681480429

checkpoint-6081/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b7ff53905c2d8e1737614bddf91d49fc043a393982ff168f02ef3949e206658
+size 14244

checkpoint-6081/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:51363f8374cf05742d50650339a31d6255912d5fc9be30731cc793b4383ae190
+size 1064

checkpoint-6081/trainer_state.json ADDED Viewed

	@@ -0,0 +1,1782 @@

+{
+  "best_metric": 0.29475775361061096,
+  "best_model_checkpoint": "autotrain-roblox-12-with-bge-large/checkpoint-6081",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 6081,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0123334977799704,
+      "grad_norm": 4.050952911376953,
+      "learning_rate": 2.052545155993432e-06,
+      "loss": 0.3312,
+      "step": 25
+    },
+    {
+      "epoch": 0.0246669955599408,
+      "grad_norm": 2.688647985458374,
+      "learning_rate": 4.105090311986864e-06,
+      "loss": 0.3098,
+      "step": 50
+    },
+    {
+      "epoch": 0.0370004933399112,
+      "grad_norm": 3.3651115894317627,
+      "learning_rate": 6.157635467980296e-06,
+      "loss": 0.2901,
+      "step": 75
+    },
+    {
+      "epoch": 0.0493339911198816,
+      "grad_norm": 2.1107537746429443,
+      "learning_rate": 8.12807881773399e-06,
+      "loss": 0.3309,
+      "step": 100
+    },
+    {
+      "epoch": 0.061667488899852,
+      "grad_norm": 3.7403604984283447,
+      "learning_rate": 1.0180623973727423e-05,
+      "loss": 0.2453,
+      "step": 125
+    },
+    {
+      "epoch": 0.0740009866798224,
+      "grad_norm": 6.779322624206543,
+      "learning_rate": 1.2233169129720855e-05,
+      "loss": 0.3616,
+      "step": 150
+    },
+    {
+      "epoch": 0.0863344844597928,
+      "grad_norm": 8.832545280456543,
+      "learning_rate": 1.4285714285714285e-05,
+      "loss": 0.2956,
+      "step": 175
+    },
+    {
+      "epoch": 0.0986679822397632,
+      "grad_norm": 10.033223152160645,
+      "learning_rate": 1.633825944170772e-05,
+      "loss": 0.2352,
+      "step": 200
+    },
+    {
+      "epoch": 0.1110014800197336,
+      "grad_norm": 6.968496322631836,
+      "learning_rate": 1.839080459770115e-05,
+      "loss": 0.2301,
+      "step": 225
+    },
+    {
+      "epoch": 0.123334977799704,
+      "grad_norm": 2.7263169288635254,
+      "learning_rate": 2.0443349753694584e-05,
+      "loss": 0.2709,
+      "step": 250
+    },
+    {
+      "epoch": 0.1356684755796744,
+      "grad_norm": 0.9099070429801941,
+      "learning_rate": 2.2495894909688014e-05,
+      "loss": 0.1919,
+      "step": 275
+    },
+    {
+      "epoch": 0.1480019733596448,
+      "grad_norm": 1.8240989446640015,
+      "learning_rate": 2.4548440065681445e-05,
+      "loss": 0.2988,
+      "step": 300
+    },
+    {
+      "epoch": 0.1603354711396152,
+      "grad_norm": 4.9374895095825195,
+      "learning_rate": 2.660098522167488e-05,
+      "loss": 0.2536,
+      "step": 325
+    },
+    {
+      "epoch": 0.1726689689195856,
+      "grad_norm": 1.09425950050354,
+      "learning_rate": 2.865353037766831e-05,
+      "loss": 0.2008,
+      "step": 350
+    },
+    {
+      "epoch": 0.185002466699556,
+      "grad_norm": 2.3137404918670654,
+      "learning_rate": 3.0706075533661744e-05,
+      "loss": 0.2165,
+      "step": 375
+    },
+    {
+      "epoch": 0.1973359644795264,
+      "grad_norm": 0.9012945294380188,
+      "learning_rate": 3.275862068965517e-05,
+      "loss": 0.1705,
+      "step": 400
+    },
+    {
+      "epoch": 0.20966946225949679,
+      "grad_norm": 1.8131095170974731,
+      "learning_rate": 3.4811165845648605e-05,
+      "loss": 0.2665,
+      "step": 425
+    },
+    {
+      "epoch": 0.2220029600394672,
+      "grad_norm": 1.6614292860031128,
+      "learning_rate": 3.686371100164204e-05,
+      "loss": 0.2429,
+      "step": 450
+    },
+    {
+      "epoch": 0.2343364578194376,
+      "grad_norm": 2.317420482635498,
+      "learning_rate": 3.891625615763547e-05,
+      "loss": 0.1651,
+      "step": 475
+    },
+    {
+      "epoch": 0.246669955599408,
+      "grad_norm": 3.639861583709717,
+      "learning_rate": 4.09688013136289e-05,
+      "loss": 0.318,
+      "step": 500
+    },
+    {
+      "epoch": 0.2590034533793784,
+      "grad_norm": 2.372562885284424,
+      "learning_rate": 4.3021346469622334e-05,
+      "loss": 0.3119,
+      "step": 525
+    },
+    {
+      "epoch": 0.2713369511593488,
+      "grad_norm": 1.7043285369873047,
+      "learning_rate": 4.507389162561577e-05,
+      "loss": 0.2579,
+      "step": 550
+    },
+    {
+      "epoch": 0.2836704489393192,
+      "grad_norm": 7.564747333526611,
+      "learning_rate": 4.7126436781609195e-05,
+      "loss": 0.2786,
+      "step": 575
+    },
+    {
+      "epoch": 0.2960039467192896,
+      "grad_norm": 12.761841773986816,
+      "learning_rate": 4.917898193760263e-05,
+      "loss": 0.2579,
+      "step": 600
+    },
+    {
+      "epoch": 0.30833744449926,
+      "grad_norm": 2.404279947280884,
+      "learning_rate": 4.986293859649123e-05,
+      "loss": 0.2808,
+      "step": 625
+    },
+    {
+      "epoch": 0.3206709422792304,
+      "grad_norm": 8.739703178405762,
+      "learning_rate": 4.9643640350877194e-05,
+      "loss": 0.2623,
+      "step": 650
+    },
+    {
+      "epoch": 0.33300444005920077,
+      "grad_norm": 2.1071040630340576,
+      "learning_rate": 4.941520467836258e-05,
+      "loss": 0.231,
+      "step": 675
+    },
+    {
+      "epoch": 0.3453379378391712,
+      "grad_norm": 5.691776275634766,
+      "learning_rate": 4.918676900584795e-05,
+      "loss": 0.2743,
+      "step": 700
+    },
+    {
+      "epoch": 0.3576714356191416,
+      "grad_norm": 3.2197370529174805,
+      "learning_rate": 4.8958333333333335e-05,
+      "loss": 0.2375,
+      "step": 725
+    },
+    {
+      "epoch": 0.370004933399112,
+      "grad_norm": 3.8034095764160156,
+      "learning_rate": 4.872989766081872e-05,
+      "loss": 0.2107,
+      "step": 750
+    },
+    {
+      "epoch": 0.3823384311790824,
+      "grad_norm": 1.8795045614242554,
+      "learning_rate": 4.85014619883041e-05,
+      "loss": 0.2305,
+      "step": 775
+    },
+    {
+      "epoch": 0.3946719289590528,
+      "grad_norm": 7.548186302185059,
+      "learning_rate": 4.8273026315789476e-05,
+      "loss": 0.2984,
+      "step": 800
+    },
+    {
+      "epoch": 0.4070054267390232,
+      "grad_norm": 3.3918960094451904,
+      "learning_rate": 4.804459064327486e-05,
+      "loss": 0.2313,
+      "step": 825
+    },
+    {
+      "epoch": 0.41933892451899357,
+      "grad_norm": 8.099982261657715,
+      "learning_rate": 4.7816154970760235e-05,
+      "loss": 0.3721,
+      "step": 850
+    },
+    {
+      "epoch": 0.43167242229896396,
+      "grad_norm": 3.144808053970337,
+      "learning_rate": 4.758771929824562e-05,
+      "loss": 0.3335,
+      "step": 875
+    },
+    {
+      "epoch": 0.4440059200789344,
+      "grad_norm": 2.1939008235931396,
+      "learning_rate": 4.7359283625731e-05,
+      "loss": 0.3207,
+      "step": 900
+    },
+    {
+      "epoch": 0.4563394178589048,
+      "grad_norm": 1.2243921756744385,
+      "learning_rate": 4.7130847953216375e-05,
+      "loss": 0.3064,
+      "step": 925
+    },
+    {
+      "epoch": 0.4686729156388752,
+      "grad_norm": 5.97895622253418,
+      "learning_rate": 4.690241228070176e-05,
+      "loss": 0.3317,
+      "step": 950
+    },
+    {
+      "epoch": 0.4810064134188456,
+      "grad_norm": 4.540741443634033,
+      "learning_rate": 4.6673976608187134e-05,
+      "loss": 0.3653,
+      "step": 975
+    },
+    {
+      "epoch": 0.493339911198816,
+      "grad_norm": 3.360098361968994,
+      "learning_rate": 4.6445540935672516e-05,
+      "loss": 0.2999,
+      "step": 1000
+    },
+    {
+      "epoch": 0.5056734089787864,
+      "grad_norm": 2.302455186843872,
+      "learning_rate": 4.621710526315789e-05,
+      "loss": 0.2998,
+      "step": 1025
+    },
+    {
+      "epoch": 0.5180069067587568,
+      "grad_norm": 4.192526817321777,
+      "learning_rate": 4.598866959064328e-05,
+      "loss": 0.2962,
+      "step": 1050
+    },
+    {
+      "epoch": 0.5303404045387272,
+      "grad_norm": 2.745168924331665,
+      "learning_rate": 4.576023391812866e-05,
+      "loss": 0.3066,
+      "step": 1075
+    },
+    {
+      "epoch": 0.5426739023186976,
+      "grad_norm": 2.6728932857513428,
+      "learning_rate": 4.553179824561404e-05,
+      "loss": 0.3107,
+      "step": 1100
+    },
+    {
+      "epoch": 0.555007400098668,
+      "grad_norm": 1.7235779762268066,
+      "learning_rate": 4.5303362573099416e-05,
+      "loss": 0.3383,
+      "step": 1125
+    },
+    {
+      "epoch": 0.5673408978786384,
+      "grad_norm": 4.529238224029541,
+      "learning_rate": 4.50749269005848e-05,
+      "loss": 0.3597,
+      "step": 1150
+    },
+    {
+      "epoch": 0.5796743956586088,
+      "grad_norm": 9.228950500488281,
+      "learning_rate": 4.4846491228070174e-05,
+      "loss": 0.2667,
+      "step": 1175
+    },
+    {
+      "epoch": 0.5920078934385792,
+      "grad_norm": 2.5120201110839844,
+      "learning_rate": 4.4618055555555563e-05,
+      "loss": 0.2917,
+      "step": 1200
+    },
+    {
+      "epoch": 0.6043413912185496,
+      "grad_norm": 2.3404152393341064,
+      "learning_rate": 4.438961988304094e-05,
+      "loss": 0.3255,
+      "step": 1225
+    },
+    {
+      "epoch": 0.61667488899852,
+      "grad_norm": 3.546064853668213,
+      "learning_rate": 4.416118421052632e-05,
+      "loss": 0.3204,
+      "step": 1250
+    },
+    {
+      "epoch": 0.6290083867784904,
+      "grad_norm": 1.058401346206665,
+      "learning_rate": 4.39327485380117e-05,
+      "loss": 0.2542,
+      "step": 1275
+    },
+    {
+      "epoch": 0.6413418845584608,
+      "grad_norm": 2.9787142276763916,
+      "learning_rate": 4.370431286549708e-05,
+      "loss": 0.3204,
+      "step": 1300
+    },
+    {
+      "epoch": 0.6536753823384311,
+      "grad_norm": 1.3214211463928223,
+      "learning_rate": 4.3475877192982456e-05,
+      "loss": 0.3145,
+      "step": 1325
+    },
+    {
+      "epoch": 0.6660088801184015,
+      "grad_norm": 8.694450378417969,
+      "learning_rate": 4.324744152046784e-05,
+      "loss": 0.3172,
+      "step": 1350
+    },
+    {
+      "epoch": 0.6783423778983719,
+      "grad_norm": 1.451136827468872,
+      "learning_rate": 4.301900584795322e-05,
+      "loss": 0.3318,
+      "step": 1375
+    },
+    {
+      "epoch": 0.6906758756783424,
+      "grad_norm": 1.4986424446105957,
+      "learning_rate": 4.27905701754386e-05,
+      "loss": 0.3323,
+      "step": 1400
+    },
+    {
+      "epoch": 0.7030093734583128,
+      "grad_norm": 2.1326487064361572,
+      "learning_rate": 4.256213450292398e-05,
+      "loss": 0.3485,
+      "step": 1425
+    },
+    {
+      "epoch": 0.7153428712382832,
+      "grad_norm": 2.9651310443878174,
+      "learning_rate": 4.2333698830409355e-05,
+      "loss": 0.3854,
+      "step": 1450
+    },
+    {
+      "epoch": 0.7276763690182536,
+      "grad_norm": 5.734078884124756,
+      "learning_rate": 4.210526315789474e-05,
+      "loss": 0.2711,
+      "step": 1475
+    },
+    {
+      "epoch": 0.740009866798224,
+      "grad_norm": 1.449973464012146,
+      "learning_rate": 4.187682748538012e-05,
+      "loss": 0.3442,
+      "step": 1500
+    },
+    {
+      "epoch": 0.7523433645781944,
+      "grad_norm": 11.361034393310547,
+      "learning_rate": 4.16483918128655e-05,
+      "loss": 0.3168,
+      "step": 1525
+    },
+    {
+      "epoch": 0.7646768623581648,
+      "grad_norm": 3.920637845993042,
+      "learning_rate": 4.141995614035088e-05,
+      "loss": 0.343,
+      "step": 1550
+    },
+    {
+      "epoch": 0.7770103601381352,
+      "grad_norm": 4.118240833282471,
+      "learning_rate": 4.119152046783626e-05,
+      "loss": 0.3151,
+      "step": 1575
+    },
+    {
+      "epoch": 0.7893438579181056,
+      "grad_norm": 6.998331069946289,
+      "learning_rate": 4.096308479532164e-05,
+      "loss": 0.3482,
+      "step": 1600
+    },
+    {
+      "epoch": 0.801677355698076,
+      "grad_norm": 2.7872273921966553,
+      "learning_rate": 4.073464912280702e-05,
+      "loss": 0.246,
+      "step": 1625
+    },
+    {
+      "epoch": 0.8140108534780464,
+      "grad_norm": 2.5942187309265137,
+      "learning_rate": 4.05062134502924e-05,
+      "loss": 0.3405,
+      "step": 1650
+    },
+    {
+      "epoch": 0.8263443512580168,
+      "grad_norm": 0.8587321639060974,
+      "learning_rate": 4.027777777777778e-05,
+      "loss": 0.2433,
+      "step": 1675
+    },
+    {
+      "epoch": 0.8386778490379871,
+      "grad_norm": 1.0923521518707275,
+      "learning_rate": 4.004934210526316e-05,
+      "loss": 0.3077,
+      "step": 1700
+    },
+    {
+      "epoch": 0.8510113468179575,
+      "grad_norm": 5.740424156188965,
+      "learning_rate": 3.9820906432748536e-05,
+      "loss": 0.2671,
+      "step": 1725
+    },
+    {
+      "epoch": 0.8633448445979279,
+      "grad_norm": 1.0194257497787476,
+      "learning_rate": 3.959247076023392e-05,
+      "loss": 0.3116,
+      "step": 1750
+    },
+    {
+      "epoch": 0.8756783423778983,
+      "grad_norm": 3.4499142169952393,
+      "learning_rate": 3.9364035087719295e-05,
+      "loss": 0.2414,
+      "step": 1775
+    },
+    {
+      "epoch": 0.8880118401578688,
+      "grad_norm": 1.5569651126861572,
+      "learning_rate": 3.9135599415204684e-05,
+      "loss": 0.2674,
+      "step": 1800
+    },
+    {
+      "epoch": 0.9003453379378392,
+      "grad_norm": 2.4388108253479004,
+      "learning_rate": 3.890716374269006e-05,
+      "loss": 0.2521,
+      "step": 1825
+    },
+    {
+      "epoch": 0.9126788357178096,
+      "grad_norm": 1.0378559827804565,
+      "learning_rate": 3.867872807017544e-05,
+      "loss": 0.3289,
+      "step": 1850
+    },
+    {
+      "epoch": 0.92501233349778,
+      "grad_norm": 0.8580950498580933,
+      "learning_rate": 3.845029239766082e-05,
+      "loss": 0.2546,
+      "step": 1875
+    },
+    {
+      "epoch": 0.9373458312777504,
+      "grad_norm": 2.7251040935516357,
+      "learning_rate": 3.82218567251462e-05,
+      "loss": 0.2795,
+      "step": 1900
+    },
+    {
+      "epoch": 0.9496793290577208,
+      "grad_norm": 1.980296015739441,
+      "learning_rate": 3.7993421052631577e-05,
+      "loss": 0.3605,
+      "step": 1925
+    },
+    {
+      "epoch": 0.9620128268376912,
+      "grad_norm": 3.8409457206726074,
+      "learning_rate": 3.7764985380116966e-05,
+      "loss": 0.3254,
+      "step": 1950
+    },
+    {
+      "epoch": 0.9743463246176616,
+      "grad_norm": 0.9380149841308594,
+      "learning_rate": 3.753654970760234e-05,
+      "loss": 0.3083,
+      "step": 1975
+    },
+    {
+      "epoch": 0.986679822397632,
+      "grad_norm": 4.170345783233643,
+      "learning_rate": 3.7308114035087724e-05,
+      "loss": 0.4037,
+      "step": 2000
+    },
+    {
+      "epoch": 0.9990133201776024,
+      "grad_norm": 0.9810841083526611,
+      "learning_rate": 3.70796783625731e-05,
+      "loss": 0.2871,
+      "step": 2025
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.9136830419095199,
+      "eval_auc": 0.5081137943294276,
+      "eval_f1": 0.9548948513415518,
+      "eval_loss": 0.29672279953956604,
+      "eval_precision": 0.9136830419095199,
+      "eval_recall": 1.0,
+      "eval_runtime": 33.752,
+      "eval_samples_per_second": 106.749,
+      "eval_steps_per_second": 3.348,
+      "step": 2027
+    },
+    {
+      "epoch": 1.0113468179575729,
+      "grad_norm": 0.8372642993927002,
+      "learning_rate": 3.685124269005848e-05,
+      "loss": 0.269,
+      "step": 2050
+    },
+    {
+      "epoch": 1.0236803157375431,
+      "grad_norm": 5.589089870452881,
+      "learning_rate": 3.662280701754386e-05,
+      "loss": 0.2937,
+      "step": 2075
+    },
+    {
+      "epoch": 1.0360138135175136,
+      "grad_norm": 2.6755239963531494,
+      "learning_rate": 3.639437134502924e-05,
+      "loss": 0.3236,
+      "step": 2100
+    },
+    {
+      "epoch": 1.048347311297484,
+      "grad_norm": 2.6148569583892822,
+      "learning_rate": 3.6165935672514624e-05,
+      "loss": 0.2979,
+      "step": 2125
+    },
+    {
+      "epoch": 1.0606808090774544,
+      "grad_norm": 3.675825595855713,
+      "learning_rate": 3.59375e-05,
+      "loss": 0.2943,
+      "step": 2150
+    },
+    {
+      "epoch": 1.0730143068574247,
+      "grad_norm": 0.9110400676727295,
+      "learning_rate": 3.570906432748538e-05,
+      "loss": 0.3048,
+      "step": 2175
+    },
+    {
+      "epoch": 1.0853478046373952,
+      "grad_norm": 4.680244445800781,
+      "learning_rate": 3.548062865497076e-05,
+      "loss": 0.3244,
+      "step": 2200
+    },
+    {
+      "epoch": 1.0976813024173655,
+      "grad_norm": 5.2665605545043945,
+      "learning_rate": 3.525219298245615e-05,
+      "loss": 0.3156,
+      "step": 2225
+    },
+    {
+      "epoch": 1.110014800197336,
+      "grad_norm": 2.348223924636841,
+      "learning_rate": 3.502375730994152e-05,
+      "loss": 0.2805,
+      "step": 2250
+    },
+    {
+      "epoch": 1.1223482979773063,
+      "grad_norm": 2.7092299461364746,
+      "learning_rate": 3.4795321637426905e-05,
+      "loss": 0.3142,
+      "step": 2275
+    },
+    {
+      "epoch": 1.1346817957572768,
+      "grad_norm": 3.3490219116210938,
+      "learning_rate": 3.456688596491228e-05,
+      "loss": 0.2897,
+      "step": 2300
+    },
+    {
+      "epoch": 1.147015293537247,
+      "grad_norm": 1.7662327289581299,
+      "learning_rate": 3.4338450292397664e-05,
+      "loss": 0.3327,
+      "step": 2325
+    },
+    {
+      "epoch": 1.1593487913172176,
+      "grad_norm": 2.353829860687256,
+      "learning_rate": 3.411001461988304e-05,
+      "loss": 0.3108,
+      "step": 2350
+    },
+    {
+      "epoch": 1.171682289097188,
+      "grad_norm": 2.7905948162078857,
+      "learning_rate": 3.388157894736842e-05,
+      "loss": 0.3048,
+      "step": 2375
+    },
+    {
+      "epoch": 1.1840157868771584,
+      "grad_norm": 5.659600257873535,
+      "learning_rate": 3.3653143274853805e-05,
+      "loss": 0.298,
+      "step": 2400
+    },
+    {
+      "epoch": 1.1963492846571286,
+      "grad_norm": 2.632979393005371,
+      "learning_rate": 3.342470760233918e-05,
+      "loss": 0.258,
+      "step": 2425
+    },
+    {
+      "epoch": 1.2086827824370991,
+      "grad_norm": 1.1394457817077637,
+      "learning_rate": 3.319627192982456e-05,
+      "loss": 0.36,
+      "step": 2450
+    },
+    {
+      "epoch": 1.2210162802170696,
+      "grad_norm": 2.0683844089508057,
+      "learning_rate": 3.296783625730994e-05,
+      "loss": 0.379,
+      "step": 2475
+    },
+    {
+      "epoch": 1.23334977799704,
+      "grad_norm": 1.8254059553146362,
+      "learning_rate": 3.273940058479532e-05,
+      "loss": 0.3444,
+      "step": 2500
+    },
+    {
+      "epoch": 1.2456832757770104,
+      "grad_norm": 9.155421257019043,
+      "learning_rate": 3.2510964912280704e-05,
+      "loss": 0.3649,
+      "step": 2525
+    },
+    {
+      "epoch": 1.2580167735569807,
+      "grad_norm": 7.08811092376709,
+      "learning_rate": 3.228252923976609e-05,
+      "loss": 0.3281,
+      "step": 2550
+    },
+    {
+      "epoch": 1.2703502713369512,
+      "grad_norm": 2.375296115875244,
+      "learning_rate": 3.205409356725146e-05,
+      "loss": 0.3021,
+      "step": 2575
+    },
+    {
+      "epoch": 1.2826837691169215,
+      "grad_norm": 1.9335981607437134,
+      "learning_rate": 3.1825657894736845e-05,
+      "loss": 0.3525,
+      "step": 2600
+    },
+    {
+      "epoch": 1.295017266896892,
+      "grad_norm": 1.156570315361023,
+      "learning_rate": 3.159722222222222e-05,
+      "loss": 0.2859,
+      "step": 2625
+    },
+    {
+      "epoch": 1.3073507646768623,
+      "grad_norm": 3.328901529312134,
+      "learning_rate": 3.13687865497076e-05,
+      "loss": 0.3364,
+      "step": 2650
+    },
+    {
+      "epoch": 1.3196842624568328,
+      "grad_norm": 2.6319520473480225,
+      "learning_rate": 3.1140350877192986e-05,
+      "loss": 0.3775,
+      "step": 2675
+    },
+    {
+      "epoch": 1.3320177602368033,
+      "grad_norm": 1.337823510169983,
+      "learning_rate": 3.091191520467837e-05,
+      "loss": 0.2888,
+      "step": 2700
+    },
+    {
+      "epoch": 1.3443512580167736,
+      "grad_norm": 1.1048977375030518,
+      "learning_rate": 3.0683479532163744e-05,
+      "loss": 0.3207,
+      "step": 2725
+    },
+    {
+      "epoch": 1.3566847557967439,
+      "grad_norm": 2.2134850025177,
+      "learning_rate": 3.0455043859649123e-05,
+      "loss": 0.4205,
+      "step": 2750
+    },
+    {
+      "epoch": 1.3690182535767144,
+      "grad_norm": 4.411900520324707,
+      "learning_rate": 3.0226608187134503e-05,
+      "loss": 0.2979,
+      "step": 2775
+    },
+    {
+      "epoch": 1.3813517513566849,
+      "grad_norm": 2.7205402851104736,
+      "learning_rate": 2.9998172514619882e-05,
+      "loss": 0.2927,
+      "step": 2800
+    },
+    {
+      "epoch": 1.3936852491366551,
+      "grad_norm": 1.4053397178649902,
+      "learning_rate": 2.9769736842105268e-05,
+      "loss": 0.3394,
+      "step": 2825
+    },
+    {
+      "epoch": 1.4060187469166254,
+      "grad_norm": 1.3051424026489258,
+      "learning_rate": 2.9541301169590647e-05,
+      "loss": 0.3368,
+      "step": 2850
+    },
+    {
+      "epoch": 1.418352244696596,
+      "grad_norm": 4.241896152496338,
+      "learning_rate": 2.9312865497076026e-05,
+      "loss": 0.3273,
+      "step": 2875
+    },
+    {
+      "epoch": 1.4306857424765664,
+      "grad_norm": 1.2715412378311157,
+      "learning_rate": 2.9084429824561405e-05,
+      "loss": 0.2459,
+      "step": 2900
+    },
+    {
+      "epoch": 1.4430192402565367,
+      "grad_norm": 2.4846601486206055,
+      "learning_rate": 2.8855994152046784e-05,
+      "loss": 0.3038,
+      "step": 2925
+    },
+    {
+      "epoch": 1.4553527380365072,
+      "grad_norm": 0.9074434638023376,
+      "learning_rate": 2.8627558479532164e-05,
+      "loss": 0.2596,
+      "step": 2950
+    },
+    {
+      "epoch": 1.4676862358164775,
+      "grad_norm": 6.2226481437683105,
+      "learning_rate": 2.8399122807017546e-05,
+      "loss": 0.3292,
+      "step": 2975
+    },
+    {
+      "epoch": 1.480019733596448,
+      "grad_norm": 2.897814989089966,
+      "learning_rate": 2.8170687134502925e-05,
+      "loss": 0.2897,
+      "step": 3000
+    },
+    {
+      "epoch": 1.4923532313764183,
+      "grad_norm": 1.7691318988800049,
+      "learning_rate": 2.7942251461988305e-05,
+      "loss": 0.3451,
+      "step": 3025
+    },
+    {
+      "epoch": 1.5046867291563888,
+      "grad_norm": 7.944800853729248,
+      "learning_rate": 2.7713815789473684e-05,
+      "loss": 0.3526,
+      "step": 3050
+    },
+    {
+      "epoch": 1.517020226936359,
+      "grad_norm": 5.50998067855835,
+      "learning_rate": 2.7485380116959063e-05,
+      "loss": 0.2618,
+      "step": 3075
+    },
+    {
+      "epoch": 1.5293537247163296,
+      "grad_norm": 4.9781107902526855,
+      "learning_rate": 2.7256944444444442e-05,
+      "loss": 0.3387,
+      "step": 3100
+    },
+    {
+      "epoch": 1.5416872224963,
+      "grad_norm": 2.1930160522460938,
+      "learning_rate": 2.7028508771929828e-05,
+      "loss": 0.3274,
+      "step": 3125
+    },
+    {
+      "epoch": 1.5540207202762704,
+      "grad_norm": 3.252208948135376,
+      "learning_rate": 2.6800073099415207e-05,
+      "loss": 0.2491,
+      "step": 3150
+    },
+    {
+      "epoch": 1.5663542180562406,
+      "grad_norm": 13.36633586883545,
+      "learning_rate": 2.6571637426900586e-05,
+      "loss": 0.359,
+      "step": 3175
+    },
+    {
+      "epoch": 1.5786877158362111,
+      "grad_norm": 3.641357183456421,
+      "learning_rate": 2.6343201754385966e-05,
+      "loss": 0.3285,
+      "step": 3200
+    },
+    {
+      "epoch": 1.5910212136161817,
+      "grad_norm": 2.2045693397521973,
+      "learning_rate": 2.6114766081871345e-05,
+      "loss": 0.3226,
+      "step": 3225
+    },
+    {
+      "epoch": 1.603354711396152,
+      "grad_norm": 11.105202674865723,
+      "learning_rate": 2.5886330409356724e-05,
+      "loss": 0.2832,
+      "step": 3250
+    },
+    {
+      "epoch": 1.6156882091761222,
+      "grad_norm": 4.253530025482178,
+      "learning_rate": 2.565789473684211e-05,
+      "loss": 0.3696,
+      "step": 3275
+    },
+    {
+      "epoch": 1.6280217069560927,
+      "grad_norm": 0.993763267993927,
+      "learning_rate": 2.542945906432749e-05,
+      "loss": 0.2776,
+      "step": 3300
+    },
+    {
+      "epoch": 1.6403552047360632,
+      "grad_norm": 2.813020944595337,
+      "learning_rate": 2.520102339181287e-05,
+      "loss": 0.2947,
+      "step": 3325
+    },
+    {
+      "epoch": 1.6526887025160335,
+      "grad_norm": 1.1125609874725342,
+      "learning_rate": 2.4972587719298248e-05,
+      "loss": 0.293,
+      "step": 3350
+    },
+    {
+      "epoch": 1.6650222002960038,
+      "grad_norm": 3.3155879974365234,
+      "learning_rate": 2.4744152046783627e-05,
+      "loss": 0.3083,
+      "step": 3375
+    },
+    {
+      "epoch": 1.6773556980759743,
+      "grad_norm": 5.092229843139648,
+      "learning_rate": 2.4515716374269006e-05,
+      "loss": 0.2883,
+      "step": 3400
+    },
+    {
+      "epoch": 1.6896891958559448,
+      "grad_norm": 0.8376036286354065,
+      "learning_rate": 2.4287280701754385e-05,
+      "loss": 0.266,
+      "step": 3425
+    },
+    {
+      "epoch": 1.7020226936359153,
+      "grad_norm": 2.6180572509765625,
+      "learning_rate": 2.4058845029239768e-05,
+      "loss": 0.3319,
+      "step": 3450
+    },
+    {
+      "epoch": 1.7143561914158856,
+      "grad_norm": 1.0887091159820557,
+      "learning_rate": 2.3830409356725147e-05,
+      "loss": 0.2878,
+      "step": 3475
+    },
+    {
+      "epoch": 1.7266896891958559,
+      "grad_norm": 3.4597008228302,
+      "learning_rate": 2.3601973684210526e-05,
+      "loss": 0.3143,
+      "step": 3500
+    },
+    {
+      "epoch": 1.7390231869758264,
+      "grad_norm": 1.2458105087280273,
+      "learning_rate": 2.337353801169591e-05,
+      "loss": 0.2772,
+      "step": 3525
+    },
+    {
+      "epoch": 1.7513566847557969,
+      "grad_norm": 1.0016106367111206,
+      "learning_rate": 2.3145102339181288e-05,
+      "loss": 0.254,
+      "step": 3550
+    },
+    {
+      "epoch": 1.7636901825357671,
+      "grad_norm": 0.9371845722198486,
+      "learning_rate": 2.2916666666666667e-05,
+      "loss": 0.2866,
+      "step": 3575
+    },
+    {
+      "epoch": 1.7760236803157374,
+      "grad_norm": 2.7984836101531982,
+      "learning_rate": 2.268823099415205e-05,
+      "loss": 0.314,
+      "step": 3600
+    },
+    {
+      "epoch": 1.788357178095708,
+      "grad_norm": 8.407980918884277,
+      "learning_rate": 2.245979532163743e-05,
+      "loss": 0.3354,
+      "step": 3625
+    },
+    {
+      "epoch": 1.8006906758756784,
+      "grad_norm": 1.4253339767456055,
+      "learning_rate": 2.2231359649122808e-05,
+      "loss": 0.6666,
+      "step": 3650
+    },
+    {
+      "epoch": 1.8130241736556487,
+      "grad_norm": 1.8990155458450317,
+      "learning_rate": 2.200292397660819e-05,
+      "loss": 0.332,
+      "step": 3675
+    },
+    {
+      "epoch": 1.825357671435619,
+      "grad_norm": 3.489974021911621,
+      "learning_rate": 2.177448830409357e-05,
+      "loss": 0.2955,
+      "step": 3700
+    },
+    {
+      "epoch": 1.8376911692155895,
+      "grad_norm": 4.009921550750732,
+      "learning_rate": 2.154605263157895e-05,
+      "loss": 0.2922,
+      "step": 3725
+    },
+    {
+      "epoch": 1.85002466699556,
+      "grad_norm": 1.8342875242233276,
+      "learning_rate": 2.1317616959064328e-05,
+      "loss": 0.3508,
+      "step": 3750
+    },
+    {
+      "epoch": 1.8623581647755303,
+      "grad_norm": 1.782589316368103,
+      "learning_rate": 2.1089181286549707e-05,
+      "loss": 0.3245,
+      "step": 3775
+    },
+    {
+      "epoch": 1.8746916625555008,
+      "grad_norm": 2.984060049057007,
+      "learning_rate": 2.0860745614035086e-05,
+      "loss": 0.276,
+      "step": 3800
+    },
+    {
+      "epoch": 1.887025160335471,
+      "grad_norm": 1.6987401247024536,
+      "learning_rate": 2.063230994152047e-05,
+      "loss": 0.3445,
+      "step": 3825
+    },
+    {
+      "epoch": 1.8993586581154416,
+      "grad_norm": 1.4252331256866455,
+      "learning_rate": 2.0403874269005848e-05,
+      "loss": 0.316,
+      "step": 3850
+    },
+    {
+      "epoch": 1.911692155895412,
+      "grad_norm": 1.4252597093582153,
+      "learning_rate": 2.0175438596491227e-05,
+      "loss": 0.3233,
+      "step": 3875
+    },
+    {
+      "epoch": 1.9240256536753824,
+      "grad_norm": 6.7763752937316895,
+      "learning_rate": 1.994700292397661e-05,
+      "loss": 0.3568,
+      "step": 3900
+    },
+    {
+      "epoch": 1.9363591514553526,
+      "grad_norm": 1.0406028032302856,
+      "learning_rate": 1.971856725146199e-05,
+      "loss": 0.256,
+      "step": 3925
+    },
+    {
+      "epoch": 1.9486926492353231,
+      "grad_norm": 5.012691974639893,
+      "learning_rate": 1.9490131578947368e-05,
+      "loss": 0.3714,
+      "step": 3950
+    },
+    {
+      "epoch": 1.9610261470152937,
+      "grad_norm": 4.675971031188965,
+      "learning_rate": 1.926169590643275e-05,
+      "loss": 0.3189,
+      "step": 3975
+    },
+    {
+      "epoch": 1.973359644795264,
+      "grad_norm": 2.5267465114593506,
+      "learning_rate": 1.903326023391813e-05,
+      "loss": 0.2358,
+      "step": 4000
+    },
+    {
+      "epoch": 1.9856931425752342,
+      "grad_norm": 0.8952421545982361,
+      "learning_rate": 1.880482456140351e-05,
+      "loss": 0.2809,
+      "step": 4025
+    },
+    {
+      "epoch": 1.9980266403552047,
+      "grad_norm": 3.1482090950012207,
+      "learning_rate": 1.857638888888889e-05,
+      "loss": 0.2778,
+      "step": 4050
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.9136830419095199,
+      "eval_auc": 0.5388596734556735,
+      "eval_f1": 0.9548948513415518,
+      "eval_loss": 0.29811790585517883,
+      "eval_precision": 0.9136830419095199,
+      "eval_recall": 1.0,
+      "eval_runtime": 33.6179,
+      "eval_samples_per_second": 107.175,
+      "eval_steps_per_second": 3.361,
+      "step": 4054
+    },
+    {
+      "epoch": 2.0103601381351752,
+      "grad_norm": 8.418047904968262,
+      "learning_rate": 1.834795321637427e-05,
+      "loss": 0.3508,
+      "step": 4075
+    },
+    {
+      "epoch": 2.0226936359151457,
+      "grad_norm": 0.8750646710395813,
+      "learning_rate": 1.811951754385965e-05,
+      "loss": 0.2411,
+      "step": 4100
+    },
+    {
+      "epoch": 2.035027133695116,
+      "grad_norm": 0.9350325465202332,
+      "learning_rate": 1.789108187134503e-05,
+      "loss": 0.2911,
+      "step": 4125
+    },
+    {
+      "epoch": 2.0473606314750863,
+      "grad_norm": 2.846468687057495,
+      "learning_rate": 1.766264619883041e-05,
+      "loss": 0.2719,
+      "step": 4150
+    },
+    {
+      "epoch": 2.059694129255057,
+      "grad_norm": 1.3613282442092896,
+      "learning_rate": 1.7434210526315788e-05,
+      "loss": 0.3131,
+      "step": 4175
+    },
+    {
+      "epoch": 2.0720276270350273,
+      "grad_norm": 1.0005241632461548,
+      "learning_rate": 1.720577485380117e-05,
+      "loss": 0.3139,
+      "step": 4200
+    },
+    {
+      "epoch": 2.0843611248149974,
+      "grad_norm": 1.0721725225448608,
+      "learning_rate": 1.697733918128655e-05,
+      "loss": 0.2719,
+      "step": 4225
+    },
+    {
+      "epoch": 2.096694622594968,
+      "grad_norm": 8.915780067443848,
+      "learning_rate": 1.674890350877193e-05,
+      "loss": 0.2804,
+      "step": 4250
+    },
+    {
+      "epoch": 2.1090281203749384,
+      "grad_norm": 2.3782267570495605,
+      "learning_rate": 1.652046783625731e-05,
+      "loss": 0.3442,
+      "step": 4275
+    },
+    {
+      "epoch": 2.121361618154909,
+      "grad_norm": 3.0817372798919678,
+      "learning_rate": 1.629203216374269e-05,
+      "loss": 0.289,
+      "step": 4300
+    },
+    {
+      "epoch": 2.133695115934879,
+      "grad_norm": 2.1607894897460938,
+      "learning_rate": 1.6063596491228073e-05,
+      "loss": 0.3078,
+      "step": 4325
+    },
+    {
+      "epoch": 2.1460286137148494,
+      "grad_norm": 2.4886415004730225,
+      "learning_rate": 1.5835160818713452e-05,
+      "loss": 0.3563,
+      "step": 4350
+    },
+    {
+      "epoch": 2.15836211149482,
+      "grad_norm": 2.146252393722534,
+      "learning_rate": 1.560672514619883e-05,
+      "loss": 0.3134,
+      "step": 4375
+    },
+    {
+      "epoch": 2.1706956092747904,
+      "grad_norm": 3.740633726119995,
+      "learning_rate": 1.5378289473684214e-05,
+      "loss": 0.3253,
+      "step": 4400
+    },
+    {
+      "epoch": 2.183029107054761,
+      "grad_norm": 2.453158140182495,
+      "learning_rate": 1.5149853801169591e-05,
+      "loss": 0.2921,
+      "step": 4425
+    },
+    {
+      "epoch": 2.195362604834731,
+      "grad_norm": 4.490250587463379,
+      "learning_rate": 1.492141812865497e-05,
+      "loss": 0.3505,
+      "step": 4450
+    },
+    {
+      "epoch": 2.2076961026147015,
+      "grad_norm": 1.5068897008895874,
+      "learning_rate": 1.4692982456140353e-05,
+      "loss": 0.307,
+      "step": 4475
+    },
+    {
+      "epoch": 2.220029600394672,
+      "grad_norm": 1.2755985260009766,
+      "learning_rate": 1.4464546783625732e-05,
+      "loss": 0.3071,
+      "step": 4500
+    },
+    {
+      "epoch": 2.2323630981746425,
+      "grad_norm": 2.845574378967285,
+      "learning_rate": 1.4236111111111111e-05,
+      "loss": 0.2902,
+      "step": 4525
+    },
+    {
+      "epoch": 2.2446965959546126,
+      "grad_norm": 0.8793368935585022,
+      "learning_rate": 1.4007675438596494e-05,
+      "loss": 0.3147,
+      "step": 4550
+    },
+    {
+      "epoch": 2.257030093734583,
+      "grad_norm": 2.616255521774292,
+      "learning_rate": 1.3779239766081873e-05,
+      "loss": 0.3844,
+      "step": 4575
+    },
+    {
+      "epoch": 2.2693635915145536,
+      "grad_norm": 1.2506290674209595,
+      "learning_rate": 1.355080409356725e-05,
+      "loss": 0.2602,
+      "step": 4600
+    },
+    {
+      "epoch": 2.281697089294524,
+      "grad_norm": 5.174871444702148,
+      "learning_rate": 1.3322368421052633e-05,
+      "loss": 0.3425,
+      "step": 4625
+    },
+    {
+      "epoch": 2.294030587074494,
+      "grad_norm": 2.352529764175415,
+      "learning_rate": 1.3093932748538012e-05,
+      "loss": 0.3513,
+      "step": 4650
+    },
+    {
+      "epoch": 2.3063640848544646,
+      "grad_norm": 3.686067819595337,
+      "learning_rate": 1.2865497076023392e-05,
+      "loss": 0.3017,
+      "step": 4675
+    },
+    {
+      "epoch": 2.318697582634435,
+      "grad_norm": 2.6614937782287598,
+      "learning_rate": 1.2637061403508774e-05,
+      "loss": 0.2407,
+      "step": 4700
+    },
+    {
+      "epoch": 2.3310310804144057,
+      "grad_norm": 3.1493828296661377,
+      "learning_rate": 1.2408625730994153e-05,
+      "loss": 0.3013,
+      "step": 4725
+    },
+    {
+      "epoch": 2.343364578194376,
+      "grad_norm": 2.7974276542663574,
+      "learning_rate": 1.2180190058479534e-05,
+      "loss": 0.3496,
+      "step": 4750
+    },
+    {
+      "epoch": 2.355698075974346,
+      "grad_norm": 0.8237548470497131,
+      "learning_rate": 1.1951754385964913e-05,
+      "loss": 0.2785,
+      "step": 4775
+    },
+    {
+      "epoch": 2.3680315737543167,
+      "grad_norm": 0.9614822864532471,
+      "learning_rate": 1.1723318713450293e-05,
+      "loss": 0.3653,
+      "step": 4800
+    },
+    {
+      "epoch": 2.3803650715342872,
+      "grad_norm": 2.6732375621795654,
+      "learning_rate": 1.1494883040935673e-05,
+      "loss": 0.3071,
+      "step": 4825
+    },
+    {
+      "epoch": 2.3926985693142573,
+      "grad_norm": 5.766613960266113,
+      "learning_rate": 1.1266447368421053e-05,
+      "loss": 0.3271,
+      "step": 4850
+    },
+    {
+      "epoch": 2.405032067094228,
+      "grad_norm": 4.799734115600586,
+      "learning_rate": 1.1038011695906433e-05,
+      "loss": 0.3366,
+      "step": 4875
+    },
+    {
+      "epoch": 2.4173655648741983,
+      "grad_norm": 4.730321407318115,
+      "learning_rate": 1.0809576023391814e-05,
+      "loss": 0.3906,
+      "step": 4900
+    },
+    {
+      "epoch": 2.429699062654169,
+      "grad_norm": 1.9978655576705933,
+      "learning_rate": 1.0581140350877194e-05,
+      "loss": 0.2988,
+      "step": 4925
+    },
+    {
+      "epoch": 2.4420325604341393,
+      "grad_norm": 7.501604080200195,
+      "learning_rate": 1.0352704678362574e-05,
+      "loss": 0.2983,
+      "step": 4950
+    },
+    {
+      "epoch": 2.4543660582141094,
+      "grad_norm": 2.636504650115967,
+      "learning_rate": 1.0124269005847954e-05,
+      "loss": 0.241,
+      "step": 4975
+    },
+    {
+      "epoch": 2.46669955599408,
+      "grad_norm": 0.9329339861869812,
+      "learning_rate": 9.895833333333333e-06,
+      "loss": 0.2748,
+      "step": 5000
+    },
+    {
+      "epoch": 2.4790330537740504,
+      "grad_norm": 3.177311897277832,
+      "learning_rate": 9.667397660818714e-06,
+      "loss": 0.2675,
+      "step": 5025
+    },
+    {
+      "epoch": 2.491366551554021,
+      "grad_norm": 2.898000717163086,
+      "learning_rate": 9.438961988304094e-06,
+      "loss": 0.3033,
+      "step": 5050
+    },
+    {
+      "epoch": 2.5037000493339914,
+      "grad_norm": 2.6203970909118652,
+      "learning_rate": 9.210526315789474e-06,
+      "loss": 0.2777,
+      "step": 5075
+    },
+    {
+      "epoch": 2.5160335471139614,
+      "grad_norm": 3.7342023849487305,
+      "learning_rate": 8.982090643274855e-06,
+      "loss": 0.3422,
+      "step": 5100
+    },
+    {
+      "epoch": 2.528367044893932,
+      "grad_norm": 1.0027085542678833,
+      "learning_rate": 8.753654970760235e-06,
+      "loss": 0.3629,
+      "step": 5125
+    },
+    {
+      "epoch": 2.5407005426739024,
+      "grad_norm": 4.914186477661133,
+      "learning_rate": 8.525219298245615e-06,
+      "loss": 0.2853,
+      "step": 5150
+    },
+    {
+      "epoch": 2.5530340404538725,
+      "grad_norm": 1.0409221649169922,
+      "learning_rate": 8.296783625730994e-06,
+      "loss": 0.2965,
+      "step": 5175
+    },
+    {
+      "epoch": 2.565367538233843,
+      "grad_norm": 5.284379482269287,
+      "learning_rate": 8.068347953216375e-06,
+      "loss": 0.3599,
+      "step": 5200
+    },
+    {
+      "epoch": 2.5777010360138135,
+      "grad_norm": 4.738578796386719,
+      "learning_rate": 7.839912280701754e-06,
+      "loss": 0.3871,
+      "step": 5225
+    },
+    {
+      "epoch": 2.590034533793784,
+      "grad_norm": 3.7173986434936523,
+      "learning_rate": 7.611476608187135e-06,
+      "loss": 0.3364,
+      "step": 5250
+    },
+    {
+      "epoch": 2.6023680315737545,
+      "grad_norm": 1.101990818977356,
+      "learning_rate": 7.383040935672516e-06,
+      "loss": 0.2705,
+      "step": 5275
+    },
+    {
+      "epoch": 2.6147015293537246,
+      "grad_norm": 0.8991968035697937,
+      "learning_rate": 7.154605263157895e-06,
+      "loss": 0.2464,
+      "step": 5300
+    },
+    {
+      "epoch": 2.627035027133695,
+      "grad_norm": 1.0676743984222412,
+      "learning_rate": 6.926169590643275e-06,
+      "loss": 0.2874,
+      "step": 5325
+    },
+    {
+      "epoch": 2.6393685249136656,
+      "grad_norm": 3.034912347793579,
+      "learning_rate": 6.697733918128656e-06,
+      "loss": 0.282,
+      "step": 5350
+    },
+    {
+      "epoch": 2.6517020226936356,
+      "grad_norm": 0.8661335706710815,
+      "learning_rate": 6.469298245614036e-06,
+      "loss": 0.2463,
+      "step": 5375
+    },
+    {
+      "epoch": 2.6640355204736066,
+      "grad_norm": 2.791443109512329,
+      "learning_rate": 6.240862573099415e-06,
+      "loss": 0.2501,
+      "step": 5400
+    },
+    {
+      "epoch": 2.6763690182535766,
+      "grad_norm": 1.0052167177200317,
+      "learning_rate": 6.012426900584796e-06,
+      "loss": 0.3198,
+      "step": 5425
+    },
+    {
+      "epoch": 2.688702516033547,
+      "grad_norm": 2.621087074279785,
+      "learning_rate": 5.783991228070176e-06,
+      "loss": 0.3273,
+      "step": 5450
+    },
+    {
+      "epoch": 2.7010360138135177,
+      "grad_norm": 1.0234779119491577,
+      "learning_rate": 5.555555555555556e-06,
+      "loss": 0.3213,
+      "step": 5475
+    },
+    {
+      "epoch": 2.7133695115934877,
+      "grad_norm": 4.86189603805542,
+      "learning_rate": 5.327119883040936e-06,
+      "loss": 0.3636,
+      "step": 5500
+    },
+    {
+      "epoch": 2.725703009373458,
+      "grad_norm": 1.8415815830230713,
+      "learning_rate": 5.098684210526316e-06,
+      "loss": 0.3645,
+      "step": 5525
+    },
+    {
+      "epoch": 2.7380365071534287,
+      "grad_norm": 1.44539213180542,
+      "learning_rate": 4.870248538011697e-06,
+      "loss": 0.301,
+      "step": 5550
+    },
+    {
+      "epoch": 2.7503700049333992,
+      "grad_norm": 4.458736896514893,
+      "learning_rate": 4.641812865497076e-06,
+      "loss": 0.3528,
+      "step": 5575
+    },
+    {
+      "epoch": 2.7627035027133697,
+      "grad_norm": 2.022102117538452,
+      "learning_rate": 4.413377192982456e-06,
+      "loss": 0.2988,
+      "step": 5600
+    },
+    {
+      "epoch": 2.77503700049334,
+      "grad_norm": 1.420719027519226,
+      "learning_rate": 4.184941520467837e-06,
+      "loss": 0.2752,
+      "step": 5625
+    },
+    {
+      "epoch": 2.7873704982733103,
+      "grad_norm": 0.9011722803115845,
+      "learning_rate": 3.956505847953217e-06,
+      "loss": 0.2944,
+      "step": 5650
+    },
+    {
+      "epoch": 2.799703996053281,
+      "grad_norm": 2.3301029205322266,
+      "learning_rate": 3.7280701754385965e-06,
+      "loss": 0.2935,
+      "step": 5675
+    },
+    {
+      "epoch": 2.812037493833251,
+      "grad_norm": 0.8454089760780334,
+      "learning_rate": 3.499634502923977e-06,
+      "loss": 0.2912,
+      "step": 5700
+    },
+    {
+      "epoch": 2.8243709916132214,
+      "grad_norm": 1.046055555343628,
+      "learning_rate": 3.271198830409357e-06,
+      "loss": 0.2726,
+      "step": 5725
+    },
+    {
+      "epoch": 2.836704489393192,
+      "grad_norm": 5.1950201988220215,
+      "learning_rate": 3.042763157894737e-06,
+      "loss": 0.3481,
+      "step": 5750
+    },
+    {
+      "epoch": 2.8490379871731624,
+      "grad_norm": 2.519756317138672,
+      "learning_rate": 2.814327485380117e-06,
+      "loss": 0.2713,
+      "step": 5775
+    },
+    {
+      "epoch": 2.861371484953133,
+      "grad_norm": 0.9701582193374634,
+      "learning_rate": 2.585891812865497e-06,
+      "loss": 0.3381,
+      "step": 5800
+    },
+    {
+      "epoch": 2.873704982733103,
+      "grad_norm": 2.884457588195801,
+      "learning_rate": 2.3574561403508775e-06,
+      "loss": 0.3101,
+      "step": 5825
+    },
+    {
+      "epoch": 2.8860384805130734,
+      "grad_norm": 1.04812753200531,
+      "learning_rate": 2.1290204678362575e-06,
+      "loss": 0.3222,
+      "step": 5850
+    },
+    {
+      "epoch": 2.898371978293044,
+      "grad_norm": 5.422378063201904,
+      "learning_rate": 1.9005847953216373e-06,
+      "loss": 0.3344,
+      "step": 5875
+    },
+    {
+      "epoch": 2.9107054760730144,
+      "grad_norm": 2.4429121017456055,
+      "learning_rate": 1.6721491228070176e-06,
+      "loss": 0.3438,
+      "step": 5900
+    },
+    {
+      "epoch": 2.923038973852985,
+      "grad_norm": 3.2756924629211426,
+      "learning_rate": 1.4437134502923976e-06,
+      "loss": 0.2633,
+      "step": 5925
+    },
+    {
+      "epoch": 2.935372471632955,
+      "grad_norm": 3.166130781173706,
+      "learning_rate": 1.2152777777777778e-06,
+      "loss": 0.3795,
+      "step": 5950
+    },
+    {
+      "epoch": 2.9477059694129255,
+      "grad_norm": 2.424431085586548,
+      "learning_rate": 9.868421052631579e-07,
+      "loss": 0.3337,
+      "step": 5975
+    },
+    {
+      "epoch": 2.960039467192896,
+      "grad_norm": 5.236613750457764,
+      "learning_rate": 7.58406432748538e-07,
+      "loss": 0.3092,
+      "step": 6000
+    },
+    {
+      "epoch": 2.972372964972866,
+      "grad_norm": 1.0767054557800293,
+      "learning_rate": 5.299707602339181e-07,
+      "loss": 0.3348,
+      "step": 6025
+    },
+    {
+      "epoch": 2.9847064627528366,
+      "grad_norm": 4.74832010269165,
+      "learning_rate": 3.0153508771929827e-07,
+      "loss": 0.3349,
+      "step": 6050
+    },
+    {
+      "epoch": 2.997039960532807,
+      "grad_norm": 2.1578445434570312,
+      "learning_rate": 7.309941520467836e-08,
+      "loss": 0.3508,
+      "step": 6075
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.9136830419095199,
+      "eval_auc": 0.5282966990033326,
+      "eval_f1": 0.9548948513415518,
+      "eval_loss": 0.29475775361061096,
+      "eval_precision": 0.9136830419095199,
+      "eval_recall": 1.0,
+      "eval_runtime": 33.2805,
+      "eval_samples_per_second": 108.262,
+      "eval_steps_per_second": 3.395,
+      "step": 6081
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 6081,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 5,
+        "early_stopping_threshold": 0.01
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 2
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 4.532261799273062e+16,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-6081/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2462a44f8bfd2c5c132a34fe87a12bdf94977fa46efcc9df9186adca0030c51d
+size 5432

config.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "_name_or_path": "BAAI/bge-large-en-v1.5",
+  "_num_labels": 2,
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "BAD_USER",
+    "1": "GOOD_USER"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "BAD_USER": 0,
+    "GOOD_USER": 1
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.0",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:210a11360287b8671eaf6f21b6f39996c84fd82642608b82cb277de916601c7f
+size 1340622760

runs/May22_15-42-05_r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do/events.out.tfevents.1747928527.r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do.89.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c76a9fafc1859a648711169e3dbb628d77aa4c3d539a4202e44a5652ce997a51
-size 49358

 version https://git-lfs.github.com/spec/v1
+oid sha256:258d6c73dc7731b83ce624cd23239cfc19675a90a2bcdb60550d1a46c26de99d
+size 58460

runs/May22_15-42-05_r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do/events.out.tfevents.1747932270.r-trentmkelly-autotrain-advanced-6oq4mr6b-da706-g95do.89.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:24e68ac3f41cb98afd769ea3673308335c81632dddbb09230d1c369b75ead20a
+size 607

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "cls_token": {
+    "content": "[CLS]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "[MASK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "[PAD]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "[SEP]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "[UNK]",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "extra_special_tokens": {},
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2462a44f8bfd2c5c132a34fe87a12bdf94977fa46efcc9df9186adca0030c51d
+size 5432

training_params.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "data_path": "autotrain-roblox-12-with-bge-large/autotrain-data",
+    "model": "BAAI/bge-large-en-v1.5",
+    "lr": 5e-05,
+    "epochs": 3,
+    "max_seq_length": 256,
+    "batch_size": 16,
+    "warmup_ratio": 0.1,
+    "gradient_accumulation": 1,
+    "optimizer": "adamw_torch",
+    "scheduler": "linear",
+    "weight_decay": 0.0,
+    "max_grad_norm": 1.0,
+    "seed": 42,
+    "train_split": "train",
+    "valid_split": "validation",
+    "text_column": "autotrain_text",
+    "target_column": "autotrain_label",
+    "logging_steps": -1,
+    "project_name": "autotrain-roblox-12-with-bge-large",
+    "auto_find_batch_size": false,
+    "mixed_precision": "fp16",
+    "save_total_limit": 1,
+    "push_to_hub": true,
+    "eval_strategy": "epoch",
+    "username": "trentmkelly",
+    "log": "tensorboard",
+    "early_stopping_patience": 5,
+    "early_stopping_threshold": 0.01
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff