Daewon0808 commited on
Commit
70284ec
·
verified ·
1 Parent(s): ed79f98

Training in progress, step 2280, checkpoint

Browse files
Files changed (28) hide show
  1. last-checkpoint/adapter_model.safetensors +1 -1
  2. last-checkpoint/global_step2280/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
  3. last-checkpoint/global_step2280/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
  4. last-checkpoint/global_step2280/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
  5. last-checkpoint/global_step2280/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
  6. last-checkpoint/global_step2280/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
  7. last-checkpoint/global_step2280/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
  8. last-checkpoint/global_step2280/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
  9. last-checkpoint/global_step2280/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
  10. last-checkpoint/global_step2280/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
  11. last-checkpoint/global_step2280/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
  12. last-checkpoint/global_step2280/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
  13. last-checkpoint/global_step2280/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
  14. last-checkpoint/global_step2280/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
  15. last-checkpoint/global_step2280/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
  16. last-checkpoint/global_step2280/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
  17. last-checkpoint/global_step2280/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
  18. last-checkpoint/latest +1 -1
  19. last-checkpoint/rng_state_0.pth +1 -1
  20. last-checkpoint/rng_state_1.pth +1 -1
  21. last-checkpoint/rng_state_2.pth +1 -1
  22. last-checkpoint/rng_state_3.pth +1 -1
  23. last-checkpoint/rng_state_4.pth +1 -1
  24. last-checkpoint/rng_state_5.pth +1 -1
  25. last-checkpoint/rng_state_6.pth +1 -1
  26. last-checkpoint/rng_state_7.pth +1 -1
  27. last-checkpoint/scheduler.pt +1 -1
  28. last-checkpoint/trainer_state.json +2860 -4
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88048dd9a70f9e1147b02b613d2ddf17384748318f500c0d758d0fdab8755f5b
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:522174c44b2bc3f54d5a8fe33ebde0bbaf5d2b47bc53cd1b001caccafeddbd61
3
  size 83946192
last-checkpoint/global_step2280/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07d1efa8c9a763e761565c9e09fb032b95960c332b4ddbbf51fd58f57d3f183f
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe62a4b7286c9077be50a9476ed4894fe5d9ed9e381e5a7cd2034d614c2dcae4
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c73f89a9c680b027932434086414c106ea8e66a860851720e3ea0b921578edfd
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ada8f36d99cee2b8f26fe1628d48517c443c270191083556c88b4fb9b001caa
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a41daa7d1bc752e6f747e79663ae96968901a0888d708fecfee03b6187bd2cea
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9f6bff5cceb2f42c9763b267161a932deb92677cd239f63e9ec92866c670fa7
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97197284a7c2f2a02e0dadce91f5e614492206ddcb1d5e782539b3f25c50620d
3
+ size 62918128
last-checkpoint/global_step2280/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:130c8b0dad8071c48aeb3da4fb6e5f834602f253e78d944742934c3f9d9e7b79
3
+ size 62918128
last-checkpoint/global_step2280/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9100886399952ad4f97249a39a28356be5cd1ee1007032ddbb488feb1523d9f3
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05ba478bb37aa2c140f640e1c177973af2ad3c43078596c06888a3705c4d3e62
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ced05eafcfc006322b4dcf064c50abbbd04e74b90aff10b421ec5e46079169
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d639e711f5e03c598579490bc8d2fe2381b4bf6f2a8c856553ce64e77557bb79
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc2fcfc2343910f41b10fece0e584ac669c89db7916e270e78fde1ba05e9d43
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c47b97e37c95bee72ba31bc21a916cf0c4e5e9ec4b9059f22d0af6f06a64975f
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e914156bb3b2272b1afd0da9a2999749f81aa5a9c70245eddf00d0f89930f4f
3
+ size 445678
last-checkpoint/global_step2280/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4915f7cd15718b84976ede5e38139b4adcea6b11bacd75a765b49f287e2db46a
3
+ size 445678
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step2280
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaf48aba55f83adec18eb14db42435a2413ac97822888622ab8d2c641cc1e890
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8aa5522d6e5be677047c5b451943bdacb934e0a52b74729f654802cd02e0680c
3
  size 15984
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7bdb46bda56f590fc6d0076d7bf2f5b22774fd62780402dce14fd09436d7953
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad839845395873a423632867bc97c602d876029defdb7eefc7f542900754508e
3
  size 15984
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfc61caf6b785c98c002fcd379b8761ea4a8826a54aac137514d616b478397cc
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf266da190f7c749119cb9a8d4b7a3d723a19bab389e04d6e629c2277a04d2a
3
  size 15984
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7fffe0bb67a745aa8c491edc47aba448ba67c915d716adfeac6dbd6d1a71c512
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c766900d10a1acc55ec0bb421d7fe33a3d53fc3d3e736229f3ebf30c67f78a11
3
  size 15984
last-checkpoint/rng_state_4.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bec4084969d019dabfd9254f605e46a9c43bfd290c92ceb14a3697a5c69a26aa
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6211d248e0fd1561b9393eb9cae178f7e48075c00e6b56ac7e46a4d450842040
3
  size 15984
last-checkpoint/rng_state_5.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad573c49c90fba941d50ff61bdb9f08c25d50a807dc8870e9222742e01860f88
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8486ce48273e9612f2429e287d68ba6a34ea649e4c905267e69fcd5f195ddc9c
3
  size 15984
last-checkpoint/rng_state_6.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:295d1e54b6e4e09dfb5a6e7f58ca6d5d61e2af7376459d37898b12bc24d1595b
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ef1145f7f0639d1022e63b5ccdd1ce92b8eb89f689b305af74b8f11f48accfe
3
  size 15984
last-checkpoint/rng_state_7.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02d0dbdb2460ae7dac1509b3f5cb4293559290b5d5b40ab6d138d09b1c8cef5
3
  size 15984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:104cccc50c39de432cfd2aaf15c3f81a4e3109745888765cb5d304a59c878c3e
3
  size 15984
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f36e0c5df7f2e0bac558458ff0a29723171a5e850ba0eae3769472f4c2c5286d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f71f4b0f1ae150ee8da5695a2f593834f500a7d3dc601cab81fadb6fb8f8b7d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8770006577504933,
5
  "eval_steps": 5,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -20423,6 +20423,2862 @@
20423
  "eval_samples_per_second": 5.862,
20424
  "eval_steps_per_second": 0.195,
20425
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20426
  }
20427
  ],
20428
  "logging_steps": 1,
@@ -20437,12 +23293,12 @@
20437
  "should_evaluate": false,
20438
  "should_log": false,
20439
  "should_save": true,
20440
- "should_training_stop": false
20441
  },
20442
  "attributes": {}
20443
  }
20444
  },
20445
- "total_flos": 3096254904532992.0,
20446
  "train_batch_size": 2,
20447
  "trial_name": null,
20448
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9997807498355624,
5
  "eval_steps": 5,
6
+ "global_step": 2280,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
20423
  "eval_samples_per_second": 5.862,
20424
  "eval_steps_per_second": 0.195,
20425
  "step": 2000
20426
+ },
20427
+ {
20428
+ "epoch": 0.8774391580793686,
20429
+ "grad_norm": 0.49010925850689235,
20430
+ "learning_rate": 4.492417883833155e-06,
20431
+ "loss": 0.1096,
20432
+ "step": 2001
20433
+ },
20434
+ {
20435
+ "epoch": 0.8778776584082438,
20436
+ "grad_norm": 0.7600230660785309,
20437
+ "learning_rate": 4.460758653975128e-06,
20438
+ "loss": 0.187,
20439
+ "step": 2002
20440
+ },
20441
+ {
20442
+ "epoch": 0.8783161587371191,
20443
+ "grad_norm": 0.9443263647840711,
20444
+ "learning_rate": 4.429206164987276e-06,
20445
+ "loss": 0.1576,
20446
+ "step": 2003
20447
+ },
20448
+ {
20449
+ "epoch": 0.8787546590659943,
20450
+ "grad_norm": 0.8156934951607084,
20451
+ "learning_rate": 4.397760490826481e-06,
20452
+ "loss": 0.2156,
20453
+ "step": 2004
20454
+ },
20455
+ {
20456
+ "epoch": 0.8791931593948695,
20457
+ "grad_norm": 0.5940823608596991,
20458
+ "learning_rate": 4.366421705199253e-06,
20459
+ "loss": 0.1643,
20460
+ "step": 2005
20461
+ },
20462
+ {
20463
+ "epoch": 0.8791931593948695,
20464
+ "eval_PRM Accuracy": 0.9047619047619048,
20465
+ "eval_PRM F1": 0.9464285714285714,
20466
+ "eval_PRM F1 AUC": 0.9001475651746188,
20467
+ "eval_PRM F1 Neg": 0.5714285714285714,
20468
+ "eval_PRM NPV": 0.8888888888888888,
20469
+ "eval_PRM Precision": 0.905982905982906,
20470
+ "eval_PRM Recall": 0.9906542056074766,
20471
+ "eval_PRM Specificty": 0.42105263157894735,
20472
+ "eval_loss": 0.26673176884651184,
20473
+ "eval_runtime": 5.1125,
20474
+ "eval_samples_per_second": 5.868,
20475
+ "eval_steps_per_second": 0.196,
20476
+ "step": 2005
20477
+ },
20478
+ {
20479
+ "epoch": 0.8796316597237448,
20480
+ "grad_norm": 0.7342813103492982,
20481
+ "learning_rate": 4.335189881561563e-06,
20482
+ "loss": 0.0901,
20483
+ "step": 2006
20484
+ },
20485
+ {
20486
+ "epoch": 0.8800701600526201,
20487
+ "grad_norm": 0.8684047400215807,
20488
+ "learning_rate": 4.3040650931186646e-06,
20489
+ "loss": 0.1782,
20490
+ "step": 2007
20491
+ },
20492
+ {
20493
+ "epoch": 0.8805086603814953,
20494
+ "grad_norm": 0.6381573654330607,
20495
+ "learning_rate": 4.273047412824954e-06,
20496
+ "loss": 0.21,
20497
+ "step": 2008
20498
+ },
20499
+ {
20500
+ "epoch": 0.8809471607103705,
20501
+ "grad_norm": 0.5881964181618498,
20502
+ "learning_rate": 4.242136913383737e-06,
20503
+ "loss": 0.1295,
20504
+ "step": 2009
20505
+ },
20506
+ {
20507
+ "epoch": 0.8813856610392458,
20508
+ "grad_norm": 0.7277343212020827,
20509
+ "learning_rate": 4.2113336672471245e-06,
20510
+ "loss": 0.144,
20511
+ "step": 2010
20512
+ },
20513
+ {
20514
+ "epoch": 0.8813856610392458,
20515
+ "eval_PRM Accuracy": 0.8968253968253969,
20516
+ "eval_PRM F1": 0.9422222222222222,
20517
+ "eval_PRM F1 AUC": 0.9006394490900148,
20518
+ "eval_PRM F1 Neg": 0.5185185185185185,
20519
+ "eval_PRM NPV": 0.875,
20520
+ "eval_PRM Precision": 0.8983050847457628,
20521
+ "eval_PRM Recall": 0.9906542056074766,
20522
+ "eval_PRM Specificty": 0.3684210526315789,
20523
+ "eval_loss": 0.26777344942092896,
20524
+ "eval_runtime": 5.13,
20525
+ "eval_samples_per_second": 5.848,
20526
+ "eval_steps_per_second": 0.195,
20527
+ "step": 2010
20528
+ },
20529
+ {
20530
+ "epoch": 0.881824161368121,
20531
+ "grad_norm": 0.7543997307775313,
20532
+ "learning_rate": 4.180637746615823e-06,
20533
+ "loss": 0.1582,
20534
+ "step": 2011
20535
+ },
20536
+ {
20537
+ "epoch": 0.8822626616969963,
20538
+ "grad_norm": 0.5995275995836078,
20539
+ "learning_rate": 4.150049223438962e-06,
20540
+ "loss": 0.128,
20541
+ "step": 2012
20542
+ },
20543
+ {
20544
+ "epoch": 0.8827011620258716,
20545
+ "grad_norm": 0.7337332961504434,
20546
+ "learning_rate": 4.119568169413951e-06,
20547
+ "loss": 0.1506,
20548
+ "step": 2013
20549
+ },
20550
+ {
20551
+ "epoch": 0.8831396623547467,
20552
+ "grad_norm": 0.6232943128877938,
20553
+ "learning_rate": 4.089194655986306e-06,
20554
+ "loss": 0.1172,
20555
+ "step": 2014
20556
+ },
20557
+ {
20558
+ "epoch": 0.883578162683622,
20559
+ "grad_norm": 0.6628637542310886,
20560
+ "learning_rate": 4.05892875434945e-06,
20561
+ "loss": 0.1272,
20562
+ "step": 2015
20563
+ },
20564
+ {
20565
+ "epoch": 0.883578162683622,
20566
+ "eval_PRM Accuracy": 0.8968253968253969,
20567
+ "eval_PRM F1": 0.9422222222222222,
20568
+ "eval_PRM F1 AUC": 0.9013772749631086,
20569
+ "eval_PRM F1 Neg": 0.5185185185185185,
20570
+ "eval_PRM NPV": 0.875,
20571
+ "eval_PRM Precision": 0.8983050847457628,
20572
+ "eval_PRM Recall": 0.9906542056074766,
20573
+ "eval_PRM Specificty": 0.3684210526315789,
20574
+ "eval_loss": 0.2679687440395355,
20575
+ "eval_runtime": 4.944,
20576
+ "eval_samples_per_second": 6.068,
20577
+ "eval_steps_per_second": 0.202,
20578
+ "step": 2015
20579
+ },
20580
+ {
20581
+ "epoch": 0.8840166630124973,
20582
+ "grad_norm": 0.8052878054952287,
20583
+ "learning_rate": 4.028770535444615e-06,
20584
+ "loss": 0.1619,
20585
+ "step": 2016
20586
+ },
20587
+ {
20588
+ "epoch": 0.8844551633413725,
20589
+ "grad_norm": 0.44828955761310213,
20590
+ "learning_rate": 3.998720069960576e-06,
20591
+ "loss": 0.0875,
20592
+ "step": 2017
20593
+ },
20594
+ {
20595
+ "epoch": 0.8848936636702478,
20596
+ "grad_norm": 0.5896069212238487,
20597
+ "learning_rate": 3.968777428333598e-06,
20598
+ "loss": 0.1313,
20599
+ "step": 2018
20600
+ },
20601
+ {
20602
+ "epoch": 0.885332163999123,
20603
+ "grad_norm": 0.8476381353088597,
20604
+ "learning_rate": 3.9389426807471766e-06,
20605
+ "loss": 0.1734,
20606
+ "step": 2019
20607
+ },
20608
+ {
20609
+ "epoch": 0.8857706643279982,
20610
+ "grad_norm": 0.853862632832727,
20611
+ "learning_rate": 3.909215897131918e-06,
20612
+ "loss": 0.172,
20613
+ "step": 2020
20614
+ },
20615
+ {
20616
+ "epoch": 0.8857706643279982,
20617
+ "eval_PRM Accuracy": 0.9047619047619048,
20618
+ "eval_PRM F1": 0.9464285714285714,
20619
+ "eval_PRM F1 AUC": 0.8999016232169208,
20620
+ "eval_PRM F1 Neg": 0.5714285714285714,
20621
+ "eval_PRM NPV": 0.8888888888888888,
20622
+ "eval_PRM Precision": 0.905982905982906,
20623
+ "eval_PRM Recall": 0.9906542056074766,
20624
+ "eval_PRM Specificty": 0.42105263157894735,
20625
+ "eval_loss": 0.26793619990348816,
20626
+ "eval_runtime": 5.367,
20627
+ "eval_samples_per_second": 5.59,
20628
+ "eval_steps_per_second": 0.186,
20629
+ "step": 2020
20630
+ },
20631
+ {
20632
+ "epoch": 0.8862091646568735,
20633
+ "grad_norm": 0.5234162286442235,
20634
+ "learning_rate": 3.8795971471653756e-06,
20635
+ "loss": 0.1191,
20636
+ "step": 2021
20637
+ },
20638
+ {
20639
+ "epoch": 0.8866476649857488,
20640
+ "grad_norm": 0.71689160790237,
20641
+ "learning_rate": 3.850086500271871e-06,
20642
+ "loss": 0.124,
20643
+ "step": 2022
20644
+ },
20645
+ {
20646
+ "epoch": 0.887086165314624,
20647
+ "grad_norm": 0.5726710678100895,
20648
+ "learning_rate": 3.820684025622339e-06,
20649
+ "loss": 0.1189,
20650
+ "step": 2023
20651
+ },
20652
+ {
20653
+ "epoch": 0.8875246656434992,
20654
+ "grad_norm": 0.8339001833741275,
20655
+ "learning_rate": 3.7913897921341866e-06,
20656
+ "loss": 0.1723,
20657
+ "step": 2024
20658
+ },
20659
+ {
20660
+ "epoch": 0.8879631659723745,
20661
+ "grad_norm": 0.48838497101610673,
20662
+ "learning_rate": 3.762203868471087e-06,
20663
+ "loss": 0.0938,
20664
+ "step": 2025
20665
+ },
20666
+ {
20667
+ "epoch": 0.8879631659723745,
20668
+ "eval_PRM Accuracy": 0.8968253968253969,
20669
+ "eval_PRM F1": 0.9422222222222222,
20670
+ "eval_PRM F1 AUC": 0.9006394490900148,
20671
+ "eval_PRM F1 Neg": 0.5185185185185185,
20672
+ "eval_PRM NPV": 0.875,
20673
+ "eval_PRM Precision": 0.8983050847457628,
20674
+ "eval_PRM Recall": 0.9906542056074766,
20675
+ "eval_PRM Specificty": 0.3684210526315789,
20676
+ "eval_loss": 0.2682128846645355,
20677
+ "eval_runtime": 5.3362,
20678
+ "eval_samples_per_second": 5.622,
20679
+ "eval_steps_per_second": 0.187,
20680
+ "step": 2025
20681
+ },
20682
+ {
20683
+ "epoch": 0.8884016663012497,
20684
+ "grad_norm": 0.5968643874507121,
20685
+ "learning_rate": 3.7331263230428516e-06,
20686
+ "loss": 0.1253,
20687
+ "step": 2026
20688
+ },
20689
+ {
20690
+ "epoch": 0.888840166630125,
20691
+ "grad_norm": 0.7264174174581575,
20692
+ "learning_rate": 3.7041572240052667e-06,
20693
+ "loss": 0.135,
20694
+ "step": 2027
20695
+ },
20696
+ {
20697
+ "epoch": 0.8892786669590003,
20698
+ "grad_norm": 0.5902997205334317,
20699
+ "learning_rate": 3.675296639259912e-06,
20700
+ "loss": 0.1197,
20701
+ "step": 2028
20702
+ },
20703
+ {
20704
+ "epoch": 0.8897171672878754,
20705
+ "grad_norm": 0.8544699784202991,
20706
+ "learning_rate": 3.6465446364540358e-06,
20707
+ "loss": 0.1953,
20708
+ "step": 2029
20709
+ },
20710
+ {
20711
+ "epoch": 0.8901556676167507,
20712
+ "grad_norm": 0.7481450600837439,
20713
+ "learning_rate": 3.617901282980357e-06,
20714
+ "loss": 0.1132,
20715
+ "step": 2030
20716
+ },
20717
+ {
20718
+ "epoch": 0.8901556676167507,
20719
+ "eval_PRM Accuracy": 0.8968253968253969,
20720
+ "eval_PRM F1": 0.9422222222222222,
20721
+ "eval_PRM F1 AUC": 0.8989178553861288,
20722
+ "eval_PRM F1 Neg": 0.5185185185185185,
20723
+ "eval_PRM NPV": 0.875,
20724
+ "eval_PRM Precision": 0.8983050847457628,
20725
+ "eval_PRM Recall": 0.9906542056074766,
20726
+ "eval_PRM Specificty": 0.3684210526315789,
20727
+ "eval_loss": 0.2726888060569763,
20728
+ "eval_runtime": 5.3119,
20729
+ "eval_samples_per_second": 5.648,
20730
+ "eval_steps_per_second": 0.188,
20731
+ "step": 2030
20732
+ },
20733
+ {
20734
+ "epoch": 0.890594167945626,
20735
+ "grad_norm": 0.8258867796367659,
20736
+ "learning_rate": 3.5893666459769326e-06,
20737
+ "loss": 0.1599,
20738
+ "step": 2031
20739
+ },
20740
+ {
20741
+ "epoch": 0.8910326682745012,
20742
+ "grad_norm": 0.8287595473034423,
20743
+ "learning_rate": 3.560940792327028e-06,
20744
+ "loss": 0.1719,
20745
+ "step": 2032
20746
+ },
20747
+ {
20748
+ "epoch": 0.8914711686033765,
20749
+ "grad_norm": 0.6480723789574196,
20750
+ "learning_rate": 3.5326237886588732e-06,
20751
+ "loss": 0.1439,
20752
+ "step": 2033
20753
+ },
20754
+ {
20755
+ "epoch": 0.8919096689322517,
20756
+ "grad_norm": 0.5589484276325517,
20757
+ "learning_rate": 3.504415701345615e-06,
20758
+ "loss": 0.1229,
20759
+ "step": 2034
20760
+ },
20761
+ {
20762
+ "epoch": 0.8923481692611269,
20763
+ "grad_norm": 0.9184356471371848,
20764
+ "learning_rate": 3.476316596505075e-06,
20765
+ "loss": 0.1771,
20766
+ "step": 2035
20767
+ },
20768
+ {
20769
+ "epoch": 0.8923481692611269,
20770
+ "eval_PRM Accuracy": 0.8968253968253969,
20771
+ "eval_PRM F1": 0.9422222222222222,
20772
+ "eval_PRM F1 AUC": 0.8986719134284309,
20773
+ "eval_PRM F1 Neg": 0.5185185185185185,
20774
+ "eval_PRM NPV": 0.875,
20775
+ "eval_PRM Precision": 0.8983050847457628,
20776
+ "eval_PRM Recall": 0.9906542056074766,
20777
+ "eval_PRM Specificty": 0.3684210526315789,
20778
+ "eval_loss": 0.2717122435569763,
20779
+ "eval_runtime": 5.0118,
20780
+ "eval_samples_per_second": 5.986,
20781
+ "eval_steps_per_second": 0.2,
20782
+ "step": 2035
20783
+ },
20784
+ {
20785
+ "epoch": 0.8927866695900022,
20786
+ "grad_norm": 0.7733135135105927,
20787
+ "learning_rate": 3.4483265399996246e-06,
20788
+ "loss": 0.1054,
20789
+ "step": 2036
20790
+ },
20791
+ {
20792
+ "epoch": 0.8932251699188775,
20793
+ "grad_norm": 0.7483075304642457,
20794
+ "learning_rate": 3.420445597436056e-06,
20795
+ "loss": 0.1991,
20796
+ "step": 2037
20797
+ },
20798
+ {
20799
+ "epoch": 0.8936636702477527,
20800
+ "grad_norm": 0.5306782721956944,
20801
+ "learning_rate": 3.3926738341653886e-06,
20802
+ "loss": 0.1033,
20803
+ "step": 2038
20804
+ },
20805
+ {
20806
+ "epoch": 0.8941021705766279,
20807
+ "grad_norm": 0.6502443097322591,
20808
+ "learning_rate": 3.365011315282729e-06,
20809
+ "loss": 0.1564,
20810
+ "step": 2039
20811
+ },
20812
+ {
20813
+ "epoch": 0.8945406709055032,
20814
+ "grad_norm": 0.794159960377106,
20815
+ "learning_rate": 3.3374581056271447e-06,
20816
+ "loss": 0.1504,
20817
+ "step": 2040
20818
+ },
20819
+ {
20820
+ "epoch": 0.8945406709055032,
20821
+ "eval_PRM Accuracy": 0.8968253968253969,
20822
+ "eval_PRM F1": 0.9422222222222222,
20823
+ "eval_PRM F1 AUC": 0.9001475651746188,
20824
+ "eval_PRM F1 Neg": 0.5185185185185185,
20825
+ "eval_PRM NPV": 0.875,
20826
+ "eval_PRM Precision": 0.8983050847457628,
20827
+ "eval_PRM Recall": 0.9906542056074766,
20828
+ "eval_PRM Specificty": 0.3684210526315789,
20829
+ "eval_loss": 0.27273762226104736,
20830
+ "eval_runtime": 5.1428,
20831
+ "eval_samples_per_second": 5.833,
20832
+ "eval_steps_per_second": 0.194,
20833
+ "step": 2040
20834
+ },
20835
+ {
20836
+ "epoch": 0.8949791712343784,
20837
+ "grad_norm": 1.0396272458324463,
20838
+ "learning_rate": 3.3100142697814697e-06,
20839
+ "loss": 0.2735,
20840
+ "step": 2041
20841
+ },
20842
+ {
20843
+ "epoch": 0.8954176715632537,
20844
+ "grad_norm": 0.8371944516730585,
20845
+ "learning_rate": 3.2826798720721864e-06,
20846
+ "loss": 0.2289,
20847
+ "step": 2042
20848
+ },
20849
+ {
20850
+ "epoch": 0.895856171892129,
20851
+ "grad_norm": 0.5761643805159979,
20852
+ "learning_rate": 3.2554549765692554e-06,
20853
+ "loss": 0.1571,
20854
+ "step": 2043
20855
+ },
20856
+ {
20857
+ "epoch": 0.8962946722210041,
20858
+ "grad_norm": 0.7867560879422744,
20859
+ "learning_rate": 3.228339647085965e-06,
20860
+ "loss": 0.2137,
20861
+ "step": 2044
20862
+ },
20863
+ {
20864
+ "epoch": 0.8967331725498794,
20865
+ "grad_norm": 0.6568812377795216,
20866
+ "learning_rate": 3.2013339471787974e-06,
20867
+ "loss": 0.1576,
20868
+ "step": 2045
20869
+ },
20870
+ {
20871
+ "epoch": 0.8967331725498794,
20872
+ "eval_PRM Accuracy": 0.8968253968253969,
20873
+ "eval_PRM F1": 0.9422222222222222,
20874
+ "eval_PRM F1 AUC": 0.9008853910477127,
20875
+ "eval_PRM F1 Neg": 0.5185185185185185,
20876
+ "eval_PRM NPV": 0.875,
20877
+ "eval_PRM Precision": 0.8983050847457628,
20878
+ "eval_PRM Recall": 0.9906542056074766,
20879
+ "eval_PRM Specificty": 0.3684210526315789,
20880
+ "eval_loss": 0.2720377743244171,
20881
+ "eval_runtime": 4.8325,
20882
+ "eval_samples_per_second": 6.208,
20883
+ "eval_steps_per_second": 0.207,
20884
+ "step": 2045
20885
+ },
20886
+ {
20887
+ "epoch": 0.8971716728787547,
20888
+ "grad_norm": 0.6050445849823863,
20889
+ "learning_rate": 3.1744379401472677e-06,
20890
+ "loss": 0.0941,
20891
+ "step": 2046
20892
+ },
20893
+ {
20894
+ "epoch": 0.8976101732076299,
20895
+ "grad_norm": 0.4980411373250404,
20896
+ "learning_rate": 3.1476516890337703e-06,
20897
+ "loss": 0.0835,
20898
+ "step": 2047
20899
+ },
20900
+ {
20901
+ "epoch": 0.8980486735365052,
20902
+ "grad_norm": 1.0147077320854392,
20903
+ "learning_rate": 3.1209752566234653e-06,
20904
+ "loss": 0.2369,
20905
+ "step": 2048
20906
+ },
20907
+ {
20908
+ "epoch": 0.8984871738653804,
20909
+ "grad_norm": 0.6849094272859165,
20910
+ "learning_rate": 3.094408705444074e-06,
20911
+ "loss": 0.1711,
20912
+ "step": 2049
20913
+ },
20914
+ {
20915
+ "epoch": 0.8989256741942556,
20916
+ "grad_norm": 0.574222203080837,
20917
+ "learning_rate": 3.0679520977657863e-06,
20918
+ "loss": 0.1325,
20919
+ "step": 2050
20920
+ },
20921
+ {
20922
+ "epoch": 0.8989256741942556,
20923
+ "eval_PRM Accuracy": 0.8968253968253969,
20924
+ "eval_PRM F1": 0.9422222222222222,
20925
+ "eval_PRM F1 AUC": 0.9013772749631087,
20926
+ "eval_PRM F1 Neg": 0.5185185185185185,
20927
+ "eval_PRM NPV": 0.875,
20928
+ "eval_PRM Precision": 0.8983050847457628,
20929
+ "eval_PRM Recall": 0.9906542056074766,
20930
+ "eval_PRM Specificty": 0.3684210526315789,
20931
+ "eval_loss": 0.2703287899494171,
20932
+ "eval_runtime": 5.2238,
20933
+ "eval_samples_per_second": 5.743,
20934
+ "eval_steps_per_second": 0.191,
20935
+ "step": 2050
20936
+ },
20937
+ {
20938
+ "epoch": 0.8993641745231309,
20939
+ "grad_norm": 0.6704277086537236,
20940
+ "learning_rate": 3.041605495601074e-06,
20941
+ "loss": 0.1193,
20942
+ "step": 2051
20943
+ },
20944
+ {
20945
+ "epoch": 0.8998026748520062,
20946
+ "grad_norm": 0.5220752486808401,
20947
+ "learning_rate": 3.0153689607045845e-06,
20948
+ "loss": 0.1275,
20949
+ "step": 2052
20950
+ },
20951
+ {
20952
+ "epoch": 0.9002411751808814,
20953
+ "grad_norm": 1.0311633126874105,
20954
+ "learning_rate": 2.989242554572952e-06,
20955
+ "loss": 0.2181,
20956
+ "step": 2053
20957
+ },
20958
+ {
20959
+ "epoch": 0.9006796755097566,
20960
+ "grad_norm": 0.7896650691751218,
20961
+ "learning_rate": 2.9632263384446913e-06,
20962
+ "loss": 0.1921,
20963
+ "step": 2054
20964
+ },
20965
+ {
20966
+ "epoch": 0.9011181758386319,
20967
+ "grad_norm": 1.0315320145585596,
20968
+ "learning_rate": 2.9373203733000232e-06,
20969
+ "loss": 0.1571,
20970
+ "step": 2055
20971
+ },
20972
+ {
20973
+ "epoch": 0.9011181758386319,
20974
+ "eval_PRM Accuracy": 0.8968253968253969,
20975
+ "eval_PRM F1": 0.9422222222222222,
20976
+ "eval_PRM F1 AUC": 0.8994097393015248,
20977
+ "eval_PRM F1 Neg": 0.5185185185185185,
20978
+ "eval_PRM NPV": 0.875,
20979
+ "eval_PRM Precision": 0.8983050847457628,
20980
+ "eval_PRM Recall": 0.9906542056074766,
20981
+ "eval_PRM Specificty": 0.3684210526315789,
20982
+ "eval_loss": 0.27177733182907104,
20983
+ "eval_runtime": 4.7798,
20984
+ "eval_samples_per_second": 6.276,
20985
+ "eval_steps_per_second": 0.209,
20986
+ "step": 2055
20987
+ },
20988
+ {
20989
+ "epoch": 0.9015566761675071,
20990
+ "grad_norm": 0.6694908121182245,
20991
+ "learning_rate": 2.9115247198607807e-06,
20992
+ "loss": 0.1056,
20993
+ "step": 2056
20994
+ },
20995
+ {
20996
+ "epoch": 0.9019951764963824,
20997
+ "grad_norm": 0.5928534592981579,
20998
+ "learning_rate": 2.885839438590204e-06,
20999
+ "loss": 0.103,
21000
+ "step": 2057
21001
+ },
21002
+ {
21003
+ "epoch": 0.9024336768252577,
21004
+ "grad_norm": 0.5381910535996246,
21005
+ "learning_rate": 2.8602645896928295e-06,
21006
+ "loss": 0.0795,
21007
+ "step": 2058
21008
+ },
21009
+ {
21010
+ "epoch": 0.9028721771541328,
21011
+ "grad_norm": 0.655291318042305,
21012
+ "learning_rate": 2.834800233114371e-06,
21013
+ "loss": 0.1756,
21014
+ "step": 2059
21015
+ },
21016
+ {
21017
+ "epoch": 0.9033106774830081,
21018
+ "grad_norm": 1.2465002992277767,
21019
+ "learning_rate": 2.8094464285415344e-06,
21020
+ "loss": 0.2788,
21021
+ "step": 2060
21022
+ },
21023
+ {
21024
+ "epoch": 0.9033106774830081,
21025
+ "eval_PRM Accuracy": 0.8968253968253969,
21026
+ "eval_PRM F1": 0.9422222222222222,
21027
+ "eval_PRM F1 AUC": 0.9011313330054107,
21028
+ "eval_PRM F1 Neg": 0.5185185185185185,
21029
+ "eval_PRM NPV": 0.875,
21030
+ "eval_PRM Precision": 0.8983050847457628,
21031
+ "eval_PRM Recall": 0.9906542056074766,
21032
+ "eval_PRM Specificty": 0.3684210526315789,
21033
+ "eval_loss": 0.2710774838924408,
21034
+ "eval_runtime": 4.8092,
21035
+ "eval_samples_per_second": 6.238,
21036
+ "eval_steps_per_second": 0.208,
21037
+ "step": 2060
21038
+ },
21039
+ {
21040
+ "epoch": 0.9037491778118834,
21041
+ "grad_norm": 0.8316573399959882,
21042
+ "learning_rate": 2.7842032354018997e-06,
21043
+ "loss": 0.1989,
21044
+ "step": 2061
21045
+ },
21046
+ {
21047
+ "epoch": 0.9041876781407586,
21048
+ "grad_norm": 0.5565538660859521,
21049
+ "learning_rate": 2.759070712863793e-06,
21050
+ "loss": 0.1463,
21051
+ "step": 2062
21052
+ },
21053
+ {
21054
+ "epoch": 0.9046261784696339,
21055
+ "grad_norm": 0.7268757914162765,
21056
+ "learning_rate": 2.7340489198361186e-06,
21057
+ "loss": 0.174,
21058
+ "step": 2063
21059
+ },
21060
+ {
21061
+ "epoch": 0.905064678798509,
21062
+ "grad_norm": 0.7236740506294429,
21063
+ "learning_rate": 2.7091379149682685e-06,
21064
+ "loss": 0.2021,
21065
+ "step": 2064
21066
+ },
21067
+ {
21068
+ "epoch": 0.9055031791273843,
21069
+ "grad_norm": 0.8026879054294682,
21070
+ "learning_rate": 2.6843377566499237e-06,
21071
+ "loss": 0.2128,
21072
+ "step": 2065
21073
+ },
21074
+ {
21075
+ "epoch": 0.9055031791273843,
21076
+ "eval_PRM Accuracy": 0.9047619047619048,
21077
+ "eval_PRM F1": 0.9464285714285714,
21078
+ "eval_PRM F1 AUC": 0.8996556812592228,
21079
+ "eval_PRM F1 Neg": 0.5714285714285714,
21080
+ "eval_PRM NPV": 0.8888888888888888,
21081
+ "eval_PRM Precision": 0.905982905982906,
21082
+ "eval_PRM Recall": 0.9906542056074766,
21083
+ "eval_PRM Specificty": 0.42105263157894735,
21084
+ "eval_loss": 0.26917317509651184,
21085
+ "eval_runtime": 5.0103,
21086
+ "eval_samples_per_second": 5.988,
21087
+ "eval_steps_per_second": 0.2,
21088
+ "step": 2065
21089
+ },
21090
+ {
21091
+ "epoch": 0.9059416794562596,
21092
+ "grad_norm": 0.5386451536790833,
21093
+ "learning_rate": 2.6596485030109587e-06,
21094
+ "loss": 0.1422,
21095
+ "step": 2066
21096
+ },
21097
+ {
21098
+ "epoch": 0.9063801797851349,
21099
+ "grad_norm": 0.7004320505442949,
21100
+ "learning_rate": 2.6350702119213034e-06,
21101
+ "loss": 0.1983,
21102
+ "step": 2067
21103
+ },
21104
+ {
21105
+ "epoch": 0.9068186801140101,
21106
+ "grad_norm": 1.0368428921283208,
21107
+ "learning_rate": 2.6106029409907974e-06,
21108
+ "loss": 0.2107,
21109
+ "step": 2068
21110
+ },
21111
+ {
21112
+ "epoch": 0.9072571804428853,
21113
+ "grad_norm": 1.0120734400023428,
21114
+ "learning_rate": 2.5862467475690378e-06,
21115
+ "loss": 0.151,
21116
+ "step": 2069
21117
+ },
21118
+ {
21119
+ "epoch": 0.9076956807717605,
21120
+ "grad_norm": 0.7618074177857603,
21121
+ "learning_rate": 2.562001688745291e-06,
21122
+ "loss": 0.1171,
21123
+ "step": 2070
21124
+ },
21125
+ {
21126
+ "epoch": 0.9076956807717605,
21127
+ "eval_PRM Accuracy": 0.9047619047619048,
21128
+ "eval_PRM F1": 0.9464285714285714,
21129
+ "eval_PRM F1 AUC": 0.9011313330054107,
21130
+ "eval_PRM F1 Neg": 0.5714285714285714,
21131
+ "eval_PRM NPV": 0.8888888888888888,
21132
+ "eval_PRM Precision": 0.905982905982906,
21133
+ "eval_PRM Recall": 0.9906542056074766,
21134
+ "eval_PRM Specificty": 0.42105263157894735,
21135
+ "eval_loss": 0.2679850161075592,
21136
+ "eval_runtime": 5.0674,
21137
+ "eval_samples_per_second": 5.92,
21138
+ "eval_steps_per_second": 0.197,
21139
+ "step": 2070
21140
+ },
21141
+ {
21142
+ "epoch": 0.9081341811006358,
21143
+ "grad_norm": 0.7602851771188424,
21144
+ "learning_rate": 2.5378678213483054e-06,
21145
+ "loss": 0.1217,
21146
+ "step": 2071
21147
+ },
21148
+ {
21149
+ "epoch": 0.9085726814295111,
21150
+ "grad_norm": 0.5011588190315214,
21151
+ "learning_rate": 2.51384520194623e-06,
21152
+ "loss": 0.0813,
21153
+ "step": 2072
21154
+ },
21155
+ {
21156
+ "epoch": 0.9090111817583864,
21157
+ "grad_norm": 1.0306642407275364,
21158
+ "learning_rate": 2.4899338868464404e-06,
21159
+ "loss": 0.1606,
21160
+ "step": 2073
21161
+ },
21162
+ {
21163
+ "epoch": 0.9094496820872615,
21164
+ "grad_norm": 0.772012854183267,
21165
+ "learning_rate": 2.466133932095416e-06,
21166
+ "loss": 0.2104,
21167
+ "step": 2074
21168
+ },
21169
+ {
21170
+ "epoch": 0.9098881824161368,
21171
+ "grad_norm": 0.8965421591737016,
21172
+ "learning_rate": 2.442445393478632e-06,
21173
+ "loss": 0.166,
21174
+ "step": 2075
21175
+ },
21176
+ {
21177
+ "epoch": 0.9098881824161368,
21178
+ "eval_PRM Accuracy": 0.9047619047619048,
21179
+ "eval_PRM F1": 0.9464285714285714,
21180
+ "eval_PRM F1 AUC": 0.9003935071323167,
21181
+ "eval_PRM F1 Neg": 0.5714285714285714,
21182
+ "eval_PRM NPV": 0.8888888888888888,
21183
+ "eval_PRM Precision": 0.905982905982906,
21184
+ "eval_PRM Recall": 0.9906542056074766,
21185
+ "eval_PRM Specificty": 0.42105263157894735,
21186
+ "eval_loss": 0.2678385376930237,
21187
+ "eval_runtime": 5.1116,
21188
+ "eval_samples_per_second": 5.869,
21189
+ "eval_steps_per_second": 0.196,
21190
+ "step": 2075
21191
+ },
21192
+ {
21193
+ "epoch": 0.910326682745012,
21194
+ "grad_norm": 1.0459773703185697,
21195
+ "learning_rate": 2.4188683265204127e-06,
21196
+ "loss": 0.2223,
21197
+ "step": 2076
21198
+ },
21199
+ {
21200
+ "epoch": 0.9107651830738873,
21201
+ "grad_norm": 0.7408872560803088,
21202
+ "learning_rate": 2.3954027864837745e-06,
21203
+ "loss": 0.1195,
21204
+ "step": 2077
21205
+ },
21206
+ {
21207
+ "epoch": 0.9112036834027626,
21208
+ "grad_norm": 0.8105697387433919,
21209
+ "learning_rate": 2.3720488283703546e-06,
21210
+ "loss": 0.1341,
21211
+ "step": 2078
21212
+ },
21213
+ {
21214
+ "epoch": 0.9116421837316377,
21215
+ "grad_norm": 0.6495676351177188,
21216
+ "learning_rate": 2.3488065069202204e-06,
21217
+ "loss": 0.1128,
21218
+ "step": 2079
21219
+ },
21220
+ {
21221
+ "epoch": 0.912080684060513,
21222
+ "grad_norm": 0.8244332014624807,
21223
+ "learning_rate": 2.325675876611805e-06,
21224
+ "loss": 0.1809,
21225
+ "step": 2080
21226
+ },
21227
+ {
21228
+ "epoch": 0.912080684060513,
21229
+ "eval_PRM Accuracy": 0.9047619047619048,
21230
+ "eval_PRM F1": 0.9464285714285714,
21231
+ "eval_PRM F1 AUC": 0.9003935071323168,
21232
+ "eval_PRM F1 Neg": 0.5714285714285714,
21233
+ "eval_PRM NPV": 0.8888888888888888,
21234
+ "eval_PRM Precision": 0.905982905982906,
21235
+ "eval_PRM Recall": 0.9906542056074766,
21236
+ "eval_PRM Specificty": 0.42105263157894735,
21237
+ "eval_loss": 0.26679688692092896,
21238
+ "eval_runtime": 4.9794,
21239
+ "eval_samples_per_second": 6.025,
21240
+ "eval_steps_per_second": 0.201,
21241
+ "step": 2080
21242
+ },
21243
+ {
21244
+ "epoch": 0.9125191843893883,
21245
+ "grad_norm": 0.9198010065353488,
21246
+ "learning_rate": 2.3026569916617114e-06,
21247
+ "loss": 0.1531,
21248
+ "step": 2081
21249
+ },
21250
+ {
21251
+ "epoch": 0.9129576847182636,
21252
+ "grad_norm": 0.9449481444857517,
21253
+ "learning_rate": 2.2797499060246253e-06,
21254
+ "loss": 0.2181,
21255
+ "step": 2082
21256
+ },
21257
+ {
21258
+ "epoch": 0.9133961850471388,
21259
+ "grad_norm": 0.634967327547961,
21260
+ "learning_rate": 2.2569546733932134e-06,
21261
+ "loss": 0.1356,
21262
+ "step": 2083
21263
+ },
21264
+ {
21265
+ "epoch": 0.913834685376014,
21266
+ "grad_norm": 0.5077658683256131,
21267
+ "learning_rate": 2.2342713471979303e-06,
21268
+ "loss": 0.1375,
21269
+ "step": 2084
21270
+ },
21271
+ {
21272
+ "epoch": 0.9142731857048892,
21273
+ "grad_norm": 0.6749453283300559,
21274
+ "learning_rate": 2.211699980606946e-06,
21275
+ "loss": 0.1164,
21276
+ "step": 2085
21277
+ },
21278
+ {
21279
+ "epoch": 0.9142731857048892,
21280
+ "eval_PRM Accuracy": 0.9047619047619048,
21281
+ "eval_PRM F1": 0.9464285714285714,
21282
+ "eval_PRM F1 AUC": 0.9021151008362026,
21283
+ "eval_PRM F1 Neg": 0.5714285714285714,
21284
+ "eval_PRM NPV": 0.8888888888888888,
21285
+ "eval_PRM Precision": 0.905982905982906,
21286
+ "eval_PRM Recall": 0.9906542056074766,
21287
+ "eval_PRM Specificty": 0.42105263157894735,
21288
+ "eval_loss": 0.265380859375,
21289
+ "eval_runtime": 5.1437,
21290
+ "eval_samples_per_second": 5.832,
21291
+ "eval_steps_per_second": 0.194,
21292
+ "step": 2085
21293
+ },
21294
+ {
21295
+ "epoch": 0.9147116860337645,
21296
+ "grad_norm": 0.5715282449392062,
21297
+ "learning_rate": 2.1892406265259955e-06,
21298
+ "loss": 0.1226,
21299
+ "step": 2086
21300
+ },
21301
+ {
21302
+ "epoch": 0.9151501863626398,
21303
+ "grad_norm": 0.760172775638105,
21304
+ "learning_rate": 2.1668933375982694e-06,
21305
+ "loss": 0.1398,
21306
+ "step": 2087
21307
+ },
21308
+ {
21309
+ "epoch": 0.915588686691515,
21310
+ "grad_norm": 0.625961925053264,
21311
+ "learning_rate": 2.1446581662042942e-06,
21312
+ "loss": 0.1595,
21313
+ "step": 2088
21314
+ },
21315
+ {
21316
+ "epoch": 0.9160271870203902,
21317
+ "grad_norm": 0.586400526492988,
21318
+ "learning_rate": 2.122535164461792e-06,
21319
+ "loss": 0.1113,
21320
+ "step": 2089
21321
+ },
21322
+ {
21323
+ "epoch": 0.9164656873492655,
21324
+ "grad_norm": 0.7883418824251702,
21325
+ "learning_rate": 2.100524384225555e-06,
21326
+ "loss": 0.1379,
21327
+ "step": 2090
21328
+ },
21329
+ {
21330
+ "epoch": 0.9164656873492655,
21331
+ "eval_PRM Accuracy": 0.9047619047619048,
21332
+ "eval_PRM F1": 0.9464285714285714,
21333
+ "eval_PRM F1 AUC": 0.9021151008362026,
21334
+ "eval_PRM F1 Neg": 0.5714285714285714,
21335
+ "eval_PRM NPV": 0.8888888888888888,
21336
+ "eval_PRM Precision": 0.905982905982906,
21337
+ "eval_PRM Recall": 0.9906542056074766,
21338
+ "eval_PRM Specificty": 0.42105263157894735,
21339
+ "eval_loss": 0.2639974057674408,
21340
+ "eval_runtime": 4.9495,
21341
+ "eval_samples_per_second": 6.061,
21342
+ "eval_steps_per_second": 0.202,
21343
+ "step": 2090
21344
+ },
21345
+ {
21346
+ "epoch": 0.9169041876781407,
21347
+ "grad_norm": 0.5196917550049326,
21348
+ "learning_rate": 2.0786258770873647e-06,
21349
+ "loss": 0.1157,
21350
+ "step": 2091
21351
+ },
21352
+ {
21353
+ "epoch": 0.917342688007016,
21354
+ "grad_norm": 0.5890538515430815,
21355
+ "learning_rate": 2.0568396943758172e-06,
21356
+ "loss": 0.1513,
21357
+ "step": 2092
21358
+ },
21359
+ {
21360
+ "epoch": 0.9177811883358913,
21361
+ "grad_norm": 0.6037446807611603,
21362
+ "learning_rate": 2.035165887156243e-06,
21363
+ "loss": 0.1162,
21364
+ "step": 2093
21365
+ },
21366
+ {
21367
+ "epoch": 0.9182196886647666,
21368
+ "grad_norm": 0.8028206864089246,
21369
+ "learning_rate": 2.013604506230554e-06,
21370
+ "loss": 0.1835,
21371
+ "step": 2094
21372
+ },
21373
+ {
21374
+ "epoch": 0.9186581889936417,
21375
+ "grad_norm": 0.80306617680803,
21376
+ "learning_rate": 1.9921556021371523e-06,
21377
+ "loss": 0.1487,
21378
+ "step": 2095
21379
+ },
21380
+ {
21381
+ "epoch": 0.9186581889936417,
21382
+ "eval_PRM Accuracy": 0.9047619047619048,
21383
+ "eval_PRM F1": 0.9464285714285714,
21384
+ "eval_PRM F1 AUC": 0.9013772749631087,
21385
+ "eval_PRM F1 Neg": 0.5714285714285714,
21386
+ "eval_PRM NPV": 0.8888888888888888,
21387
+ "eval_PRM Precision": 0.905982905982906,
21388
+ "eval_PRM Recall": 0.9906542056074766,
21389
+ "eval_PRM Specificty": 0.42105263157894735,
21390
+ "eval_loss": 0.2653645873069763,
21391
+ "eval_runtime": 5.0962,
21392
+ "eval_samples_per_second": 5.887,
21393
+ "eval_steps_per_second": 0.196,
21394
+ "step": 2095
21395
+ },
21396
+ {
21397
+ "epoch": 0.919096689322517,
21398
+ "grad_norm": 0.6135907842130369,
21399
+ "learning_rate": 1.970819225150822e-06,
21400
+ "loss": 0.1061,
21401
+ "step": 2096
21402
+ },
21403
+ {
21404
+ "epoch": 0.9195351896513922,
21405
+ "grad_norm": 0.6049596521926599,
21406
+ "learning_rate": 1.9495954252825577e-06,
21407
+ "loss": 0.1187,
21408
+ "step": 2097
21409
+ },
21410
+ {
21411
+ "epoch": 0.9199736899802675,
21412
+ "grad_norm": 0.5972702336073918,
21413
+ "learning_rate": 1.9284842522794945e-06,
21414
+ "loss": 0.1237,
21415
+ "step": 2098
21416
+ },
21417
+ {
21418
+ "epoch": 0.9204121903091428,
21419
+ "grad_norm": 0.7652451341846678,
21420
+ "learning_rate": 1.907485755624788e-06,
21421
+ "loss": 0.1659,
21422
+ "step": 2099
21423
+ },
21424
+ {
21425
+ "epoch": 0.9208506906380179,
21426
+ "grad_norm": 0.9965183116186683,
21427
+ "learning_rate": 1.8865999845374793e-06,
21428
+ "loss": 0.2435,
21429
+ "step": 2100
21430
+ },
21431
+ {
21432
+ "epoch": 0.9208506906380179,
21433
+ "eval_PRM Accuracy": 0.9047619047619048,
21434
+ "eval_PRM F1": 0.9464285714285714,
21435
+ "eval_PRM F1 AUC": 0.9035907525823905,
21436
+ "eval_PRM F1 Neg": 0.5714285714285714,
21437
+ "eval_PRM NPV": 0.8888888888888888,
21438
+ "eval_PRM Precision": 0.905982905982906,
21439
+ "eval_PRM Recall": 0.9906542056074766,
21440
+ "eval_PRM Specificty": 0.42105263157894735,
21441
+ "eval_loss": 0.264892578125,
21442
+ "eval_runtime": 4.8153,
21443
+ "eval_samples_per_second": 6.23,
21444
+ "eval_steps_per_second": 0.208,
21445
+ "step": 2100
21446
+ },
21447
+ {
21448
+ "epoch": 0.9212891909668932,
21449
+ "grad_norm": 0.8389146985920428,
21450
+ "learning_rate": 1.865826987972391e-06,
21451
+ "loss": 0.1704,
21452
+ "step": 2101
21453
+ },
21454
+ {
21455
+ "epoch": 0.9217276912957685,
21456
+ "grad_norm": 0.6715898820658518,
21457
+ "learning_rate": 1.8451668146199974e-06,
21458
+ "loss": 0.1091,
21459
+ "step": 2102
21460
+ },
21461
+ {
21462
+ "epoch": 0.9221661916246437,
21463
+ "grad_norm": 0.6207515584959578,
21464
+ "learning_rate": 1.8246195129063336e-06,
21465
+ "loss": 0.1615,
21466
+ "step": 2103
21467
+ },
21468
+ {
21469
+ "epoch": 0.922604691953519,
21470
+ "grad_norm": 0.848793520041008,
21471
+ "learning_rate": 1.8041851309928802e-06,
21472
+ "loss": 0.1548,
21473
+ "step": 2104
21474
+ },
21475
+ {
21476
+ "epoch": 0.9230431922823942,
21477
+ "grad_norm": 0.6788111135403079,
21478
+ "learning_rate": 1.7838637167764294e-06,
21479
+ "loss": 0.1605,
21480
+ "step": 2105
21481
+ },
21482
+ {
21483
+ "epoch": 0.9230431922823942,
21484
+ "eval_PRM Accuracy": 0.9047619047619048,
21485
+ "eval_PRM F1": 0.9464285714285714,
21486
+ "eval_PRM F1 AUC": 0.9001475651746188,
21487
+ "eval_PRM F1 Neg": 0.5714285714285714,
21488
+ "eval_PRM NPV": 0.8888888888888888,
21489
+ "eval_PRM Precision": 0.905982905982906,
21490
+ "eval_PRM Recall": 0.9906542056074766,
21491
+ "eval_PRM Specificty": 0.42105263157894735,
21492
+ "eval_loss": 0.26728516817092896,
21493
+ "eval_runtime": 4.9915,
21494
+ "eval_samples_per_second": 6.01,
21495
+ "eval_steps_per_second": 0.2,
21496
+ "step": 2105
21497
+ },
21498
+ {
21499
+ "epoch": 0.9234816926112694,
21500
+ "grad_norm": 1.1010740603161708,
21501
+ "learning_rate": 1.7636553178889792e-06,
21502
+ "loss": 0.2308,
21503
+ "step": 2106
21504
+ },
21505
+ {
21506
+ "epoch": 0.9239201929401447,
21507
+ "grad_norm": 0.9571915996550008,
21508
+ "learning_rate": 1.7435599816976512e-06,
21509
+ "loss": 0.1601,
21510
+ "step": 2107
21511
+ },
21512
+ {
21513
+ "epoch": 0.92435869326902,
21514
+ "grad_norm": 0.6062601796558845,
21515
+ "learning_rate": 1.7235777553045284e-06,
21516
+ "loss": 0.1231,
21517
+ "step": 2108
21518
+ },
21519
+ {
21520
+ "epoch": 0.9247971935978953,
21521
+ "grad_norm": 0.7058702603322184,
21522
+ "learning_rate": 1.70370868554659e-06,
21523
+ "loss": 0.1669,
21524
+ "step": 2109
21525
+ },
21526
+ {
21527
+ "epoch": 0.9252356939267704,
21528
+ "grad_norm": 0.6861546257522678,
21529
+ "learning_rate": 1.683952818995571e-06,
21530
+ "loss": 0.1323,
21531
+ "step": 2110
21532
+ },
21533
+ {
21534
+ "epoch": 0.9252356939267704,
21535
+ "eval_PRM Accuracy": 0.9047619047619048,
21536
+ "eval_PRM F1": 0.9464285714285714,
21537
+ "eval_PRM F1 AUC": 0.9026069847515986,
21538
+ "eval_PRM F1 Neg": 0.5714285714285714,
21539
+ "eval_PRM NPV": 0.8888888888888888,
21540
+ "eval_PRM Precision": 0.905982905982906,
21541
+ "eval_PRM Recall": 0.9906542056074766,
21542
+ "eval_PRM Specificty": 0.42105263157894735,
21543
+ "eval_loss": 0.2656901180744171,
21544
+ "eval_runtime": 5.128,
21545
+ "eval_samples_per_second": 5.85,
21546
+ "eval_steps_per_second": 0.195,
21547
+ "step": 2110
21548
+ },
21549
+ {
21550
+ "epoch": 0.9256741942556457,
21551
+ "grad_norm": 0.8520826217421488,
21552
+ "learning_rate": 1.6643102019578805e-06,
21553
+ "loss": 0.1336,
21554
+ "step": 2111
21555
+ },
21556
+ {
21557
+ "epoch": 0.926112694584521,
21558
+ "grad_norm": 0.4797688260349934,
21559
+ "learning_rate": 1.6447808804744668e-06,
21560
+ "loss": 0.1175,
21561
+ "step": 2112
21562
+ },
21563
+ {
21564
+ "epoch": 0.9265511949133962,
21565
+ "grad_norm": 0.7661663885999256,
21566
+ "learning_rate": 1.6253649003207249e-06,
21567
+ "loss": 0.1454,
21568
+ "step": 2113
21569
+ },
21570
+ {
21571
+ "epoch": 0.9269896952422715,
21572
+ "grad_norm": 0.4915870187790107,
21573
+ "learning_rate": 1.606062307006384e-06,
21574
+ "loss": 0.1146,
21575
+ "step": 2114
21576
+ },
21577
+ {
21578
+ "epoch": 0.9274281955711466,
21579
+ "grad_norm": 1.052785510896854,
21580
+ "learning_rate": 1.5868731457754139e-06,
21581
+ "loss": 0.2203,
21582
+ "step": 2115
21583
+ },
21584
+ {
21585
+ "epoch": 0.9274281955711466,
21586
+ "eval_PRM Accuracy": 0.8968253968253969,
21587
+ "eval_PRM F1": 0.9422222222222222,
21588
+ "eval_PRM F1 AUC": 0.9033448106246925,
21589
+ "eval_PRM F1 Neg": 0.5185185185185185,
21590
+ "eval_PRM NPV": 0.875,
21591
+ "eval_PRM Precision": 0.8983050847457628,
21592
+ "eval_PRM Recall": 0.9906542056074766,
21593
+ "eval_PRM Specificty": 0.3684210526315789,
21594
+ "eval_loss": 0.2664550840854645,
21595
+ "eval_runtime": 5.0995,
21596
+ "eval_samples_per_second": 5.883,
21597
+ "eval_steps_per_second": 0.196,
21598
+ "step": 2115
21599
+ },
21600
+ {
21601
+ "epoch": 0.9278666959000219,
21602
+ "grad_norm": 0.8549090499057688,
21603
+ "learning_rate": 1.5677974616058855e-06,
21604
+ "loss": 0.1729,
21605
+ "step": 2116
21606
+ },
21607
+ {
21608
+ "epoch": 0.9283051962288972,
21609
+ "grad_norm": 0.7792292639337909,
21610
+ "learning_rate": 1.5488352992099053e-06,
21611
+ "loss": 0.2705,
21612
+ "step": 2117
21613
+ },
21614
+ {
21615
+ "epoch": 0.9287436965577724,
21616
+ "grad_norm": 0.6893387190665935,
21617
+ "learning_rate": 1.5299867030334814e-06,
21618
+ "loss": 0.0947,
21619
+ "step": 2118
21620
+ },
21621
+ {
21622
+ "epoch": 0.9291821968866477,
21623
+ "grad_norm": 0.6451030801962423,
21624
+ "learning_rate": 1.51125171725644e-06,
21625
+ "loss": 0.1544,
21626
+ "step": 2119
21627
+ },
21628
+ {
21629
+ "epoch": 0.9296206972155229,
21630
+ "grad_norm": 1.5791580231385312,
21631
+ "learning_rate": 1.4926303857923097e-06,
21632
+ "loss": 0.1762,
21633
+ "step": 2120
21634
+ },
21635
+ {
21636
+ "epoch": 0.9296206972155229,
21637
+ "eval_PRM Accuracy": 0.9047619047619048,
21638
+ "eval_PRM F1": 0.9464285714285714,
21639
+ "eval_PRM F1 AUC": 0.9028529267092966,
21640
+ "eval_PRM F1 Neg": 0.5714285714285714,
21641
+ "eval_PRM NPV": 0.8888888888888888,
21642
+ "eval_PRM Precision": 0.905982905982906,
21643
+ "eval_PRM Recall": 0.9906542056074766,
21644
+ "eval_PRM Specificty": 0.42105263157894735,
21645
+ "eval_loss": 0.26712238788604736,
21646
+ "eval_runtime": 5.0188,
21647
+ "eval_samples_per_second": 5.978,
21648
+ "eval_steps_per_second": 0.199,
21649
+ "step": 2120
21650
+ },
21651
+ {
21652
+ "epoch": 0.9300591975443981,
21653
+ "grad_norm": 0.5173209803502309,
21654
+ "learning_rate": 1.4741227522882095e-06,
21655
+ "loss": 0.1093,
21656
+ "step": 2121
21657
+ },
21658
+ {
21659
+ "epoch": 0.9304976978732734,
21660
+ "grad_norm": 0.8467810225304794,
21661
+ "learning_rate": 1.4557288601247777e-06,
21662
+ "loss": 0.1848,
21663
+ "step": 2122
21664
+ },
21665
+ {
21666
+ "epoch": 0.9309361982021487,
21667
+ "grad_norm": 1.1692040132174368,
21668
+ "learning_rate": 1.4374487524160374e-06,
21669
+ "loss": 0.1817,
21670
+ "step": 2123
21671
+ },
21672
+ {
21673
+ "epoch": 0.931374698531024,
21674
+ "grad_norm": 0.7266315441382668,
21675
+ "learning_rate": 1.419282472009309e-06,
21676
+ "loss": 0.1309,
21677
+ "step": 2124
21678
+ },
21679
+ {
21680
+ "epoch": 0.9318131988598991,
21681
+ "grad_norm": 0.7786894103546885,
21682
+ "learning_rate": 1.4012300614851205e-06,
21683
+ "loss": 0.1391,
21684
+ "step": 2125
21685
+ },
21686
+ {
21687
+ "epoch": 0.9318131988598991,
21688
+ "eval_PRM Accuracy": 0.9047619047619048,
21689
+ "eval_PRM F1": 0.9464285714285714,
21690
+ "eval_PRM F1 AUC": 0.9026069847515986,
21691
+ "eval_PRM F1 Neg": 0.5714285714285714,
21692
+ "eval_PRM NPV": 0.8888888888888888,
21693
+ "eval_PRM Precision": 0.905982905982906,
21694
+ "eval_PRM Recall": 0.9906542056074766,
21695
+ "eval_PRM Specificty": 0.42105263157894735,
21696
+ "eval_loss": 0.2656901180744171,
21697
+ "eval_runtime": 5.0871,
21698
+ "eval_samples_per_second": 5.897,
21699
+ "eval_steps_per_second": 0.197,
21700
+ "step": 2125
21701
+ },
21702
+ {
21703
+ "epoch": 0.9322516991887744,
21704
+ "grad_norm": 1.1467148132412364,
21705
+ "learning_rate": 1.3832915631570797e-06,
21706
+ "loss": 0.1949,
21707
+ "step": 2126
21708
+ },
21709
+ {
21710
+ "epoch": 0.9326901995176496,
21711
+ "grad_norm": 0.6715268839598615,
21712
+ "learning_rate": 1.3654670190718034e-06,
21713
+ "loss": 0.1519,
21714
+ "step": 2127
21715
+ },
21716
+ {
21717
+ "epoch": 0.9331286998465249,
21718
+ "grad_norm": 1.1541979063594885,
21719
+ "learning_rate": 1.3477564710088098e-06,
21720
+ "loss": 0.216,
21721
+ "step": 2128
21722
+ },
21723
+ {
21724
+ "epoch": 0.9335672001754002,
21725
+ "grad_norm": 0.6259082431772327,
21726
+ "learning_rate": 1.3301599604804094e-06,
21727
+ "loss": 0.0983,
21728
+ "step": 2129
21729
+ },
21730
+ {
21731
+ "epoch": 0.9340057005042753,
21732
+ "grad_norm": 0.8688995719013783,
21733
+ "learning_rate": 1.3126775287316151e-06,
21734
+ "loss": 0.1297,
21735
+ "step": 2130
21736
+ },
21737
+ {
21738
+ "epoch": 0.9340057005042753,
21739
+ "eval_PRM Accuracy": 0.9047619047619048,
21740
+ "eval_PRM F1": 0.9464285714285714,
21741
+ "eval_PRM F1 AUC": 0.9050664043285784,
21742
+ "eval_PRM F1 Neg": 0.5714285714285714,
21743
+ "eval_PRM NPV": 0.8888888888888888,
21744
+ "eval_PRM Precision": 0.905982905982906,
21745
+ "eval_PRM Recall": 0.9906542056074766,
21746
+ "eval_PRM Specificty": 0.42105263157894735,
21747
+ "eval_loss": 0.2642252743244171,
21748
+ "eval_runtime": 5.3474,
21749
+ "eval_samples_per_second": 5.61,
21750
+ "eval_steps_per_second": 0.187,
21751
+ "step": 2130
21752
+ },
21753
+ {
21754
+ "epoch": 0.9344442008331506,
21755
+ "grad_norm": 0.9335917329429506,
21756
+ "learning_rate": 1.2953092167400648e-06,
21757
+ "loss": 0.1911,
21758
+ "step": 2131
21759
+ },
21760
+ {
21761
+ "epoch": 0.9348827011620259,
21762
+ "grad_norm": 0.6039686103100694,
21763
+ "learning_rate": 1.278055065215894e-06,
21764
+ "loss": 0.0965,
21765
+ "step": 2132
21766
+ },
21767
+ {
21768
+ "epoch": 0.9353212014909011,
21769
+ "grad_norm": 1.3529839152802352,
21770
+ "learning_rate": 1.2609151146016408e-06,
21771
+ "loss": 0.2378,
21772
+ "step": 2133
21773
+ },
21774
+ {
21775
+ "epoch": 0.9357597018197764,
21776
+ "grad_norm": 0.6210514535593444,
21777
+ "learning_rate": 1.2438894050721906e-06,
21778
+ "loss": 0.1282,
21779
+ "step": 2134
21780
+ },
21781
+ {
21782
+ "epoch": 0.9361982021486516,
21783
+ "grad_norm": 0.8390757398143625,
21784
+ "learning_rate": 1.2269779765346379e-06,
21785
+ "loss": 0.2028,
21786
+ "step": 2135
21787
+ },
21788
+ {
21789
+ "epoch": 0.9361982021486516,
21790
+ "eval_PRM Accuracy": 0.9047619047619048,
21791
+ "eval_PRM F1": 0.9464285714285714,
21792
+ "eval_PRM F1 AUC": 0.9043285784554844,
21793
+ "eval_PRM F1 Neg": 0.5714285714285714,
21794
+ "eval_PRM NPV": 0.8888888888888888,
21795
+ "eval_PRM Precision": 0.905982905982906,
21796
+ "eval_PRM Recall": 0.9906542056074766,
21797
+ "eval_PRM Specificty": 0.42105263157894735,
21798
+ "eval_loss": 0.26437175273895264,
21799
+ "eval_runtime": 5.1964,
21800
+ "eval_samples_per_second": 5.773,
21801
+ "eval_steps_per_second": 0.192,
21802
+ "step": 2135
21803
+ },
21804
+ {
21805
+ "epoch": 0.9366367024775268,
21806
+ "grad_norm": 0.4687331394277408,
21807
+ "learning_rate": 1.210180868628219e-06,
21808
+ "loss": 0.1026,
21809
+ "step": 2136
21810
+ },
21811
+ {
21812
+ "epoch": 0.9370752028064021,
21813
+ "grad_norm": 0.5862264228110875,
21814
+ "learning_rate": 1.1934981207242069e-06,
21815
+ "loss": 0.124,
21816
+ "step": 2137
21817
+ },
21818
+ {
21819
+ "epoch": 0.9375137031352774,
21820
+ "grad_norm": 0.8200106052610655,
21821
+ "learning_rate": 1.176929771925822e-06,
21822
+ "loss": 0.2088,
21823
+ "step": 2138
21824
+ },
21825
+ {
21826
+ "epoch": 0.9379522034641526,
21827
+ "grad_norm": 0.8226521041070936,
21828
+ "learning_rate": 1.1604758610681389e-06,
21829
+ "loss": 0.1448,
21830
+ "step": 2139
21831
+ },
21832
+ {
21833
+ "epoch": 0.9383907037930278,
21834
+ "grad_norm": 0.5311207702270658,
21835
+ "learning_rate": 1.1441364267180065e-06,
21836
+ "loss": 0.1121,
21837
+ "step": 2140
21838
+ },
21839
+ {
21840
+ "epoch": 0.9383907037930278,
21841
+ "eval_PRM Accuracy": 0.9047619047619048,
21842
+ "eval_PRM F1": 0.9464285714285714,
21843
+ "eval_PRM F1 AUC": 0.9048204623708804,
21844
+ "eval_PRM F1 Neg": 0.5714285714285714,
21845
+ "eval_PRM NPV": 0.8888888888888888,
21846
+ "eval_PRM Precision": 0.905982905982906,
21847
+ "eval_PRM Recall": 0.9906542056074766,
21848
+ "eval_PRM Specificty": 0.42105263157894735,
21849
+ "eval_loss": 0.2653157413005829,
21850
+ "eval_runtime": 4.9421,
21851
+ "eval_samples_per_second": 6.07,
21852
+ "eval_steps_per_second": 0.202,
21853
+ "step": 2140
21854
+ },
21855
+ {
21856
+ "epoch": 0.9388292041219031,
21857
+ "grad_norm": 0.7184160903672674,
21858
+ "learning_rate": 1.1279115071739399e-06,
21859
+ "loss": 0.1779,
21860
+ "step": 2141
21861
+ },
21862
+ {
21863
+ "epoch": 0.9392677044507783,
21864
+ "grad_norm": 0.8668610911634299,
21865
+ "learning_rate": 1.11180114046604e-06,
21866
+ "loss": 0.1344,
21867
+ "step": 2142
21868
+ },
21869
+ {
21870
+ "epoch": 0.9397062047796536,
21871
+ "grad_norm": 0.8434118972893657,
21872
+ "learning_rate": 1.0958053643559007e-06,
21873
+ "loss": 0.1949,
21874
+ "step": 2143
21875
+ },
21876
+ {
21877
+ "epoch": 0.9401447051085289,
21878
+ "grad_norm": 0.9316201349513029,
21879
+ "learning_rate": 1.0799242163365419e-06,
21880
+ "loss": 0.1531,
21881
+ "step": 2144
21882
+ },
21883
+ {
21884
+ "epoch": 0.940583205437404,
21885
+ "grad_norm": 0.8142559862279604,
21886
+ "learning_rate": 1.064157733632276e-06,
21887
+ "loss": 0.1418,
21888
+ "step": 2145
21889
+ },
21890
+ {
21891
+ "epoch": 0.940583205437404,
21892
+ "eval_PRM Accuracy": 0.8968253968253969,
21893
+ "eval_PRM F1": 0.9417040358744395,
21894
+ "eval_PRM F1 AUC": 0.9033448106246926,
21895
+ "eval_PRM F1 Neg": 0.5517241379310345,
21896
+ "eval_PRM NPV": 0.8,
21897
+ "eval_PRM Precision": 0.9051724137931034,
21898
+ "eval_PRM Recall": 0.9813084112149533,
21899
+ "eval_PRM Specificty": 0.42105263157894735,
21900
+ "eval_loss": 0.26681315898895264,
21901
+ "eval_runtime": 4.9284,
21902
+ "eval_samples_per_second": 6.087,
21903
+ "eval_steps_per_second": 0.203,
21904
+ "step": 2145
21905
+ },
21906
+ {
21907
+ "epoch": 0.9410217057662793,
21908
+ "grad_norm": 0.5212668178086708,
21909
+ "learning_rate": 1.0485059531986696e-06,
21910
+ "loss": 0.105,
21911
+ "step": 2146
21912
+ },
21913
+ {
21914
+ "epoch": 0.9414602060951546,
21915
+ "grad_norm": 0.5750379454162786,
21916
+ "learning_rate": 1.0329689117224262e-06,
21917
+ "loss": 0.1251,
21918
+ "step": 2147
21919
+ },
21920
+ {
21921
+ "epoch": 0.9418987064240298,
21922
+ "grad_norm": 0.7938197843175332,
21923
+ "learning_rate": 1.0175466456213034e-06,
21924
+ "loss": 0.2071,
21925
+ "step": 2148
21926
+ },
21927
+ {
21928
+ "epoch": 0.9423372067529051,
21929
+ "grad_norm": 0.7609522576614798,
21930
+ "learning_rate": 1.0022391910440464e-06,
21931
+ "loss": 0.1686,
21932
+ "step": 2149
21933
+ },
21934
+ {
21935
+ "epoch": 0.9427757070817803,
21936
+ "grad_norm": 1.0825325298877981,
21937
+ "learning_rate": 9.870465838702824e-07,
21938
+ "loss": 0.206,
21939
+ "step": 2150
21940
+ },
21941
+ {
21942
+ "epoch": 0.9427757070817803,
21943
+ "eval_PRM Accuracy": 0.9047619047619048,
21944
+ "eval_PRM F1": 0.9464285714285714,
21945
+ "eval_PRM F1 AUC": 0.9033448106246926,
21946
+ "eval_PRM F1 Neg": 0.5714285714285714,
21947
+ "eval_PRM NPV": 0.8888888888888888,
21948
+ "eval_PRM Precision": 0.905982905982906,
21949
+ "eval_PRM Recall": 0.9906542056074766,
21950
+ "eval_PRM Specificty": 0.42105263157894735,
21951
+ "eval_loss": 0.2645670473575592,
21952
+ "eval_runtime": 4.8176,
21953
+ "eval_samples_per_second": 6.227,
21954
+ "eval_steps_per_second": 0.208,
21955
+ "step": 2150
21956
+ },
21957
+ {
21958
+ "epoch": 0.9432142074106555,
21959
+ "grad_norm": 1.1605715209380159,
21960
+ "learning_rate": 9.719688597104315e-07,
21961
+ "loss": 0.2371,
21962
+ "step": 2151
21963
+ },
21964
+ {
21965
+ "epoch": 0.9436527077395308,
21966
+ "grad_norm": 0.919347752868836,
21967
+ "learning_rate": 9.57006053905668e-07,
21968
+ "loss": 0.1507,
21969
+ "step": 2152
21970
+ },
21971
+ {
21972
+ "epoch": 0.9440912080684061,
21973
+ "grad_norm": 0.6573401880918529,
21974
+ "learning_rate": 9.421582015277763e-07,
21975
+ "loss": 0.137,
21976
+ "step": 2153
21977
+ },
21978
+ {
21979
+ "epoch": 0.9445297083972813,
21980
+ "grad_norm": 0.555027807607804,
21981
+ "learning_rate": 9.274253373791064e-07,
21982
+ "loss": 0.1233,
21983
+ "step": 2154
21984
+ },
21985
+ {
21986
+ "epoch": 0.9449682087261565,
21987
+ "grad_norm": 0.7088183628017939,
21988
+ "learning_rate": 9.128074959924904e-07,
21989
+ "loss": 0.149,
21990
+ "step": 2155
21991
+ },
21992
+ {
21993
+ "epoch": 0.9449682087261565,
21994
+ "eval_PRM Accuracy": 0.8968253968253969,
21995
+ "eval_PRM F1": 0.9417040358744395,
21996
+ "eval_PRM F1 AUC": 0.9038366945400885,
21997
+ "eval_PRM F1 Neg": 0.5517241379310345,
21998
+ "eval_PRM NPV": 0.8,
21999
+ "eval_PRM Precision": 0.9051724137931034,
22000
+ "eval_PRM Recall": 0.9813084112149533,
22001
+ "eval_PRM Specificty": 0.42105263157894735,
22002
+ "eval_loss": 0.26513671875,
22003
+ "eval_runtime": 5.2144,
22004
+ "eval_samples_per_second": 5.753,
22005
+ "eval_steps_per_second": 0.192,
22006
+ "step": 2155
22007
+ },
22008
+ {
22009
+ "epoch": 0.9454067090550318,
22010
+ "grad_norm": 0.7132128101033369,
22011
+ "learning_rate": 8.983047116311428e-07,
22012
+ "loss": 0.1473,
22013
+ "step": 2156
22014
+ },
22015
+ {
22016
+ "epoch": 0.945845209383907,
22017
+ "grad_norm": 0.6946167168023032,
22018
+ "learning_rate": 8.839170182886103e-07,
22019
+ "loss": 0.1818,
22020
+ "step": 2157
22021
+ },
22022
+ {
22023
+ "epoch": 0.9462837097127823,
22024
+ "grad_norm": 1.021201531447852,
22025
+ "learning_rate": 8.696444496886503e-07,
22026
+ "loss": 0.1585,
22027
+ "step": 2158
22028
+ },
22029
+ {
22030
+ "epoch": 0.9467222100416576,
22031
+ "grad_norm": 0.9336316608951104,
22032
+ "learning_rate": 8.554870392851966e-07,
22033
+ "loss": 0.2737,
22034
+ "step": 2159
22035
+ },
22036
+ {
22037
+ "epoch": 0.9471607103705327,
22038
+ "grad_norm": 0.9472850446578975,
22039
+ "learning_rate": 8.414448202622494e-07,
22040
+ "loss": 0.1883,
22041
+ "step": 2160
22042
+ },
22043
+ {
22044
+ "epoch": 0.9471607103705327,
22045
+ "eval_PRM Accuracy": 0.9047619047619048,
22046
+ "eval_PRM F1": 0.9464285714285714,
22047
+ "eval_PRM F1 AUC": 0.9048204623708804,
22048
+ "eval_PRM F1 Neg": 0.5714285714285714,
22049
+ "eval_PRM NPV": 0.8888888888888888,
22050
+ "eval_PRM Precision": 0.905982905982906,
22051
+ "eval_PRM Recall": 0.9906542056074766,
22052
+ "eval_PRM Specificty": 0.42105263157894735,
22053
+ "eval_loss": 0.26331380009651184,
22054
+ "eval_runtime": 5.0138,
22055
+ "eval_samples_per_second": 5.983,
22056
+ "eval_steps_per_second": 0.199,
22057
+ "step": 2160
22058
+ },
22059
+ {
22060
+ "epoch": 0.947599210699408,
22061
+ "grad_norm": 1.111800506009798,
22062
+ "learning_rate": 8.275178255338134e-07,
22063
+ "loss": 0.1685,
22064
+ "step": 2161
22065
+ },
22066
+ {
22067
+ "epoch": 0.9480377110282833,
22068
+ "grad_norm": 0.5059186367851203,
22069
+ "learning_rate": 8.137060877438041e-07,
22070
+ "loss": 0.0931,
22071
+ "step": 2162
22072
+ },
22073
+ {
22074
+ "epoch": 0.9484762113571585,
22075
+ "grad_norm": 0.7719033530225466,
22076
+ "learning_rate": 8.000096392660029e-07,
22077
+ "loss": 0.1343,
22078
+ "step": 2163
22079
+ },
22080
+ {
22081
+ "epoch": 0.9489147116860338,
22082
+ "grad_norm": 0.5929636269259362,
22083
+ "learning_rate": 7.864285122039405e-07,
22084
+ "loss": 0.1285,
22085
+ "step": 2164
22086
+ },
22087
+ {
22088
+ "epoch": 0.949353212014909,
22089
+ "grad_norm": 1.1695896496309977,
22090
+ "learning_rate": 7.729627383908533e-07,
22091
+ "loss": 0.2401,
22092
+ "step": 2165
22093
+ },
22094
+ {
22095
+ "epoch": 0.949353212014909,
22096
+ "eval_PRM Accuracy": 0.8968253968253969,
22097
+ "eval_PRM F1": 0.9417040358744395,
22098
+ "eval_PRM F1 AUC": 0.9026069847515986,
22099
+ "eval_PRM F1 Neg": 0.5517241379310345,
22100
+ "eval_PRM NPV": 0.8,
22101
+ "eval_PRM Precision": 0.9051724137931034,
22102
+ "eval_PRM Recall": 0.9813084112149533,
22103
+ "eval_PRM Specificty": 0.42105263157894735,
22104
+ "eval_loss": 0.2637369930744171,
22105
+ "eval_runtime": 4.8625,
22106
+ "eval_samples_per_second": 6.17,
22107
+ "eval_steps_per_second": 0.206,
22108
+ "step": 2165
22109
+ },
22110
+ {
22111
+ "epoch": 0.9497917123437842,
22112
+ "grad_norm": 0.6521530665459749,
22113
+ "learning_rate": 7.596123493895991e-07,
22114
+ "loss": 0.1058,
22115
+ "step": 2166
22116
+ },
22117
+ {
22118
+ "epoch": 0.9502302126726595,
22119
+ "grad_norm": 0.8427308193302898,
22120
+ "learning_rate": 7.463773764925686e-07,
22121
+ "loss": 0.1341,
22122
+ "step": 2167
22123
+ },
22124
+ {
22125
+ "epoch": 0.9506687130015348,
22126
+ "grad_norm": 0.7869796271149967,
22127
+ "learning_rate": 7.33257850721647e-07,
22128
+ "loss": 0.158,
22129
+ "step": 2168
22130
+ },
22131
+ {
22132
+ "epoch": 0.95110721333041,
22133
+ "grad_norm": 0.7962832533796937,
22134
+ "learning_rate": 7.202538028280914e-07,
22135
+ "loss": 0.1094,
22136
+ "step": 2169
22137
+ },
22138
+ {
22139
+ "epoch": 0.9515457136592852,
22140
+ "grad_norm": 0.8636075599024543,
22141
+ "learning_rate": 7.073652632925087e-07,
22142
+ "loss": 0.1705,
22143
+ "step": 2170
22144
+ },
22145
+ {
22146
+ "epoch": 0.9515457136592852,
22147
+ "eval_PRM Accuracy": 0.8968253968253969,
22148
+ "eval_PRM F1": 0.9417040358744395,
22149
+ "eval_PRM F1 AUC": 0.9028529267092965,
22150
+ "eval_PRM F1 Neg": 0.5517241379310345,
22151
+ "eval_PRM NPV": 0.8,
22152
+ "eval_PRM Precision": 0.9051724137931034,
22153
+ "eval_PRM Recall": 0.9813084112149533,
22154
+ "eval_PRM Specificty": 0.42105263157894735,
22155
+ "eval_loss": 0.26453450322151184,
22156
+ "eval_runtime": 5.1573,
22157
+ "eval_samples_per_second": 5.817,
22158
+ "eval_steps_per_second": 0.194,
22159
+ "step": 2170
22160
+ },
22161
+ {
22162
+ "epoch": 0.9519842139881605,
22163
+ "grad_norm": 0.784077039684517,
22164
+ "learning_rate": 6.945922623247614e-07,
22165
+ "loss": 0.1602,
22166
+ "step": 2171
22167
+ },
22168
+ {
22169
+ "epoch": 0.9524227143170357,
22170
+ "grad_norm": 1.2854529872085394,
22171
+ "learning_rate": 6.819348298638839e-07,
22172
+ "loss": 0.1922,
22173
+ "step": 2172
22174
+ },
22175
+ {
22176
+ "epoch": 0.952861214645911,
22177
+ "grad_norm": 0.6349974234756767,
22178
+ "learning_rate": 6.693929955780332e-07,
22179
+ "loss": 0.1656,
22180
+ "step": 2173
22181
+ },
22182
+ {
22183
+ "epoch": 0.9532997149747863,
22184
+ "grad_norm": 0.5238995151748975,
22185
+ "learning_rate": 6.569667888644104e-07,
22186
+ "loss": 0.1156,
22187
+ "step": 2174
22188
+ },
22189
+ {
22190
+ "epoch": 0.9537382153036614,
22191
+ "grad_norm": 0.7349628428554369,
22192
+ "learning_rate": 6.44656238849195e-07,
22193
+ "loss": 0.1484,
22194
+ "step": 2175
22195
+ },
22196
+ {
22197
+ "epoch": 0.9537382153036614,
22198
+ "eval_PRM Accuracy": 0.9047619047619048,
22199
+ "eval_PRM F1": 0.9464285714285714,
22200
+ "eval_PRM F1 AUC": 0.9053123462862764,
22201
+ "eval_PRM F1 Neg": 0.5714285714285714,
22202
+ "eval_PRM NPV": 0.8888888888888888,
22203
+ "eval_PRM Precision": 0.905982905982906,
22204
+ "eval_PRM Recall": 0.9906542056074766,
22205
+ "eval_PRM Specificty": 0.42105263157894735,
22206
+ "eval_loss": 0.26232096552848816,
22207
+ "eval_runtime": 5.086,
22208
+ "eval_samples_per_second": 5.899,
22209
+ "eval_steps_per_second": 0.197,
22210
+ "step": 2175
22211
+ },
22212
+ {
22213
+ "epoch": 0.9541767156325367,
22214
+ "grad_norm": 0.4632878495486221,
22215
+ "learning_rate": 6.324613743874774e-07,
22216
+ "loss": 0.0758,
22217
+ "step": 2176
22218
+ },
22219
+ {
22220
+ "epoch": 0.954615215961412,
22221
+ "grad_norm": 0.6786662920685442,
22222
+ "learning_rate": 6.203822240631929e-07,
22223
+ "loss": 0.1685,
22224
+ "step": 2177
22225
+ },
22226
+ {
22227
+ "epoch": 0.9550537162902872,
22228
+ "grad_norm": 0.9438241899400488,
22229
+ "learning_rate": 6.084188161890325e-07,
22230
+ "loss": 0.1806,
22231
+ "step": 2178
22232
+ },
22233
+ {
22234
+ "epoch": 0.9554922166191625,
22235
+ "grad_norm": 0.7715786403842603,
22236
+ "learning_rate": 5.965711788064099e-07,
22237
+ "loss": 0.1814,
22238
+ "step": 2179
22239
+ },
22240
+ {
22241
+ "epoch": 0.9559307169480377,
22242
+ "grad_norm": 1.0451113800282277,
22243
+ "learning_rate": 5.848393396853891e-07,
22244
+ "loss": 0.2313,
22245
+ "step": 2180
22246
+ },
22247
+ {
22248
+ "epoch": 0.9559307169480377,
22249
+ "eval_PRM Accuracy": 0.8968253968253969,
22250
+ "eval_PRM F1": 0.9417040358744395,
22251
+ "eval_PRM F1 AUC": 0.9040826364977865,
22252
+ "eval_PRM F1 Neg": 0.5517241379310345,
22253
+ "eval_PRM NPV": 0.8,
22254
+ "eval_PRM Precision": 0.9051724137931034,
22255
+ "eval_PRM Recall": 0.9813084112149533,
22256
+ "eval_PRM Specificty": 0.42105263157894735,
22257
+ "eval_loss": 0.26347655057907104,
22258
+ "eval_runtime": 5.2788,
22259
+ "eval_samples_per_second": 5.683,
22260
+ "eval_steps_per_second": 0.189,
22261
+ "step": 2180
22262
+ },
22263
+ {
22264
+ "epoch": 0.9563692172769129,
22265
+ "grad_norm": 1.266440352991766,
22266
+ "learning_rate": 5.732233263245845e-07,
22267
+ "loss": 0.2308,
22268
+ "step": 2181
22269
+ },
22270
+ {
22271
+ "epoch": 0.9568077176057882,
22272
+ "grad_norm": 0.7482766422048829,
22273
+ "learning_rate": 5.617231659511446e-07,
22274
+ "loss": 0.2174,
22275
+ "step": 2182
22276
+ },
22277
+ {
22278
+ "epoch": 0.9572462179346635,
22279
+ "grad_norm": 0.7270349159294758,
22280
+ "learning_rate": 5.50338885520657e-07,
22281
+ "loss": 0.1179,
22282
+ "step": 2183
22283
+ },
22284
+ {
22285
+ "epoch": 0.9576847182635387,
22286
+ "grad_norm": 0.5952603890903395,
22287
+ "learning_rate": 5.390705117171047e-07,
22288
+ "loss": 0.1598,
22289
+ "step": 2184
22290
+ },
22291
+ {
22292
+ "epoch": 0.9581232185924139,
22293
+ "grad_norm": 0.8984400831268945,
22294
+ "learning_rate": 5.279180709527765e-07,
22295
+ "loss": 0.1434,
22296
+ "step": 2185
22297
+ },
22298
+ {
22299
+ "epoch": 0.9581232185924139,
22300
+ "eval_PRM Accuracy": 0.8968253968253969,
22301
+ "eval_PRM F1": 0.9417040358744395,
22302
+ "eval_PRM F1 AUC": 0.9033448106246925,
22303
+ "eval_PRM F1 Neg": 0.5517241379310345,
22304
+ "eval_PRM NPV": 0.8,
22305
+ "eval_PRM Precision": 0.9051724137931034,
22306
+ "eval_PRM Recall": 0.9813084112149533,
22307
+ "eval_PRM Specificty": 0.42105263157894735,
22308
+ "eval_loss": 0.2635742127895355,
22309
+ "eval_runtime": 5.2136,
22310
+ "eval_samples_per_second": 5.754,
22311
+ "eval_steps_per_second": 0.192,
22312
+ "step": 2185
22313
+ },
22314
+ {
22315
+ "epoch": 0.9585617189212892,
22316
+ "grad_norm": 0.7725604869756233,
22317
+ "learning_rate": 5.168815893682343e-07,
22318
+ "loss": 0.139,
22319
+ "step": 2186
22320
+ },
22321
+ {
22322
+ "epoch": 0.9590002192501644,
22323
+ "grad_norm": 0.5621323581107791,
22324
+ "learning_rate": 5.059610928322356e-07,
22325
+ "loss": 0.1059,
22326
+ "step": 2187
22327
+ },
22328
+ {
22329
+ "epoch": 0.9594387195790397,
22330
+ "grad_norm": 0.6900791198754395,
22331
+ "learning_rate": 4.95156606941688e-07,
22332
+ "loss": 0.1655,
22333
+ "step": 2188
22334
+ },
22335
+ {
22336
+ "epoch": 0.959877219907915,
22337
+ "grad_norm": 0.9013450567220388,
22338
+ "learning_rate": 4.844681570215559e-07,
22339
+ "loss": 0.1771,
22340
+ "step": 2189
22341
+ },
22342
+ {
22343
+ "epoch": 0.9603157202367901,
22344
+ "grad_norm": 0.9556659040681873,
22345
+ "learning_rate": 4.738957681248379e-07,
22346
+ "loss": 0.169,
22347
+ "step": 2190
22348
+ },
22349
+ {
22350
+ "epoch": 0.9603157202367901,
22351
+ "eval_PRM Accuracy": 0.8968253968253969,
22352
+ "eval_PRM F1": 0.9417040358744395,
22353
+ "eval_PRM F1 AUC": 0.9040826364977865,
22354
+ "eval_PRM F1 Neg": 0.5517241379310345,
22355
+ "eval_PRM NPV": 0.8,
22356
+ "eval_PRM Precision": 0.9051724137931034,
22357
+ "eval_PRM Recall": 0.9813084112149533,
22358
+ "eval_PRM Specificty": 0.42105263157894735,
22359
+ "eval_loss": 0.26306965947151184,
22360
+ "eval_runtime": 4.8874,
22361
+ "eval_samples_per_second": 6.138,
22362
+ "eval_steps_per_second": 0.205,
22363
+ "step": 2190
22364
+ },
22365
+ {
22366
+ "epoch": 0.9607542205656654,
22367
+ "grad_norm": 1.1230099132122637,
22368
+ "learning_rate": 4.634394650324947e-07,
22369
+ "loss": 0.1854,
22370
+ "step": 2191
22371
+ },
22372
+ {
22373
+ "epoch": 0.9611927208945407,
22374
+ "grad_norm": 0.7775607006120955,
22375
+ "learning_rate": 4.530992722533878e-07,
22376
+ "loss": 0.1729,
22377
+ "step": 2192
22378
+ },
22379
+ {
22380
+ "epoch": 0.9616312212234159,
22381
+ "grad_norm": 0.9230569500481941,
22382
+ "learning_rate": 4.4287521402421875e-07,
22383
+ "loss": 0.1276,
22384
+ "step": 2193
22385
+ },
22386
+ {
22387
+ "epoch": 0.9620697215522912,
22388
+ "grad_norm": 0.5600690909404583,
22389
+ "learning_rate": 4.32767314309479e-07,
22390
+ "loss": 0.1232,
22391
+ "step": 2194
22392
+ },
22393
+ {
22394
+ "epoch": 0.9625082218811664,
22395
+ "grad_norm": 0.5395738887593279,
22396
+ "learning_rate": 4.227755968014002e-07,
22397
+ "loss": 0.1028,
22398
+ "step": 2195
22399
+ },
22400
+ {
22401
+ "epoch": 0.9625082218811664,
22402
+ "eval_PRM Accuracy": 0.8968253968253969,
22403
+ "eval_PRM F1": 0.9417040358744395,
22404
+ "eval_PRM F1 AUC": 0.9026069847515985,
22405
+ "eval_PRM F1 Neg": 0.5517241379310345,
22406
+ "eval_PRM NPV": 0.8,
22407
+ "eval_PRM Precision": 0.9051724137931034,
22408
+ "eval_PRM Recall": 0.9813084112149533,
22409
+ "eval_PRM Specificty": 0.42105263157894735,
22410
+ "eval_loss": 0.26363933086395264,
22411
+ "eval_runtime": 5.191,
22412
+ "eval_samples_per_second": 5.779,
22413
+ "eval_steps_per_second": 0.193,
22414
+ "step": 2195
22415
+ },
22416
+ {
22417
+ "epoch": 0.9629467222100416,
22418
+ "grad_norm": 0.8361798815116941,
22419
+ "learning_rate": 4.129000849198872e-07,
22420
+ "loss": 0.1546,
22421
+ "step": 2196
22422
+ },
22423
+ {
22424
+ "epoch": 0.9633852225389169,
22425
+ "grad_norm": 0.822751115655838,
22426
+ "learning_rate": 4.0314080181245716e-07,
22427
+ "loss": 0.1551,
22428
+ "step": 2197
22429
+ },
22430
+ {
22431
+ "epoch": 0.9638237228677922,
22432
+ "grad_norm": 0.5776993811184741,
22433
+ "learning_rate": 3.93497770354212e-07,
22434
+ "loss": 0.119,
22435
+ "step": 2198
22436
+ },
22437
+ {
22438
+ "epoch": 0.9642622231966674,
22439
+ "grad_norm": 0.6327136380994263,
22440
+ "learning_rate": 3.839710131477492e-07,
22441
+ "loss": 0.1322,
22442
+ "step": 2199
22443
+ },
22444
+ {
22445
+ "epoch": 0.9647007235255426,
22446
+ "grad_norm": 0.7086850988696183,
22447
+ "learning_rate": 3.745605525231399e-07,
22448
+ "loss": 0.1564,
22449
+ "step": 2200
22450
+ },
22451
+ {
22452
+ "epoch": 0.9647007235255426,
22453
+ "eval_PRM Accuracy": 0.8968253968253969,
22454
+ "eval_PRM F1": 0.9417040358744395,
22455
+ "eval_PRM F1 AUC": 0.9035907525823905,
22456
+ "eval_PRM F1 Neg": 0.5517241379310345,
22457
+ "eval_PRM NPV": 0.8,
22458
+ "eval_PRM Precision": 0.9051724137931034,
22459
+ "eval_PRM Recall": 0.9813084112149533,
22460
+ "eval_PRM Specificty": 0.42105263157894735,
22461
+ "eval_loss": 0.2638508975505829,
22462
+ "eval_runtime": 5.2255,
22463
+ "eval_samples_per_second": 5.741,
22464
+ "eval_steps_per_second": 0.191,
22465
+ "step": 2200
22466
+ },
22467
+ {
22468
+ "epoch": 0.9651392238544179,
22469
+ "grad_norm": 0.569190485329816,
22470
+ "learning_rate": 3.652664105378678e-07,
22471
+ "loss": 0.1342,
22472
+ "step": 2201
22473
+ },
22474
+ {
22475
+ "epoch": 0.9655777241832931,
22476
+ "grad_norm": 0.9935857823706388,
22477
+ "learning_rate": 3.5608860897675677e-07,
22478
+ "loss": 0.1997,
22479
+ "step": 2202
22480
+ },
22481
+ {
22482
+ "epoch": 0.9660162245121684,
22483
+ "grad_norm": 0.9913514917949786,
22484
+ "learning_rate": 3.470271693519545e-07,
22485
+ "loss": 0.1979,
22486
+ "step": 2203
22487
+ },
22488
+ {
22489
+ "epoch": 0.9664547248410437,
22490
+ "grad_norm": 0.6366315028151958,
22491
+ "learning_rate": 3.380821129028489e-07,
22492
+ "loss": 0.103,
22493
+ "step": 2204
22494
+ },
22495
+ {
22496
+ "epoch": 0.9668932251699188,
22497
+ "grad_norm": 0.5753737137664783,
22498
+ "learning_rate": 3.2925346059605176e-07,
22499
+ "loss": 0.0892,
22500
+ "step": 2205
22501
+ },
22502
+ {
22503
+ "epoch": 0.9668932251699188,
22504
+ "eval_PRM Accuracy": 0.8968253968253969,
22505
+ "eval_PRM F1": 0.9417040358744395,
22506
+ "eval_PRM F1 AUC": 0.9033448106246925,
22507
+ "eval_PRM F1 Neg": 0.5517241379310345,
22508
+ "eval_PRM NPV": 0.8,
22509
+ "eval_PRM Precision": 0.9051724137931034,
22510
+ "eval_PRM Recall": 0.9813084112149533,
22511
+ "eval_PRM Specificty": 0.42105263157894735,
22512
+ "eval_loss": 0.26412761211395264,
22513
+ "eval_runtime": 5.1565,
22514
+ "eval_samples_per_second": 5.818,
22515
+ "eval_steps_per_second": 0.194,
22516
+ "step": 2205
22517
+ },
22518
+ {
22519
+ "epoch": 0.9673317254987941,
22520
+ "grad_norm": 0.5455766403670592,
22521
+ "learning_rate": 3.205412331253099e-07,
22522
+ "loss": 0.1049,
22523
+ "step": 2206
22524
+ },
22525
+ {
22526
+ "epoch": 0.9677702258276694,
22527
+ "grad_norm": 0.6338154276165335,
22528
+ "learning_rate": 3.119454509114883e-07,
22529
+ "loss": 0.1278,
22530
+ "step": 2207
22531
+ },
22532
+ {
22533
+ "epoch": 0.9682087261565446,
22534
+ "grad_norm": 0.5264334592171102,
22535
+ "learning_rate": 3.034661341025258e-07,
22536
+ "loss": 0.0964,
22537
+ "step": 2208
22538
+ },
22539
+ {
22540
+ "epoch": 0.9686472264854199,
22541
+ "grad_norm": 1.0680536965110514,
22542
+ "learning_rate": 2.9510330257335184e-07,
22543
+ "loss": 0.1671,
22544
+ "step": 2209
22545
+ },
22546
+ {
22547
+ "epoch": 0.9690857268142951,
22548
+ "grad_norm": 0.9708998643068253,
22549
+ "learning_rate": 2.8685697592587546e-07,
22550
+ "loss": 0.1963,
22551
+ "step": 2210
22552
+ },
22553
+ {
22554
+ "epoch": 0.9690857268142951,
22555
+ "eval_PRM Accuracy": 0.8968253968253969,
22556
+ "eval_PRM F1": 0.9417040358744395,
22557
+ "eval_PRM F1 AUC": 0.9028529267092965,
22558
+ "eval_PRM F1 Neg": 0.5517241379310345,
22559
+ "eval_PRM NPV": 0.8,
22560
+ "eval_PRM Precision": 0.9051724137931034,
22561
+ "eval_PRM Recall": 0.9813084112149533,
22562
+ "eval_PRM Specificty": 0.42105263157894735,
22563
+ "eval_loss": 0.26445311307907104,
22564
+ "eval_runtime": 5.0997,
22565
+ "eval_samples_per_second": 5.883,
22566
+ "eval_steps_per_second": 0.196,
22567
+ "step": 2210
22568
+ },
22569
+ {
22570
+ "epoch": 0.9695242271431703,
22571
+ "grad_norm": 1.1110372911583375,
22572
+ "learning_rate": 2.787271734889185e-07,
22573
+ "loss": 0.1375,
22574
+ "step": 2211
22575
+ },
22576
+ {
22577
+ "epoch": 0.9699627274720456,
22578
+ "grad_norm": 0.586301467596993,
22579
+ "learning_rate": 2.7071391431818806e-07,
22580
+ "loss": 0.161,
22581
+ "step": 2212
22582
+ },
22583
+ {
22584
+ "epoch": 0.9704012278009209,
22585
+ "grad_norm": 0.7126567576095886,
22586
+ "learning_rate": 2.628172171962151e-07,
22587
+ "loss": 0.1338,
22588
+ "step": 2213
22589
+ },
22590
+ {
22591
+ "epoch": 0.9708397281297961,
22592
+ "grad_norm": 0.6451622075385745,
22593
+ "learning_rate": 2.55037100632316e-07,
22594
+ "loss": 0.1564,
22595
+ "step": 2214
22596
+ },
22597
+ {
22598
+ "epoch": 0.9712782284586713,
22599
+ "grad_norm": 0.6184519064922749,
22600
+ "learning_rate": 2.473735828625534e-07,
22601
+ "loss": 0.0992,
22602
+ "step": 2215
22603
+ },
22604
+ {
22605
+ "epoch": 0.9712782284586713,
22606
+ "eval_PRM Accuracy": 0.8968253968253969,
22607
+ "eval_PRM F1": 0.9417040358744395,
22608
+ "eval_PRM F1 AUC": 0.9065420560747663,
22609
+ "eval_PRM F1 Neg": 0.5517241379310345,
22610
+ "eval_PRM NPV": 0.8,
22611
+ "eval_PRM Precision": 0.9051724137931034,
22612
+ "eval_PRM Recall": 0.9813084112149533,
22613
+ "eval_PRM Specificty": 0.42105263157894735,
22614
+ "eval_loss": 0.26346027851104736,
22615
+ "eval_runtime": 5.182,
22616
+ "eval_samples_per_second": 5.789,
22617
+ "eval_steps_per_second": 0.193,
22618
+ "step": 2215
22619
+ },
22620
+ {
22621
+ "epoch": 0.9717167287875466,
22622
+ "grad_norm": 0.6138175685958295,
22623
+ "learning_rate": 2.3982668184968636e-07,
22624
+ "loss": 0.124,
22625
+ "step": 2216
22626
+ },
22627
+ {
22628
+ "epoch": 0.9721552291164218,
22629
+ "grad_norm": 0.5360624165039689,
22630
+ "learning_rate": 2.323964152831426e-07,
22631
+ "loss": 0.1222,
22632
+ "step": 2217
22633
+ },
22634
+ {
22635
+ "epoch": 0.9725937294452971,
22636
+ "grad_norm": 0.8228569935909387,
22637
+ "learning_rate": 2.250828005789518e-07,
22638
+ "loss": 0.1778,
22639
+ "step": 2218
22640
+ },
22641
+ {
22642
+ "epoch": 0.9730322297741724,
22643
+ "grad_norm": 1.0404806638045407,
22644
+ "learning_rate": 2.1788585487972913e-07,
22645
+ "loss": 0.1893,
22646
+ "step": 2219
22647
+ },
22648
+ {
22649
+ "epoch": 0.9734707301030476,
22650
+ "grad_norm": 0.4807679078408988,
22651
+ "learning_rate": 2.1080559505462505e-07,
22652
+ "loss": 0.1029,
22653
+ "step": 2220
22654
+ },
22655
+ {
22656
+ "epoch": 0.9734707301030476,
22657
+ "eval_PRM Accuracy": 0.8968253968253969,
22658
+ "eval_PRM F1": 0.9417040358744395,
22659
+ "eval_PRM F1 AUC": 0.9038366945400885,
22660
+ "eval_PRM F1 Neg": 0.5517241379310345,
22661
+ "eval_PRM NPV": 0.8,
22662
+ "eval_PRM Precision": 0.9051724137931034,
22663
+ "eval_PRM Recall": 0.9813084112149533,
22664
+ "eval_PRM Specificty": 0.42105263157894735,
22665
+ "eval_loss": 0.26331380009651184,
22666
+ "eval_runtime": 5.0227,
22667
+ "eval_samples_per_second": 5.973,
22668
+ "eval_steps_per_second": 0.199,
22669
+ "step": 2220
22670
+ },
22671
+ {
22672
+ "epoch": 0.9739092304319228,
22673
+ "grad_norm": 0.7656904200298938,
22674
+ "learning_rate": 2.0384203769928667e-07,
22675
+ "loss": 0.1769,
22676
+ "step": 2221
22677
+ },
22678
+ {
22679
+ "epoch": 0.9743477307607981,
22680
+ "grad_norm": 1.0331427419472423,
22681
+ "learning_rate": 1.9699519913581322e-07,
22682
+ "loss": 0.1885,
22683
+ "step": 2222
22684
+ },
22685
+ {
22686
+ "epoch": 0.9747862310896733,
22687
+ "grad_norm": 0.783225997877613,
22688
+ "learning_rate": 1.9026509541272275e-07,
22689
+ "loss": 0.1498,
22690
+ "step": 2223
22691
+ },
22692
+ {
22693
+ "epoch": 0.9752247314185486,
22694
+ "grad_norm": 0.7720611933517559,
22695
+ "learning_rate": 1.8365174230492998e-07,
22696
+ "loss": 0.1623,
22697
+ "step": 2224
22698
+ },
22699
+ {
22700
+ "epoch": 0.9756632317474239,
22701
+ "grad_norm": 0.7520632190497106,
22702
+ "learning_rate": 1.7715515531366856e-07,
22703
+ "loss": 0.1841,
22704
+ "step": 2225
22705
+ },
22706
+ {
22707
+ "epoch": 0.9756632317474239,
22708
+ "eval_PRM Accuracy": 0.8968253968253969,
22709
+ "eval_PRM F1": 0.9417040358744395,
22710
+ "eval_PRM F1 AUC": 0.9008853910477128,
22711
+ "eval_PRM F1 Neg": 0.5517241379310345,
22712
+ "eval_PRM NPV": 0.8,
22713
+ "eval_PRM Precision": 0.9051724137931034,
22714
+ "eval_PRM Recall": 0.9813084112149533,
22715
+ "eval_PRM Specificty": 0.42105263157894735,
22716
+ "eval_loss": 0.26486003398895264,
22717
+ "eval_runtime": 5.0609,
22718
+ "eval_samples_per_second": 5.928,
22719
+ "eval_steps_per_second": 0.198,
22720
+ "step": 2225
22721
+ },
22722
+ {
22723
+ "epoch": 0.976101732076299,
22724
+ "grad_norm": 0.4777516949598096,
22725
+ "learning_rate": 1.7077534966650766e-07,
22726
+ "loss": 0.1083,
22727
+ "step": 2226
22728
+ },
22729
+ {
22730
+ "epoch": 0.9765402324051743,
22731
+ "grad_norm": 0.6892492529814521,
22732
+ "learning_rate": 1.6451234031726882e-07,
22733
+ "loss": 0.1122,
22734
+ "step": 2227
22735
+ },
22736
+ {
22737
+ "epoch": 0.9769787327340496,
22738
+ "grad_norm": 0.734444175159532,
22739
+ "learning_rate": 1.5836614194602027e-07,
22740
+ "loss": 0.1346,
22741
+ "step": 2228
22742
+ },
22743
+ {
22744
+ "epoch": 0.9774172330629248,
22745
+ "grad_norm": 0.7628592587258765,
22746
+ "learning_rate": 1.5233676895902714e-07,
22747
+ "loss": 0.2134,
22748
+ "step": 2229
22749
+ },
22750
+ {
22751
+ "epoch": 0.9778557333918001,
22752
+ "grad_norm": 0.6999402711499318,
22753
+ "learning_rate": 1.4642423548873462e-07,
22754
+ "loss": 0.1594,
22755
+ "step": 2230
22756
+ },
22757
+ {
22758
+ "epoch": 0.9778557333918001,
22759
+ "eval_PRM Accuracy": 0.8968253968253969,
22760
+ "eval_PRM F1": 0.9417040358744395,
22761
+ "eval_PRM F1 AUC": 0.9021151008362026,
22762
+ "eval_PRM F1 Neg": 0.5517241379310345,
22763
+ "eval_PRM NPV": 0.8,
22764
+ "eval_PRM Precision": 0.9051724137931034,
22765
+ "eval_PRM Recall": 0.9813084112149533,
22766
+ "eval_PRM Specificty": 0.42105263157894735,
22767
+ "eval_loss": 0.26419270038604736,
22768
+ "eval_runtime": 5.0475,
22769
+ "eval_samples_per_second": 5.943,
22770
+ "eval_steps_per_second": 0.198,
22771
+ "step": 2230
22772
+ },
22773
+ {
22774
+ "epoch": 0.9782942337206753,
22775
+ "grad_norm": 1.0426238786158057,
22776
+ "learning_rate": 1.40628555393707e-07,
22777
+ "loss": 0.1631,
22778
+ "step": 2231
22779
+ },
22780
+ {
22781
+ "epoch": 0.9787327340495505,
22782
+ "grad_norm": 0.6611734543229769,
22783
+ "learning_rate": 1.3494974225863322e-07,
22784
+ "loss": 0.1174,
22785
+ "step": 2232
22786
+ },
22787
+ {
22788
+ "epoch": 0.9791712343784258,
22789
+ "grad_norm": 1.0595722319614658,
22790
+ "learning_rate": 1.293878093942602e-07,
22791
+ "loss": 0.1999,
22792
+ "step": 2233
22793
+ },
22794
+ {
22795
+ "epoch": 0.9796097347073011,
22796
+ "grad_norm": 0.7775983526743748,
22797
+ "learning_rate": 1.2394276983737073e-07,
22798
+ "loss": 0.1644,
22799
+ "step": 2234
22800
+ },
22801
+ {
22802
+ "epoch": 0.9800482350361763,
22803
+ "grad_norm": 0.7031513669716907,
22804
+ "learning_rate": 1.1861463635077785e-07,
22805
+ "loss": 0.1275,
22806
+ "step": 2235
22807
+ },
22808
+ {
22809
+ "epoch": 0.9800482350361763,
22810
+ "eval_PRM Accuracy": 0.8968253968253969,
22811
+ "eval_PRM F1": 0.9417040358744395,
22812
+ "eval_PRM F1 AUC": 0.9033448106246925,
22813
+ "eval_PRM F1 Neg": 0.5517241379310345,
22814
+ "eval_PRM NPV": 0.8,
22815
+ "eval_PRM Precision": 0.9051724137931034,
22816
+ "eval_PRM Recall": 0.9813084112149533,
22817
+ "eval_PRM Specificty": 0.42105263157894735,
22818
+ "eval_loss": 0.2634440064430237,
22819
+ "eval_runtime": 5.1453,
22820
+ "eval_samples_per_second": 5.831,
22821
+ "eval_steps_per_second": 0.194,
22822
+ "step": 2235
22823
+ },
22824
+ {
22825
+ "epoch": 0.9804867353650515,
22826
+ "grad_norm": 0.8229340016430483,
22827
+ "learning_rate": 1.1340342142325267e-07,
22828
+ "loss": 0.1454,
22829
+ "step": 2236
22830
+ },
22831
+ {
22832
+ "epoch": 0.9809252356939268,
22833
+ "grad_norm": 0.700060712434519,
22834
+ "learning_rate": 1.0830913726952996e-07,
22835
+ "loss": 0.1681,
22836
+ "step": 2237
22837
+ },
22838
+ {
22839
+ "epoch": 0.981363736022802,
22840
+ "grad_norm": 0.8465474385209266,
22841
+ "learning_rate": 1.033317958302693e-07,
22842
+ "loss": 0.1452,
22843
+ "step": 2238
22844
+ },
22845
+ {
22846
+ "epoch": 0.9818022363516773,
22847
+ "grad_norm": 1.0379966371758056,
22848
+ "learning_rate": 9.847140877200512e-08,
22849
+ "loss": 0.2115,
22850
+ "step": 2239
22851
+ },
22852
+ {
22853
+ "epoch": 0.9822407366805526,
22854
+ "grad_norm": 0.7936350841083502,
22855
+ "learning_rate": 9.372798748716883e-08,
22856
+ "loss": 0.1219,
22857
+ "step": 2240
22858
+ },
22859
+ {
22860
+ "epoch": 0.9822407366805526,
22861
+ "eval_PRM Accuracy": 0.9047619047619048,
22862
+ "eval_PRM F1": 0.9464285714285714,
22863
+ "eval_PRM F1 AUC": 0.9021151008362027,
22864
+ "eval_PRM F1 Neg": 0.5714285714285714,
22865
+ "eval_PRM NPV": 0.8888888888888888,
22866
+ "eval_PRM Precision": 0.905982905982906,
22867
+ "eval_PRM Recall": 0.9906542056074766,
22868
+ "eval_PRM Specificty": 0.42105263157894735,
22869
+ "eval_loss": 0.2647298276424408,
22870
+ "eval_runtime": 5.0448,
22871
+ "eval_samples_per_second": 5.947,
22872
+ "eval_steps_per_second": 0.198,
22873
+ "step": 2240
22874
+ },
22875
+ {
22876
+ "epoch": 0.9826792370094277,
22877
+ "grad_norm": 0.7451721135727123,
22878
+ "learning_rate": 8.910154309400564e-08,
22879
+ "loss": 0.1528,
22880
+ "step": 2241
22881
+ },
22882
+ {
22883
+ "epoch": 0.983117737338303,
22884
+ "grad_norm": 0.7528869575190266,
22885
+ "learning_rate": 8.459208643659122e-08,
22886
+ "loss": 0.1965,
22887
+ "step": 2242
22888
+ },
22889
+ {
22890
+ "epoch": 0.9835562376671783,
22891
+ "grad_norm": 0.8245719648817869,
22892
+ "learning_rate": 8.01996280847761e-08,
22893
+ "loss": 0.1899,
22894
+ "step": 2243
22895
+ },
22896
+ {
22897
+ "epoch": 0.9839947379960535,
22898
+ "grad_norm": 0.6978686747600712,
22899
+ "learning_rate": 7.59241783341913e-08,
22900
+ "loss": 0.1777,
22901
+ "step": 2244
22902
+ },
22903
+ {
22904
+ "epoch": 0.9844332383249288,
22905
+ "grad_norm": 0.6555901389145737,
22906
+ "learning_rate": 7.176574720618723e-08,
22907
+ "loss": 0.1264,
22908
+ "step": 2245
22909
+ },
22910
+ {
22911
+ "epoch": 0.9844332383249288,
22912
+ "eval_PRM Accuracy": 0.8968253968253969,
22913
+ "eval_PRM F1": 0.9417040358744395,
22914
+ "eval_PRM F1 AUC": 0.9038366945400885,
22915
+ "eval_PRM F1 Neg": 0.5517241379310345,
22916
+ "eval_PRM NPV": 0.8,
22917
+ "eval_PRM Precision": 0.9051724137931034,
22918
+ "eval_PRM Recall": 0.9813084112149533,
22919
+ "eval_PRM Specificty": 0.42105263157894735,
22920
+ "eval_loss": 0.26510417461395264,
22921
+ "eval_runtime": 4.9026,
22922
+ "eval_samples_per_second": 6.119,
22923
+ "eval_steps_per_second": 0.204,
22924
+ "step": 2245
22925
+ },
22926
+ {
22927
+ "epoch": 0.984871738653804,
22928
+ "grad_norm": 0.6020348862072671,
22929
+ "learning_rate": 6.772434444785591e-08,
22930
+ "loss": 0.1462,
22931
+ "step": 2246
22932
+ },
22933
+ {
22934
+ "epoch": 0.9853102389826792,
22935
+ "grad_norm": 0.5343478606265442,
22936
+ "learning_rate": 6.379997953196437e-08,
22937
+ "loss": 0.1161,
22938
+ "step": 2247
22939
+ },
22940
+ {
22941
+ "epoch": 0.9857487393115545,
22942
+ "grad_norm": 1.0499703730680832,
22943
+ "learning_rate": 5.999266165694905e-08,
22944
+ "loss": 0.1321,
22945
+ "step": 2248
22946
+ },
22947
+ {
22948
+ "epoch": 0.9861872396404298,
22949
+ "grad_norm": 0.9390822132612041,
22950
+ "learning_rate": 5.630239974691032e-08,
22951
+ "loss": 0.177,
22952
+ "step": 2249
22953
+ },
22954
+ {
22955
+ "epoch": 0.986625739969305,
22956
+ "grad_norm": 0.7750388228245491,
22957
+ "learning_rate": 5.272920245156798e-08,
22958
+ "loss": 0.1124,
22959
+ "step": 2250
22960
+ },
22961
+ {
22962
+ "epoch": 0.986625739969305,
22963
+ "eval_PRM Accuracy": 0.8968253968253969,
22964
+ "eval_PRM F1": 0.9417040358744395,
22965
+ "eval_PRM F1 AUC": 0.9028529267092966,
22966
+ "eval_PRM F1 Neg": 0.5517241379310345,
22967
+ "eval_PRM NPV": 0.8,
22968
+ "eval_PRM Precision": 0.9051724137931034,
22969
+ "eval_PRM Recall": 0.9813084112149533,
22970
+ "eval_PRM Specificty": 0.42105263157894735,
22971
+ "eval_loss": 0.26339519023895264,
22972
+ "eval_runtime": 5.3029,
22973
+ "eval_samples_per_second": 5.657,
22974
+ "eval_steps_per_second": 0.189,
22975
+ "step": 2250
22976
+ },
22977
+ {
22978
+ "epoch": 0.9870642402981802,
22979
+ "grad_norm": 0.4372383231571483,
22980
+ "learning_rate": 4.927307814625026e-08,
22981
+ "loss": 0.099,
22982
+ "step": 2251
22983
+ },
22984
+ {
22985
+ "epoch": 0.9875027406270555,
22986
+ "grad_norm": 0.7555332839083383,
22987
+ "learning_rate": 4.5934034931882644e-08,
22988
+ "loss": 0.1293,
22989
+ "step": 2252
22990
+ },
22991
+ {
22992
+ "epoch": 0.9879412409559307,
22993
+ "grad_norm": 0.6698273061888639,
22994
+ "learning_rate": 4.2712080634949024e-08,
22995
+ "loss": 0.1398,
22996
+ "step": 2253
22997
+ },
22998
+ {
22999
+ "epoch": 0.988379741284806,
23000
+ "grad_norm": 1.0889584011710371,
23001
+ "learning_rate": 3.960722280749174e-08,
23002
+ "loss": 0.1537,
23003
+ "step": 2254
23004
+ },
23005
+ {
23006
+ "epoch": 0.9888182416136813,
23007
+ "grad_norm": 0.49362924819793047,
23008
+ "learning_rate": 3.6619468727083773e-08,
23009
+ "loss": 0.0824,
23010
+ "step": 2255
23011
+ },
23012
+ {
23013
+ "epoch": 0.9888182416136813,
23014
+ "eval_PRM Accuracy": 0.8968253968253969,
23015
+ "eval_PRM F1": 0.9417040358744395,
23016
+ "eval_PRM F1 AUC": 0.9016232169208066,
23017
+ "eval_PRM F1 Neg": 0.5517241379310345,
23018
+ "eval_PRM NPV": 0.8,
23019
+ "eval_PRM Precision": 0.9051724137931034,
23020
+ "eval_PRM Recall": 0.9813084112149533,
23021
+ "eval_PRM Specificty": 0.42105263157894735,
23022
+ "eval_loss": 0.2658854126930237,
23023
+ "eval_runtime": 5.0593,
23024
+ "eval_samples_per_second": 5.93,
23025
+ "eval_steps_per_second": 0.198,
23026
+ "step": 2255
23027
+ },
23028
+ {
23029
+ "epoch": 0.9892567419425564,
23030
+ "grad_norm": 1.0037748102150028,
23031
+ "learning_rate": 3.374882539681767e-08,
23032
+ "loss": 0.1385,
23033
+ "step": 2256
23034
+ },
23035
+ {
23036
+ "epoch": 0.9896952422714317,
23037
+ "grad_norm": 0.8214090899116598,
23038
+ "learning_rate": 3.099529954528335e-08,
23039
+ "loss": 0.1463,
23040
+ "step": 2257
23041
+ },
23042
+ {
23043
+ "epoch": 0.990133742600307,
23044
+ "grad_norm": 0.7813408410559692,
23045
+ "learning_rate": 2.8358897626556968e-08,
23046
+ "loss": 0.1303,
23047
+ "step": 2258
23048
+ },
23049
+ {
23050
+ "epoch": 0.9905722429291822,
23051
+ "grad_norm": 0.6352466813644553,
23052
+ "learning_rate": 2.583962582018984e-08,
23053
+ "loss": 0.1169,
23054
+ "step": 2259
23055
+ },
23056
+ {
23057
+ "epoch": 0.9910107432580575,
23058
+ "grad_norm": 0.7860685137760435,
23059
+ "learning_rate": 2.3437490031180676e-08,
23060
+ "loss": 0.1381,
23061
+ "step": 2260
23062
+ },
23063
+ {
23064
+ "epoch": 0.9910107432580575,
23065
+ "eval_PRM Accuracy": 0.8968253968253969,
23066
+ "eval_PRM F1": 0.9417040358744395,
23067
+ "eval_PRM F1 AUC": 0.9028529267092966,
23068
+ "eval_PRM F1 Neg": 0.5517241379310345,
23069
+ "eval_PRM NPV": 0.8,
23070
+ "eval_PRM Precision": 0.9051724137931034,
23071
+ "eval_PRM Recall": 0.9813084112149533,
23072
+ "eval_PRM Specificty": 0.42105263157894735,
23073
+ "eval_loss": 0.26518553495407104,
23074
+ "eval_runtime": 4.9722,
23075
+ "eval_samples_per_second": 6.034,
23076
+ "eval_steps_per_second": 0.201,
23077
+ "step": 2260
23078
+ },
23079
+ {
23080
+ "epoch": 0.9914492435869326,
23081
+ "grad_norm": 0.6868815272750708,
23082
+ "learning_rate": 2.1152495889970035e-08,
23083
+ "loss": 0.146,
23084
+ "step": 2261
23085
+ },
23086
+ {
23087
+ "epoch": 0.9918877439158079,
23088
+ "grad_norm": 1.1308574836346492,
23089
+ "learning_rate": 1.8984648752429225e-08,
23090
+ "loss": 0.1972,
23091
+ "step": 2262
23092
+ },
23093
+ {
23094
+ "epoch": 0.9923262442446832,
23095
+ "grad_norm": 1.3612719504990785,
23096
+ "learning_rate": 1.693395369984363e-08,
23097
+ "loss": 0.186,
23098
+ "step": 2263
23099
+ },
23100
+ {
23101
+ "epoch": 0.9927647445735585,
23102
+ "grad_norm": 0.7177351398294232,
23103
+ "learning_rate": 1.5000415538901636e-08,
23104
+ "loss": 0.164,
23105
+ "step": 2264
23106
+ },
23107
+ {
23108
+ "epoch": 0.9932032449024337,
23109
+ "grad_norm": 0.7130993275571453,
23110
+ "learning_rate": 1.3184038801683508e-08,
23111
+ "loss": 0.133,
23112
+ "step": 2265
23113
+ },
23114
+ {
23115
+ "epoch": 0.9932032449024337,
23116
+ "eval_PRM Accuracy": 0.8968253968253969,
23117
+ "eval_PRM F1": 0.9417040358744395,
23118
+ "eval_PRM F1 AUC": 0.9023610427939006,
23119
+ "eval_PRM F1 Neg": 0.5517241379310345,
23120
+ "eval_PRM NPV": 0.8,
23121
+ "eval_PRM Precision": 0.9051724137931034,
23122
+ "eval_PRM Recall": 0.9813084112149533,
23123
+ "eval_PRM Specificty": 0.42105263157894735,
23124
+ "eval_loss": 0.265625,
23125
+ "eval_runtime": 5.1737,
23126
+ "eval_samples_per_second": 5.799,
23127
+ "eval_steps_per_second": 0.193,
23128
+ "step": 2265
23129
+ },
23130
+ {
23131
+ "epoch": 0.9936417452313089,
23132
+ "grad_norm": 0.722029153275214,
23133
+ "learning_rate": 1.1484827745655846e-08,
23134
+ "loss": 0.1271,
23135
+ "step": 2266
23136
+ },
23137
+ {
23138
+ "epoch": 0.9940802455601842,
23139
+ "grad_norm": 0.7513117017757962,
23140
+ "learning_rate": 9.902786353649385e-09,
23141
+ "loss": 0.1868,
23142
+ "step": 2267
23143
+ },
23144
+ {
23145
+ "epoch": 0.9945187458890594,
23146
+ "grad_norm": 0.6226798334100295,
23147
+ "learning_rate": 8.437918333864536e-09,
23148
+ "loss": 0.1267,
23149
+ "step": 2268
23150
+ },
23151
+ {
23152
+ "epoch": 0.9949572462179347,
23153
+ "grad_norm": 0.918389018849689,
23154
+ "learning_rate": 7.09022711984364e-09,
23155
+ "loss": 0.1684,
23156
+ "step": 2269
23157
+ },
23158
+ {
23159
+ "epoch": 0.99539574654681,
23160
+ "grad_norm": 0.5928183592332686,
23161
+ "learning_rate": 5.859715870498716e-09,
23162
+ "loss": 0.1017,
23163
+ "step": 2270
23164
+ },
23165
+ {
23166
+ "epoch": 0.99539574654681,
23167
+ "eval_PRM Accuracy": 0.8968253968253969,
23168
+ "eval_PRM F1": 0.9417040358744395,
23169
+ "eval_PRM F1 AUC": 0.9033448106246926,
23170
+ "eval_PRM F1 Neg": 0.5517241379310345,
23171
+ "eval_PRM NPV": 0.8,
23172
+ "eval_PRM Precision": 0.9051724137931034,
23173
+ "eval_PRM Recall": 0.9813084112149533,
23174
+ "eval_PRM Specificty": 0.42105263157894735,
23175
+ "eval_loss": 0.2633626163005829,
23176
+ "eval_runtime": 4.8567,
23177
+ "eval_samples_per_second": 6.177,
23178
+ "eval_steps_per_second": 0.206,
23179
+ "step": 2270
23180
+ },
23181
+ {
23182
+ "epoch": 0.9958342468756851,
23183
+ "grad_norm": 0.7976166036651872,
23184
+ "learning_rate": 4.746387470044855e-09,
23185
+ "loss": 0.1435,
23186
+ "step": 2271
23187
+ },
23188
+ {
23189
+ "epoch": 0.9962727472045604,
23190
+ "grad_norm": 0.6341820836388087,
23191
+ "learning_rate": 3.750244528066826e-09,
23192
+ "loss": 0.1243,
23193
+ "step": 2272
23194
+ },
23195
+ {
23196
+ "epoch": 0.9967112475334357,
23197
+ "grad_norm": 1.5392080360921296,
23198
+ "learning_rate": 2.8712893794413665e-09,
23199
+ "loss": 0.0712,
23200
+ "step": 2273
23201
+ },
23202
+ {
23203
+ "epoch": 0.9971497478623109,
23204
+ "grad_norm": 0.6412650064731606,
23205
+ "learning_rate": 2.109524084381587e-09,
23206
+ "loss": 0.1226,
23207
+ "step": 2274
23208
+ },
23209
+ {
23210
+ "epoch": 0.9975882481911862,
23211
+ "grad_norm": 0.4398479433907805,
23212
+ "learning_rate": 1.4649504284203198e-09,
23213
+ "loss": 0.11,
23214
+ "step": 2275
23215
+ },
23216
+ {
23217
+ "epoch": 0.9975882481911862,
23218
+ "eval_PRM Accuracy": 0.9047619047619048,
23219
+ "eval_PRM F1": 0.9464285714285714,
23220
+ "eval_PRM F1 AUC": 0.9048204623708804,
23221
+ "eval_PRM F1 Neg": 0.5714285714285714,
23222
+ "eval_PRM NPV": 0.8888888888888888,
23223
+ "eval_PRM Precision": 0.905982905982906,
23224
+ "eval_PRM Recall": 0.9906542056074766,
23225
+ "eval_PRM Specificty": 0.42105263157894735,
23226
+ "eval_loss": 0.2633300721645355,
23227
+ "eval_runtime": 5.1512,
23228
+ "eval_samples_per_second": 5.824,
23229
+ "eval_steps_per_second": 0.194,
23230
+ "step": 2275
23231
+ },
23232
+ {
23233
+ "epoch": 0.9980267485200613,
23234
+ "grad_norm": 0.9910158250626084,
23235
+ "learning_rate": 9.375699223879153e-10,
23236
+ "loss": 0.2007,
23237
+ "step": 2276
23238
+ },
23239
+ {
23240
+ "epoch": 0.9984652488489366,
23241
+ "grad_norm": 0.6291837367255644,
23242
+ "learning_rate": 5.273838024344446e-10,
23243
+ "loss": 0.1029,
23244
+ "step": 2277
23245
+ },
23246
+ {
23247
+ "epoch": 0.9989037491778119,
23248
+ "grad_norm": 0.9883836422855934,
23249
+ "learning_rate": 2.3439302999639366e-10,
23250
+ "loss": 0.2552,
23251
+ "step": 2278
23252
+ },
23253
+ {
23254
+ "epoch": 0.9993422495066872,
23255
+ "grad_norm": 0.6843970949709545,
23256
+ "learning_rate": 5.859829183552101e-11,
23257
+ "loss": 0.1788,
23258
+ "step": 2279
23259
+ },
23260
+ {
23261
+ "epoch": 0.9997807498355624,
23262
+ "grad_norm": 1.0098250537966291,
23263
+ "learning_rate": 0.0,
23264
+ "loss": 0.22,
23265
+ "step": 2280
23266
+ },
23267
+ {
23268
+ "epoch": 0.9997807498355624,
23269
+ "eval_PRM Accuracy": 0.8968253968253969,
23270
+ "eval_PRM F1": 0.9417040358744395,
23271
+ "eval_PRM F1 AUC": 0.9021151008362027,
23272
+ "eval_PRM F1 Neg": 0.5517241379310345,
23273
+ "eval_PRM NPV": 0.8,
23274
+ "eval_PRM Precision": 0.9051724137931034,
23275
+ "eval_PRM Recall": 0.9813084112149533,
23276
+ "eval_PRM Specificty": 0.42105263157894735,
23277
+ "eval_loss": 0.2653645873069763,
23278
+ "eval_runtime": 5.1575,
23279
+ "eval_samples_per_second": 5.817,
23280
+ "eval_steps_per_second": 0.194,
23281
+ "step": 2280
23282
  }
23283
  ],
23284
  "logging_steps": 1,
 
23293
  "should_evaluate": false,
23294
  "should_log": false,
23295
  "should_save": true,
23296
+ "should_training_stop": true
23297
  },
23298
  "attributes": {}
23299
  }
23300
  },
23301
+ "total_flos": 3531784782086144.0,
23302
  "train_batch_size": 2,
23303
  "trial_name": null,
23304
  "trial_params": null