Draoi commited on
Commit
10f9285
·
verified ·
1 Parent(s): 423177b

Training in progress, epoch 1

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f91f93cb88b9e525f62d09557e80a43d9a0fafca8b34615983b234abd823d57f
3
  size 598442860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f7b30b1c91eb4fc5bcde57a101edd4f5d5745f7c2121a1c6cd5d993fc6ac0d
3
  size 598442860
run-0/checkpoint-840/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37789a41472adb35a61a31f3e47889380027afb577757dbea18438e9703ab63b
3
  size 1196975482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:016b3a5455a155ed6846fc988e3bb8c603d5a7153e13239972d47aa4d7f29dc9
3
  size 1196975482
run-0/checkpoint-840/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:399209c54274067d13fb3c584101a071f31ac1198b71c7ac31aeb2e750b5136b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e73e59b6e5b9056ac0e8920fff852bc272c5c86a3b059cb9c4d16269570f8731
3
  size 14244
run-1/checkpoint-1678/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8377921634e61cee47c92077de601dfbcc9a23240486c0b902c7dd5a5b41a9e2
3
  size 598442860
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09f7b30b1c91eb4fc5bcde57a101edd4f5d5745f7c2121a1c6cd5d993fc6ac0d
3
  size 598442860
run-1/checkpoint-1678/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41995bc6a21917b2cb3ee533d2efbfed828f812c61fd5fd165f587fa6bcb161f
3
  size 1196975482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d309d824c67e3e0400b3bc3cdf4130fb1c6cef145f16479dc1747d2e81b3e546
3
  size 1196975482
run-1/checkpoint-1678/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e73e59b6e5b9056ac0e8920fff852bc272c5c86a3b059cb9c4d16269570f8731
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8bd53d0fd3ae53de76f4ce4413bda2fac7f45845191ef3f62010be4991b2ff9
3
  size 14244
run-1/checkpoint-1678/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:77378d6ce1a8f3f903aaefb0376510c054e8823dec8ff6df70d9b6f9d3ca2d9f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ff66996cb8fcb94533441a340b3ea014b1520ea065f0e2365c2e4c21ed5fec
3
  size 1064
run-1/checkpoint-1678/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.38195738196372986,
3
  "best_model_checkpoint": "modernbert-financial-sentiment/run-1/checkpoint-1678",
4
- "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 1678,
7
  "is_hyper_param_search": true,
@@ -9,53 +9,41 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.5959475566150179,
13
- "grad_norm": 3.4977939128875732,
14
- "learning_rate": 0.00023796275402835382,
15
- "loss": 0.6709,
16
  "step": 500
17
  },
18
  {
19
- "epoch": 1.0,
20
- "eval_accuracy": 0.833134684147795,
21
- "eval_f1": 0.8303703737833891,
22
- "eval_loss": 0.4791373908519745,
23
- "eval_precision": 0.8296883223434215,
24
- "eval_recall": 0.833134684147795,
25
- "eval_runtime": 18.2507,
26
- "eval_samples_per_second": 91.942,
27
- "eval_steps_per_second": 11.506,
28
- "step": 839
29
- },
30
- {
31
- "epoch": 1.1918951132300357,
32
- "grad_norm": 3.490213632583618,
33
- "learning_rate": 0.00013695988729305934,
34
- "loss": 0.4573,
35
  "step": 1000
36
  },
37
  {
38
- "epoch": 1.7878426698450536,
39
- "grad_norm": 3.538949489593506,
40
- "learning_rate": 3.595702055776484e-05,
41
- "loss": 0.294,
42
  "step": 1500
43
  },
44
  {
45
- "epoch": 2.0,
46
- "eval_accuracy": 0.8694874851013111,
47
- "eval_f1": 0.8682786483149874,
48
- "eval_loss": 0.38195738196372986,
49
- "eval_precision": 0.868142253395164,
50
- "eval_recall": 0.8694874851013111,
51
- "eval_runtime": 18.3643,
52
- "eval_samples_per_second": 91.373,
53
- "eval_steps_per_second": 11.435,
54
  "step": 1678
55
  }
56
  ],
57
  "logging_steps": 500,
58
- "max_steps": 1678,
59
  "num_input_tokens_seen": 0,
60
  "num_train_epochs": 2,
61
  "save_steps": 500,
@@ -66,22 +54,22 @@
66
  "should_evaluate": false,
67
  "should_log": false,
68
  "should_save": true,
69
- "should_training_stop": true
70
  },
71
  "attributes": {}
72
  }
73
  },
74
- "total_flos": 2044305680147712.0,
75
- "train_batch_size": 16,
76
  "trial_name": null,
77
  "trial_params": {
78
- "adam_epsilon": 1.658518398119787e-08,
79
- "dropout": 0.0017330050952681364,
80
- "learning_rate": 0.0002935143307327658,
81
- "lr_scheduler_type": "linear",
82
  "num_train_epochs": 2,
83
- "per_device_train_batch_size": 16,
84
- "warmup_ratio": 0.13352557567852008,
85
- "weight_decay": 0.09108802122030084
86
  }
87
  }
 
1
  {
2
+ "best_metric": 0.3714371919631958,
3
  "best_model_checkpoint": "modernbert-financial-sentiment/run-1/checkpoint-1678",
4
+ "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 1678,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.29797377830750893,
13
+ "grad_norm": 5.944282531738281,
14
+ "learning_rate": 2.8367040754379768e-05,
15
+ "loss": 0.7489,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.5959475566150179,
20
+ "grad_norm": 7.314896583557129,
21
+ "learning_rate": 2.8783421399749905e-05,
22
+ "loss": 0.494,
 
 
 
 
 
 
 
 
 
 
 
 
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.8939213349225268,
27
+ "grad_norm": 9.174110412597656,
28
+ "learning_rate": 2.2763209312337626e-05,
29
+ "loss": 0.399,
30
  "step": 1500
31
  },
32
  {
33
+ "epoch": 1.0,
34
+ "eval_accuracy": 0.8784266984505363,
35
+ "eval_f1": 0.8771538940867877,
36
+ "eval_loss": 0.3714371919631958,
37
+ "eval_precision": 0.8787001249469087,
38
+ "eval_recall": 0.8784266984505363,
39
+ "eval_runtime": 17.0799,
40
+ "eval_samples_per_second": 98.244,
41
+ "eval_steps_per_second": 12.295,
42
  "step": 1678
43
  }
44
  ],
45
  "logging_steps": 500,
46
+ "max_steps": 3356,
47
  "num_input_tokens_seen": 0,
48
  "num_train_epochs": 2,
49
  "save_steps": 500,
 
54
  "should_evaluate": false,
55
  "should_log": false,
56
  "should_save": true,
57
+ "should_training_stop": false
58
  },
59
  "attributes": {}
60
  }
61
  },
62
+ "total_flos": 1022280625152000.0,
63
+ "train_batch_size": 8,
64
  "trial_name": null,
65
  "trial_params": {
66
+ "adam_epsilon": 4.528050525370297e-07,
67
+ "dropout": 0.2907986120031344,
68
+ "learning_rate": 3.074987217774767e-05,
69
+ "lr_scheduler_type": "cosine",
70
  "num_train_epochs": 2,
71
+ "per_device_train_batch_size": 8,
72
+ "warmup_ratio": 0.1614840440413937,
73
+ "weight_decay": 0.07691241332138511
74
  }
75
  }
run-1/checkpoint-1678/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04cf6cefe1f017323156a5a30caf80658f8089fb6b585684f5ac1ae40be4a5c5
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5fa3bf0a9ce2f9a5a0913c1298bfdd80544ceba840b21f3da340205c796f9d
3
  size 5432
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e09ed9adf72eedc40701f01052aa920844eae0235f04830a6574f45c45113830
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db5fa3bf0a9ce2f9a5a0913c1298bfdd80544ceba840b21f3da340205c796f9d
3
  size 5432