Training in progress, step 1008
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +14 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db41925edc36f9bfdff03a589bbac51ede28874bd2e4740b375bdf7af59f70bd
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -205,3 +205,17 @@
|
|
| 205 |
{"current_steps": 950, "total_steps": 1250, "loss": 0.3589, "lr": 0.004994208776927635, "epoch": 7.6, "percentage": 76.0, "elapsed_time": "0:02:20", "remaining_time": "0:00:44", "throughput": 2662.14, "total_tokens": 372928}
|
| 206 |
{"current_steps": 955, "total_steps": 1250, "loss": 0.3537, "lr": 0.004839154538973943, "epoch": 7.64, "percentage": 76.4, "elapsed_time": "0:02:20", "remaining_time": "0:00:43", "throughput": 2665.09, "total_tokens": 374816}
|
| 207 |
{"current_steps": 960, "total_steps": 1250, "loss": 0.3578, "lr": 0.00468608117797549, "epoch": 7.68, "percentage": 76.8, "elapsed_time": "0:02:21", "remaining_time": "0:00:42", "throughput": 2667.96, "total_tokens": 376704}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 205 |
{"current_steps": 950, "total_steps": 1250, "loss": 0.3589, "lr": 0.004994208776927635, "epoch": 7.6, "percentage": 76.0, "elapsed_time": "0:02:20", "remaining_time": "0:00:44", "throughput": 2662.14, "total_tokens": 372928}
|
| 206 |
{"current_steps": 955, "total_steps": 1250, "loss": 0.3537, "lr": 0.004839154538973943, "epoch": 7.64, "percentage": 76.4, "elapsed_time": "0:02:20", "remaining_time": "0:00:43", "throughput": 2665.09, "total_tokens": 374816}
|
| 207 |
{"current_steps": 960, "total_steps": 1250, "loss": 0.3578, "lr": 0.00468608117797549, "epoch": 7.68, "percentage": 76.8, "elapsed_time": "0:02:21", "remaining_time": "0:00:42", "throughput": 2667.96, "total_tokens": 376704}
|
| 208 |
+
{"current_steps": 965, "total_steps": 1250, "loss": 0.3526, "lr": 0.004535018535887305, "epoch": 7.72, "percentage": 77.2, "elapsed_time": "0:02:21", "remaining_time": "0:00:41", "throughput": 2670.94, "total_tokens": 378624}
|
| 209 |
+
{"current_steps": 970, "total_steps": 1250, "loss": 0.3456, "lr": 0.004385996062670774, "epoch": 7.76, "percentage": 77.6, "elapsed_time": "0:02:22", "remaining_time": "0:00:41", "throughput": 2673.46, "total_tokens": 380480}
|
| 210 |
+
{"current_steps": 975, "total_steps": 1250, "loss": 0.3575, "lr": 0.0042390428105523225, "epoch": 7.8, "percentage": 78.0, "elapsed_time": "0:02:22", "remaining_time": "0:00:40", "throughput": 2675.12, "total_tokens": 382144}
|
| 211 |
+
{"current_steps": 980, "total_steps": 1250, "loss": 0.3544, "lr": 0.004094187428359625, "epoch": 7.84, "percentage": 78.4, "elapsed_time": "0:02:23", "remaining_time": "0:00:39", "throughput": 2679.33, "total_tokens": 384320}
|
| 212 |
+
{"current_steps": 985, "total_steps": 1250, "loss": 0.3762, "lr": 0.003951458155936452, "epoch": 7.88, "percentage": 78.8, "elapsed_time": "0:02:24", "remaining_time": "0:00:38", "throughput": 2684.29, "total_tokens": 386656}
|
| 213 |
+
{"current_steps": 990, "total_steps": 1250, "loss": 0.3497, "lr": 0.0038108828186372685, "epoch": 7.92, "percentage": 79.2, "elapsed_time": "0:02:24", "remaining_time": "0:00:37", "throughput": 2687.75, "total_tokens": 388672}
|
| 214 |
+
{"current_steps": 995, "total_steps": 1250, "loss": 0.3919, "lr": 0.003672488821902614, "epoch": 7.96, "percentage": 79.6, "elapsed_time": "0:02:25", "remaining_time": "0:00:37", "throughput": 2689.8, "total_tokens": 390400}
|
| 215 |
+
{"current_steps": 1000, "total_steps": 1250, "loss": 0.3462, "lr": 0.0035363031459163647, "epoch": 8.0, "percentage": 80.0, "elapsed_time": "0:02:25", "remaining_time": "0:00:36", "throughput": 2689.25, "total_tokens": 392080}
|
| 216 |
+
{"current_steps": 1005, "total_steps": 1250, "loss": 0.3555, "lr": 0.0034023523403458908, "epoch": 8.04, "percentage": 80.4, "elapsed_time": "0:02:26", "remaining_time": "0:00:35", "throughput": 2688.24, "total_tokens": 394160}
|
| 217 |
+
{"current_steps": 1008, "total_steps": 1250, "eval_loss": 0.35737964510917664, "epoch": 8.064, "percentage": 80.64, "elapsed_time": "0:02:27", "remaining_time": "0:00:35", "throughput": 2672.08, "total_tokens": 395216}
|
| 218 |
+
{"current_steps": 1010, "total_steps": 1250, "loss": 0.3613, "lr": 0.003270662519166149, "epoch": 8.08, "percentage": 80.8, "elapsed_time": "0:02:29", "remaining_time": "0:00:35", "throughput": 2655.77, "total_tokens": 395888}
|
| 219 |
+
{"current_steps": 1015, "total_steps": 1250, "loss": 0.3469, "lr": 0.003141259355568705, "epoch": 8.12, "percentage": 81.2, "elapsed_time": "0:02:29", "remaining_time": "0:00:34", "throughput": 2659.52, "total_tokens": 398032}
|
| 220 |
+
{"current_steps": 1020, "total_steps": 1250, "loss": 0.3543, "lr": 0.003014168076956707, "epoch": 8.16, "percentage": 81.6, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2661.86, "total_tokens": 399856}
|
| 221 |
+
{"current_steps": 1025, "total_steps": 1250, "loss": 0.3636, "lr": 0.002889413460026724, "epoch": 8.2, "percentage": 82.0, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2663.87, "total_tokens": 401616}
|