Training in progress, step 1071
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +13 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34500ac4bcc3aea3a09511805ae1142a6f9fbfe5d8d5da2c51fe1ca165924a4a
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -219,3 +219,16 @@
|
|
| 219 |
{"current_steps": 1015, "total_steps": 1250, "loss": 0.3469, "lr": 0.003141259355568705, "epoch": 8.12, "percentage": 81.2, "elapsed_time": "0:02:29", "remaining_time": "0:00:34", "throughput": 2659.52, "total_tokens": 398032}
|
| 220 |
{"current_steps": 1020, "total_steps": 1250, "loss": 0.3543, "lr": 0.003014168076956707, "epoch": 8.16, "percentage": 81.6, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2661.86, "total_tokens": 399856}
|
| 221 |
{"current_steps": 1025, "total_steps": 1250, "loss": 0.3636, "lr": 0.002889413460026724, "epoch": 8.2, "percentage": 82.0, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2663.87, "total_tokens": 401616}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
{"current_steps": 1015, "total_steps": 1250, "loss": 0.3469, "lr": 0.003141259355568705, "epoch": 8.12, "percentage": 81.2, "elapsed_time": "0:02:29", "remaining_time": "0:00:34", "throughput": 2659.52, "total_tokens": 398032}
|
| 220 |
{"current_steps": 1020, "total_steps": 1250, "loss": 0.3543, "lr": 0.003014168076956707, "epoch": 8.16, "percentage": 81.6, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2661.86, "total_tokens": 399856}
|
| 221 |
{"current_steps": 1025, "total_steps": 1250, "loss": 0.3636, "lr": 0.002889413460026724, "epoch": 8.2, "percentage": 82.0, "elapsed_time": "0:02:30", "remaining_time": "0:00:33", "throughput": 2663.87, "total_tokens": 401616}
|
| 222 |
+
{"current_steps": 1030, "total_steps": 1250, "loss": 0.3521, "lr": 0.0027670198259385275, "epoch": 8.24, "percentage": 82.4, "elapsed_time": "0:02:31", "remaining_time": "0:00:32", "throughput": 2666.86, "total_tokens": 403568}
|
| 223 |
+
{"current_steps": 1035, "total_steps": 1250, "loss": 0.3611, "lr": 0.0026470110355735882, "epoch": 8.28, "percentage": 82.8, "elapsed_time": "0:02:31", "remaining_time": "0:00:31", "throughput": 2670.85, "total_tokens": 405712}
|
| 224 |
+
{"current_steps": 1040, "total_steps": 1250, "loss": 0.3538, "lr": 0.0025294104848833754, "epoch": 8.32, "percentage": 83.2, "elapsed_time": "0:02:32", "remaining_time": "0:00:30", "throughput": 2674.96, "total_tokens": 407888}
|
| 225 |
+
{"current_steps": 1045, "total_steps": 1250, "loss": 0.3559, "lr": 0.002414241100328251, "epoch": 8.36, "percentage": 83.6, "elapsed_time": "0:02:33", "remaining_time": "0:00:30", "throughput": 2677.28, "total_tokens": 409712}
|
| 226 |
+
{"current_steps": 1050, "total_steps": 1250, "loss": 0.3549, "lr": 0.002301525334407931, "epoch": 8.4, "percentage": 84.0, "elapsed_time": "0:02:33", "remaining_time": "0:00:29", "throughput": 2679.41, "total_tokens": 411504}
|
| 227 |
+
{"current_steps": 1055, "total_steps": 1250, "loss": 0.3515, "lr": 0.0021912851612843243, "epoch": 8.44, "percentage": 84.4, "elapsed_time": "0:02:34", "remaining_time": "0:00:28", "throughput": 2680.98, "total_tokens": 413168}
|
| 228 |
+
{"current_steps": 1060, "total_steps": 1250, "loss": 0.3453, "lr": 0.002083542072497606, "epoch": 8.48, "percentage": 84.8, "elapsed_time": "0:02:34", "remaining_time": "0:00:27", "throughput": 2683.24, "total_tokens": 414960}
|
| 229 |
+
{"current_steps": 1065, "total_steps": 1250, "loss": 0.3562, "lr": 0.001978317072776413, "epoch": 8.52, "percentage": 85.2, "elapsed_time": "0:02:35", "remaining_time": "0:00:26", "throughput": 2685.95, "total_tokens": 416880}
|
| 230 |
+
{"current_steps": 1070, "total_steps": 1250, "loss": 0.3562, "lr": 0.0018756306759429363, "epoch": 8.56, "percentage": 85.6, "elapsed_time": "0:02:35", "remaining_time": "0:00:26", "throughput": 2688.49, "total_tokens": 418768}
|
| 231 |
+
{"current_steps": 1071, "total_steps": 1250, "eval_loss": 0.354716032743454, "epoch": 8.568, "percentage": 85.68, "elapsed_time": "0:02:36", "remaining_time": "0:00:26", "throughput": 2672.09, "total_tokens": 419184}
|
| 232 |
+
{"current_steps": 1075, "total_steps": 1250, "loss": 0.3512, "lr": 0.001775502900913697, "epoch": 8.6, "percentage": 86.0, "elapsed_time": "0:02:38", "remaining_time": "0:00:25", "throughput": 2655.84, "total_tokens": 421008}
|
| 233 |
+
{"current_steps": 1080, "total_steps": 1250, "loss": 0.352, "lr": 0.0016779532677968327, "epoch": 8.64, "percentage": 86.4, "elapsed_time": "0:02:39", "remaining_time": "0:00:25", "throughput": 2658.3, "total_tokens": 422864}
|
| 234 |
+
{"current_steps": 1085, "total_steps": 1250, "loss": 0.3618, "lr": 0.0015830007940866035, "epoch": 8.68, "percentage": 86.8, "elapsed_time": "0:02:39", "remaining_time": "0:00:24", "throughput": 2662.01, "total_tokens": 424976}
|