Training in progress, step 315
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +14 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f5a23a86c0de40afe8528a055186b899204358bfe29286550af82adef721cbc5
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -55,3 +55,17 @@
|
|
| 55 |
{"current_steps": 255, "total_steps": 1250, "loss": 0.36, "lr": 0.029037205137577363, "epoch": 2.04, "percentage": 20.4, "elapsed_time": "0:00:38", "remaining_time": "0:02:30", "throughput": 2605.71, "total_tokens": 100224}
|
| 56 |
{"current_steps": 260, "total_steps": 1250, "loss": 0.4149, "lr": 0.02896201285410813, "epoch": 2.08, "percentage": 20.8, "elapsed_time": "0:00:39", "remaining_time": "0:02:28", "throughput": 2611.62, "total_tokens": 101920}
|
| 57 |
{"current_steps": 265, "total_steps": 1250, "loss": 0.3722, "lr": 0.028884098648568782, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:00:39", "remaining_time": "0:02:27", "throughput": 2622.6, "total_tokens": 103808}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
{"current_steps": 255, "total_steps": 1250, "loss": 0.36, "lr": 0.029037205137577363, "epoch": 2.04, "percentage": 20.4, "elapsed_time": "0:00:38", "remaining_time": "0:02:30", "throughput": 2605.71, "total_tokens": 100224}
|
| 56 |
{"current_steps": 260, "total_steps": 1250, "loss": 0.4149, "lr": 0.02896201285410813, "epoch": 2.08, "percentage": 20.8, "elapsed_time": "0:00:39", "remaining_time": "0:02:28", "throughput": 2611.62, "total_tokens": 101920}
|
| 57 |
{"current_steps": 265, "total_steps": 1250, "loss": 0.3722, "lr": 0.028884098648568782, "epoch": 2.12, "percentage": 21.2, "elapsed_time": "0:00:39", "remaining_time": "0:02:27", "throughput": 2622.6, "total_tokens": 103808}
|
| 58 |
+
{"current_steps": 270, "total_steps": 1250, "loss": 0.4172, "lr": 0.028803477710488055, "epoch": 2.16, "percentage": 21.6, "elapsed_time": "0:00:40", "remaining_time": "0:02:25", "throughput": 2635.79, "total_tokens": 105920}
|
| 59 |
+
{"current_steps": 275, "total_steps": 1250, "loss": 0.38, "lr": 0.028720165757077573, "epoch": 2.2, "percentage": 22.0, "elapsed_time": "0:00:40", "remaining_time": "0:02:24", "throughput": 2652.6, "total_tokens": 108160}
|
| 60 |
+
{"current_steps": 280, "total_steps": 1250, "loss": 0.49, "lr": 0.02863417903016773, "epoch": 2.24, "percentage": 22.4, "elapsed_time": "0:00:41", "remaining_time": "0:02:23", "throughput": 2660.81, "total_tokens": 109920}
|
| 61 |
+
{"current_steps": 285, "total_steps": 1250, "loss": 0.7063, "lr": 0.02854553429304131, "epoch": 2.2800000000000002, "percentage": 22.8, "elapsed_time": "0:00:41", "remaining_time": "0:02:21", "throughput": 2672.36, "total_tokens": 111904}
|
| 62 |
+
{"current_steps": 290, "total_steps": 1250, "loss": 0.4212, "lr": 0.02845424882716545, "epoch": 2.32, "percentage": 23.2, "elapsed_time": "0:00:42", "remaining_time": "0:02:20", "throughput": 2678.32, "total_tokens": 113632}
|
| 63 |
+
{"current_steps": 295, "total_steps": 1250, "loss": 0.418, "lr": 0.028360340428822597, "epoch": 2.36, "percentage": 23.6, "elapsed_time": "0:00:42", "remaining_time": "0:02:19", "throughput": 2688.92, "total_tokens": 115616}
|
| 64 |
+
{"current_steps": 300, "total_steps": 1250, "loss": 0.4265, "lr": 0.028263827405641085, "epoch": 2.4, "percentage": 24.0, "elapsed_time": "0:00:43", "remaining_time": "0:02:17", "throughput": 2697.53, "total_tokens": 117472}
|
| 65 |
+
{"current_steps": 305, "total_steps": 1250, "loss": 0.3562, "lr": 0.028164728573026005, "epoch": 2.44, "percentage": 24.4, "elapsed_time": "0:00:44", "remaining_time": "0:02:16", "throughput": 2710.33, "total_tokens": 119616}
|
| 66 |
+
{"current_steps": 310, "total_steps": 1250, "loss": 0.4739, "lr": 0.02806306325049113, "epoch": 2.48, "percentage": 24.8, "elapsed_time": "0:00:44", "remaining_time": "0:02:15", "throughput": 2719.64, "total_tokens": 121568}
|
| 67 |
+
{"current_steps": 315, "total_steps": 1250, "loss": 0.384, "lr": 0.027958851257892527, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:00:45", "remaining_time": "0:02:14", "throughput": 2734.73, "total_tokens": 123904}
|
| 68 |
+
{"current_steps": 315, "total_steps": 1250, "eval_loss": 0.3612143099308014, "epoch": 2.52, "percentage": 25.2, "elapsed_time": "0:00:46", "remaining_time": "0:02:17", "throughput": 2676.73, "total_tokens": 123904}
|
| 69 |
+
{"current_steps": 320, "total_steps": 1250, "loss": 0.4243, "lr": 0.02785211291156464, "epoch": 2.56, "percentage": 25.6, "elapsed_time": "0:00:47", "remaining_time": "0:02:18", "throughput": 2633.15, "total_tokens": 125696}
|
| 70 |
+
{"current_steps": 325, "total_steps": 1250, "loss": 0.3746, "lr": 0.027742869020359582, "epoch": 2.6, "percentage": 26.0, "elapsed_time": "0:00:48", "remaining_time": "0:02:17", "throughput": 2640.2, "total_tokens": 127488}
|
| 71 |
+
{"current_steps": 330, "total_steps": 1250, "loss": 0.3832, "lr": 0.027631140881590383, "epoch": 2.64, "percentage": 26.4, "elapsed_time": "0:00:48", "remaining_time": "0:02:16", "throughput": 2652.68, "total_tokens": 129632}
|