Training in progress, step 1134
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +13 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2ddfcb700479058d916691de5e36a15134ddcc5e746c153ccbea7b3c1bc22396
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -232,3 +232,16 @@
|
|
| 232 |
{"current_steps": 1075, "total_steps": 1250, "loss": 0.3512, "lr": 0.001775502900913697, "epoch": 8.6, "percentage": 86.0, "elapsed_time": "0:02:38", "remaining_time": "0:00:25", "throughput": 2655.84, "total_tokens": 421008}
|
| 233 |
{"current_steps": 1080, "total_steps": 1250, "loss": 0.352, "lr": 0.0016779532677968327, "epoch": 8.64, "percentage": 86.4, "elapsed_time": "0:02:39", "remaining_time": "0:00:25", "throughput": 2658.3, "total_tokens": 422864}
|
| 234 |
{"current_steps": 1085, "total_steps": 1250, "loss": 0.3618, "lr": 0.0015830007940866035, "epoch": 8.68, "percentage": 86.8, "elapsed_time": "0:02:39", "remaining_time": "0:00:24", "throughput": 2662.01, "total_tokens": 424976}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
{"current_steps": 1075, "total_steps": 1250, "loss": 0.3512, "lr": 0.001775502900913697, "epoch": 8.6, "percentage": 86.0, "elapsed_time": "0:02:38", "remaining_time": "0:00:25", "throughput": 2655.84, "total_tokens": 421008}
|
| 233 |
{"current_steps": 1080, "total_steps": 1250, "loss": 0.352, "lr": 0.0016779532677968327, "epoch": 8.64, "percentage": 86.4, "elapsed_time": "0:02:39", "remaining_time": "0:00:25", "throughput": 2658.3, "total_tokens": 422864}
|
| 234 |
{"current_steps": 1085, "total_steps": 1250, "loss": 0.3618, "lr": 0.0015830007940866035, "epoch": 8.68, "percentage": 86.8, "elapsed_time": "0:02:39", "remaining_time": "0:00:24", "throughput": 2662.01, "total_tokens": 424976}
|
| 235 |
+
{"current_steps": 1090, "total_steps": 1250, "loss": 0.3384, "lr": 0.0014906639909558954, "epoch": 8.72, "percentage": 87.2, "elapsed_time": "0:02:40", "remaining_time": "0:00:23", "throughput": 2665.52, "total_tokens": 427120}
|
| 236 |
+
{"current_steps": 1095, "total_steps": 1250, "loss": 0.3384, "lr": 0.0014009608596474348, "epoch": 8.76, "percentage": 87.6, "elapsed_time": "0:02:40", "remaining_time": "0:00:22", "throughput": 2669.1, "total_tokens": 429264}
|
| 237 |
+
{"current_steps": 1100, "total_steps": 1250, "loss": 0.3566, "lr": 0.001313908887964409, "epoch": 8.8, "percentage": 88.0, "elapsed_time": "0:02:41", "remaining_time": "0:00:22", "throughput": 2671.09, "total_tokens": 431056}
|
| 238 |
+
{"current_steps": 1105, "total_steps": 1250, "loss": 0.3671, "lr": 0.0012295250468611779, "epoch": 8.84, "percentage": 88.4, "elapsed_time": "0:02:41", "remaining_time": "0:00:21", "throughput": 2674.58, "total_tokens": 433136}
|
| 239 |
+
{"current_steps": 1110, "total_steps": 1250, "loss": 0.3551, "lr": 0.0011478257871347663, "epoch": 8.88, "percentage": 88.8, "elapsed_time": "0:02:42", "remaining_time": "0:00:20", "throughput": 2677.96, "total_tokens": 435216}
|
| 240 |
+
{"current_steps": 1115, "total_steps": 1250, "loss": 0.3383, "lr": 0.0010688270362177355, "epoch": 8.92, "percentage": 89.2, "elapsed_time": "0:02:43", "remaining_time": "0:00:19", "throughput": 2680.2, "total_tokens": 437040}
|
| 241 |
+
{"current_steps": 1120, "total_steps": 1250, "loss": 0.3615, "lr": 0.0009925441950730985, "epoch": 8.96, "percentage": 89.6, "elapsed_time": "0:02:43", "remaining_time": "0:00:18", "throughput": 2683.46, "total_tokens": 439088}
|
| 242 |
+
{"current_steps": 1125, "total_steps": 1250, "loss": 0.3257, "lr": 0.0009189921351918889, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:02:44", "remaining_time": "0:00:18", "throughput": 2684.33, "total_tokens": 440848}
|
| 243 |
+
{"current_steps": 1130, "total_steps": 1250, "loss": 0.3731, "lr": 0.0008481851956939134, "epoch": 9.04, "percentage": 90.4, "elapsed_time": "0:02:45", "remaining_time": "0:00:17", "throughput": 2684.01, "total_tokens": 442864}
|
| 244 |
+
{"current_steps": 1134, "total_steps": 1250, "eval_loss": 0.3622165322303772, "epoch": 9.072, "percentage": 90.72, "elapsed_time": "0:02:46", "remaining_time": "0:00:17", "throughput": 2670.96, "total_tokens": 444560}
|
| 245 |
+
{"current_steps": 1135, "total_steps": 1250, "loss": 0.3666, "lr": 0.0007801371805323276, "epoch": 9.08, "percentage": 90.8, "elapsed_time": "0:02:47", "remaining_time": "0:00:16", "throughput": 2654.34, "total_tokens": 445040}
|
| 246 |
+
{"current_steps": 1140, "total_steps": 1250, "loss": 0.3579, "lr": 0.0007148613558025102, "epoch": 9.12, "percentage": 91.2, "elapsed_time": "0:02:48", "remaining_time": "0:00:16", "throughput": 2655.66, "total_tokens": 446864}
|
| 247 |
+
{"current_steps": 1145, "total_steps": 1250, "loss": 0.3587, "lr": 0.0006523704471558306, "epoch": 9.16, "percentage": 91.6, "elapsed_time": "0:02:48", "remaining_time": "0:00:15", "throughput": 2659.2, "total_tokens": 449008}
|