Training in progress, step 680000
Browse files- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state_0.pth +1 -1
- last-checkpoint/rng_state_1.pth +1 -1
- last-checkpoint/rng_state_2.pth +1 -1
- last-checkpoint/rng_state_3.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +77 -3
- last-checkpoint/training_args.bin +1 -1
- pytorch_model.bin +1 -1
- training_args.bin +1 -1
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 893439185
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:44b0820fc6b350986b719e550b35d09a69f533f4c17c09f1fc4dcc51eee4deaf
|
| 3 |
size 893439185
|
last-checkpoint/pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bad34c9686a45968effd10ffe093593240a68e27baf1e992462f0c39c5a1154f
|
| 3 |
size 449471589
|
last-checkpoint/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5303f8d69702e89a1f09b78b9ce543e8c95fc0bc0f0edba191465caf3a91d92a
|
| 3 |
size 14503
|
last-checkpoint/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7ef09cba59e3953ac0ff77371a062cedb407170faeac834b4f4e7690a6546000
|
| 3 |
size 14503
|
last-checkpoint/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b74b641ea212d2246cddaeb018c081c477c99608fb3a2c0685286bdcc4bf7cde
|
| 3 |
size 14503
|
last-checkpoint/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14503
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e039e987a11ac43b108942b16a0127488a54c01eff95956d54d2613b30129dbd
|
| 3 |
size 14503
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 623
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d737cb5c8a42fd478f2f6309865787d8315dbe8632150ec0b5dc4840a67a78da
|
| 3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
-
"epoch": 10.
|
| 5 |
-
"global_step":
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
@@ -4964,11 +4964,85 @@
|
|
| 4964 |
"eval_samples_per_second": 876.243,
|
| 4965 |
"eval_steps_per_second": 14.02,
|
| 4966 |
"step": 670000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4967 |
}
|
| 4968 |
],
|
| 4969 |
"max_steps": 1000000,
|
| 4970 |
"num_train_epochs": 16,
|
| 4971 |
-
"total_flos": 4.
|
| 4972 |
"trial_name": null,
|
| 4973 |
"trial_params": null
|
| 4974 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": null,
|
| 3 |
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 10.38374028433124,
|
| 5 |
+
"global_step": 680000,
|
| 6 |
"is_hyper_param_search": false,
|
| 7 |
"is_local_process_zero": true,
|
| 8 |
"is_world_process_zero": true,
|
|
|
|
| 4964 |
"eval_samples_per_second": 876.243,
|
| 4965 |
"eval_steps_per_second": 14.02,
|
| 4966 |
"step": 670000
|
| 4967 |
+
},
|
| 4968 |
+
{
|
| 4969 |
+
"epoch": 10.25,
|
| 4970 |
+
"learning_rate": 4.7500916084690564e-05,
|
| 4971 |
+
"loss": 0.2542,
|
| 4972 |
+
"step": 671000
|
| 4973 |
+
},
|
| 4974 |
+
{
|
| 4975 |
+
"epoch": 10.26,
|
| 4976 |
+
"learning_rate": 4.729606898266411e-05,
|
| 4977 |
+
"loss": 0.2507,
|
| 4978 |
+
"step": 672000
|
| 4979 |
+
},
|
| 4980 |
+
{
|
| 4981 |
+
"epoch": 10.28,
|
| 4982 |
+
"learning_rate": 4.709157952558768e-05,
|
| 4983 |
+
"loss": 0.2478,
|
| 4984 |
+
"step": 673000
|
| 4985 |
+
},
|
| 4986 |
+
{
|
| 4987 |
+
"epoch": 10.29,
|
| 4988 |
+
"learning_rate": 4.688744994972514e-05,
|
| 4989 |
+
"loss": 0.2482,
|
| 4990 |
+
"step": 674000
|
| 4991 |
+
},
|
| 4992 |
+
{
|
| 4993 |
+
"epoch": 10.31,
|
| 4994 |
+
"learning_rate": 4.668368248740485e-05,
|
| 4995 |
+
"loss": 0.247,
|
| 4996 |
+
"step": 675000
|
| 4997 |
+
},
|
| 4998 |
+
{
|
| 4999 |
+
"epoch": 10.31,
|
| 5000 |
+
"eval_runtime": 0.9224,
|
| 5001 |
+
"eval_samples_per_second": 1084.145,
|
| 5002 |
+
"eval_steps_per_second": 17.346,
|
| 5003 |
+
"step": 675000
|
| 5004 |
+
},
|
| 5005 |
+
{
|
| 5006 |
+
"epoch": 10.32,
|
| 5007 |
+
"learning_rate": 4.6480279366995116e-05,
|
| 5008 |
+
"loss": 0.2472,
|
| 5009 |
+
"step": 676000
|
| 5010 |
+
},
|
| 5011 |
+
{
|
| 5012 |
+
"epoch": 10.34,
|
| 5013 |
+
"learning_rate": 4.6277242812879914e-05,
|
| 5014 |
+
"loss": 0.2473,
|
| 5015 |
+
"step": 677000
|
| 5016 |
+
},
|
| 5017 |
+
{
|
| 5018 |
+
"epoch": 10.35,
|
| 5019 |
+
"learning_rate": 4.607457504543447e-05,
|
| 5020 |
+
"loss": 0.2471,
|
| 5021 |
+
"step": 678000
|
| 5022 |
+
},
|
| 5023 |
+
{
|
| 5024 |
+
"epoch": 10.37,
|
| 5025 |
+
"learning_rate": 4.5872278281000955e-05,
|
| 5026 |
+
"loss": 0.2469,
|
| 5027 |
+
"step": 679000
|
| 5028 |
+
},
|
| 5029 |
+
{
|
| 5030 |
+
"epoch": 10.38,
|
| 5031 |
+
"learning_rate": 4.567035473186444e-05,
|
| 5032 |
+
"loss": 0.2469,
|
| 5033 |
+
"step": 680000
|
| 5034 |
+
},
|
| 5035 |
+
{
|
| 5036 |
+
"epoch": 10.38,
|
| 5037 |
+
"eval_runtime": 0.7393,
|
| 5038 |
+
"eval_samples_per_second": 1352.617,
|
| 5039 |
+
"eval_steps_per_second": 21.642,
|
| 5040 |
+
"step": 680000
|
| 5041 |
}
|
| 5042 |
],
|
| 5043 |
"max_steps": 1000000,
|
| 5044 |
"num_train_epochs": 16,
|
| 5045 |
+
"total_flos": 4.766811179382118e+22,
|
| 5046 |
"trial_name": null,
|
| 5047 |
"trial_params": null
|
| 5048 |
}
|
last-checkpoint/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:266f63e45a1817b41dc068c262baa2af3cee153bdb768839f58f4ee888697edd
|
| 3 |
size 3311
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 449471589
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bad34c9686a45968effd10ffe093593240a68e27baf1e992462f0c39c5a1154f
|
| 3 |
size 449471589
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 3311
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:266f63e45a1817b41dc068c262baa2af3cee153bdb768839f58f4ee888697edd
|
| 3 |
size 3311
|