Model save
Browse files- README.md +5 -8
- all_results.json +5 -5
- config.json +1 -1
- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- runs/Jun14_18-07-43_action-graph-trainer/events.out.tfevents.1718388492.action-graph-trainer.695665.0 +3 -0
- train_results.json +5 -5
- trainer_state.json +25 -25
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -2,15 +2,12 @@
|
|
| 2 |
license: apache-2.0
|
| 3 |
base_model: Qwen/Qwen2-7B
|
| 4 |
tags:
|
| 5 |
-
- alignment-handbook
|
| 6 |
-
- trl
|
| 7 |
-
- sft
|
| 8 |
-
- generated_from_trainer
|
| 9 |
- trl
|
| 10 |
- sft
|
|
|
|
| 11 |
- generated_from_trainer
|
| 12 |
datasets:
|
| 13 |
-
-
|
| 14 |
model-index:
|
| 15 |
- name: zephyr-qwen2-7b-sft
|
| 16 |
results: []
|
|
@@ -21,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 21 |
|
| 22 |
# zephyr-qwen2-7b-sft
|
| 23 |
|
| 24 |
-
This model is a fine-tuned version of [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) on the
|
| 25 |
It achieves the following results on the evaluation set:
|
| 26 |
-
- Loss: 1.
|
| 27 |
|
| 28 |
## Model description
|
| 29 |
|
|
@@ -60,7 +57,7 @@ The following hyperparameters were used during training:
|
|
| 60 |
|
| 61 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 62 |
|:-------------:|:-----:|:----:|:---------------:|
|
| 63 |
-
| 1.
|
| 64 |
|
| 65 |
|
| 66 |
### Framework versions
|
|
|
|
| 2 |
license: apache-2.0
|
| 3 |
base_model: Qwen/Qwen2-7B
|
| 4 |
tags:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
- trl
|
| 6 |
- sft
|
| 7 |
+
- alignment-handbook
|
| 8 |
- generated_from_trainer
|
| 9 |
datasets:
|
| 10 |
+
- generator
|
| 11 |
model-index:
|
| 12 |
- name: zephyr-qwen2-7b-sft
|
| 13 |
results: []
|
|
|
|
| 18 |
|
| 19 |
# zephyr-qwen2-7b-sft
|
| 20 |
|
| 21 |
+
This model is a fine-tuned version of [Qwen/Qwen2-7B](https://huggingface.co/Qwen/Qwen2-7B) on the generator dataset.
|
| 22 |
It achieves the following results on the evaluation set:
|
| 23 |
+
- Loss: 1.0645
|
| 24 |
|
| 25 |
## Model description
|
| 26 |
|
|
|
|
| 57 |
|
| 58 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 59 |
|:-------------:|:-----:|:----:|:---------------:|
|
| 60 |
+
| 1.0627 | 1.0 | 956 | 1.0645 |
|
| 61 |
|
| 62 |
|
| 63 |
### Framework versions
|
all_results.json
CHANGED
|
@@ -5,10 +5,10 @@
|
|
| 5 |
"eval_samples": 23109,
|
| 6 |
"eval_samples_per_second": 47.288,
|
| 7 |
"eval_steps_per_second": 0.74,
|
| 8 |
-
"total_flos":
|
| 9 |
-
"train_loss":
|
| 10 |
-
"train_runtime":
|
| 11 |
"train_samples": 207864,
|
| 12 |
-
"train_samples_per_second":
|
| 13 |
-
"train_steps_per_second":
|
| 14 |
}
|
|
|
|
| 5 |
"eval_samples": 23109,
|
| 6 |
"eval_samples_per_second": 47.288,
|
| 7 |
"eval_steps_per_second": 0.74,
|
| 8 |
+
"total_flos": 500662995517440.0,
|
| 9 |
+
"train_loss": 0.06220405869902926,
|
| 10 |
+
"train_runtime": 877.8841,
|
| 11 |
"train_samples": 207864,
|
| 12 |
+
"train_samples_per_second": 139.358,
|
| 13 |
+
"train_steps_per_second": 1.089
|
| 14 |
}
|
config.json
CHANGED
|
@@ -22,7 +22,7 @@
|
|
| 22 |
"tie_word_embeddings": false,
|
| 23 |
"torch_dtype": "bfloat16",
|
| 24 |
"transformers_version": "4.40.2",
|
| 25 |
-
"use_cache":
|
| 26 |
"use_sliding_window": false,
|
| 27 |
"vocab_size": 152064
|
| 28 |
}
|
|
|
|
| 22 |
"tie_word_embeddings": false,
|
| 23 |
"torch_dtype": "bfloat16",
|
| 24 |
"transformers_version": "4.40.2",
|
| 25 |
+
"use_cache": false,
|
| 26 |
"use_sliding_window": false,
|
| 27 |
"vocab_size": 152064
|
| 28 |
}
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4877660776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5346f7673f73f551aaaa605516577660e1eeedcc29154ded68a8a39e1bf72c4c
|
| 3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b42de9be5847138b7967316f3dad2efd4db71d2dc2042256575769e7883a189a
|
| 3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4330865200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:90e5f201a3835937500f20e67d4c791a7124c5a060229fc1f92b17ebf3fda4b2
|
| 3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1089994880
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:634e5128c71a098d1f1cad9837cc743ae34a83706e15c9cb9df0ac5d7fc76820
|
| 3 |
size 1089994880
|
runs/Jun14_18-07-43_action-graph-trainer/events.out.tfevents.1718388492.action-graph-trainer.695665.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c86dcc545bc64c7e85477584418e0651f4252ace1b3894df3f4e8f5359dddf47
|
| 3 |
+
size 7807
|
train_results.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
-
"total_flos":
|
| 4 |
-
"train_loss":
|
| 5 |
-
"train_runtime":
|
| 6 |
"train_samples": 207864,
|
| 7 |
-
"train_samples_per_second":
|
| 8 |
-
"train_steps_per_second":
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
+
"total_flos": 500662995517440.0,
|
| 4 |
+
"train_loss": 0.06220405869902926,
|
| 5 |
+
"train_runtime": 877.8841,
|
| 6 |
"train_samples": 207864,
|
| 7 |
+
"train_samples_per_second": 139.358,
|
| 8 |
+
"train_steps_per_second": 1.089
|
| 9 |
}
|
trainer_state.json
CHANGED
|
@@ -1277,97 +1277,97 @@
|
|
| 1277 |
},
|
| 1278 |
{
|
| 1279 |
"epoch": 0.946652719665272,
|
| 1280 |
-
"grad_norm": 0.
|
| 1281 |
"learning_rate": 1.730440504639408e-07,
|
| 1282 |
"loss": 1.058,
|
| 1283 |
"step": 905
|
| 1284 |
},
|
| 1285 |
{
|
| 1286 |
"epoch": 0.9518828451882845,
|
| 1287 |
-
"grad_norm": 0.
|
| 1288 |
"learning_rate": 1.408530770781813e-07,
|
| 1289 |
"loss": 1.0526,
|
| 1290 |
"step": 910
|
| 1291 |
},
|
| 1292 |
{
|
| 1293 |
"epoch": 0.9571129707112971,
|
| 1294 |
-
"grad_norm": 0.
|
| 1295 |
"learning_rate": 1.1195115097079268e-07,
|
| 1296 |
-
"loss": 1.
|
| 1297 |
"step": 915
|
| 1298 |
},
|
| 1299 |
{
|
| 1300 |
"epoch": 0.9623430962343096,
|
| 1301 |
-
"grad_norm": 0.
|
| 1302 |
"learning_rate": 8.634791392946429e-08,
|
| 1303 |
"loss": 1.0676,
|
| 1304 |
"step": 920
|
| 1305 |
},
|
| 1306 |
{
|
| 1307 |
"epoch": 0.9675732217573222,
|
| 1308 |
-
"grad_norm": 0.
|
| 1309 |
"learning_rate": 6.405190728721033e-08,
|
| 1310 |
"loss": 1.0455,
|
| 1311 |
"step": 925
|
| 1312 |
},
|
| 1313 |
{
|
| 1314 |
"epoch": 0.9728033472803347,
|
| 1315 |
-
"grad_norm": 0.
|
| 1316 |
"learning_rate": 4.5070569072952485e-08,
|
| 1317 |
-
"loss": 1.
|
| 1318 |
"step": 930
|
| 1319 |
},
|
| 1320 |
{
|
| 1321 |
"epoch": 0.9780334728033473,
|
| 1322 |
-
"grad_norm": 0.
|
| 1323 |
"learning_rate": 2.9410231530168087e-08,
|
| 1324 |
-
"loss": 1.
|
| 1325 |
"step": 935
|
| 1326 |
},
|
| 1327 |
{
|
| 1328 |
"epoch": 0.9832635983263598,
|
| 1329 |
-
"grad_norm": 0.
|
| 1330 |
"learning_rate": 1.7076119004429958e-08,
|
| 1331 |
"loss": 1.0763,
|
| 1332 |
"step": 940
|
| 1333 |
},
|
| 1334 |
{
|
| 1335 |
"epoch": 0.9884937238493724,
|
| 1336 |
-
"grad_norm": 0.
|
| 1337 |
"learning_rate": 8.072346200544979e-09,
|
| 1338 |
"loss": 1.0672,
|
| 1339 |
"step": 945
|
| 1340 |
},
|
| 1341 |
{
|
| 1342 |
"epoch": 0.9937238493723849,
|
| 1343 |
-
"grad_norm": 0.
|
| 1344 |
"learning_rate": 2.401916809872118e-09,
|
| 1345 |
"loss": 1.0629,
|
| 1346 |
"step": 950
|
| 1347 |
},
|
| 1348 |
{
|
| 1349 |
"epoch": 0.9989539748953975,
|
| 1350 |
-
"grad_norm": 0.
|
| 1351 |
"learning_rate": 6.672250828620996e-11,
|
| 1352 |
-
"loss": 1.
|
| 1353 |
"step": 955
|
| 1354 |
},
|
| 1355 |
{
|
| 1356 |
"epoch": 1.0,
|
| 1357 |
-
"eval_loss": 1.
|
| 1358 |
-
"eval_runtime":
|
| 1359 |
-
"eval_samples_per_second": 47.
|
| 1360 |
-
"eval_steps_per_second": 0.
|
| 1361 |
"step": 956
|
| 1362 |
},
|
| 1363 |
{
|
| 1364 |
"epoch": 1.0,
|
| 1365 |
"step": 956,
|
| 1366 |
-
"total_flos":
|
| 1367 |
-
"train_loss":
|
| 1368 |
-
"train_runtime":
|
| 1369 |
-
"train_samples_per_second":
|
| 1370 |
-
"train_steps_per_second":
|
| 1371 |
}
|
| 1372 |
],
|
| 1373 |
"logging_steps": 5,
|
|
@@ -1375,7 +1375,7 @@
|
|
| 1375 |
"num_input_tokens_seen": 0,
|
| 1376 |
"num_train_epochs": 1,
|
| 1377 |
"save_steps": 100,
|
| 1378 |
-
"total_flos":
|
| 1379 |
"train_batch_size": 8,
|
| 1380 |
"trial_name": null,
|
| 1381 |
"trial_params": null
|
|
|
|
| 1277 |
},
|
| 1278 |
{
|
| 1279 |
"epoch": 0.946652719665272,
|
| 1280 |
+
"grad_norm": 0.4832426848529184,
|
| 1281 |
"learning_rate": 1.730440504639408e-07,
|
| 1282 |
"loss": 1.058,
|
| 1283 |
"step": 905
|
| 1284 |
},
|
| 1285 |
{
|
| 1286 |
"epoch": 0.9518828451882845,
|
| 1287 |
+
"grad_norm": 0.48799198233407015,
|
| 1288 |
"learning_rate": 1.408530770781813e-07,
|
| 1289 |
"loss": 1.0526,
|
| 1290 |
"step": 910
|
| 1291 |
},
|
| 1292 |
{
|
| 1293 |
"epoch": 0.9571129707112971,
|
| 1294 |
+
"grad_norm": 0.4969562812336113,
|
| 1295 |
"learning_rate": 1.1195115097079268e-07,
|
| 1296 |
+
"loss": 1.0684,
|
| 1297 |
"step": 915
|
| 1298 |
},
|
| 1299 |
{
|
| 1300 |
"epoch": 0.9623430962343096,
|
| 1301 |
+
"grad_norm": 0.4892866844912397,
|
| 1302 |
"learning_rate": 8.634791392946429e-08,
|
| 1303 |
"loss": 1.0676,
|
| 1304 |
"step": 920
|
| 1305 |
},
|
| 1306 |
{
|
| 1307 |
"epoch": 0.9675732217573222,
|
| 1308 |
+
"grad_norm": 0.49055262840153824,
|
| 1309 |
"learning_rate": 6.405190728721033e-08,
|
| 1310 |
"loss": 1.0455,
|
| 1311 |
"step": 925
|
| 1312 |
},
|
| 1313 |
{
|
| 1314 |
"epoch": 0.9728033472803347,
|
| 1315 |
+
"grad_norm": 0.47689555635255854,
|
| 1316 |
"learning_rate": 4.5070569072952485e-08,
|
| 1317 |
+
"loss": 1.0609,
|
| 1318 |
"step": 930
|
| 1319 |
},
|
| 1320 |
{
|
| 1321 |
"epoch": 0.9780334728033473,
|
| 1322 |
+
"grad_norm": 0.4955325802322405,
|
| 1323 |
"learning_rate": 2.9410231530168087e-08,
|
| 1324 |
+
"loss": 1.0498,
|
| 1325 |
"step": 935
|
| 1326 |
},
|
| 1327 |
{
|
| 1328 |
"epoch": 0.9832635983263598,
|
| 1329 |
+
"grad_norm": 0.47274550178714503,
|
| 1330 |
"learning_rate": 1.7076119004429958e-08,
|
| 1331 |
"loss": 1.0763,
|
| 1332 |
"step": 940
|
| 1333 |
},
|
| 1334 |
{
|
| 1335 |
"epoch": 0.9884937238493724,
|
| 1336 |
+
"grad_norm": 0.49350697124044746,
|
| 1337 |
"learning_rate": 8.072346200544979e-09,
|
| 1338 |
"loss": 1.0672,
|
| 1339 |
"step": 945
|
| 1340 |
},
|
| 1341 |
{
|
| 1342 |
"epoch": 0.9937238493723849,
|
| 1343 |
+
"grad_norm": 0.4887853656062252,
|
| 1344 |
"learning_rate": 2.401916809872118e-09,
|
| 1345 |
"loss": 1.0629,
|
| 1346 |
"step": 950
|
| 1347 |
},
|
| 1348 |
{
|
| 1349 |
"epoch": 0.9989539748953975,
|
| 1350 |
+
"grad_norm": 0.4764956168422736,
|
| 1351 |
"learning_rate": 6.672250828620996e-11,
|
| 1352 |
+
"loss": 1.0627,
|
| 1353 |
"step": 955
|
| 1354 |
},
|
| 1355 |
{
|
| 1356 |
"epoch": 1.0,
|
| 1357 |
+
"eval_loss": 1.0645456314086914,
|
| 1358 |
+
"eval_runtime": 285.3875,
|
| 1359 |
+
"eval_samples_per_second": 47.441,
|
| 1360 |
+
"eval_steps_per_second": 0.743,
|
| 1361 |
"step": 956
|
| 1362 |
},
|
| 1363 |
{
|
| 1364 |
"epoch": 1.0,
|
| 1365 |
"step": 956,
|
| 1366 |
+
"total_flos": 500662995517440.0,
|
| 1367 |
+
"train_loss": 0.06220405869902926,
|
| 1368 |
+
"train_runtime": 877.8841,
|
| 1369 |
+
"train_samples_per_second": 139.358,
|
| 1370 |
+
"train_steps_per_second": 1.089
|
| 1371 |
}
|
| 1372 |
],
|
| 1373 |
"logging_steps": 5,
|
|
|
|
| 1375 |
"num_input_tokens_seen": 0,
|
| 1376 |
"num_train_epochs": 1,
|
| 1377 |
"save_steps": 100,
|
| 1378 |
+
"total_flos": 500662995517440.0,
|
| 1379 |
"train_batch_size": 8,
|
| 1380 |
"trial_name": null,
|
| 1381 |
"trial_params": null
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a961b2dc9433696ebda3812a203e258f2b437c5b7c7dd434f0409fcfebe52fe
|
| 3 |
size 6200
|