Training in progress, step 1000
Browse files- config.json +4 -5
- pytorch_model.bin +2 -2
- runs/Oct09_16-06-08_nid006852/events.out.tfevents.1760018783.nid006852.205420.0 +3 -0
- runs/Sep27_10-53-04_nid007099/events.out.tfevents.1758963191.nid007099.167413.0 +2 -2
- runs/Sep27_21-55-44_nid006631/events.out.tfevents.1759002954.nid006631.150629.0 +3 -0
- training_args.bin +1 -1
config.json
CHANGED
|
@@ -8,12 +8,12 @@
|
|
| 8 |
"dtype": "float32",
|
| 9 |
"embedding_size": 128,
|
| 10 |
"expert_intermediate_size": 2624,
|
| 11 |
-
"group_depth":
|
| 12 |
"hidden_act": "gelu",
|
| 13 |
"hidden_dropout_prob": 0.1,
|
| 14 |
-
"hidden_size":
|
| 15 |
"initializer_range": 0.02,
|
| 16 |
-
"intermediate_size":
|
| 17 |
"layer_norm_eps": 1e-06,
|
| 18 |
"load_balancing_loss_coef": 0.2,
|
| 19 |
"lora_alpha": 32,
|
|
@@ -23,8 +23,7 @@
|
|
| 23 |
"num_attention_heads": 16,
|
| 24 |
"num_expert_modules": 3,
|
| 25 |
"num_experts": 8,
|
| 26 |
-
"num_hidden_layers":
|
| 27 |
-
"output_hidden_states": true,
|
| 28 |
"pad_token_id": 0,
|
| 29 |
"router_jitter_noise": 0.01,
|
| 30 |
"top_k": 2,
|
|
|
|
| 8 |
"dtype": "float32",
|
| 9 |
"embedding_size": 128,
|
| 10 |
"expert_intermediate_size": 2624,
|
| 11 |
+
"group_depth": 3,
|
| 12 |
"hidden_act": "gelu",
|
| 13 |
"hidden_dropout_prob": 0.1,
|
| 14 |
+
"hidden_size": 768,
|
| 15 |
"initializer_range": 0.02,
|
| 16 |
+
"intermediate_size": 1152,
|
| 17 |
"layer_norm_eps": 1e-06,
|
| 18 |
"load_balancing_loss_coef": 0.2,
|
| 19 |
"lora_alpha": 32,
|
|
|
|
| 23 |
"num_attention_heads": 16,
|
| 24 |
"num_expert_modules": 3,
|
| 25 |
"num_experts": 8,
|
| 26 |
+
"num_hidden_layers": 12,
|
|
|
|
| 27 |
"pad_token_id": 0,
|
| 28 |
"router_jitter_noise": 0.01,
|
| 29 |
"top_k": 2,
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ce76e7ff5e00c2b43727da522f29f397bdc16bd061c317acc62bd8f0fa104fe
|
| 3 |
+
size 383474230
|
runs/Oct09_16-06-08_nid006852/events.out.tfevents.1760018783.nid006852.205420.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a511eda6dd8b00603ffd39f43cc6ba138048f3e6811d23150a075c1e70729a3
|
| 3 |
+
size 15859
|
runs/Sep27_10-53-04_nid007099/events.out.tfevents.1758963191.nid007099.167413.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:75c790adca0ae4dafa1d43ce1496b4891ddfb85b2252124c5a6cefd4df129b4b
|
| 3 |
+
size 231082
|
runs/Sep27_21-55-44_nid006631/events.out.tfevents.1759002954.nid006631.150629.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e6ca037b3ccd7d914ace4a9720206d1c078cde552bb787fec64a618ba32061f
|
| 3 |
+
size 15867
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:119e924a11235114f963ea267ac6873181a4ff9d54cf5938dedce89a3b292868
|
| 3 |
size 5432
|