mohammadmahdinouri commited on
Commit
f7d4ea6
·
verified ·
1 Parent(s): 9e4dced

Training in progress, step 1000

Browse files
config.json CHANGED
@@ -8,12 +8,12 @@
8
  "dtype": "float32",
9
  "embedding_size": 128,
10
  "expert_intermediate_size": 2624,
11
- "group_depth": 4,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
- "hidden_size": 1024,
15
  "initializer_range": 0.02,
16
- "intermediate_size": 2624,
17
  "layer_norm_eps": 1e-06,
18
  "load_balancing_loss_coef": 0.2,
19
  "lora_alpha": 32,
@@ -23,8 +23,7 @@
23
  "num_attention_heads": 16,
24
  "num_expert_modules": 3,
25
  "num_experts": 8,
26
- "num_hidden_layers": 16,
27
- "output_hidden_states": true,
28
  "pad_token_id": 0,
29
  "router_jitter_noise": 0.01,
30
  "top_k": 2,
 
8
  "dtype": "float32",
9
  "embedding_size": 128,
10
  "expert_intermediate_size": 2624,
11
+ "group_depth": 3,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
  "initializer_range": 0.02,
16
+ "intermediate_size": 1152,
17
  "layer_norm_eps": 1e-06,
18
  "load_balancing_loss_coef": 0.2,
19
  "lora_alpha": 32,
 
23
  "num_attention_heads": 16,
24
  "num_expert_modules": 3,
25
  "num_experts": 8,
26
+ "num_hidden_layers": 12,
 
27
  "pad_token_id": 0,
28
  "router_jitter_noise": 0.01,
29
  "top_k": 2,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:923f7d1b566247ede9b22278980822eae56cadb9a9b3dd9262ae20fbe9e710b1
3
- size 1059459406
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ce76e7ff5e00c2b43727da522f29f397bdc16bd061c317acc62bd8f0fa104fe
3
+ size 383474230
runs/Oct09_16-06-08_nid006852/events.out.tfevents.1760018783.nid006852.205420.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a511eda6dd8b00603ffd39f43cc6ba138048f3e6811d23150a075c1e70729a3
3
+ size 15859
runs/Sep27_10-53-04_nid007099/events.out.tfevents.1758963191.nid007099.167413.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:520a51bd0ea389e20c9063d3dfe5fedd0d8fa5e3dcf3dcc4845beff6466eea17
3
- size 220332
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75c790adca0ae4dafa1d43ce1496b4891ddfb85b2252124c5a6cefd4df129b4b
3
+ size 231082
runs/Sep27_21-55-44_nid006631/events.out.tfevents.1759002954.nid006631.150629.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e6ca037b3ccd7d914ace4a9720206d1c078cde552bb787fec64a618ba32061f
3
+ size 15867
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19c9bbb6b985d4f48fe80fcb93df10124c24f15065a7691fa4973605b097bf6b
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:119e924a11235114f963ea267ac6873181a4ff9d54cf5938dedce89a3b292868
3
  size 5432