mohammadmahdinouri commited on
Commit
0ffaa17
·
verified ·
1 Parent(s): a11047d

Training in progress, step 1000

Browse files
config.json CHANGED
@@ -11,9 +11,9 @@
11
  "group_depth": 4,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
- "hidden_size": 768,
15
  "initializer_range": 0.02,
16
- "intermediate_size": 1152,
17
  "layer_norm_eps": 1e-06,
18
  "load_balancing_loss_coef": 0.2,
19
  "lora_alpha": 32,
@@ -21,9 +21,10 @@
21
  "max_position_embeddings": 8192,
22
  "model_type": "ModernALBERT",
23
  "num_attention_heads": 16,
24
- "num_expert_modules": 2,
25
  "num_experts": 8,
26
- "num_hidden_layers": 12,
 
27
  "pad_token_id": 0,
28
  "router_jitter_noise": 0.01,
29
  "top_k": 2,
 
11
  "group_depth": 4,
12
  "hidden_act": "gelu",
13
  "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 1024,
15
  "initializer_range": 0.02,
16
+ "intermediate_size": 2624,
17
  "layer_norm_eps": 1e-06,
18
  "load_balancing_loss_coef": 0.2,
19
  "lora_alpha": 32,
 
21
  "max_position_embeddings": 8192,
22
  "model_type": "ModernALBERT",
23
  "num_attention_heads": 16,
24
+ "num_expert_modules": 3,
25
  "num_experts": 8,
26
+ "num_hidden_layers": 16,
27
+ "output_hidden_states": true,
28
  "pad_token_id": 0,
29
  "router_jitter_noise": 0.01,
30
  "top_k": 2,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:785ee9b18edce1ac4424d119c2d5676cf37a77224a2e436f3c1b6a4a6d0771b5
3
- size 363608098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de084139f6288438d259fd9e37d957d3a8c02421c362535e92de3d81b82b275a
3
+ size 1059459406
runs/Sep29_11-20-07_nid007006/events.out.tfevents.1759137628.nid007006.208314.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3062f5e25a9c52f0a0ce4f702866cba1da6f765b7d84527a545f3dcbe0b72814
3
+ size 13749
runs/Sep29_12-45-46_nid007464/events.out.tfevents.1759142754.nid007464.198193.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:558f1b15f98456a225ae403edfc9ad5eec2809135da72595b09e6a03869deab9
3
+ size 15437
runs/Sep29_14-12-30_nid007012/events.out.tfevents.1759147957.nid007012.139120.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:511894f0629fe57e6fb7bf2543eca4a65602e5cf6bf59f0ab4e7d85cbdf80039
3
+ size 10584
runs/Sep29_15-14-44_nid006669/events.out.tfevents.1759151691.nid006669.251253.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6ddf36691f3af271fe7e000a5ff1e37ea908d422a98333956d629153b84c647
3
+ size 15891
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:027ed360ff4f0ca496611394dee093a2d433e6d06c6b711927bd401f9bd2506e
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:283bddcb16b2ad0c0d90a34efee30ef8fc9142ab210ab65bb573df2be56fe411
3
  size 5432