mohammadmahdinouri commited on
Commit
938f20d
·
verified ·
1 Parent(s): 425d45d

Upload ModernALBERTForMaskedLM

Browse files
Files changed (2) hide show
  1. config.json +4 -12
  2. model.safetensors +1 -1
config.json CHANGED
@@ -12,28 +12,20 @@
12
  "intermediate_size": 2048,
13
  "layer_norm_eps": 1e-12,
14
  "max_position_embeddings": 8192,
15
- "min_router_temp": 0.3,
16
- "min_routing_noise_std": 0.05,
17
  "model_type": "ModernALBERT_MoL",
18
  "num_attention_heads": 16,
19
  "num_expert_layers": 6,
20
  "num_hidden_layers": 18,
21
  "num_static_groups": 2,
22
  "pad_token_id": 0,
23
- "router_aux_loss_coef": 0.05,
24
- "router_temp": 1.0,
25
- "router_warmup_steps": 1000,
26
- "routing_inputs": "cls+mean",
27
- "routing_noise_decay": true,
28
- "routing_noise_std": 0.3,
29
- "routing_noise_type": "gaussian",
30
- "shared_router": false,
31
- "soft_router_intro": true,
32
  "static_group_depth": 3,
33
  "top_k_experts": 1,
34
  "torch_dtype": "float32",
35
  "transformers_version": "4.55.2",
36
- "use_block_adapters": false,
37
  "use_gated_residual": true,
38
  "vocab_size": 50368
39
  }
 
12
  "intermediate_size": 2048,
13
  "layer_norm_eps": 1e-12,
14
  "max_position_embeddings": 8192,
 
 
15
  "model_type": "ModernALBERT_MoL",
16
  "num_attention_heads": 16,
17
  "num_expert_layers": 6,
18
  "num_hidden_layers": 18,
19
  "num_static_groups": 2,
20
  "pad_token_id": 0,
21
+ "router_aux_loss_coef": 0.04,
22
+ "router_warmup_steps": 5000,
23
+ "routing_noise_std": 1.0,
24
+ "routing_noise_type": "gumbel",
 
 
 
 
 
25
  "static_group_depth": 3,
26
  "top_k_experts": 1,
27
  "torch_dtype": "float32",
28
  "transformers_version": "4.55.2",
 
29
  "use_gated_residual": true,
30
  "vocab_size": 50368
31
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:84671dfd45d54fd75ba4294b63b0c9dc3531a762b27ff0fc49caf47ad7c88e72
3
  size 283504080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92c77ded2738bd7a5fc9380e5633a3df2f125ff5e951b85ff95149f715dbbebc
3
  size 283504080