Update config
Browse files- config.json +0 -2
- configuration_chatglm.py +0 -3
config.json
CHANGED
|
@@ -19,7 +19,6 @@
|
|
| 19 |
"fp32_residual_connection": false,
|
| 20 |
"hidden_dropout": 0.0,
|
| 21 |
"hidden_size": 4096,
|
| 22 |
-
"interleaved_qkv": false,
|
| 23 |
"kv_channels": 128,
|
| 24 |
"layernorm_epsilon": 1e-05,
|
| 25 |
"multi_query_attention": true,
|
|
@@ -30,7 +29,6 @@
|
|
| 30 |
"padded_vocab_size": 65024,
|
| 31 |
"post_layer_norm": true,
|
| 32 |
"rmsnorm": true,
|
| 33 |
-
"rotary_percent": 0.5,
|
| 34 |
"seq_length": 32768,
|
| 35 |
"use_cache": true,
|
| 36 |
"torch_dtype": "float16",
|
|
|
|
| 19 |
"fp32_residual_connection": false,
|
| 20 |
"hidden_dropout": 0.0,
|
| 21 |
"hidden_size": 4096,
|
|
|
|
| 22 |
"kv_channels": 128,
|
| 23 |
"layernorm_epsilon": 1e-05,
|
| 24 |
"multi_query_attention": true,
|
|
|
|
| 29 |
"padded_vocab_size": 65024,
|
| 30 |
"post_layer_norm": true,
|
| 31 |
"rmsnorm": true,
|
|
|
|
| 32 |
"seq_length": 32768,
|
| 33 |
"use_cache": true,
|
| 34 |
"torch_dtype": "float16",
|
configuration_chatglm.py
CHANGED
|
@@ -21,7 +21,6 @@ class ChatGLMConfig(PretrainedConfig):
|
|
| 21 |
add_qkv_bias=False,
|
| 22 |
interleaved_qkv=False,
|
| 23 |
bias_dropout_fusion=True,
|
| 24 |
-
rotary_percent=1.0,
|
| 25 |
multi_query_attention=False,
|
| 26 |
multi_query_group_num=1,
|
| 27 |
apply_query_key_layer_scaling=True,
|
|
@@ -45,9 +44,7 @@ class ChatGLMConfig(PretrainedConfig):
|
|
| 45 |
self.post_layer_norm = post_layer_norm
|
| 46 |
self.add_bias_linear = add_bias_linear
|
| 47 |
self.add_qkv_bias = add_qkv_bias
|
| 48 |
-
self.interleaved_qkv = interleaved_qkv
|
| 49 |
self.bias_dropout_fusion = bias_dropout_fusion
|
| 50 |
-
self.rotary_percent = rotary_percent
|
| 51 |
self.multi_query_attention = multi_query_attention
|
| 52 |
self.multi_query_group_num = multi_query_group_num
|
| 53 |
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
|
|
|
|
| 21 |
add_qkv_bias=False,
|
| 22 |
interleaved_qkv=False,
|
| 23 |
bias_dropout_fusion=True,
|
|
|
|
| 24 |
multi_query_attention=False,
|
| 25 |
multi_query_group_num=1,
|
| 26 |
apply_query_key_layer_scaling=True,
|
|
|
|
| 44 |
self.post_layer_norm = post_layer_norm
|
| 45 |
self.add_bias_linear = add_bias_linear
|
| 46 |
self.add_qkv_bias = add_qkv_bias
|
|
|
|
| 47 |
self.bias_dropout_fusion = bias_dropout_fusion
|
|
|
|
| 48 |
self.multi_query_attention = multi_query_attention
|
| 49 |
self.multi_query_group_num = multi_query_group_num
|
| 50 |
self.apply_query_key_layer_scaling = apply_query_key_layer_scaling
|