maxmeuer's picture
Update code for latest transformer
6a30f3b verified
raw
history blame
2.15 kB
encoder_config:
vocab_size: 256
hidden_size: 1024
num_hidden_layers: 6
num_attention_heads: 8
num_key_value_heads: 8
rms_norm_eps: 1.0e-05
intermediate_size: 2816
max_position_embeddings: 262144
rope_scaling:
rope_type: default
rope_theta: 100000
mlp_bias: false
use_cache: true
sliding_window: 768
transformers_version: null
key_query_norm: true
key_query_norm_per_head: true
is_neox_style: true
cross_attention_config:
hidden_size_q: 4096
hidden_size_kv: 1024
hidden_size: 4096
num_attention_heads: 32
attention_num_kv_heads: 32
word_window_size: 1
key_query_norm: true
key_query_norm_per_head: true
backbone_config:
vocab_size: 0
hidden_size: 4096
num_hidden_layers: 32
num_attention_heads: 32
num_key_value_heads: 8
rms_norm_eps: 1.0e-05
intermediate_size: 14336
max_position_embeddings: 32900
rope_scaling:
rope_type: default
rope_theta: 500000
mlp_bias: false
use_cache: true
sliding_window: null
transformers_version: null
key_query_norm: true
key_query_norm_per_head: true
is_neox_style: true
decoder_config:
vocab_size: 256
hidden_size: 1024
num_hidden_layers: 4
num_attention_heads: 8
num_key_value_heads: 8
rms_norm_eps: 1.0e-05
intermediate_size: 2816
max_position_embeddings: 262144
rope_scaling:
rope_type: default
rope_theta: 100000
mlp_bias: false
use_cache: true
sliding_window: 768
transformers_version: null
key_query_norm: true
key_query_norm_per_head: true
is_neox_style: true
cross_attn_every_layer: true
cross_attention_config:
hidden_size_q: 1024
hidden_size_kv: 4096
hidden_size: 1024
num_attention_heads: 8
attention_num_kv_heads: 8
word_window_size: 1
key_query_norm: true
key_query_norm_per_head: true
model_type: hierarchical_autoregressive_transformer
transformers_version: 4.46.3
auto_map:
AutoConfig: config.HATArchitectureConfig
AutoModelForCausalLM: model.HATForCausalLM
special_token_dict: {}
max_word_size: 100
sliding_window: 768
max_position_embeddings: 262144
torch_dtype: bfloat16
architectures:
- HATDecoderForCausalLM