File size: 3,472 Bytes
b815ebb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
model_name: molmo
llm:
  d_model: 3584
  n_heads: 28
  n_kv_heads: 4
  head_dim: null
  qkv_bias: true
  clip_qkv: null
  n_layers: 28
  mlp_ratio: 4
  mlp_hidden_size: 37888
  activation_type: swiglu
  block_type: sequential
  rope: true
  rope_full_precision: true
  rope_theta: 1000000.0
  rope_type: default
  rope_factor: null
  rope_high_freq_factor: null
  rope_low_freq_factor: null
  rope_original_max_position_embeddings: null
  attention_type: sdpa
  float32_attention: true
  attention_dropout: 0.0
  attention_layer_norm: false
  attention_layer_norm_type: olmo
  residual_dropout: 0.0
  response_residual_dropout: 0.1
  layer_norm_type: rms
  layer_norm_with_affine: true
  layer_norm_eps: 1.0e-06
  attention_layer_norm_with_affine: true
  max_sequence_length: 4096
  max_position_embeddings: null
  include_bias: false
  bias_for_layer_norm: null
  norm_after: false
  moe_num_experts: 8
  moe_top_k: 2
  moe_mlp_impl: sparse
  moe_log_expert_assignment: false
  moe_shared_expert: false
  moe_lbl_in_fp32: false
  moe_interleave: false
  moe_loss_weight: 0.1
  moe_zloss_weight: null
  moe_dropless: true
  moe_capacity_factor: 1.25
  embedding_dropout: 0.0
  scale_logits: false
  vocab_size: 152064
  additional_vocab_size: 128
  weight_tying: false
  embedding_size: 152064
  use_position_ids: true
  tokenizer:
    identifier: Qwen/Qwen2.5-7B
    tokenizer_dir: null
    depth_tokens: false
  init_path: gs://mm-olmo/pretrained_llms/qwen2.5-7b.pt
  init_incremental: null
  new_embedding_init_range: 0.02
  initializer_range: 0.02
  normalize_input_embeds: false
  activation_checkpoint: whole_layer
  compile: blocks
  fix_pad_tokenizer: false
  resize_vocab: false
  init_std: 0.02
  init_fn: normal
  init_cutoff_factor: null
vision_backbone:
  vit:
    image_model_type: siglip
    image_default_input_size:
    - 378
    - 378
    image_patch_size: 14
    image_pos_patch_size: 14
    image_emb_dim: 1152
    image_num_heads: 16
    image_num_key_value_heads: 16
    image_num_layers: 27
    image_head_dim: 72
    image_mlp_dim: 4304
    image_mlp_activations: gelu_pytorch_tanh
    image_dropout_rate: 0.0
    image_num_pos: 729
    image_norm_eps: 1.0e-06
    attention_dropout: 0.0
    residual_dropout: 0.0
    initializer_range: 0.02
    float32_attention: true
    attention_type: sdpa
    activation_checkpointing: true
    init_path: gs://mm-olmo/pretrained_image_encoders/siglip2-so400m-14-384.pt
    resize_mode: siglip
    pad_value: 0.0
    normalize: siglip
  image_pooling_2d: attention_meanq
  pooling_attention_mask: false
  image_projector: mlp
  image_padding_embed: null
  vit_layers:
  - -3
  - -9
  skip_unused_layers: true
  image_feature_dropout: 0.0
  connector_activation_checkpointing: true
  compile_vit: blocks
data_formatter:
  prompt_templates: none
  message_format: none
  system_prompt: style_and_length
  always_start_with_space: false
  default_inference_len: 65
  select_answer: best
  debug: false
  image_last: false
  format_message_list: null
  p_one_message: 0.0
mm_preprocessor:
  crop_mode: overlap-and-resize-c2
  max_crops: 8
  max_images: null
  max_multi_image_crops: 6
  pooling_w: 2
  pooling_h: 2
  overlap_margins:
  - 4
  - 4
  use_col_tokens: true
  loss_token_weighting: null
  legacy_image_mask: false
  max_answer_len: null
  img_aug: false
bi_directional_attn: null
lora_enable: null
lora_rank: null
lora_alpha: null
lora_dropout: null
lora_bias: null
n_action_bins: 256
norm_stats: null