{ "dd_meta_major_version": 1, "dd_meta_minor_version": 4, "state_table_updates": [ { "state_table_idx": 0, "update_func": 1, "update_arg": 1 } ], "op_list": [ { "name": "MatMulNBits_2_0", "type": "MladfMatMul", "in_args": [ "/model/layers.0/input_layernorm/output_0.out5_4_0" ], "const_args": [ "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.0.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.0/input_layernorm/output_0.out5_4_0" ], "const_args": [ "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.v_proj.Add.bias.preformat", "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.0.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "3", "1" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.0/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0", "past_key_values.0.key", "past_key_values.0.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0", "present.0.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "0", "0", "2", "0", "1", "1", "6", "0", "2", "0" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.0.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0" ], "const_args": [ "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_0", "type": "FlatRMSAdd", "in_args": [ "/model/embed_tokens/Gather/output_0.out4_0", "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1" ], "const_args": [ "model.layers.0.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.0/post_attention_layernorm/output_3.out4_0", "/model/layers.0/post_attention_layernorm/output_0.out4_0" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_0", "type": "FlatMLP", "in_args": [ "/model/layers.0/post_attention_layernorm/output_0.out4_0" ], "const_args": [ "model.layers.0.mlp.gate_proj.MatMulNBits.qweight", "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.0.mlp.up_proj.MatMulNBits.qweight", "model.layers.0.mlp.up_proj.MatMulNBits.scales.f", "model.layers.0.mlp.up_proj.MatMulNBits.qzeros", "model.layers.0.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.0/mlp/Mul/output_0.out3_0" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.0.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.0/mlp/Mul/output_0.out3_0" ], "const_args": [ "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_1", "type": "FlatRMSAdd", "in_args": [ "/model/layers.0/post_attention_layernorm/output_3.out4_0", "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2" ], "const_args": [ "model.layers.1.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.1/input_layernorm/output_3.out4_1", "/model/layers.1/input_layernorm/output_0.out4_1" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_1", "type": "MladfMatMul", "in_args": [ "/model/layers.1/input_layernorm/output_0.out4_1" ], "const_args": [ "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.1.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.1/input_layernorm/output_0.out4_1" ], "const_args": [ "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.v_proj.Add.bias.preformat", "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.1.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "7", "3" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.1/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3", "past_key_values.1.key", "past_key_values.1.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1", "present.1.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "4", "2", "2", "0", "5", "3", "6", "0", "6", "2" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.1.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1" ], "const_args": [ "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_2", "type": "FlatRMSAdd", "in_args": [ "/model/layers.1/input_layernorm/output_3.out4_1", "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4" ], "const_args": [ "model.layers.1.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.1/post_attention_layernorm/output_3.out4_2", "/model/layers.1/post_attention_layernorm/output_0.out4_2" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_1", "type": "FlatMLP", "in_args": [ "/model/layers.1/post_attention_layernorm/output_0.out4_2" ], "const_args": [ "model.layers.1.mlp.gate_proj.MatMulNBits.qweight", "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.1.mlp.up_proj.MatMulNBits.qweight", "model.layers.1.mlp.up_proj.MatMulNBits.scales.f", "model.layers.1.mlp.up_proj.MatMulNBits.qzeros", "model.layers.1.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.1/mlp/Mul/output_0.out3_1" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.1.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.1/mlp/Mul/output_0.out3_1" ], "const_args": [ "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_3", "type": "FlatRMSAdd", "in_args": [ "/model/layers.1/post_attention_layernorm/output_3.out4_2", "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5" ], "const_args": [ "model.layers.2.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.2/input_layernorm/output_3.out4_3", "/model/layers.2/input_layernorm/output_0.out4_3" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_2", "type": "MladfMatMul", "in_args": [ "/model/layers.2/input_layernorm/output_0.out4_3" ], "const_args": [ "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.2.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.2/input_layernorm/output_0.out4_3" ], "const_args": [ "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.v_proj.Add.bias.preformat", "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.2.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "11", "5" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.2/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6", "past_key_values.2.key", "past_key_values.2.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2", "present.2.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "8", "4", "2", "0", "9", "5", "6", "0", "10", "4" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.2.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2" ], "const_args": [ "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_4", "type": "FlatRMSAdd", "in_args": [ "/model/layers.2/input_layernorm/output_3.out4_3", "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7" ], "const_args": [ "model.layers.2.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.2/post_attention_layernorm/output_3.out4_4", "/model/layers.2/post_attention_layernorm/output_0.out4_4" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_2", "type": "FlatMLP", "in_args": [ "/model/layers.2/post_attention_layernorm/output_0.out4_4" ], "const_args": [ "model.layers.2.mlp.gate_proj.MatMulNBits.qweight", "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.2.mlp.up_proj.MatMulNBits.qweight", "model.layers.2.mlp.up_proj.MatMulNBits.scales.f", "model.layers.2.mlp.up_proj.MatMulNBits.qzeros", "model.layers.2.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.2/mlp/Mul/output_0.out3_2" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.2.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.2/mlp/Mul/output_0.out3_2" ], "const_args": [ "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_5", "type": "FlatRMSAdd", "in_args": [ "/model/layers.2/post_attention_layernorm/output_3.out4_4", "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8" ], "const_args": [ "model.layers.3.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.3/input_layernorm/output_3.out4_5", "/model/layers.3/input_layernorm/output_0.out4_5" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_3", "type": "MladfMatMul", "in_args": [ "/model/layers.3/input_layernorm/output_0.out4_5" ], "const_args": [ "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.3.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.3/input_layernorm/output_0.out4_5" ], "const_args": [ "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.v_proj.Add.bias.preformat", "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.3.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "15", "7" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.3/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9", "past_key_values.3.key", "past_key_values.3.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3", "present.3.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "12", "6", "2", "0", "13", "7", "6", "0", "14", "6" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.3.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3" ], "const_args": [ "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_6", "type": "FlatRMSAdd", "in_args": [ "/model/layers.3/input_layernorm/output_3.out4_5", "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10" ], "const_args": [ "model.layers.3.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.3/post_attention_layernorm/output_3.out4_6", "/model/layers.3/post_attention_layernorm/output_0.out4_6" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_3", "type": "FlatMLP", "in_args": [ "/model/layers.3/post_attention_layernorm/output_0.out4_6" ], "const_args": [ "model.layers.3.mlp.gate_proj.MatMulNBits.qweight", "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.3.mlp.up_proj.MatMulNBits.qweight", "model.layers.3.mlp.up_proj.MatMulNBits.scales.f", "model.layers.3.mlp.up_proj.MatMulNBits.qzeros", "model.layers.3.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.3/mlp/Mul/output_0.out3_3" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.3.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.3/mlp/Mul/output_0.out3_3" ], "const_args": [ "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_7", "type": "FlatRMSAdd", "in_args": [ "/model/layers.3/post_attention_layernorm/output_3.out4_6", "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11" ], "const_args": [ "model.layers.4.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.4/input_layernorm/output_3.out4_7", "/model/layers.4/input_layernorm/output_0.out4_7" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_4", "type": "MladfMatMul", "in_args": [ "/model/layers.4/input_layernorm/output_0.out4_7" ], "const_args": [ "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.4.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.4/input_layernorm/output_0.out4_7" ], "const_args": [ "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.v_proj.Add.bias.preformat", "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.4.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "19", "9" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.4/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12", "past_key_values.4.key", "past_key_values.4.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4", "present.4.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "16", "8", "2", "0", "17", "9", "6", "0", "18", "8" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.4.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4" ], "const_args": [ "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_8", "type": "FlatRMSAdd", "in_args": [ "/model/layers.4/input_layernorm/output_3.out4_7", "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13" ], "const_args": [ "model.layers.4.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.4/post_attention_layernorm/output_3.out4_8", "/model/layers.4/post_attention_layernorm/output_0.out4_8" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_4", "type": "FlatMLP", "in_args": [ "/model/layers.4/post_attention_layernorm/output_0.out4_8" ], "const_args": [ "model.layers.4.mlp.gate_proj.MatMulNBits.qweight", "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.4.mlp.up_proj.MatMulNBits.qweight", "model.layers.4.mlp.up_proj.MatMulNBits.scales.f", "model.layers.4.mlp.up_proj.MatMulNBits.qzeros", "model.layers.4.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.4/mlp/Mul/output_0.out3_4" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.4.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.4/mlp/Mul/output_0.out3_4" ], "const_args": [ "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_9", "type": "FlatRMSAdd", "in_args": [ "/model/layers.4/post_attention_layernorm/output_3.out4_8", "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14" ], "const_args": [ "model.layers.5.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.5/input_layernorm/output_3.out4_9", "/model/layers.5/input_layernorm/output_0.out4_9" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_5", "type": "MladfMatMul", "in_args": [ "/model/layers.5/input_layernorm/output_0.out4_9" ], "const_args": [ "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.5.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.5/input_layernorm/output_0.out4_9" ], "const_args": [ "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.v_proj.Add.bias.preformat", "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.5.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "23", "11" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.5/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15", "past_key_values.5.key", "past_key_values.5.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5", "present.5.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "20", "10", "2", "0", "21", "11", "6", "0", "22", "10" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.5.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5" ], "const_args": [ "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_10", "type": "FlatRMSAdd", "in_args": [ "/model/layers.5/input_layernorm/output_3.out4_9", "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16" ], "const_args": [ "model.layers.5.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.5/post_attention_layernorm/output_3.out4_10", "/model/layers.5/post_attention_layernorm/output_0.out4_10" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_5", "type": "FlatMLP", "in_args": [ "/model/layers.5/post_attention_layernorm/output_0.out4_10" ], "const_args": [ "model.layers.5.mlp.gate_proj.MatMulNBits.qweight", "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.5.mlp.up_proj.MatMulNBits.qweight", "model.layers.5.mlp.up_proj.MatMulNBits.scales.f", "model.layers.5.mlp.up_proj.MatMulNBits.qzeros", "model.layers.5.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.5/mlp/Mul/output_0.out3_5" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.5.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.5/mlp/Mul/output_0.out3_5" ], "const_args": [ "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_11", "type": "FlatRMSAdd", "in_args": [ "/model/layers.5/post_attention_layernorm/output_3.out4_10", "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17" ], "const_args": [ "model.layers.6.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.6/input_layernorm/output_3.out4_11", "/model/layers.6/input_layernorm/output_0.out4_11" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_6", "type": "MladfMatMul", "in_args": [ "/model/layers.6/input_layernorm/output_0.out4_11" ], "const_args": [ "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.6.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.6/input_layernorm/output_0.out4_11" ], "const_args": [ "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.v_proj.Add.bias.preformat", "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.6.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "27", "13" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.6/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18", "past_key_values.6.key", "past_key_values.6.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6", "present.6.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "24", "12", "2", "0", "25", "13", "6", "0", "26", "12" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.6.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6" ], "const_args": [ "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_12", "type": "FlatRMSAdd", "in_args": [ "/model/layers.6/input_layernorm/output_3.out4_11", "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19" ], "const_args": [ "model.layers.6.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.6/post_attention_layernorm/output_3.out4_12", "/model/layers.6/post_attention_layernorm/output_0.out4_12" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_6", "type": "FlatMLP", "in_args": [ "/model/layers.6/post_attention_layernorm/output_0.out4_12" ], "const_args": [ "model.layers.6.mlp.gate_proj.MatMulNBits.qweight", "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.6.mlp.up_proj.MatMulNBits.qweight", "model.layers.6.mlp.up_proj.MatMulNBits.scales.f", "model.layers.6.mlp.up_proj.MatMulNBits.qzeros", "model.layers.6.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.6/mlp/Mul/output_0.out3_6" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.6.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.6/mlp/Mul/output_0.out3_6" ], "const_args": [ "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_13", "type": "FlatRMSAdd", "in_args": [ "/model/layers.6/post_attention_layernorm/output_3.out4_12", "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20" ], "const_args": [ "model.layers.7.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.7/input_layernorm/output_3.out4_13", "/model/layers.7/input_layernorm/output_0.out4_13" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_7", "type": "MladfMatMul", "in_args": [ "/model/layers.7/input_layernorm/output_0.out4_13" ], "const_args": [ "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.7.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.7/input_layernorm/output_0.out4_13" ], "const_args": [ "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.v_proj.Add.bias.preformat", "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.7.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "31", "15" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.7/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21", "past_key_values.7.key", "past_key_values.7.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7", "present.7.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "28", "14", "2", "0", "29", "15", "6", "0", "30", "14" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.7.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7" ], "const_args": [ "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_14", "type": "FlatRMSAdd", "in_args": [ "/model/layers.7/input_layernorm/output_3.out4_13", "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22" ], "const_args": [ "model.layers.7.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.7/post_attention_layernorm/output_3.out4_14", "/model/layers.7/post_attention_layernorm/output_0.out4_14" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_7", "type": "FlatMLP", "in_args": [ "/model/layers.7/post_attention_layernorm/output_0.out4_14" ], "const_args": [ "model.layers.7.mlp.gate_proj.MatMulNBits.qweight", "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.7.mlp.up_proj.MatMulNBits.qweight", "model.layers.7.mlp.up_proj.MatMulNBits.scales.f", "model.layers.7.mlp.up_proj.MatMulNBits.qzeros", "model.layers.7.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.7/mlp/Mul/output_0.out3_7" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.7.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.7/mlp/Mul/output_0.out3_7" ], "const_args": [ "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_15", "type": "FlatRMSAdd", "in_args": [ "/model/layers.7/post_attention_layernorm/output_3.out4_14", "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23" ], "const_args": [ "model.layers.8.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.8/input_layernorm/output_3.out4_15", "/model/layers.8/input_layernorm/output_0.out4_15" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_8", "type": "MladfMatMul", "in_args": [ "/model/layers.8/input_layernorm/output_0.out4_15" ], "const_args": [ "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.8.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.8/input_layernorm/output_0.out4_15" ], "const_args": [ "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.v_proj.Add.bias.preformat", "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.8.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "35", "17" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.8/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24", "past_key_values.8.key", "past_key_values.8.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8", "present.8.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "32", "16", "2", "0", "33", "17", "6", "0", "34", "16" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.8.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8" ], "const_args": [ "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_16", "type": "FlatRMSAdd", "in_args": [ "/model/layers.8/input_layernorm/output_3.out4_15", "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25" ], "const_args": [ "model.layers.8.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.8/post_attention_layernorm/output_3.out4_16", "/model/layers.8/post_attention_layernorm/output_0.out4_16" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_8", "type": "FlatMLP", "in_args": [ "/model/layers.8/post_attention_layernorm/output_0.out4_16" ], "const_args": [ "model.layers.8.mlp.gate_proj.MatMulNBits.qweight", "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.8.mlp.up_proj.MatMulNBits.qweight", "model.layers.8.mlp.up_proj.MatMulNBits.scales.f", "model.layers.8.mlp.up_proj.MatMulNBits.qzeros", "model.layers.8.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.8/mlp/Mul/output_0.out3_8" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.8.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.8/mlp/Mul/output_0.out3_8" ], "const_args": [ "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_17", "type": "FlatRMSAdd", "in_args": [ "/model/layers.8/post_attention_layernorm/output_3.out4_16", "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26" ], "const_args": [ "model.layers.9.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.9/input_layernorm/output_3.out4_17", "/model/layers.9/input_layernorm/output_0.out4_17" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_9", "type": "MladfMatMul", "in_args": [ "/model/layers.9/input_layernorm/output_0.out4_17" ], "const_args": [ "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.9.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.9/input_layernorm/output_0.out4_17" ], "const_args": [ "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.v_proj.Add.bias.preformat", "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.9.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "39", "19" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.9/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27", "past_key_values.9.key", "past_key_values.9.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9", "present.9.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "36", "18", "2", "0", "37", "19", "6", "0", "38", "18" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.9.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9" ], "const_args": [ "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_18", "type": "FlatRMSAdd", "in_args": [ "/model/layers.9/input_layernorm/output_3.out4_17", "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28" ], "const_args": [ "model.layers.9.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.9/post_attention_layernorm/output_3.out4_18", "/model/layers.9/post_attention_layernorm/output_0.out4_18" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_9", "type": "FlatMLP", "in_args": [ "/model/layers.9/post_attention_layernorm/output_0.out4_18" ], "const_args": [ "model.layers.9.mlp.gate_proj.MatMulNBits.qweight", "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.9.mlp.up_proj.MatMulNBits.qweight", "model.layers.9.mlp.up_proj.MatMulNBits.scales.f", "model.layers.9.mlp.up_proj.MatMulNBits.qzeros", "model.layers.9.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.9/mlp/Mul/output_0.out3_9" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.9.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.9/mlp/Mul/output_0.out3_9" ], "const_args": [ "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_19", "type": "FlatRMSAdd", "in_args": [ "/model/layers.9/post_attention_layernorm/output_3.out4_18", "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29" ], "const_args": [ "model.layers.10.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.10/input_layernorm/output_3.out4_19", "/model/layers.10/input_layernorm/output_0.out4_19" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_10", "type": "MladfMatMul", "in_args": [ "/model/layers.10/input_layernorm/output_0.out4_19" ], "const_args": [ "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.10.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.10/input_layernorm/output_0.out4_19" ], "const_args": [ "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.v_proj.Add.bias.preformat", "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.10.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "43", "21" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.10/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30", "past_key_values.10.key", "past_key_values.10.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10", "present.10.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "40", "20", "2", "0", "41", "21", "6", "0", "42", "20" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.10.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10" ], "const_args": [ "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_20", "type": "FlatRMSAdd", "in_args": [ "/model/layers.10/input_layernorm/output_3.out4_19", "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31" ], "const_args": [ "model.layers.10.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.10/post_attention_layernorm/output_3.out4_20", "/model/layers.10/post_attention_layernorm/output_0.out4_20" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_10", "type": "FlatMLP", "in_args": [ "/model/layers.10/post_attention_layernorm/output_0.out4_20" ], "const_args": [ "model.layers.10.mlp.gate_proj.MatMulNBits.qweight", "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.10.mlp.up_proj.MatMulNBits.qweight", "model.layers.10.mlp.up_proj.MatMulNBits.scales.f", "model.layers.10.mlp.up_proj.MatMulNBits.qzeros", "model.layers.10.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.10/mlp/Mul/output_0.out3_10" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.10.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.10/mlp/Mul/output_0.out3_10" ], "const_args": [ "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_21", "type": "FlatRMSAdd", "in_args": [ "/model/layers.10/post_attention_layernorm/output_3.out4_20", "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32" ], "const_args": [ "model.layers.11.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.11/input_layernorm/output_3.out4_21", "/model/layers.11/input_layernorm/output_0.out4_21" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_11", "type": "MladfMatMul", "in_args": [ "/model/layers.11/input_layernorm/output_0.out4_21" ], "const_args": [ "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.11.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.11/input_layernorm/output_0.out4_21" ], "const_args": [ "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.v_proj.Add.bias.preformat", "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.11.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "47", "23" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.11/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33", "past_key_values.11.key", "past_key_values.11.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11", "present.11.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "44", "22", "2", "0", "45", "23", "6", "0", "46", "22" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.11.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11" ], "const_args": [ "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_22", "type": "FlatRMSAdd", "in_args": [ "/model/layers.11/input_layernorm/output_3.out4_21", "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34" ], "const_args": [ "model.layers.11.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.11/post_attention_layernorm/output_3.out4_22", "/model/layers.11/post_attention_layernorm/output_0.out4_22" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_11", "type": "FlatMLP", "in_args": [ "/model/layers.11/post_attention_layernorm/output_0.out4_22" ], "const_args": [ "model.layers.11.mlp.gate_proj.MatMulNBits.qweight", "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.11.mlp.up_proj.MatMulNBits.qweight", "model.layers.11.mlp.up_proj.MatMulNBits.scales.f", "model.layers.11.mlp.up_proj.MatMulNBits.qzeros", "model.layers.11.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.11/mlp/Mul/output_0.out3_11" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.11.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.11/mlp/Mul/output_0.out3_11" ], "const_args": [ "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_23", "type": "FlatRMSAdd", "in_args": [ "/model/layers.11/post_attention_layernorm/output_3.out4_22", "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35" ], "const_args": [ "model.layers.12.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.12/input_layernorm/output_3.out4_23", "/model/layers.12/input_layernorm/output_0.out4_23" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_12", "type": "MladfMatMul", "in_args": [ "/model/layers.12/input_layernorm/output_0.out4_23" ], "const_args": [ "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.12.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.12/input_layernorm/output_0.out4_23" ], "const_args": [ "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.v_proj.Add.bias.preformat", "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.12.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "51", "25" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.12/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36", "past_key_values.12.key", "past_key_values.12.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12", "present.12.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "48", "24", "2", "0", "49", "25", "6", "0", "50", "24" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.12.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12" ], "const_args": [ "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_24", "type": "FlatRMSAdd", "in_args": [ "/model/layers.12/input_layernorm/output_3.out4_23", "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37" ], "const_args": [ "model.layers.12.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.12/post_attention_layernorm/output_3.out4_24", "/model/layers.12/post_attention_layernorm/output_0.out4_24" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_12", "type": "FlatMLP", "in_args": [ "/model/layers.12/post_attention_layernorm/output_0.out4_24" ], "const_args": [ "model.layers.12.mlp.gate_proj.MatMulNBits.qweight", "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.12.mlp.up_proj.MatMulNBits.qweight", "model.layers.12.mlp.up_proj.MatMulNBits.scales.f", "model.layers.12.mlp.up_proj.MatMulNBits.qzeros", "model.layers.12.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.12/mlp/Mul/output_0.out3_12" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.12.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.12/mlp/Mul/output_0.out3_12" ], "const_args": [ "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_25", "type": "FlatRMSAdd", "in_args": [ "/model/layers.12/post_attention_layernorm/output_3.out4_24", "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38" ], "const_args": [ "model.layers.13.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.13/input_layernorm/output_3.out4_25", "/model/layers.13/input_layernorm/output_0.out4_25" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_13", "type": "MladfMatMul", "in_args": [ "/model/layers.13/input_layernorm/output_0.out4_25" ], "const_args": [ "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.13.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.13/input_layernorm/output_0.out4_25" ], "const_args": [ "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.v_proj.Add.bias.preformat", "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.13.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "55", "27" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.13/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39", "past_key_values.13.key", "past_key_values.13.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13", "present.13.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "52", "26", "2", "0", "53", "27", "6", "0", "54", "26" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.13.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13" ], "const_args": [ "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_26", "type": "FlatRMSAdd", "in_args": [ "/model/layers.13/input_layernorm/output_3.out4_25", "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40" ], "const_args": [ "model.layers.13.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.13/post_attention_layernorm/output_3.out4_26", "/model/layers.13/post_attention_layernorm/output_0.out4_26" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_13", "type": "FlatMLP", "in_args": [ "/model/layers.13/post_attention_layernorm/output_0.out4_26" ], "const_args": [ "model.layers.13.mlp.gate_proj.MatMulNBits.qweight", "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.13.mlp.up_proj.MatMulNBits.qweight", "model.layers.13.mlp.up_proj.MatMulNBits.scales.f", "model.layers.13.mlp.up_proj.MatMulNBits.qzeros", "model.layers.13.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.13/mlp/Mul/output_0.out3_13" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.13.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.13/mlp/Mul/output_0.out3_13" ], "const_args": [ "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_27", "type": "FlatRMSAdd", "in_args": [ "/model/layers.13/post_attention_layernorm/output_3.out4_26", "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41" ], "const_args": [ "model.layers.14.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.14/input_layernorm/output_3.out4_27", "/model/layers.14/input_layernorm/output_0.out4_27" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_14", "type": "MladfMatMul", "in_args": [ "/model/layers.14/input_layernorm/output_0.out4_27" ], "const_args": [ "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.14.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.14/input_layernorm/output_0.out4_27" ], "const_args": [ "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.v_proj.Add.bias.preformat", "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.14.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "59", "29" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.14/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42", "past_key_values.14.key", "past_key_values.14.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14", "present.14.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "56", "28", "2", "0", "57", "29", "6", "0", "58", "28" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.14.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14" ], "const_args": [ "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_28", "type": "FlatRMSAdd", "in_args": [ "/model/layers.14/input_layernorm/output_3.out4_27", "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43" ], "const_args": [ "model.layers.14.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.14/post_attention_layernorm/output_3.out4_28", "/model/layers.14/post_attention_layernorm/output_0.out4_28" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_14", "type": "FlatMLP", "in_args": [ "/model/layers.14/post_attention_layernorm/output_0.out4_28" ], "const_args": [ "model.layers.14.mlp.gate_proj.MatMulNBits.qweight", "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.14.mlp.up_proj.MatMulNBits.qweight", "model.layers.14.mlp.up_proj.MatMulNBits.scales.f", "model.layers.14.mlp.up_proj.MatMulNBits.qzeros", "model.layers.14.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.14/mlp/Mul/output_0.out3_14" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.14.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.14/mlp/Mul/output_0.out3_14" ], "const_args": [ "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_29", "type": "FlatRMSAdd", "in_args": [ "/model/layers.14/post_attention_layernorm/output_3.out4_28", "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44" ], "const_args": [ "model.layers.15.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.15/input_layernorm/output_3.out4_29", "/model/layers.15/input_layernorm/output_0.out4_29" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_15", "type": "MladfMatMul", "in_args": [ "/model/layers.15/input_layernorm/output_0.out4_29" ], "const_args": [ "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.15.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.15/input_layernorm/output_0.out4_29" ], "const_args": [ "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.v_proj.Add.bias.preformat", "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.15.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "63", "31" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.15/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45", "past_key_values.15.key", "past_key_values.15.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15", "present.15.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "60", "30", "2", "0", "61", "31", "6", "0", "62", "30" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.15.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15" ], "const_args": [ "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_30", "type": "FlatRMSAdd", "in_args": [ "/model/layers.15/input_layernorm/output_3.out4_29", "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46" ], "const_args": [ "model.layers.15.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.15/post_attention_layernorm/output_3.out4_30", "/model/layers.15/post_attention_layernorm/output_0.out4_30" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_15", "type": "FlatMLP", "in_args": [ "/model/layers.15/post_attention_layernorm/output_0.out4_30" ], "const_args": [ "model.layers.15.mlp.gate_proj.MatMulNBits.qweight", "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.15.mlp.up_proj.MatMulNBits.qweight", "model.layers.15.mlp.up_proj.MatMulNBits.scales.f", "model.layers.15.mlp.up_proj.MatMulNBits.qzeros", "model.layers.15.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.15/mlp/Mul/output_0.out3_15" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.15.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.15/mlp/Mul/output_0.out3_15" ], "const_args": [ "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_31", "type": "FlatRMSAdd", "in_args": [ "/model/layers.15/post_attention_layernorm/output_3.out4_30", "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47" ], "const_args": [ "model.layers.16.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.16/input_layernorm/output_3.out4_31", "/model/layers.16/input_layernorm/output_0.out4_31" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_16", "type": "MladfMatMul", "in_args": [ "/model/layers.16/input_layernorm/output_0.out4_31" ], "const_args": [ "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.16.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.16/input_layernorm/output_0.out4_31" ], "const_args": [ "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.v_proj.Add.bias.preformat", "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.16.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "67", "33" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.16/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48", "past_key_values.16.key", "past_key_values.16.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16", "present.16.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "64", "32", "2", "0", "65", "33", "6", "0", "66", "32" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.16.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16" ], "const_args": [ "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_32", "type": "FlatRMSAdd", "in_args": [ "/model/layers.16/input_layernorm/output_3.out4_31", "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49" ], "const_args": [ "model.layers.16.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.16/post_attention_layernorm/output_3.out4_32", "/model/layers.16/post_attention_layernorm/output_0.out4_32" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_16", "type": "FlatMLP", "in_args": [ "/model/layers.16/post_attention_layernorm/output_0.out4_32" ], "const_args": [ "model.layers.16.mlp.gate_proj.MatMulNBits.qweight", "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.16.mlp.up_proj.MatMulNBits.qweight", "model.layers.16.mlp.up_proj.MatMulNBits.scales.f", "model.layers.16.mlp.up_proj.MatMulNBits.qzeros", "model.layers.16.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.16/mlp/Mul/output_0.out3_16" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.16.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.16/mlp/Mul/output_0.out3_16" ], "const_args": [ "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_33", "type": "FlatRMSAdd", "in_args": [ "/model/layers.16/post_attention_layernorm/output_3.out4_32", "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50" ], "const_args": [ "model.layers.17.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.17/input_layernorm/output_3.out4_33", "/model/layers.17/input_layernorm/output_0.out4_33" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_17", "type": "MladfMatMul", "in_args": [ "/model/layers.17/input_layernorm/output_0.out4_33" ], "const_args": [ "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.17.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.17/input_layernorm/output_0.out4_33" ], "const_args": [ "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.v_proj.Add.bias.preformat", "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.17.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "71", "35" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.17/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51", "past_key_values.17.key", "past_key_values.17.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17", "present.17.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "68", "34", "2", "0", "69", "35", "6", "0", "70", "34" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.17.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17" ], "const_args": [ "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_34", "type": "FlatRMSAdd", "in_args": [ "/model/layers.17/input_layernorm/output_3.out4_33", "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52" ], "const_args": [ "model.layers.17.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.17/post_attention_layernorm/output_3.out4_34", "/model/layers.17/post_attention_layernorm/output_0.out4_34" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_17", "type": "FlatMLP", "in_args": [ "/model/layers.17/post_attention_layernorm/output_0.out4_34" ], "const_args": [ "model.layers.17.mlp.gate_proj.MatMulNBits.qweight", "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.17.mlp.up_proj.MatMulNBits.qweight", "model.layers.17.mlp.up_proj.MatMulNBits.scales.f", "model.layers.17.mlp.up_proj.MatMulNBits.qzeros", "model.layers.17.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.17/mlp/Mul/output_0.out3_17" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.17.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.17/mlp/Mul/output_0.out3_17" ], "const_args": [ "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_35", "type": "FlatRMSAdd", "in_args": [ "/model/layers.17/post_attention_layernorm/output_3.out4_34", "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53" ], "const_args": [ "model.layers.18.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.18/input_layernorm/output_3.out4_35", "/model/layers.18/input_layernorm/output_0.out4_35" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_18", "type": "MladfMatMul", "in_args": [ "/model/layers.18/input_layernorm/output_0.out4_35" ], "const_args": [ "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.18.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.18/input_layernorm/output_0.out4_35" ], "const_args": [ "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.v_proj.Add.bias.preformat", "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.18.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "75", "37" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.18/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54", "past_key_values.18.key", "past_key_values.18.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18", "present.18.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "72", "36", "2", "0", "73", "37", "6", "0", "74", "36" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.18.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18" ], "const_args": [ "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_36", "type": "FlatRMSAdd", "in_args": [ "/model/layers.18/input_layernorm/output_3.out4_35", "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55" ], "const_args": [ "model.layers.18.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.18/post_attention_layernorm/output_3.out4_36", "/model/layers.18/post_attention_layernorm/output_0.out4_36" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_18", "type": "FlatMLP", "in_args": [ "/model/layers.18/post_attention_layernorm/output_0.out4_36" ], "const_args": [ "model.layers.18.mlp.gate_proj.MatMulNBits.qweight", "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.18.mlp.up_proj.MatMulNBits.qweight", "model.layers.18.mlp.up_proj.MatMulNBits.scales.f", "model.layers.18.mlp.up_proj.MatMulNBits.qzeros", "model.layers.18.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.18/mlp/Mul/output_0.out3_18" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.18.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.18/mlp/Mul/output_0.out3_18" ], "const_args": [ "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_37", "type": "FlatRMSAdd", "in_args": [ "/model/layers.18/post_attention_layernorm/output_3.out4_36", "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56" ], "const_args": [ "model.layers.19.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.19/input_layernorm/output_3.out4_37", "/model/layers.19/input_layernorm/output_0.out4_37" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_19", "type": "MladfMatMul", "in_args": [ "/model/layers.19/input_layernorm/output_0.out4_37" ], "const_args": [ "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.19.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.19/input_layernorm/output_0.out4_37" ], "const_args": [ "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.v_proj.Add.bias.preformat", "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.19.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "79", "39" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.19/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57", "past_key_values.19.key", "past_key_values.19.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19", "present.19.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "76", "38", "2", "0", "77", "39", "6", "0", "78", "38" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.19.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19" ], "const_args": [ "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_38", "type": "FlatRMSAdd", "in_args": [ "/model/layers.19/input_layernorm/output_3.out4_37", "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58" ], "const_args": [ "model.layers.19.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.19/post_attention_layernorm/output_3.out4_38", "/model/layers.19/post_attention_layernorm/output_0.out4_38" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_19", "type": "FlatMLP", "in_args": [ "/model/layers.19/post_attention_layernorm/output_0.out4_38" ], "const_args": [ "model.layers.19.mlp.gate_proj.MatMulNBits.qweight", "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.19.mlp.up_proj.MatMulNBits.qweight", "model.layers.19.mlp.up_proj.MatMulNBits.scales.f", "model.layers.19.mlp.up_proj.MatMulNBits.qzeros", "model.layers.19.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.19/mlp/Mul/output_0.out3_19" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.19.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.19/mlp/Mul/output_0.out3_19" ], "const_args": [ "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_39", "type": "FlatRMSAdd", "in_args": [ "/model/layers.19/post_attention_layernorm/output_3.out4_38", "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59" ], "const_args": [ "model.layers.20.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.20/input_layernorm/output_3.out4_39", "/model/layers.20/input_layernorm/output_0.out4_39" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_20", "type": "MladfMatMul", "in_args": [ "/model/layers.20/input_layernorm/output_0.out4_39" ], "const_args": [ "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.20.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.20/input_layernorm/output_0.out4_39" ], "const_args": [ "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.v_proj.Add.bias.preformat", "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.20.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "83", "41" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.20/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60", "past_key_values.20.key", "past_key_values.20.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20", "present.20.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "80", "40", "2", "0", "81", "41", "6", "0", "82", "40" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.20.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20" ], "const_args": [ "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_40", "type": "FlatRMSAdd", "in_args": [ "/model/layers.20/input_layernorm/output_3.out4_39", "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61" ], "const_args": [ "model.layers.20.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.20/post_attention_layernorm/output_3.out4_40", "/model/layers.20/post_attention_layernorm/output_0.out4_40" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_20", "type": "FlatMLP", "in_args": [ "/model/layers.20/post_attention_layernorm/output_0.out4_40" ], "const_args": [ "model.layers.20.mlp.gate_proj.MatMulNBits.qweight", "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.20.mlp.up_proj.MatMulNBits.qweight", "model.layers.20.mlp.up_proj.MatMulNBits.scales.f", "model.layers.20.mlp.up_proj.MatMulNBits.qzeros", "model.layers.20.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.20/mlp/Mul/output_0.out3_20" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.20.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.20/mlp/Mul/output_0.out3_20" ], "const_args": [ "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_41", "type": "FlatRMSAdd", "in_args": [ "/model/layers.20/post_attention_layernorm/output_3.out4_40", "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62" ], "const_args": [ "model.layers.21.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.21/input_layernorm/output_3.out4_41", "/model/layers.21/input_layernorm/output_0.out4_41" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_21", "type": "MladfMatMul", "in_args": [ "/model/layers.21/input_layernorm/output_0.out4_41" ], "const_args": [ "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.21.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.21/input_layernorm/output_0.out4_41" ], "const_args": [ "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.v_proj.Add.bias.preformat", "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.21.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "87", "43" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.21/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63", "past_key_values.21.key", "past_key_values.21.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21", "present.21.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "84", "42", "2", "0", "85", "43", "6", "0", "86", "42" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.21.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21" ], "const_args": [ "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_42", "type": "FlatRMSAdd", "in_args": [ "/model/layers.21/input_layernorm/output_3.out4_41", "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64" ], "const_args": [ "model.layers.21.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.21/post_attention_layernorm/output_3.out4_42", "/model/layers.21/post_attention_layernorm/output_0.out4_42" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_21", "type": "FlatMLP", "in_args": [ "/model/layers.21/post_attention_layernorm/output_0.out4_42" ], "const_args": [ "model.layers.21.mlp.gate_proj.MatMulNBits.qweight", "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.21.mlp.up_proj.MatMulNBits.qweight", "model.layers.21.mlp.up_proj.MatMulNBits.scales.f", "model.layers.21.mlp.up_proj.MatMulNBits.qzeros", "model.layers.21.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.21/mlp/Mul/output_0.out3_21" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.21.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.21/mlp/Mul/output_0.out3_21" ], "const_args": [ "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_43", "type": "FlatRMSAdd", "in_args": [ "/model/layers.21/post_attention_layernorm/output_3.out4_42", "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65" ], "const_args": [ "model.layers.22.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.22/input_layernorm/output_3.out4_43", "/model/layers.22/input_layernorm/output_0.out4_43" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_22", "type": "MladfMatMul", "in_args": [ "/model/layers.22/input_layernorm/output_0.out4_43" ], "const_args": [ "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.22.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.22/input_layernorm/output_0.out4_43" ], "const_args": [ "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.v_proj.Add.bias.preformat", "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.22.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "91", "45" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.22/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66", "past_key_values.22.key", "past_key_values.22.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22", "present.22.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "88", "44", "2", "0", "89", "45", "6", "0", "90", "44" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.22.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22" ], "const_args": [ "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_44", "type": "FlatRMSAdd", "in_args": [ "/model/layers.22/input_layernorm/output_3.out4_43", "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67" ], "const_args": [ "model.layers.22.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.22/post_attention_layernorm/output_3.out4_44", "/model/layers.22/post_attention_layernorm/output_0.out4_44" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_22", "type": "FlatMLP", "in_args": [ "/model/layers.22/post_attention_layernorm/output_0.out4_44" ], "const_args": [ "model.layers.22.mlp.gate_proj.MatMulNBits.qweight", "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.22.mlp.up_proj.MatMulNBits.qweight", "model.layers.22.mlp.up_proj.MatMulNBits.scales.f", "model.layers.22.mlp.up_proj.MatMulNBits.qzeros", "model.layers.22.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.22/mlp/Mul/output_0.out3_22" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.22.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.22/mlp/Mul/output_0.out3_22" ], "const_args": [ "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_45", "type": "FlatRMSAdd", "in_args": [ "/model/layers.22/post_attention_layernorm/output_3.out4_44", "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68" ], "const_args": [ "model.layers.23.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.23/input_layernorm/output_3.out4_45", "/model/layers.23/input_layernorm/output_0.out4_45" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_23", "type": "MladfMatMul", "in_args": [ "/model/layers.23/input_layernorm/output_0.out4_45" ], "const_args": [ "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.23.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.23/input_layernorm/output_0.out4_45" ], "const_args": [ "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.v_proj.Add.bias.preformat", "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.23.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "95", "47" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.23/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69", "past_key_values.23.key", "past_key_values.23.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23", "present.23.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "92", "46", "2", "0", "93", "47", "6", "0", "94", "46" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.23.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23" ], "const_args": [ "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_46", "type": "FlatRMSAdd", "in_args": [ "/model/layers.23/input_layernorm/output_3.out4_45", "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70" ], "const_args": [ "model.layers.23.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.23/post_attention_layernorm/output_3.out4_46", "/model/layers.23/post_attention_layernorm/output_0.out4_46" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_23", "type": "FlatMLP", "in_args": [ "/model/layers.23/post_attention_layernorm/output_0.out4_46" ], "const_args": [ "model.layers.23.mlp.gate_proj.MatMulNBits.qweight", "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.23.mlp.up_proj.MatMulNBits.qweight", "model.layers.23.mlp.up_proj.MatMulNBits.scales.f", "model.layers.23.mlp.up_proj.MatMulNBits.qzeros", "model.layers.23.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.23/mlp/Mul/output_0.out3_23" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.23.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.23/mlp/Mul/output_0.out3_23" ], "const_args": [ "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_47", "type": "FlatRMSAdd", "in_args": [ "/model/layers.23/post_attention_layernorm/output_3.out4_46", "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71" ], "const_args": [ "model.layers.24.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.24/input_layernorm/output_3.out4_47", "/model/layers.24/input_layernorm/output_0.out4_47" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_24", "type": "MladfMatMul", "in_args": [ "/model/layers.24/input_layernorm/output_0.out4_47" ], "const_args": [ "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.24.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.24/input_layernorm/output_0.out4_47" ], "const_args": [ "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.v_proj.Add.bias.preformat", "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.24.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "99", "49" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.24/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72", "past_key_values.24.key", "past_key_values.24.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24", "present.24.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "96", "48", "2", "0", "97", "49", "6", "0", "98", "48" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.24.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24" ], "const_args": [ "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_48", "type": "FlatRMSAdd", "in_args": [ "/model/layers.24/input_layernorm/output_3.out4_47", "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73" ], "const_args": [ "model.layers.24.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.24/post_attention_layernorm/output_3.out4_48", "/model/layers.24/post_attention_layernorm/output_0.out4_48" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_24", "type": "FlatMLP", "in_args": [ "/model/layers.24/post_attention_layernorm/output_0.out4_48" ], "const_args": [ "model.layers.24.mlp.gate_proj.MatMulNBits.qweight", "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.24.mlp.up_proj.MatMulNBits.qweight", "model.layers.24.mlp.up_proj.MatMulNBits.scales.f", "model.layers.24.mlp.up_proj.MatMulNBits.qzeros", "model.layers.24.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.24/mlp/Mul/output_0.out3_24" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.24.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.24/mlp/Mul/output_0.out3_24" ], "const_args": [ "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_49", "type": "FlatRMSAdd", "in_args": [ "/model/layers.24/post_attention_layernorm/output_3.out4_48", "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74" ], "const_args": [ "model.layers.25.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.25/input_layernorm/output_3.out4_49", "/model/layers.25/input_layernorm/output_0.out4_49" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_25", "type": "MladfMatMul", "in_args": [ "/model/layers.25/input_layernorm/output_0.out4_49" ], "const_args": [ "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.25.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.25/input_layernorm/output_0.out4_49" ], "const_args": [ "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.v_proj.Add.bias.preformat", "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.25.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "103", "51" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.25/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75", "past_key_values.25.key", "past_key_values.25.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25", "present.25.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "100", "50", "2", "0", "101", "51", "6", "0", "102", "50" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.25.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25" ], "const_args": [ "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_50", "type": "FlatRMSAdd", "in_args": [ "/model/layers.25/input_layernorm/output_3.out4_49", "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76" ], "const_args": [ "model.layers.25.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.25/post_attention_layernorm/output_3.out4_50", "/model/layers.25/post_attention_layernorm/output_0.out4_50" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_25", "type": "FlatMLP", "in_args": [ "/model/layers.25/post_attention_layernorm/output_0.out4_50" ], "const_args": [ "model.layers.25.mlp.gate_proj.MatMulNBits.qweight", "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.25.mlp.up_proj.MatMulNBits.qweight", "model.layers.25.mlp.up_proj.MatMulNBits.scales.f", "model.layers.25.mlp.up_proj.MatMulNBits.qzeros", "model.layers.25.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.25/mlp/Mul/output_0.out3_25" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.25.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.25/mlp/Mul/output_0.out3_25" ], "const_args": [ "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_51", "type": "FlatRMSAdd", "in_args": [ "/model/layers.25/post_attention_layernorm/output_3.out4_50", "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77" ], "const_args": [ "model.layers.26.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.26/input_layernorm/output_3.out4_51", "/model/layers.26/input_layernorm/output_0.out4_51" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_26", "type": "MladfMatMul", "in_args": [ "/model/layers.26/input_layernorm/output_0.out4_51" ], "const_args": [ "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.26.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.26/input_layernorm/output_0.out4_51" ], "const_args": [ "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.v_proj.Add.bias.preformat", "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.26.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "107", "53" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.26/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78", "past_key_values.26.key", "past_key_values.26.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26", "present.26.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "104", "52", "2", "0", "105", "53", "6", "0", "106", "52" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.26.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26" ], "const_args": [ "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_52", "type": "FlatRMSAdd", "in_args": [ "/model/layers.26/input_layernorm/output_3.out4_51", "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79" ], "const_args": [ "model.layers.26.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.26/post_attention_layernorm/output_3.out4_52", "/model/layers.26/post_attention_layernorm/output_0.out4_52" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_26", "type": "FlatMLP", "in_args": [ "/model/layers.26/post_attention_layernorm/output_0.out4_52" ], "const_args": [ "model.layers.26.mlp.gate_proj.MatMulNBits.qweight", "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.26.mlp.up_proj.MatMulNBits.qweight", "model.layers.26.mlp.up_proj.MatMulNBits.scales.f", "model.layers.26.mlp.up_proj.MatMulNBits.qzeros", "model.layers.26.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.26/mlp/Mul/output_0.out3_26" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.26.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.26/mlp/Mul/output_0.out3_26" ], "const_args": [ "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_53", "type": "FlatRMSAdd", "in_args": [ "/model/layers.26/post_attention_layernorm/output_3.out4_52", "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80" ], "const_args": [ "model.layers.27.input_layernorm.weight.bf" ], "out_args": [ "/model/layers.27/input_layernorm/output_3.out4_53", "/model/layers.27/input_layernorm/output_0.out4_53" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "MatMulNBits_2_27", "type": "MladfMatMul", "in_args": [ "/model/layers.27/input_layernorm/output_0.out4_53" ], "const_args": [ "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1792" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "layers.27.attn.v_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.27/input_layernorm/output_0.out4_53" ], "const_args": [ "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.v_proj.Add.bias.preformat", "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "present.27.value" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "256" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "total_seq_len": { "type": "int", "value": [ "4096" ] }, "external_buffers": { "type": "int", "value": [ "5", "0", "111", "55" ] }, "update_tensor_offsets": { "type": "int", "value": [ "5", "0", "0", "256" ] } } }, { "name": "/model/layers.27/attn/GroupQueryAttention", "type": "FLATMHA", "in_args": [ "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81", "past_key_values.27.key", "past_key_values.27.value", "attention_mask_const_uint", "sin_cos_cache_token" ], "const_args": [], "out_args": [ "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27", "present.27.key" ], "attrs": { "num_heads": { "type": "int", "value": [ "12" ] }, "kv_num_heads": { "type": "int", "value": [ "2" ] }, "scale": { "type": "float", "value": [ "0.0883883461356163" ] }, "softcap": { "type": "float", "value": [ "0.0" ] }, "do_rotary": { "type": "int", "value": [ "0" ] }, "rotary_interleaved": { "type": "int", "value": [ "0" ] }, "input_shape": { "type": "int", "value": [ "2", "12", "1", "4096", "128" ] }, "external_buffers": { "type": "int", "value": [ "4", "1", "0", "0", "1", "0", "108", "54", "2", "0", "109", "55", "6", "0", "110", "54" ] }, "update_tensor_offsets": { "type": "int", "value": [ "4", "0", "0", "256", "6", "0", "0", "256" ] } } }, { "name": "layers.27.attn.o_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27" ], "const_args": [ "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_54", "type": "FlatRMSAdd", "in_args": [ "/model/layers.27/input_layernorm/output_3.out4_53", "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82" ], "const_args": [ "model.layers.27.post_attention_layernorm.weight.bf" ], "out_args": [ "/model/layers.27/post_attention_layernorm/output_3.out4_54", "/model/layers.27/post_attention_layernorm/output_0.out4_54" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "FlatMLP_3_27", "type": "FlatMLP", "in_args": [ "/model/layers.27/post_attention_layernorm/output_0.out4_54" ], "const_args": [ "model.layers.27.mlp.gate_proj.MatMulNBits.qweight", "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.27.mlp.up_proj.MatMulNBits.qweight", "model.layers.27.mlp.up_proj.MatMulNBits.scales.f", "model.layers.27.mlp.up_proj.MatMulNBits.qzeros", "model.layers.27.mlp.up_proj.MatMulNBits.bias.f" ], "out_args": [ "/model/layers.27/mlp/Mul/output_0.out3_27" ], "attrs": { "input_shape": { "type": "int", "value": [ "1", "1536", "8960" ] }, "group_size": { "type": "int", "value": [ "128" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "uint8", "float", "uint8", "float", "uint8", "float", "uint8", "float" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16" ] } } }, { "name": "layers.27.mlp.down_proj", "type": "MladfMatMul", "in_args": [ "/model/layers.27/mlp/Mul/output_0.out3_27" ], "const_args": [ "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat" ], "out_args": [ "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "8960" ] }, "N": { "type": "int", "value": [ "1536" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } }, { "name": "FlatRMSAdd_4_55", "type": "FlatRMSAdd", "in_args": [ "/model/layers.27/post_attention_layernorm/output_3.out4_54", "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83" ], "const_args": [ "model.layers.28.final_norm_layernorm.weight.bf" ], "out_args": [ "/model/layers.28/final_norm_layernorm/output_0.dummy", "/model/layers.28/final_norm_layernorm/output_0.out4_55" ], "attrs": { "a_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "in_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "out_dtypes": { "type": "str", "value": [ "bfloat16", "bfloat16" ] }, "c_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "b_shape": { "type": "int", "value": [ "1", "1", "1536" ] }, "is_gamma_ifm": { "type": "int", "value": [ "1" ] } } }, { "name": "/lm_head/MatMulNBits", "type": "MladfMatMul", "in_args": [ "/model/layers.28/final_norm_layernorm/output_0.out4_55" ], "const_args": [ "lm_head.MatMulNBits.qweight.preformat", "lm_head.MatMulNBits.bias.preformat", "lm_head.MatMulNBits.scales.preformat", "lm_head.MatMulNBits.qzeros.preformat" ], "out_args": [ "logits.out5_4_84" ], "attrs": { "accuracy_level": { "type": "int", "value": [ "0" ] }, "bits": { "type": "int", "value": [ "4" ] }, "block_size": { "type": "int", "value": [ "128" ] }, "K": { "type": "int", "value": [ "1536" ] }, "N": { "type": "int", "value": [ "151936" ] }, "default_shape": { "type": "int", "value": [ "1" ] }, "op_version": { "type": "str", "value": [ "flat" ] }, "group_size": { "type": "int", "value": [ "128" ] } } } ], "fused_tensors": { "in": { "buffer_size": 9328, "xrt_arg_id": 0, "packed_tensors": [ "/model/layers.0/input_layernorm/output_0.out5_4_0", "attention_mask_const_uint", "/model/embed_tokens/Gather/output_0.out4_0" ] }, "out": { "buffer_size": 306944, "xrt_arg_id": 1, "packed_tensors": [ "/model/layers.28/final_norm_layernorm/output_0.dummy", "logits.out5_4_84" ] }, "scratch": { "buffer_size": 1201152, "xrt_arg_id": 2, "packed_tensors": [ "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0", "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0", "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1", "/model/layers.0/post_attention_layernorm/output_3.out4_0", "/model/layers.0/post_attention_layernorm/output_0.out4_0", "/model/layers.0/mlp/Mul/output_0.out3_0", "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2", "/model/layers.1/input_layernorm/output_3.out4_1", "/model/layers.1/input_layernorm/output_0.out4_1", "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3", "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1", "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4", "/model/layers.1/post_attention_layernorm/output_3.out4_2", "/model/layers.1/post_attention_layernorm/output_0.out4_2", "/model/layers.1/mlp/Mul/output_0.out3_1", "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5", "/model/layers.2/input_layernorm/output_3.out4_3", "/model/layers.2/input_layernorm/output_0.out4_3", "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6", "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2", "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7", "/model/layers.2/post_attention_layernorm/output_3.out4_4", "/model/layers.2/post_attention_layernorm/output_0.out4_4", "/model/layers.2/mlp/Mul/output_0.out3_2", "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8", "/model/layers.3/input_layernorm/output_3.out4_5", "/model/layers.3/input_layernorm/output_0.out4_5", "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9", "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3", "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10", "/model/layers.3/post_attention_layernorm/output_3.out4_6", "/model/layers.3/post_attention_layernorm/output_0.out4_6", "/model/layers.3/mlp/Mul/output_0.out3_3", "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11", "/model/layers.4/input_layernorm/output_3.out4_7", "/model/layers.4/input_layernorm/output_0.out4_7", "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12", "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4", "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13", "/model/layers.4/post_attention_layernorm/output_3.out4_8", "/model/layers.4/post_attention_layernorm/output_0.out4_8", "/model/layers.4/mlp/Mul/output_0.out3_4", "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14", "/model/layers.5/input_layernorm/output_3.out4_9", "/model/layers.5/input_layernorm/output_0.out4_9", "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15", "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5", "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16", "/model/layers.5/post_attention_layernorm/output_3.out4_10", "/model/layers.5/post_attention_layernorm/output_0.out4_10", "/model/layers.5/mlp/Mul/output_0.out3_5", "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17", "/model/layers.6/input_layernorm/output_3.out4_11", "/model/layers.6/input_layernorm/output_0.out4_11", "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18", "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6", "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19", "/model/layers.6/post_attention_layernorm/output_3.out4_12", "/model/layers.6/post_attention_layernorm/output_0.out4_12", "/model/layers.6/mlp/Mul/output_0.out3_6", "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20", "/model/layers.7/input_layernorm/output_3.out4_13", "/model/layers.7/input_layernorm/output_0.out4_13", "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21", "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7", "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22", "/model/layers.7/post_attention_layernorm/output_3.out4_14", "/model/layers.7/post_attention_layernorm/output_0.out4_14", "/model/layers.7/mlp/Mul/output_0.out3_7", "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23", "/model/layers.8/input_layernorm/output_3.out4_15", "/model/layers.8/input_layernorm/output_0.out4_15", "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24", "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8", "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25", "/model/layers.8/post_attention_layernorm/output_3.out4_16", "/model/layers.8/post_attention_layernorm/output_0.out4_16", "/model/layers.8/mlp/Mul/output_0.out3_8", "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26", "/model/layers.9/input_layernorm/output_3.out4_17", "/model/layers.9/input_layernorm/output_0.out4_17", "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27", "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9", "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28", "/model/layers.9/post_attention_layernorm/output_3.out4_18", "/model/layers.9/post_attention_layernorm/output_0.out4_18", "/model/layers.9/mlp/Mul/output_0.out3_9", "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29", "/model/layers.10/input_layernorm/output_3.out4_19", "/model/layers.10/input_layernorm/output_0.out4_19", "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30", "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10", "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31", "/model/layers.10/post_attention_layernorm/output_3.out4_20", "/model/layers.10/post_attention_layernorm/output_0.out4_20", "/model/layers.10/mlp/Mul/output_0.out3_10", "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32", "/model/layers.11/input_layernorm/output_3.out4_21", "/model/layers.11/input_layernorm/output_0.out4_21", "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33", "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11", "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34", "/model/layers.11/post_attention_layernorm/output_3.out4_22", "/model/layers.11/post_attention_layernorm/output_0.out4_22", "/model/layers.11/mlp/Mul/output_0.out3_11", "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35", "/model/layers.12/input_layernorm/output_3.out4_23", "/model/layers.12/input_layernorm/output_0.out4_23", "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36", "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12", "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37", "/model/layers.12/post_attention_layernorm/output_3.out4_24", "/model/layers.12/post_attention_layernorm/output_0.out4_24", "/model/layers.12/mlp/Mul/output_0.out3_12", "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38", "/model/layers.13/input_layernorm/output_3.out4_25", "/model/layers.13/input_layernorm/output_0.out4_25", "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39", "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13", "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40", "/model/layers.13/post_attention_layernorm/output_3.out4_26", "/model/layers.13/post_attention_layernorm/output_0.out4_26", "/model/layers.13/mlp/Mul/output_0.out3_13", "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41", "/model/layers.14/input_layernorm/output_3.out4_27", "/model/layers.14/input_layernorm/output_0.out4_27", "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42", "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14", "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43", "/model/layers.14/post_attention_layernorm/output_3.out4_28", "/model/layers.14/post_attention_layernorm/output_0.out4_28", "/model/layers.14/mlp/Mul/output_0.out3_14", "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44", "/model/layers.15/input_layernorm/output_3.out4_29", "/model/layers.15/input_layernorm/output_0.out4_29", "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45", "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15", "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46", "/model/layers.15/post_attention_layernorm/output_3.out4_30", "/model/layers.15/post_attention_layernorm/output_0.out4_30", "/model/layers.15/mlp/Mul/output_0.out3_15", "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47", "/model/layers.16/input_layernorm/output_3.out4_31", "/model/layers.16/input_layernorm/output_0.out4_31", "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48", "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16", "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49", "/model/layers.16/post_attention_layernorm/output_3.out4_32", "/model/layers.16/post_attention_layernorm/output_0.out4_32", "/model/layers.16/mlp/Mul/output_0.out3_16", "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50", "/model/layers.17/input_layernorm/output_3.out4_33", "/model/layers.17/input_layernorm/output_0.out4_33", "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51", "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17", "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52", "/model/layers.17/post_attention_layernorm/output_3.out4_34", "/model/layers.17/post_attention_layernorm/output_0.out4_34", "/model/layers.17/mlp/Mul/output_0.out3_17", "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53", "/model/layers.18/input_layernorm/output_3.out4_35", "/model/layers.18/input_layernorm/output_0.out4_35", "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54", "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18", "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55", "/model/layers.18/post_attention_layernorm/output_3.out4_36", "/model/layers.18/post_attention_layernorm/output_0.out4_36", "/model/layers.18/mlp/Mul/output_0.out3_18", "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56", "/model/layers.19/input_layernorm/output_3.out4_37", "/model/layers.19/input_layernorm/output_0.out4_37", "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57", "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19", "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58", "/model/layers.19/post_attention_layernorm/output_3.out4_38", "/model/layers.19/post_attention_layernorm/output_0.out4_38", "/model/layers.19/mlp/Mul/output_0.out3_19", "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59", "/model/layers.20/input_layernorm/output_3.out4_39", "/model/layers.20/input_layernorm/output_0.out4_39", "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60", "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20", "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61", "/model/layers.20/post_attention_layernorm/output_3.out4_40", "/model/layers.20/post_attention_layernorm/output_0.out4_40", "/model/layers.20/mlp/Mul/output_0.out3_20", "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62", "/model/layers.21/input_layernorm/output_3.out4_41", "/model/layers.21/input_layernorm/output_0.out4_41", "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63", "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21", "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64", "/model/layers.21/post_attention_layernorm/output_3.out4_42", "/model/layers.21/post_attention_layernorm/output_0.out4_42", "/model/layers.21/mlp/Mul/output_0.out3_21", "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65", "/model/layers.22/input_layernorm/output_3.out4_43", "/model/layers.22/input_layernorm/output_0.out4_43", "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66", "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22", "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67", "/model/layers.22/post_attention_layernorm/output_3.out4_44", "/model/layers.22/post_attention_layernorm/output_0.out4_44", "/model/layers.22/mlp/Mul/output_0.out3_22", "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68", "/model/layers.23/input_layernorm/output_3.out4_45", "/model/layers.23/input_layernorm/output_0.out4_45", "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69", "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23", "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70", "/model/layers.23/post_attention_layernorm/output_3.out4_46", "/model/layers.23/post_attention_layernorm/output_0.out4_46", "/model/layers.23/mlp/Mul/output_0.out3_23", "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71", "/model/layers.24/input_layernorm/output_3.out4_47", "/model/layers.24/input_layernorm/output_0.out4_47", "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72", "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24", "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73", "/model/layers.24/post_attention_layernorm/output_3.out4_48", "/model/layers.24/post_attention_layernorm/output_0.out4_48", "/model/layers.24/mlp/Mul/output_0.out3_24", "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74", "/model/layers.25/input_layernorm/output_3.out4_49", "/model/layers.25/input_layernorm/output_0.out4_49", "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75", "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25", "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76", "/model/layers.25/post_attention_layernorm/output_3.out4_50", "/model/layers.25/post_attention_layernorm/output_0.out4_50", "/model/layers.25/mlp/Mul/output_0.out3_25", "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77", "/model/layers.26/input_layernorm/output_3.out4_51", "/model/layers.26/input_layernorm/output_0.out4_51", "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78", "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26", "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79", "/model/layers.26/post_attention_layernorm/output_3.out4_52", "/model/layers.26/post_attention_layernorm/output_0.out4_52", "/model/layers.26/mlp/Mul/output_0.out3_26", "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80", "/model/layers.27/input_layernorm/output_3.out4_53", "/model/layers.27/input_layernorm/output_0.out4_53", "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81", "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27", "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82", "/model/layers.27/post_attention_layernorm/output_3.out4_54", "/model/layers.27/post_attention_layernorm/output_0.out4_54", "/model/layers.27/mlp/Mul/output_0.out3_27", "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83", "/model/layers.28/final_norm_layernorm/output_0.out4_55" ] }, "const": { "buffer_size": 1218863104, "xrt_arg_id": 3, "packed_tensors": [ "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.v_proj.Add.bias.preformat", "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.0.post_attention_layernorm.weight.bf", "model.layers.0.mlp.gate_proj.MatMulNBits.qweight", "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.0.mlp.up_proj.MatMulNBits.qweight", "model.layers.0.mlp.up_proj.MatMulNBits.scales.f", "model.layers.0.mlp.up_proj.MatMulNBits.qzeros", "model.layers.0.mlp.up_proj.MatMulNBits.bias.f", "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.1.input_layernorm.weight.bf", "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.v_proj.Add.bias.preformat", "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.1.post_attention_layernorm.weight.bf", "model.layers.1.mlp.gate_proj.MatMulNBits.qweight", "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.1.mlp.up_proj.MatMulNBits.qweight", "model.layers.1.mlp.up_proj.MatMulNBits.scales.f", "model.layers.1.mlp.up_proj.MatMulNBits.qzeros", "model.layers.1.mlp.up_proj.MatMulNBits.bias.f", "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.2.input_layernorm.weight.bf", "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.v_proj.Add.bias.preformat", "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.2.post_attention_layernorm.weight.bf", "model.layers.2.mlp.gate_proj.MatMulNBits.qweight", "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.2.mlp.up_proj.MatMulNBits.qweight", "model.layers.2.mlp.up_proj.MatMulNBits.scales.f", "model.layers.2.mlp.up_proj.MatMulNBits.qzeros", "model.layers.2.mlp.up_proj.MatMulNBits.bias.f", "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.3.input_layernorm.weight.bf", "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.v_proj.Add.bias.preformat", "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.3.post_attention_layernorm.weight.bf", "model.layers.3.mlp.gate_proj.MatMulNBits.qweight", "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.3.mlp.up_proj.MatMulNBits.qweight", "model.layers.3.mlp.up_proj.MatMulNBits.scales.f", "model.layers.3.mlp.up_proj.MatMulNBits.qzeros", "model.layers.3.mlp.up_proj.MatMulNBits.bias.f", "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.4.input_layernorm.weight.bf", "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.v_proj.Add.bias.preformat", "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.4.post_attention_layernorm.weight.bf", "model.layers.4.mlp.gate_proj.MatMulNBits.qweight", "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.4.mlp.up_proj.MatMulNBits.qweight", "model.layers.4.mlp.up_proj.MatMulNBits.scales.f", "model.layers.4.mlp.up_proj.MatMulNBits.qzeros", "model.layers.4.mlp.up_proj.MatMulNBits.bias.f", "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.5.input_layernorm.weight.bf", "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.v_proj.Add.bias.preformat", "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.5.post_attention_layernorm.weight.bf", "model.layers.5.mlp.gate_proj.MatMulNBits.qweight", "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.5.mlp.up_proj.MatMulNBits.qweight", "model.layers.5.mlp.up_proj.MatMulNBits.scales.f", "model.layers.5.mlp.up_proj.MatMulNBits.qzeros", "model.layers.5.mlp.up_proj.MatMulNBits.bias.f", "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.6.input_layernorm.weight.bf", "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.v_proj.Add.bias.preformat", "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.6.post_attention_layernorm.weight.bf", "model.layers.6.mlp.gate_proj.MatMulNBits.qweight", "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.6.mlp.up_proj.MatMulNBits.qweight", "model.layers.6.mlp.up_proj.MatMulNBits.scales.f", "model.layers.6.mlp.up_proj.MatMulNBits.qzeros", "model.layers.6.mlp.up_proj.MatMulNBits.bias.f", "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.7.input_layernorm.weight.bf", "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.v_proj.Add.bias.preformat", "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.7.post_attention_layernorm.weight.bf", "model.layers.7.mlp.gate_proj.MatMulNBits.qweight", "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.7.mlp.up_proj.MatMulNBits.qweight", "model.layers.7.mlp.up_proj.MatMulNBits.scales.f", "model.layers.7.mlp.up_proj.MatMulNBits.qzeros", "model.layers.7.mlp.up_proj.MatMulNBits.bias.f", "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.8.input_layernorm.weight.bf", "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.v_proj.Add.bias.preformat", "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.8.post_attention_layernorm.weight.bf", "model.layers.8.mlp.gate_proj.MatMulNBits.qweight", "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.8.mlp.up_proj.MatMulNBits.qweight", "model.layers.8.mlp.up_proj.MatMulNBits.scales.f", "model.layers.8.mlp.up_proj.MatMulNBits.qzeros", "model.layers.8.mlp.up_proj.MatMulNBits.bias.f", "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.9.input_layernorm.weight.bf", "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.v_proj.Add.bias.preformat", "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.9.post_attention_layernorm.weight.bf", "model.layers.9.mlp.gate_proj.MatMulNBits.qweight", "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.9.mlp.up_proj.MatMulNBits.qweight", "model.layers.9.mlp.up_proj.MatMulNBits.scales.f", "model.layers.9.mlp.up_proj.MatMulNBits.qzeros", "model.layers.9.mlp.up_proj.MatMulNBits.bias.f", "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.10.input_layernorm.weight.bf", "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.v_proj.Add.bias.preformat", "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.10.post_attention_layernorm.weight.bf", "model.layers.10.mlp.gate_proj.MatMulNBits.qweight", "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.10.mlp.up_proj.MatMulNBits.qweight", "model.layers.10.mlp.up_proj.MatMulNBits.scales.f", "model.layers.10.mlp.up_proj.MatMulNBits.qzeros", "model.layers.10.mlp.up_proj.MatMulNBits.bias.f", "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.11.input_layernorm.weight.bf", "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.v_proj.Add.bias.preformat", "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.11.post_attention_layernorm.weight.bf", "model.layers.11.mlp.gate_proj.MatMulNBits.qweight", "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.11.mlp.up_proj.MatMulNBits.qweight", "model.layers.11.mlp.up_proj.MatMulNBits.scales.f", "model.layers.11.mlp.up_proj.MatMulNBits.qzeros", "model.layers.11.mlp.up_proj.MatMulNBits.bias.f", "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.12.input_layernorm.weight.bf", "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.v_proj.Add.bias.preformat", "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.12.post_attention_layernorm.weight.bf", "model.layers.12.mlp.gate_proj.MatMulNBits.qweight", "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.12.mlp.up_proj.MatMulNBits.qweight", "model.layers.12.mlp.up_proj.MatMulNBits.scales.f", "model.layers.12.mlp.up_proj.MatMulNBits.qzeros", "model.layers.12.mlp.up_proj.MatMulNBits.bias.f", "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.13.input_layernorm.weight.bf", "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.v_proj.Add.bias.preformat", "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.13.post_attention_layernorm.weight.bf", "model.layers.13.mlp.gate_proj.MatMulNBits.qweight", "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.13.mlp.up_proj.MatMulNBits.qweight", "model.layers.13.mlp.up_proj.MatMulNBits.scales.f", "model.layers.13.mlp.up_proj.MatMulNBits.qzeros", "model.layers.13.mlp.up_proj.MatMulNBits.bias.f", "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.14.input_layernorm.weight.bf", "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.v_proj.Add.bias.preformat", "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.14.post_attention_layernorm.weight.bf", "model.layers.14.mlp.gate_proj.MatMulNBits.qweight", "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.14.mlp.up_proj.MatMulNBits.qweight", "model.layers.14.mlp.up_proj.MatMulNBits.scales.f", "model.layers.14.mlp.up_proj.MatMulNBits.qzeros", "model.layers.14.mlp.up_proj.MatMulNBits.bias.f", "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.15.input_layernorm.weight.bf", "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.v_proj.Add.bias.preformat", "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.15.post_attention_layernorm.weight.bf", "model.layers.15.mlp.gate_proj.MatMulNBits.qweight", "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.15.mlp.up_proj.MatMulNBits.qweight", "model.layers.15.mlp.up_proj.MatMulNBits.scales.f", "model.layers.15.mlp.up_proj.MatMulNBits.qzeros", "model.layers.15.mlp.up_proj.MatMulNBits.bias.f", "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.16.input_layernorm.weight.bf", "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.v_proj.Add.bias.preformat", "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.16.post_attention_layernorm.weight.bf", "model.layers.16.mlp.gate_proj.MatMulNBits.qweight", "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.16.mlp.up_proj.MatMulNBits.qweight", "model.layers.16.mlp.up_proj.MatMulNBits.scales.f", "model.layers.16.mlp.up_proj.MatMulNBits.qzeros", "model.layers.16.mlp.up_proj.MatMulNBits.bias.f", "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.17.input_layernorm.weight.bf", "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.v_proj.Add.bias.preformat", "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.17.post_attention_layernorm.weight.bf", "model.layers.17.mlp.gate_proj.MatMulNBits.qweight", "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.17.mlp.up_proj.MatMulNBits.qweight", "model.layers.17.mlp.up_proj.MatMulNBits.scales.f", "model.layers.17.mlp.up_proj.MatMulNBits.qzeros", "model.layers.17.mlp.up_proj.MatMulNBits.bias.f", "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.18.input_layernorm.weight.bf", "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.v_proj.Add.bias.preformat", "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.18.post_attention_layernorm.weight.bf", "model.layers.18.mlp.gate_proj.MatMulNBits.qweight", "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.18.mlp.up_proj.MatMulNBits.qweight", "model.layers.18.mlp.up_proj.MatMulNBits.scales.f", "model.layers.18.mlp.up_proj.MatMulNBits.qzeros", "model.layers.18.mlp.up_proj.MatMulNBits.bias.f", "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.19.input_layernorm.weight.bf", "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.v_proj.Add.bias.preformat", "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.19.post_attention_layernorm.weight.bf", "model.layers.19.mlp.gate_proj.MatMulNBits.qweight", "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.19.mlp.up_proj.MatMulNBits.qweight", "model.layers.19.mlp.up_proj.MatMulNBits.scales.f", "model.layers.19.mlp.up_proj.MatMulNBits.qzeros", "model.layers.19.mlp.up_proj.MatMulNBits.bias.f", "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.20.input_layernorm.weight.bf", "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.v_proj.Add.bias.preformat", "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.20.post_attention_layernorm.weight.bf", "model.layers.20.mlp.gate_proj.MatMulNBits.qweight", "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.20.mlp.up_proj.MatMulNBits.qweight", "model.layers.20.mlp.up_proj.MatMulNBits.scales.f", "model.layers.20.mlp.up_proj.MatMulNBits.qzeros", "model.layers.20.mlp.up_proj.MatMulNBits.bias.f", "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.21.input_layernorm.weight.bf", "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.v_proj.Add.bias.preformat", "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.21.post_attention_layernorm.weight.bf", "model.layers.21.mlp.gate_proj.MatMulNBits.qweight", "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.21.mlp.up_proj.MatMulNBits.qweight", "model.layers.21.mlp.up_proj.MatMulNBits.scales.f", "model.layers.21.mlp.up_proj.MatMulNBits.qzeros", "model.layers.21.mlp.up_proj.MatMulNBits.bias.f", "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.22.input_layernorm.weight.bf", "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.v_proj.Add.bias.preformat", "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.22.post_attention_layernorm.weight.bf", "model.layers.22.mlp.gate_proj.MatMulNBits.qweight", "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.22.mlp.up_proj.MatMulNBits.qweight", "model.layers.22.mlp.up_proj.MatMulNBits.scales.f", "model.layers.22.mlp.up_proj.MatMulNBits.qzeros", "model.layers.22.mlp.up_proj.MatMulNBits.bias.f", "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.23.input_layernorm.weight.bf", "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.v_proj.Add.bias.preformat", "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.23.post_attention_layernorm.weight.bf", "model.layers.23.mlp.gate_proj.MatMulNBits.qweight", "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.23.mlp.up_proj.MatMulNBits.qweight", "model.layers.23.mlp.up_proj.MatMulNBits.scales.f", "model.layers.23.mlp.up_proj.MatMulNBits.qzeros", "model.layers.23.mlp.up_proj.MatMulNBits.bias.f", "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.24.input_layernorm.weight.bf", "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.v_proj.Add.bias.preformat", "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.24.post_attention_layernorm.weight.bf", "model.layers.24.mlp.gate_proj.MatMulNBits.qweight", "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.24.mlp.up_proj.MatMulNBits.qweight", "model.layers.24.mlp.up_proj.MatMulNBits.scales.f", "model.layers.24.mlp.up_proj.MatMulNBits.qzeros", "model.layers.24.mlp.up_proj.MatMulNBits.bias.f", "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.25.input_layernorm.weight.bf", "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.v_proj.Add.bias.preformat", "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.25.post_attention_layernorm.weight.bf", "model.layers.25.mlp.gate_proj.MatMulNBits.qweight", "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.25.mlp.up_proj.MatMulNBits.qweight", "model.layers.25.mlp.up_proj.MatMulNBits.scales.f", "model.layers.25.mlp.up_proj.MatMulNBits.qzeros", "model.layers.25.mlp.up_proj.MatMulNBits.bias.f", "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.26.input_layernorm.weight.bf", "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.v_proj.Add.bias.preformat", "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.26.post_attention_layernorm.weight.bf", "model.layers.26.mlp.gate_proj.MatMulNBits.qweight", "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.26.mlp.up_proj.MatMulNBits.qweight", "model.layers.26.mlp.up_proj.MatMulNBits.scales.f", "model.layers.26.mlp.up_proj.MatMulNBits.qzeros", "model.layers.26.mlp.up_proj.MatMulNBits.bias.f", "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.27.input_layernorm.weight.bf", "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat", "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.v_proj.Add.bias.preformat", "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat", "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat", "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat", "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat", "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat", "model.layers.27.post_attention_layernorm.weight.bf", "model.layers.27.mlp.gate_proj.MatMulNBits.qweight", "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f", "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros", "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f", "model.layers.27.mlp.up_proj.MatMulNBits.qweight", "model.layers.27.mlp.up_proj.MatMulNBits.scales.f", "model.layers.27.mlp.up_proj.MatMulNBits.qzeros", "model.layers.27.mlp.up_proj.MatMulNBits.bias.f", "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat", "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat", "model.layers.28.final_norm_layernorm.weight.bf", "lm_head.MatMulNBits.qweight.preformat", "lm_head.MatMulNBits.bias.preformat", "lm_head.MatMulNBits.scales.preformat", "lm_head.MatMulNBits.qzeros.preformat" ] }, "super_instr": { "buffer_size": 0, "xrt_arg_id": 4, "packed_tensors": [] }, "ext_buf_0": { "buffer_size": 117440512, "xrt_arg_id": 5, "packed_tensors": [ "past_key_values.0.key", "past_key_values.0.value", "present.0.key", "present.0.value", "past_key_values.1.key", "past_key_values.1.value", "present.1.key", "present.1.value", "past_key_values.2.key", "past_key_values.2.value", "present.2.key", "present.2.value", "past_key_values.3.key", "past_key_values.3.value", "present.3.key", "present.3.value", "past_key_values.4.key", "past_key_values.4.value", "present.4.key", "present.4.value", "past_key_values.5.key", "past_key_values.5.value", "present.5.key", "present.5.value", "past_key_values.6.key", "past_key_values.6.value", "present.6.key", "present.6.value", "past_key_values.7.key", "past_key_values.7.value", "present.7.key", "present.7.value", "past_key_values.8.key", "past_key_values.8.value", "present.8.key", "present.8.value", "past_key_values.9.key", "past_key_values.9.value", "present.9.key", "present.9.value", "past_key_values.10.key", "past_key_values.10.value", "present.10.key", "present.10.value", "past_key_values.11.key", "past_key_values.11.value", "present.11.key", "present.11.value", "past_key_values.12.key", "past_key_values.12.value", "present.12.key", "present.12.value", "past_key_values.13.key", "past_key_values.13.value", "present.13.key", "present.13.value", "past_key_values.14.key", "past_key_values.14.value", "present.14.key", "present.14.value", "past_key_values.15.key", "past_key_values.15.value", "present.15.key", "present.15.value", "past_key_values.16.key", "past_key_values.16.value", "present.16.key", "present.16.value", "past_key_values.17.key", "past_key_values.17.value", "present.17.key", "present.17.value", "past_key_values.18.key", "past_key_values.18.value", "present.18.key", "present.18.value", "past_key_values.19.key", "past_key_values.19.value", "present.19.key", "present.19.value", "past_key_values.20.key", "past_key_values.20.value", "present.20.key", "present.20.value", "past_key_values.21.key", "past_key_values.21.value", "present.21.key", "present.21.value", "past_key_values.22.key", "past_key_values.22.value", "present.22.key", "present.22.value", "past_key_values.23.key", "past_key_values.23.value", "present.23.key", "present.23.value", "past_key_values.24.key", "past_key_values.24.value", "present.24.key", "present.24.value", "past_key_values.25.key", "past_key_values.25.value", "present.25.key", "present.25.value", "past_key_values.26.key", "past_key_values.26.value", "present.26.key", "present.26.value", "past_key_values.27.key", "past_key_values.27.value", "present.27.key", "present.27.value" ] }, "ext_buf_1": { "buffer_size": 33554432, "xrt_arg_id": 6, "packed_tensors": [ "sin_cos_cache_token" ] } }, "tensor_map": { "/model/layers.0/input_layernorm/output_0.out5_4_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 3072 }, "attention_mask_const_uint": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "uint32", "shape": [ 1 ], "size_in_bytes": 4, "op_tensor_size": 4, "offset": 9324 }, "/model/embed_tokens/Gather/output_0.out4_0": { "packed_buffer_label": "in", "xrt_arg_id": 0, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 6148 }, "/model/layers.28/final_norm_layernorm/output_0.dummy": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "float16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 0 }, "logits.out5_4_84": { "packed_buffer_label": "out", "xrt_arg_id": 1, "dtype": "bfloat16", "shape": [ 1, 1, 151936 ], "size_in_bytes": 303872, "op_tensor_size": 303872, "offset": 3072 }, "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 0 }, "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 3584 }, "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 6656 }, "/model/layers.0/post_attention_layernorm/output_3.out4_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 9728 }, "/model/layers.0/post_attention_layernorm/output_0.out4_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 12800 }, "/model/layers.0/mlp/Mul/output_0.out3_0": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 15872 }, "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 33792 }, "/model/layers.1/input_layernorm/output_3.out4_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 36864 }, "/model/layers.1/input_layernorm/output_0.out4_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 39936 }, "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 43008 }, "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 46592 }, "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 49664 }, "/model/layers.1/post_attention_layernorm/output_3.out4_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 52736 }, "/model/layers.1/post_attention_layernorm/output_0.out4_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 55808 }, "/model/layers.1/mlp/Mul/output_0.out3_1": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 58880 }, "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 76800 }, "/model/layers.2/input_layernorm/output_3.out4_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 79872 }, "/model/layers.2/input_layernorm/output_0.out4_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 82944 }, "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 86016 }, "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 89600 }, "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 92672 }, "/model/layers.2/post_attention_layernorm/output_3.out4_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 95744 }, "/model/layers.2/post_attention_layernorm/output_0.out4_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 98816 }, "/model/layers.2/mlp/Mul/output_0.out3_2": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 101888 }, "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 119808 }, "/model/layers.3/input_layernorm/output_3.out4_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 122880 }, "/model/layers.3/input_layernorm/output_0.out4_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 125952 }, "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 129024 }, "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 132608 }, "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 135680 }, "/model/layers.3/post_attention_layernorm/output_3.out4_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 138752 }, "/model/layers.3/post_attention_layernorm/output_0.out4_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 141824 }, "/model/layers.3/mlp/Mul/output_0.out3_3": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 144896 }, "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 162816 }, "/model/layers.4/input_layernorm/output_3.out4_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 165888 }, "/model/layers.4/input_layernorm/output_0.out4_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 168960 }, "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 172032 }, "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 175616 }, "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 178688 }, "/model/layers.4/post_attention_layernorm/output_3.out4_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 181760 }, "/model/layers.4/post_attention_layernorm/output_0.out4_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 184832 }, "/model/layers.4/mlp/Mul/output_0.out3_4": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 187904 }, "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 205824 }, "/model/layers.5/input_layernorm/output_3.out4_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 208896 }, "/model/layers.5/input_layernorm/output_0.out4_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 211968 }, "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 215040 }, "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 218624 }, "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 221696 }, "/model/layers.5/post_attention_layernorm/output_3.out4_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 224768 }, "/model/layers.5/post_attention_layernorm/output_0.out4_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 227840 }, "/model/layers.5/mlp/Mul/output_0.out3_5": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 230912 }, "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 248832 }, "/model/layers.6/input_layernorm/output_3.out4_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 251904 }, "/model/layers.6/input_layernorm/output_0.out4_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 254976 }, "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 258048 }, "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 261632 }, "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 264704 }, "/model/layers.6/post_attention_layernorm/output_3.out4_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 267776 }, "/model/layers.6/post_attention_layernorm/output_0.out4_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 270848 }, "/model/layers.6/mlp/Mul/output_0.out3_6": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 273920 }, "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 291840 }, "/model/layers.7/input_layernorm/output_3.out4_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 294912 }, "/model/layers.7/input_layernorm/output_0.out4_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 297984 }, "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 301056 }, "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 304640 }, "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 307712 }, "/model/layers.7/post_attention_layernorm/output_3.out4_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 310784 }, "/model/layers.7/post_attention_layernorm/output_0.out4_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 313856 }, "/model/layers.7/mlp/Mul/output_0.out3_7": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 316928 }, "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 334848 }, "/model/layers.8/input_layernorm/output_3.out4_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 337920 }, "/model/layers.8/input_layernorm/output_0.out4_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 340992 }, "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 344064 }, "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 347648 }, "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 350720 }, "/model/layers.8/post_attention_layernorm/output_3.out4_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 353792 }, "/model/layers.8/post_attention_layernorm/output_0.out4_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 356864 }, "/model/layers.8/mlp/Mul/output_0.out3_8": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 359936 }, "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 377856 }, "/model/layers.9/input_layernorm/output_3.out4_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 380928 }, "/model/layers.9/input_layernorm/output_0.out4_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 384000 }, "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 387072 }, "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 390656 }, "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 393728 }, "/model/layers.9/post_attention_layernorm/output_3.out4_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 396800 }, "/model/layers.9/post_attention_layernorm/output_0.out4_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 399872 }, "/model/layers.9/mlp/Mul/output_0.out3_9": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 402944 }, "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 420864 }, "/model/layers.10/input_layernorm/output_3.out4_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 423936 }, "/model/layers.10/input_layernorm/output_0.out4_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 427008 }, "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 430080 }, "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 433664 }, "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 436736 }, "/model/layers.10/post_attention_layernorm/output_3.out4_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 439808 }, "/model/layers.10/post_attention_layernorm/output_0.out4_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 442880 }, "/model/layers.10/mlp/Mul/output_0.out3_10": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 445952 }, "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 463872 }, "/model/layers.11/input_layernorm/output_3.out4_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 466944 }, "/model/layers.11/input_layernorm/output_0.out4_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 470016 }, "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 473088 }, "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 476672 }, "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 479744 }, "/model/layers.11/post_attention_layernorm/output_3.out4_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 482816 }, "/model/layers.11/post_attention_layernorm/output_0.out4_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 485888 }, "/model/layers.11/mlp/Mul/output_0.out3_11": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 488960 }, "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 506880 }, "/model/layers.12/input_layernorm/output_3.out4_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 509952 }, "/model/layers.12/input_layernorm/output_0.out4_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 513024 }, "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 516096 }, "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 519680 }, "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 522752 }, "/model/layers.12/post_attention_layernorm/output_3.out4_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 525824 }, "/model/layers.12/post_attention_layernorm/output_0.out4_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 528896 }, "/model/layers.12/mlp/Mul/output_0.out3_12": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 531968 }, "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 549888 }, "/model/layers.13/input_layernorm/output_3.out4_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 552960 }, "/model/layers.13/input_layernorm/output_0.out4_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 556032 }, "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 559104 }, "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 562688 }, "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 565760 }, "/model/layers.13/post_attention_layernorm/output_3.out4_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 568832 }, "/model/layers.13/post_attention_layernorm/output_0.out4_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 571904 }, "/model/layers.13/mlp/Mul/output_0.out3_13": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 574976 }, "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 592896 }, "/model/layers.14/input_layernorm/output_3.out4_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 595968 }, "/model/layers.14/input_layernorm/output_0.out4_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 599040 }, "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 602112 }, "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 605696 }, "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 608768 }, "/model/layers.14/post_attention_layernorm/output_3.out4_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 611840 }, "/model/layers.14/post_attention_layernorm/output_0.out4_28": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 614912 }, "/model/layers.14/mlp/Mul/output_0.out3_14": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 617984 }, "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 635904 }, "/model/layers.15/input_layernorm/output_3.out4_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 638976 }, "/model/layers.15/input_layernorm/output_0.out4_29": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 642048 }, "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 645120 }, "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 648704 }, "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 651776 }, "/model/layers.15/post_attention_layernorm/output_3.out4_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 654848 }, "/model/layers.15/post_attention_layernorm/output_0.out4_30": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 657920 }, "/model/layers.15/mlp/Mul/output_0.out3_15": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 660992 }, "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 678912 }, "/model/layers.16/input_layernorm/output_3.out4_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 681984 }, "/model/layers.16/input_layernorm/output_0.out4_31": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 685056 }, "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 688128 }, "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 691712 }, "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 694784 }, "/model/layers.16/post_attention_layernorm/output_3.out4_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 697856 }, "/model/layers.16/post_attention_layernorm/output_0.out4_32": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 700928 }, "/model/layers.16/mlp/Mul/output_0.out3_16": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 704000 }, "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 721920 }, "/model/layers.17/input_layernorm/output_3.out4_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 724992 }, "/model/layers.17/input_layernorm/output_0.out4_33": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 728064 }, "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 731136 }, "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 734720 }, "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 737792 }, "/model/layers.17/post_attention_layernorm/output_3.out4_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 740864 }, "/model/layers.17/post_attention_layernorm/output_0.out4_34": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 743936 }, "/model/layers.17/mlp/Mul/output_0.out3_17": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 747008 }, "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 764928 }, "/model/layers.18/input_layernorm/output_3.out4_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 768000 }, "/model/layers.18/input_layernorm/output_0.out4_35": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 771072 }, "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 774144 }, "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 777728 }, "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 780800 }, "/model/layers.18/post_attention_layernorm/output_3.out4_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 783872 }, "/model/layers.18/post_attention_layernorm/output_0.out4_36": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 786944 }, "/model/layers.18/mlp/Mul/output_0.out3_18": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 790016 }, "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 807936 }, "/model/layers.19/input_layernorm/output_3.out4_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 811008 }, "/model/layers.19/input_layernorm/output_0.out4_37": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 814080 }, "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 817152 }, "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 820736 }, "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 823808 }, "/model/layers.19/post_attention_layernorm/output_3.out4_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 826880 }, "/model/layers.19/post_attention_layernorm/output_0.out4_38": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 829952 }, "/model/layers.19/mlp/Mul/output_0.out3_19": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 833024 }, "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 850944 }, "/model/layers.20/input_layernorm/output_3.out4_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 854016 }, "/model/layers.20/input_layernorm/output_0.out4_39": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 857088 }, "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 860160 }, "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 863744 }, "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 866816 }, "/model/layers.20/post_attention_layernorm/output_3.out4_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 869888 }, "/model/layers.20/post_attention_layernorm/output_0.out4_40": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 872960 }, "/model/layers.20/mlp/Mul/output_0.out3_20": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 876032 }, "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 893952 }, "/model/layers.21/input_layernorm/output_3.out4_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 897024 }, "/model/layers.21/input_layernorm/output_0.out4_41": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 900096 }, "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 903168 }, "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 906752 }, "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 909824 }, "/model/layers.21/post_attention_layernorm/output_3.out4_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 912896 }, "/model/layers.21/post_attention_layernorm/output_0.out4_42": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 915968 }, "/model/layers.21/mlp/Mul/output_0.out3_21": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 919040 }, "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 936960 }, "/model/layers.22/input_layernorm/output_3.out4_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 940032 }, "/model/layers.22/input_layernorm/output_0.out4_43": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 943104 }, "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 946176 }, "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 949760 }, "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 952832 }, "/model/layers.22/post_attention_layernorm/output_3.out4_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 955904 }, "/model/layers.22/post_attention_layernorm/output_0.out4_44": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 958976 }, "/model/layers.22/mlp/Mul/output_0.out3_22": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 962048 }, "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 979968 }, "/model/layers.23/input_layernorm/output_3.out4_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 983040 }, "/model/layers.23/input_layernorm/output_0.out4_45": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 986112 }, "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 989184 }, "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 992768 }, "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 995840 }, "/model/layers.23/post_attention_layernorm/output_3.out4_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 998912 }, "/model/layers.23/post_attention_layernorm/output_0.out4_46": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1001984 }, "/model/layers.23/mlp/Mul/output_0.out3_23": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 1005056 }, "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1022976 }, "/model/layers.24/input_layernorm/output_3.out4_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1026048 }, "/model/layers.24/input_layernorm/output_0.out4_47": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1029120 }, "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 1032192 }, "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1035776 }, "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1038848 }, "/model/layers.24/post_attention_layernorm/output_3.out4_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1041920 }, "/model/layers.24/post_attention_layernorm/output_0.out4_48": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1044992 }, "/model/layers.24/mlp/Mul/output_0.out3_24": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 1048064 }, "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1065984 }, "/model/layers.25/input_layernorm/output_3.out4_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1069056 }, "/model/layers.25/input_layernorm/output_0.out4_49": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1072128 }, "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 1075200 }, "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1078784 }, "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1081856 }, "/model/layers.25/post_attention_layernorm/output_3.out4_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1084928 }, "/model/layers.25/post_attention_layernorm/output_0.out4_50": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1088000 }, "/model/layers.25/mlp/Mul/output_0.out3_25": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 1091072 }, "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1108992 }, "/model/layers.26/input_layernorm/output_3.out4_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1112064 }, "/model/layers.26/input_layernorm/output_0.out4_51": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1115136 }, "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 1118208 }, "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1121792 }, "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1124864 }, "/model/layers.26/post_attention_layernorm/output_3.out4_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1127936 }, "/model/layers.26/post_attention_layernorm/output_0.out4_52": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1131008 }, "/model/layers.26/mlp/Mul/output_0.out3_26": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 1134080 }, "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1152000 }, "/model/layers.27/input_layernorm/output_3.out4_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1155072 }, "/model/layers.27/input_layernorm/output_0.out4_53": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1158144 }, "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1792 ], "size_in_bytes": 3584, "op_tensor_size": 3584, "offset": 1161216 }, "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1164800 }, "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1167872 }, "/model/layers.27/post_attention_layernorm/output_3.out4_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1170944 }, "/model/layers.27/post_attention_layernorm/output_0.out4_54": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1174016 }, "/model/layers.27/mlp/Mul/output_0.out3_27": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 8960 ], "size_in_bytes": 17920, "op_tensor_size": 17920, "offset": 1177088 }, "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1195008 }, "/model/layers.28/final_norm_layernorm/output_0.out4_55": { "packed_buffer_label": "scratch", "xrt_arg_id": 2, "dtype": "bfloat16", "shape": [ 1, 1, 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 1198080 }, "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 0, "file_name": ".cache\\MatMulNBits_2_0_0.const", "file_size": 2752512 }, "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 2752512, "file_name": ".cache\\MatMulNBits_2_0_1.const", "file_size": 7168 }, "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 2759680, "file_name": ".cache\\MatMulNBits_2_0_2.const", "file_size": 86016 }, "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 2845696, "file_name": ".cache\\MatMulNBits_2_0_3.const", "file_size": 21504 }, "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 2867200, "file_name": ".cache\\MatMulNBits_2_0_4.const", "file_size": 393216 }, "model.layers.0.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 3260416, "file_name": ".cache\\MatMulNBits_2_0_5.const", "file_size": 1024 }, "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 3261440, "file_name": ".cache\\MatMulNBits_2_0_6.const", "file_size": 12288 }, "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 3273728, "file_name": ".cache\\MatMulNBits_2_0_7.const", "file_size": 3072 }, "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 3276800, "file_name": ".cache\\MatMulNBits_2_0_8.const", "file_size": 2359296 }, "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 5636096, "file_name": ".cache\\MatMulNBits_2_0_9.const", "file_size": 6144 }, "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 5642240, "file_name": ".cache\\MatMulNBits_2_0_10.const", "file_size": 73728 }, "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 5715968, "file_name": ".cache\\MatMulNBits_2_0_11.const", "file_size": 18432 }, "model.layers.0.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 5734400, "file_name": ".cache\\MatMulNBits_2_0_12.const", "file_size": 3072 }, "model.layers.0.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 5737472, "file_name": ".cache\\MatMulNBits_2_0_13.const", "file_size": 6881280 }, "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 12618752, "file_name": ".cache\\MatMulNBits_2_0_14.const", "file_size": 430080 }, "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 13048832, "file_name": ".cache\\MatMulNBits_2_0_15.const", "file_size": 53760 }, "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 13102592, "file_name": ".cache\\MatMulNBits_2_0_16.const", "file_size": 35840 }, "model.layers.0.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 13138432, "file_name": ".cache\\MatMulNBits_2_0_17.const", "file_size": 6881280 }, "model.layers.0.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 20019712, "file_name": ".cache\\MatMulNBits_2_0_18.const", "file_size": 430080 }, "model.layers.0.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 20449792, "file_name": ".cache\\MatMulNBits_2_0_19.const", "file_size": 53760 }, "model.layers.0.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 20503552, "file_name": ".cache\\MatMulNBits_2_0_20.const", "file_size": 35840 }, "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 20539392, "file_name": ".cache\\MatMulNBits_2_0_21.const", "file_size": 13762560 }, "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 34301952, "file_name": ".cache\\MatMulNBits_2_0_22.const", "file_size": 6144 }, "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 34308096, "file_name": ".cache\\MatMulNBits_2_0_23.const", "file_size": 430080 }, "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 34738176, "file_name": ".cache\\MatMulNBits_2_0_24.const", "file_size": 107520 }, "model.layers.1.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 34845696, "file_name": ".cache\\MatMulNBits_2_0_25.const", "file_size": 3072 }, "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 34848768, "file_name": ".cache\\MatMulNBits_2_0_26.const", "file_size": 2752512 }, "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 37601280, "file_name": ".cache\\MatMulNBits_2_0_27.const", "file_size": 7168 }, "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 37608448, "file_name": ".cache\\MatMulNBits_2_0_28.const", "file_size": 86016 }, "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 37694464, "file_name": ".cache\\MatMulNBits_2_0_29.const", "file_size": 21504 }, "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 37715968, "file_name": ".cache\\MatMulNBits_2_0_30.const", "file_size": 393216 }, "model.layers.1.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 38109184, "file_name": ".cache\\MatMulNBits_2_0_31.const", "file_size": 1024 }, "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 38110208, "file_name": ".cache\\MatMulNBits_2_0_32.const", "file_size": 12288 }, "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 38122496, "file_name": ".cache\\MatMulNBits_2_0_33.const", "file_size": 3072 }, "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 38125568, "file_name": ".cache\\MatMulNBits_2_0_34.const", "file_size": 2359296 }, "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 40484864, "file_name": ".cache\\MatMulNBits_2_0_35.const", "file_size": 6144 }, "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 40491008, "file_name": ".cache\\MatMulNBits_2_0_36.const", "file_size": 73728 }, "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 40564736, "file_name": ".cache\\MatMulNBits_2_0_37.const", "file_size": 18432 }, "model.layers.1.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 40583168, "file_name": ".cache\\MatMulNBits_2_0_38.const", "file_size": 3072 }, "model.layers.1.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 40586240, "file_name": ".cache\\MatMulNBits_2_0_39.const", "file_size": 6881280 }, "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 47467520, "file_name": ".cache\\MatMulNBits_2_0_40.const", "file_size": 430080 }, "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 47897600, "file_name": ".cache\\MatMulNBits_2_0_41.const", "file_size": 53760 }, "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 47951360, "file_name": ".cache\\MatMulNBits_2_0_42.const", "file_size": 35840 }, "model.layers.1.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 47987200, "file_name": ".cache\\MatMulNBits_2_0_43.const", "file_size": 6881280 }, "model.layers.1.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 54868480, "file_name": ".cache\\MatMulNBits_2_0_44.const", "file_size": 430080 }, "model.layers.1.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 55298560, "file_name": ".cache\\MatMulNBits_2_0_45.const", "file_size": 53760 }, "model.layers.1.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 55352320, "file_name": ".cache\\MatMulNBits_2_0_46.const", "file_size": 35840 }, "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 55388160, "file_name": ".cache\\MatMulNBits_2_0_47.const", "file_size": 13762560 }, "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 69150720, "file_name": ".cache\\MatMulNBits_2_0_48.const", "file_size": 6144 }, "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 69156864, "file_name": ".cache\\MatMulNBits_2_0_49.const", "file_size": 430080 }, "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 69586944, "file_name": ".cache\\MatMulNBits_2_0_50.const", "file_size": 107520 }, "model.layers.2.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 69694464, "file_name": ".cache\\MatMulNBits_2_0_51.const", "file_size": 3072 }, "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 69697536, "file_name": ".cache\\MatMulNBits_2_0_52.const", "file_size": 2752512 }, "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 72450048, "file_name": ".cache\\MatMulNBits_2_0_53.const", "file_size": 7168 }, "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 72457216, "file_name": ".cache\\MatMulNBits_2_0_54.const", "file_size": 86016 }, "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 72543232, "file_name": ".cache\\MatMulNBits_2_0_55.const", "file_size": 21504 }, "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 72564736, "file_name": ".cache\\MatMulNBits_2_0_56.const", "file_size": 393216 }, "model.layers.2.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 72957952, "file_name": ".cache\\MatMulNBits_2_0_57.const", "file_size": 1024 }, "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 72958976, "file_name": ".cache\\MatMulNBits_2_0_58.const", "file_size": 12288 }, "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 72971264, "file_name": ".cache\\MatMulNBits_2_0_59.const", "file_size": 3072 }, "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 72974336, "file_name": ".cache\\MatMulNBits_2_0_60.const", "file_size": 2359296 }, "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 75333632, "file_name": ".cache\\MatMulNBits_2_0_61.const", "file_size": 6144 }, "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 75339776, "file_name": ".cache\\MatMulNBits_2_0_62.const", "file_size": 73728 }, "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 75413504, "file_name": ".cache\\MatMulNBits_2_0_63.const", "file_size": 18432 }, "model.layers.2.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 75431936, "file_name": ".cache\\MatMulNBits_2_0_64.const", "file_size": 3072 }, "model.layers.2.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 75435008, "file_name": ".cache\\MatMulNBits_2_0_65.const", "file_size": 6881280 }, "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 82316288, "file_name": ".cache\\MatMulNBits_2_0_66.const", "file_size": 430080 }, "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 82746368, "file_name": ".cache\\MatMulNBits_2_0_67.const", "file_size": 53760 }, "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 82800128, "file_name": ".cache\\MatMulNBits_2_0_68.const", "file_size": 35840 }, "model.layers.2.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 82835968, "file_name": ".cache\\MatMulNBits_2_0_69.const", "file_size": 6881280 }, "model.layers.2.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 89717248, "file_name": ".cache\\MatMulNBits_2_0_70.const", "file_size": 430080 }, "model.layers.2.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 90147328, "file_name": ".cache\\MatMulNBits_2_0_71.const", "file_size": 53760 }, "model.layers.2.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 90201088, "file_name": ".cache\\MatMulNBits_2_0_72.const", "file_size": 35840 }, "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 90236928, "file_name": ".cache\\MatMulNBits_2_0_73.const", "file_size": 13762560 }, "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 103999488, "file_name": ".cache\\MatMulNBits_2_0_74.const", "file_size": 6144 }, "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 104005632, "file_name": ".cache\\MatMulNBits_2_0_75.const", "file_size": 430080 }, "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 104435712, "file_name": ".cache\\MatMulNBits_2_0_76.const", "file_size": 107520 }, "model.layers.3.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 104543232, "file_name": ".cache\\MatMulNBits_2_0_77.const", "file_size": 3072 }, "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 104546304, "file_name": ".cache\\MatMulNBits_2_0_78.const", "file_size": 2752512 }, "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 107298816, "file_name": ".cache\\MatMulNBits_2_0_79.const", "file_size": 7168 }, "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 107305984, "file_name": ".cache\\MatMulNBits_2_0_80.const", "file_size": 86016 }, "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 107392000, "file_name": ".cache\\MatMulNBits_2_0_81.const", "file_size": 21504 }, "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 107413504, "file_name": ".cache\\MatMulNBits_2_0_82.const", "file_size": 393216 }, "model.layers.3.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 107806720, "file_name": ".cache\\MatMulNBits_2_0_83.const", "file_size": 1024 }, "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 107807744, "file_name": ".cache\\MatMulNBits_2_0_84.const", "file_size": 12288 }, "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 107820032, "file_name": ".cache\\MatMulNBits_2_0_85.const", "file_size": 3072 }, "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 107823104, "file_name": ".cache\\MatMulNBits_2_0_86.const", "file_size": 2359296 }, "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 110182400, "file_name": ".cache\\MatMulNBits_2_0_87.const", "file_size": 6144 }, "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 110188544, "file_name": ".cache\\MatMulNBits_2_0_88.const", "file_size": 73728 }, "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 110262272, "file_name": ".cache\\MatMulNBits_2_0_89.const", "file_size": 18432 }, "model.layers.3.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 110280704, "file_name": ".cache\\MatMulNBits_2_0_90.const", "file_size": 3072 }, "model.layers.3.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 110283776, "file_name": ".cache\\MatMulNBits_2_0_91.const", "file_size": 6881280 }, "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 117165056, "file_name": ".cache\\MatMulNBits_2_0_92.const", "file_size": 430080 }, "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 117595136, "file_name": ".cache\\MatMulNBits_2_0_93.const", "file_size": 53760 }, "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 117648896, "file_name": ".cache\\MatMulNBits_2_0_94.const", "file_size": 35840 }, "model.layers.3.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 117684736, "file_name": ".cache\\MatMulNBits_2_0_95.const", "file_size": 6881280 }, "model.layers.3.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 124566016, "file_name": ".cache\\MatMulNBits_2_0_96.const", "file_size": 430080 }, "model.layers.3.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 124996096, "file_name": ".cache\\MatMulNBits_2_0_97.const", "file_size": 53760 }, "model.layers.3.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 125049856, "file_name": ".cache\\MatMulNBits_2_0_98.const", "file_size": 35840 }, "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 125085696, "file_name": ".cache\\MatMulNBits_2_0_99.const", "file_size": 13762560 }, "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 138848256, "file_name": ".cache\\MatMulNBits_2_0_100.const", "file_size": 6144 }, "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 138854400, "file_name": ".cache\\MatMulNBits_2_0_101.const", "file_size": 430080 }, "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 139284480, "file_name": ".cache\\MatMulNBits_2_0_102.const", "file_size": 107520 }, "model.layers.4.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 139392000, "file_name": ".cache\\MatMulNBits_2_0_103.const", "file_size": 3072 }, "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 139395072, "file_name": ".cache\\MatMulNBits_2_0_104.const", "file_size": 2752512 }, "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 142147584, "file_name": ".cache\\MatMulNBits_2_0_105.const", "file_size": 7168 }, "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 142154752, "file_name": ".cache\\MatMulNBits_2_0_106.const", "file_size": 86016 }, "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 142240768, "file_name": ".cache\\MatMulNBits_2_0_107.const", "file_size": 21504 }, "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 142262272, "file_name": ".cache\\MatMulNBits_2_0_108.const", "file_size": 393216 }, "model.layers.4.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 142655488, "file_name": ".cache\\MatMulNBits_2_0_109.const", "file_size": 1024 }, "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 142656512, "file_name": ".cache\\MatMulNBits_2_0_110.const", "file_size": 12288 }, "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 142668800, "file_name": ".cache\\MatMulNBits_2_0_111.const", "file_size": 3072 }, "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 142671872, "file_name": ".cache\\MatMulNBits_2_0_112.const", "file_size": 2359296 }, "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 145031168, "file_name": ".cache\\MatMulNBits_2_0_113.const", "file_size": 6144 }, "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 145037312, "file_name": ".cache\\MatMulNBits_2_0_114.const", "file_size": 73728 }, "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 145111040, "file_name": ".cache\\MatMulNBits_2_0_115.const", "file_size": 18432 }, "model.layers.4.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 145129472, "file_name": ".cache\\MatMulNBits_2_0_116.const", "file_size": 3072 }, "model.layers.4.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 145132544, "file_name": ".cache\\MatMulNBits_2_0_117.const", "file_size": 6881280 }, "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 152013824, "file_name": ".cache\\MatMulNBits_2_0_118.const", "file_size": 430080 }, "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 152443904, "file_name": ".cache\\MatMulNBits_2_0_119.const", "file_size": 53760 }, "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 152497664, "file_name": ".cache\\MatMulNBits_2_0_120.const", "file_size": 35840 }, "model.layers.4.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 152533504, "file_name": ".cache\\MatMulNBits_2_0_121.const", "file_size": 6881280 }, "model.layers.4.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 159414784, "file_name": ".cache\\MatMulNBits_2_0_122.const", "file_size": 430080 }, "model.layers.4.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 159844864, "file_name": ".cache\\MatMulNBits_2_0_123.const", "file_size": 53760 }, "model.layers.4.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 159898624, "file_name": ".cache\\MatMulNBits_2_0_124.const", "file_size": 35840 }, "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 159934464, "file_name": ".cache\\MatMulNBits_2_0_125.const", "file_size": 13762560 }, "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 173697024, "file_name": ".cache\\MatMulNBits_2_0_126.const", "file_size": 6144 }, "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 173703168, "file_name": ".cache\\MatMulNBits_2_0_127.const", "file_size": 430080 }, "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 174133248, "file_name": ".cache\\MatMulNBits_2_0_128.const", "file_size": 107520 }, "model.layers.5.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 174240768, "file_name": ".cache\\MatMulNBits_2_0_129.const", "file_size": 3072 }, "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 174243840, "file_name": ".cache\\MatMulNBits_2_0_130.const", "file_size": 2752512 }, "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 176996352, "file_name": ".cache\\MatMulNBits_2_0_131.const", "file_size": 7168 }, "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 177003520, "file_name": ".cache\\MatMulNBits_2_0_132.const", "file_size": 86016 }, "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 177089536, "file_name": ".cache\\MatMulNBits_2_0_133.const", "file_size": 21504 }, "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 177111040, "file_name": ".cache\\MatMulNBits_2_0_134.const", "file_size": 393216 }, "model.layers.5.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 177504256, "file_name": ".cache\\MatMulNBits_2_0_135.const", "file_size": 1024 }, "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 177505280, "file_name": ".cache\\MatMulNBits_2_0_136.const", "file_size": 12288 }, "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 177517568, "file_name": ".cache\\MatMulNBits_2_0_137.const", "file_size": 3072 }, "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 177520640, "file_name": ".cache\\MatMulNBits_2_0_138.const", "file_size": 2359296 }, "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 179879936, "file_name": ".cache\\MatMulNBits_2_0_139.const", "file_size": 6144 }, "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 179886080, "file_name": ".cache\\MatMulNBits_2_0_140.const", "file_size": 73728 }, "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 179959808, "file_name": ".cache\\MatMulNBits_2_0_141.const", "file_size": 18432 }, "model.layers.5.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 179978240, "file_name": ".cache\\MatMulNBits_2_0_142.const", "file_size": 3072 }, "model.layers.5.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 179981312, "file_name": ".cache\\MatMulNBits_2_0_143.const", "file_size": 6881280 }, "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 186862592, "file_name": ".cache\\MatMulNBits_2_0_144.const", "file_size": 430080 }, "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 187292672, "file_name": ".cache\\MatMulNBits_2_0_145.const", "file_size": 53760 }, "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 187346432, "file_name": ".cache\\MatMulNBits_2_0_146.const", "file_size": 35840 }, "model.layers.5.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 187382272, "file_name": ".cache\\MatMulNBits_2_0_147.const", "file_size": 6881280 }, "model.layers.5.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 194263552, "file_name": ".cache\\MatMulNBits_2_0_148.const", "file_size": 430080 }, "model.layers.5.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 194693632, "file_name": ".cache\\MatMulNBits_2_0_149.const", "file_size": 53760 }, "model.layers.5.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 194747392, "file_name": ".cache\\MatMulNBits_2_0_150.const", "file_size": 35840 }, "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 194783232, "file_name": ".cache\\MatMulNBits_2_0_151.const", "file_size": 13762560 }, "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 208545792, "file_name": ".cache\\MatMulNBits_2_0_152.const", "file_size": 6144 }, "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 208551936, "file_name": ".cache\\MatMulNBits_2_0_153.const", "file_size": 430080 }, "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 208982016, "file_name": ".cache\\MatMulNBits_2_0_154.const", "file_size": 107520 }, "model.layers.6.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 209089536, "file_name": ".cache\\MatMulNBits_2_0_155.const", "file_size": 3072 }, "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 209092608, "file_name": ".cache\\MatMulNBits_2_0_156.const", "file_size": 2752512 }, "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 211845120, "file_name": ".cache\\MatMulNBits_2_0_157.const", "file_size": 7168 }, "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 211852288, "file_name": ".cache\\MatMulNBits_2_0_158.const", "file_size": 86016 }, "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 211938304, "file_name": ".cache\\MatMulNBits_2_0_159.const", "file_size": 21504 }, "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 211959808, "file_name": ".cache\\MatMulNBits_2_0_160.const", "file_size": 393216 }, "model.layers.6.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 212353024, "file_name": ".cache\\MatMulNBits_2_0_161.const", "file_size": 1024 }, "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 212354048, "file_name": ".cache\\MatMulNBits_2_0_162.const", "file_size": 12288 }, "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 212366336, "file_name": ".cache\\MatMulNBits_2_0_163.const", "file_size": 3072 }, "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 212369408, "file_name": ".cache\\MatMulNBits_2_0_164.const", "file_size": 2359296 }, "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 214728704, "file_name": ".cache\\MatMulNBits_2_0_165.const", "file_size": 6144 }, "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 214734848, "file_name": ".cache\\MatMulNBits_2_0_166.const", "file_size": 73728 }, "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 214808576, "file_name": ".cache\\MatMulNBits_2_0_167.const", "file_size": 18432 }, "model.layers.6.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 214827008, "file_name": ".cache\\MatMulNBits_2_0_168.const", "file_size": 3072 }, "model.layers.6.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 214830080, "file_name": ".cache\\MatMulNBits_2_0_169.const", "file_size": 6881280 }, "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 221711360, "file_name": ".cache\\MatMulNBits_2_0_170.const", "file_size": 430080 }, "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 222141440, "file_name": ".cache\\MatMulNBits_2_0_171.const", "file_size": 53760 }, "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 222195200, "file_name": ".cache\\MatMulNBits_2_0_172.const", "file_size": 35840 }, "model.layers.6.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 222231040, "file_name": ".cache\\MatMulNBits_2_0_173.const", "file_size": 6881280 }, "model.layers.6.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 229112320, "file_name": ".cache\\MatMulNBits_2_0_174.const", "file_size": 430080 }, "model.layers.6.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 229542400, "file_name": ".cache\\MatMulNBits_2_0_175.const", "file_size": 53760 }, "model.layers.6.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 229596160, "file_name": ".cache\\MatMulNBits_2_0_176.const", "file_size": 35840 }, "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 229632000, "file_name": ".cache\\MatMulNBits_2_0_177.const", "file_size": 13762560 }, "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 243394560, "file_name": ".cache\\MatMulNBits_2_0_178.const", "file_size": 6144 }, "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 243400704, "file_name": ".cache\\MatMulNBits_2_0_179.const", "file_size": 430080 }, "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 243830784, "file_name": ".cache\\MatMulNBits_2_0_180.const", "file_size": 107520 }, "model.layers.7.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 243938304, "file_name": ".cache\\MatMulNBits_2_0_181.const", "file_size": 3072 }, "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 243941376, "file_name": ".cache\\MatMulNBits_2_0_182.const", "file_size": 2752512 }, "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 246693888, "file_name": ".cache\\MatMulNBits_2_0_183.const", "file_size": 7168 }, "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 246701056, "file_name": ".cache\\MatMulNBits_2_0_184.const", "file_size": 86016 }, "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 246787072, "file_name": ".cache\\MatMulNBits_2_0_185.const", "file_size": 21504 }, "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 246808576, "file_name": ".cache\\MatMulNBits_2_0_186.const", "file_size": 393216 }, "model.layers.7.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 247201792, "file_name": ".cache\\MatMulNBits_2_0_187.const", "file_size": 1024 }, "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 247202816, "file_name": ".cache\\MatMulNBits_2_0_188.const", "file_size": 12288 }, "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 247215104, "file_name": ".cache\\MatMulNBits_2_0_189.const", "file_size": 3072 }, "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 247218176, "file_name": ".cache\\MatMulNBits_2_0_190.const", "file_size": 2359296 }, "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 249577472, "file_name": ".cache\\MatMulNBits_2_0_191.const", "file_size": 6144 }, "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 249583616, "file_name": ".cache\\MatMulNBits_2_0_192.const", "file_size": 73728 }, "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 249657344, "file_name": ".cache\\MatMulNBits_2_0_193.const", "file_size": 18432 }, "model.layers.7.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 249675776, "file_name": ".cache\\MatMulNBits_2_0_194.const", "file_size": 3072 }, "model.layers.7.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 249678848, "file_name": ".cache\\MatMulNBits_2_0_195.const", "file_size": 6881280 }, "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 256560128, "file_name": ".cache\\MatMulNBits_2_0_196.const", "file_size": 430080 }, "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 256990208, "file_name": ".cache\\MatMulNBits_2_0_197.const", "file_size": 53760 }, "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 257043968, "file_name": ".cache\\MatMulNBits_2_0_198.const", "file_size": 35840 }, "model.layers.7.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 257079808, "file_name": ".cache\\MatMulNBits_2_0_199.const", "file_size": 6881280 }, "model.layers.7.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 263961088, "file_name": ".cache\\MatMulNBits_2_0_200.const", "file_size": 430080 }, "model.layers.7.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 264391168, "file_name": ".cache\\MatMulNBits_2_0_201.const", "file_size": 53760 }, "model.layers.7.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 264444928, "file_name": ".cache\\MatMulNBits_2_0_202.const", "file_size": 35840 }, "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 264480768, "file_name": ".cache\\MatMulNBits_2_0_203.const", "file_size": 13762560 }, "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 278243328, "file_name": ".cache\\MatMulNBits_2_0_204.const", "file_size": 6144 }, "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 278249472, "file_name": ".cache\\MatMulNBits_2_0_205.const", "file_size": 430080 }, "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 278679552, "file_name": ".cache\\MatMulNBits_2_0_206.const", "file_size": 107520 }, "model.layers.8.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 278787072, "file_name": ".cache\\MatMulNBits_2_0_207.const", "file_size": 3072 }, "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 278790144, "file_name": ".cache\\MatMulNBits_2_0_208.const", "file_size": 2752512 }, "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 281542656, "file_name": ".cache\\MatMulNBits_2_0_209.const", "file_size": 7168 }, "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 281549824, "file_name": ".cache\\MatMulNBits_2_0_210.const", "file_size": 86016 }, "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 281635840, "file_name": ".cache\\MatMulNBits_2_0_211.const", "file_size": 21504 }, "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 281657344, "file_name": ".cache\\MatMulNBits_2_0_212.const", "file_size": 393216 }, "model.layers.8.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 282050560, "file_name": ".cache\\MatMulNBits_2_0_213.const", "file_size": 1024 }, "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 282051584, "file_name": ".cache\\MatMulNBits_2_0_214.const", "file_size": 12288 }, "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 282063872, "file_name": ".cache\\MatMulNBits_2_0_215.const", "file_size": 3072 }, "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 282066944, "file_name": ".cache\\MatMulNBits_2_0_216.const", "file_size": 2359296 }, "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 284426240, "file_name": ".cache\\MatMulNBits_2_0_217.const", "file_size": 6144 }, "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 284432384, "file_name": ".cache\\MatMulNBits_2_0_218.const", "file_size": 73728 }, "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 284506112, "file_name": ".cache\\MatMulNBits_2_0_219.const", "file_size": 18432 }, "model.layers.8.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 284524544, "file_name": ".cache\\MatMulNBits_2_0_220.const", "file_size": 3072 }, "model.layers.8.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 284527616, "file_name": ".cache\\MatMulNBits_2_0_221.const", "file_size": 6881280 }, "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 291408896, "file_name": ".cache\\MatMulNBits_2_0_222.const", "file_size": 430080 }, "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 291838976, "file_name": ".cache\\MatMulNBits_2_0_223.const", "file_size": 53760 }, "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 291892736, "file_name": ".cache\\MatMulNBits_2_0_224.const", "file_size": 35840 }, "model.layers.8.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 291928576, "file_name": ".cache\\MatMulNBits_2_0_225.const", "file_size": 6881280 }, "model.layers.8.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 298809856, "file_name": ".cache\\MatMulNBits_2_0_226.const", "file_size": 430080 }, "model.layers.8.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 299239936, "file_name": ".cache\\MatMulNBits_2_0_227.const", "file_size": 53760 }, "model.layers.8.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 299293696, "file_name": ".cache\\MatMulNBits_2_0_228.const", "file_size": 35840 }, "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 299329536, "file_name": ".cache\\MatMulNBits_2_0_229.const", "file_size": 13762560 }, "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 313092096, "file_name": ".cache\\MatMulNBits_2_0_230.const", "file_size": 6144 }, "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 313098240, "file_name": ".cache\\MatMulNBits_2_0_231.const", "file_size": 430080 }, "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 313528320, "file_name": ".cache\\MatMulNBits_2_0_232.const", "file_size": 107520 }, "model.layers.9.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 313635840, "file_name": ".cache\\MatMulNBits_2_0_233.const", "file_size": 3072 }, "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 313638912, "file_name": ".cache\\MatMulNBits_2_0_234.const", "file_size": 2752512 }, "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 316391424, "file_name": ".cache\\MatMulNBits_2_0_235.const", "file_size": 7168 }, "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 316398592, "file_name": ".cache\\MatMulNBits_2_0_236.const", "file_size": 86016 }, "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 316484608, "file_name": ".cache\\MatMulNBits_2_0_237.const", "file_size": 21504 }, "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 316506112, "file_name": ".cache\\MatMulNBits_2_0_238.const", "file_size": 393216 }, "model.layers.9.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 316899328, "file_name": ".cache\\MatMulNBits_2_0_239.const", "file_size": 1024 }, "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 316900352, "file_name": ".cache\\MatMulNBits_2_0_240.const", "file_size": 12288 }, "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 316912640, "file_name": ".cache\\MatMulNBits_2_0_241.const", "file_size": 3072 }, "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 316915712, "file_name": ".cache\\MatMulNBits_2_0_242.const", "file_size": 2359296 }, "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 319275008, "file_name": ".cache\\MatMulNBits_2_0_243.const", "file_size": 6144 }, "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 319281152, "file_name": ".cache\\MatMulNBits_2_0_244.const", "file_size": 73728 }, "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 319354880, "file_name": ".cache\\MatMulNBits_2_0_245.const", "file_size": 18432 }, "model.layers.9.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 319373312, "file_name": ".cache\\MatMulNBits_2_0_246.const", "file_size": 3072 }, "model.layers.9.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 319376384, "file_name": ".cache\\MatMulNBits_2_0_247.const", "file_size": 6881280 }, "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 326257664, "file_name": ".cache\\MatMulNBits_2_0_248.const", "file_size": 430080 }, "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 326687744, "file_name": ".cache\\MatMulNBits_2_0_249.const", "file_size": 53760 }, "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 326741504, "file_name": ".cache\\MatMulNBits_2_0_250.const", "file_size": 35840 }, "model.layers.9.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 326777344, "file_name": ".cache\\MatMulNBits_2_0_251.const", "file_size": 6881280 }, "model.layers.9.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 333658624, "file_name": ".cache\\MatMulNBits_2_0_252.const", "file_size": 430080 }, "model.layers.9.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 334088704, "file_name": ".cache\\MatMulNBits_2_0_253.const", "file_size": 53760 }, "model.layers.9.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 334142464, "file_name": ".cache\\MatMulNBits_2_0_254.const", "file_size": 35840 }, "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 334178304, "file_name": ".cache\\MatMulNBits_2_0_255.const", "file_size": 13762560 }, "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 347940864, "file_name": ".cache\\MatMulNBits_2_0_256.const", "file_size": 6144 }, "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 347947008, "file_name": ".cache\\MatMulNBits_2_0_257.const", "file_size": 430080 }, "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 348377088, "file_name": ".cache\\MatMulNBits_2_0_258.const", "file_size": 107520 }, "model.layers.10.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 348484608, "file_name": ".cache\\MatMulNBits_2_0_259.const", "file_size": 3072 }, "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 348487680, "file_name": ".cache\\MatMulNBits_2_0_260.const", "file_size": 2752512 }, "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 351240192, "file_name": ".cache\\MatMulNBits_2_0_261.const", "file_size": 7168 }, "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 351247360, "file_name": ".cache\\MatMulNBits_2_0_262.const", "file_size": 86016 }, "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 351333376, "file_name": ".cache\\MatMulNBits_2_0_263.const", "file_size": 21504 }, "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 351354880, "file_name": ".cache\\MatMulNBits_2_0_264.const", "file_size": 393216 }, "model.layers.10.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 351748096, "file_name": ".cache\\MatMulNBits_2_0_265.const", "file_size": 1024 }, "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 351749120, "file_name": ".cache\\MatMulNBits_2_0_266.const", "file_size": 12288 }, "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 351761408, "file_name": ".cache\\MatMulNBits_2_0_267.const", "file_size": 3072 }, "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 351764480, "file_name": ".cache\\MatMulNBits_2_0_268.const", "file_size": 2359296 }, "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 354123776, "file_name": ".cache\\MatMulNBits_2_0_269.const", "file_size": 6144 }, "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 354129920, "file_name": ".cache\\MatMulNBits_2_0_270.const", "file_size": 73728 }, "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 354203648, "file_name": ".cache\\MatMulNBits_2_0_271.const", "file_size": 18432 }, "model.layers.10.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 354222080, "file_name": ".cache\\MatMulNBits_2_0_272.const", "file_size": 3072 }, "model.layers.10.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 354225152, "file_name": ".cache\\MatMulNBits_2_0_273.const", "file_size": 6881280 }, "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 361106432, "file_name": ".cache\\MatMulNBits_2_0_274.const", "file_size": 430080 }, "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 361536512, "file_name": ".cache\\MatMulNBits_2_0_275.const", "file_size": 53760 }, "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 361590272, "file_name": ".cache\\MatMulNBits_2_0_276.const", "file_size": 35840 }, "model.layers.10.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 361626112, "file_name": ".cache\\MatMulNBits_2_0_277.const", "file_size": 6881280 }, "model.layers.10.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 368507392, "file_name": ".cache\\MatMulNBits_2_0_278.const", "file_size": 430080 }, "model.layers.10.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 368937472, "file_name": ".cache\\MatMulNBits_2_0_279.const", "file_size": 53760 }, "model.layers.10.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 368991232, "file_name": ".cache\\MatMulNBits_2_0_280.const", "file_size": 35840 }, "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 369027072, "file_name": ".cache\\MatMulNBits_2_0_281.const", "file_size": 13762560 }, "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 382789632, "file_name": ".cache\\MatMulNBits_2_0_282.const", "file_size": 6144 }, "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 382795776, "file_name": ".cache\\MatMulNBits_2_0_283.const", "file_size": 430080 }, "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 383225856, "file_name": ".cache\\MatMulNBits_2_0_284.const", "file_size": 107520 }, "model.layers.11.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 383333376, "file_name": ".cache\\MatMulNBits_2_0_285.const", "file_size": 3072 }, "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 383336448, "file_name": ".cache\\MatMulNBits_2_0_286.const", "file_size": 2752512 }, "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 386088960, "file_name": ".cache\\MatMulNBits_2_0_287.const", "file_size": 7168 }, "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 386096128, "file_name": ".cache\\MatMulNBits_2_0_288.const", "file_size": 86016 }, "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 386182144, "file_name": ".cache\\MatMulNBits_2_0_289.const", "file_size": 21504 }, "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 386203648, "file_name": ".cache\\MatMulNBits_2_0_290.const", "file_size": 393216 }, "model.layers.11.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 386596864, "file_name": ".cache\\MatMulNBits_2_0_291.const", "file_size": 1024 }, "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 386597888, "file_name": ".cache\\MatMulNBits_2_0_292.const", "file_size": 12288 }, "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 386610176, "file_name": ".cache\\MatMulNBits_2_0_293.const", "file_size": 3072 }, "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 386613248, "file_name": ".cache\\MatMulNBits_2_0_294.const", "file_size": 2359296 }, "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 388972544, "file_name": ".cache\\MatMulNBits_2_0_295.const", "file_size": 6144 }, "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 388978688, "file_name": ".cache\\MatMulNBits_2_0_296.const", "file_size": 73728 }, "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 389052416, "file_name": ".cache\\MatMulNBits_2_0_297.const", "file_size": 18432 }, "model.layers.11.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 389070848, "file_name": ".cache\\MatMulNBits_2_0_298.const", "file_size": 3072 }, "model.layers.11.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 389073920, "file_name": ".cache\\MatMulNBits_2_0_299.const", "file_size": 6881280 }, "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 395955200, "file_name": ".cache\\MatMulNBits_2_0_300.const", "file_size": 430080 }, "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 396385280, "file_name": ".cache\\MatMulNBits_2_0_301.const", "file_size": 53760 }, "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 396439040, "file_name": ".cache\\MatMulNBits_2_0_302.const", "file_size": 35840 }, "model.layers.11.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 396474880, "file_name": ".cache\\MatMulNBits_2_0_303.const", "file_size": 6881280 }, "model.layers.11.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 403356160, "file_name": ".cache\\MatMulNBits_2_0_304.const", "file_size": 430080 }, "model.layers.11.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 403786240, "file_name": ".cache\\MatMulNBits_2_0_305.const", "file_size": 53760 }, "model.layers.11.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 403840000, "file_name": ".cache\\MatMulNBits_2_0_306.const", "file_size": 35840 }, "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 403875840, "file_name": ".cache\\MatMulNBits_2_0_307.const", "file_size": 13762560 }, "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 417638400, "file_name": ".cache\\MatMulNBits_2_0_308.const", "file_size": 6144 }, "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 417644544, "file_name": ".cache\\MatMulNBits_2_0_309.const", "file_size": 430080 }, "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 418074624, "file_name": ".cache\\MatMulNBits_2_0_310.const", "file_size": 107520 }, "model.layers.12.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 418182144, "file_name": ".cache\\MatMulNBits_2_0_311.const", "file_size": 3072 }, "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 418185216, "file_name": ".cache\\MatMulNBits_2_0_312.const", "file_size": 2752512 }, "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 420937728, "file_name": ".cache\\MatMulNBits_2_0_313.const", "file_size": 7168 }, "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 420944896, "file_name": ".cache\\MatMulNBits_2_0_314.const", "file_size": 86016 }, "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 421030912, "file_name": ".cache\\MatMulNBits_2_0_315.const", "file_size": 21504 }, "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 421052416, "file_name": ".cache\\MatMulNBits_2_0_316.const", "file_size": 393216 }, "model.layers.12.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 421445632, "file_name": ".cache\\MatMulNBits_2_0_317.const", "file_size": 1024 }, "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 421446656, "file_name": ".cache\\MatMulNBits_2_0_318.const", "file_size": 12288 }, "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 421458944, "file_name": ".cache\\MatMulNBits_2_0_319.const", "file_size": 3072 }, "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 421462016, "file_name": ".cache\\MatMulNBits_2_0_320.const", "file_size": 2359296 }, "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 423821312, "file_name": ".cache\\MatMulNBits_2_0_321.const", "file_size": 6144 }, "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 423827456, "file_name": ".cache\\MatMulNBits_2_0_322.const", "file_size": 73728 }, "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 423901184, "file_name": ".cache\\MatMulNBits_2_0_323.const", "file_size": 18432 }, "model.layers.12.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 423919616, "file_name": ".cache\\MatMulNBits_2_0_324.const", "file_size": 3072 }, "model.layers.12.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 423922688, "file_name": ".cache\\MatMulNBits_2_0_325.const", "file_size": 6881280 }, "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 430803968, "file_name": ".cache\\MatMulNBits_2_0_326.const", "file_size": 430080 }, "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 431234048, "file_name": ".cache\\MatMulNBits_2_0_327.const", "file_size": 53760 }, "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 431287808, "file_name": ".cache\\MatMulNBits_2_0_328.const", "file_size": 35840 }, "model.layers.12.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 431323648, "file_name": ".cache\\MatMulNBits_2_0_329.const", "file_size": 6881280 }, "model.layers.12.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 438204928, "file_name": ".cache\\MatMulNBits_2_0_330.const", "file_size": 430080 }, "model.layers.12.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 438635008, "file_name": ".cache\\MatMulNBits_2_0_331.const", "file_size": 53760 }, "model.layers.12.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 438688768, "file_name": ".cache\\MatMulNBits_2_0_332.const", "file_size": 35840 }, "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 438724608, "file_name": ".cache\\MatMulNBits_2_0_333.const", "file_size": 13762560 }, "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 452487168, "file_name": ".cache\\MatMulNBits_2_0_334.const", "file_size": 6144 }, "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 452493312, "file_name": ".cache\\MatMulNBits_2_0_335.const", "file_size": 430080 }, "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 452923392, "file_name": ".cache\\MatMulNBits_2_0_336.const", "file_size": 107520 }, "model.layers.13.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 453030912, "file_name": ".cache\\MatMulNBits_2_0_337.const", "file_size": 3072 }, "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 453033984, "file_name": ".cache\\MatMulNBits_2_0_338.const", "file_size": 2752512 }, "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 455786496, "file_name": ".cache\\MatMulNBits_2_0_339.const", "file_size": 7168 }, "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 455793664, "file_name": ".cache\\MatMulNBits_2_0_340.const", "file_size": 86016 }, "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 455879680, "file_name": ".cache\\MatMulNBits_2_0_341.const", "file_size": 21504 }, "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 455901184, "file_name": ".cache\\MatMulNBits_2_0_342.const", "file_size": 393216 }, "model.layers.13.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 456294400, "file_name": ".cache\\MatMulNBits_2_0_343.const", "file_size": 1024 }, "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 456295424, "file_name": ".cache\\MatMulNBits_2_0_344.const", "file_size": 12288 }, "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 456307712, "file_name": ".cache\\MatMulNBits_2_0_345.const", "file_size": 3072 }, "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 456310784, "file_name": ".cache\\MatMulNBits_2_0_346.const", "file_size": 2359296 }, "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 458670080, "file_name": ".cache\\MatMulNBits_2_0_347.const", "file_size": 6144 }, "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 458676224, "file_name": ".cache\\MatMulNBits_2_0_348.const", "file_size": 73728 }, "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 458749952, "file_name": ".cache\\MatMulNBits_2_0_349.const", "file_size": 18432 }, "model.layers.13.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 458768384, "file_name": ".cache\\MatMulNBits_2_0_350.const", "file_size": 3072 }, "model.layers.13.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 458771456, "file_name": ".cache\\MatMulNBits_2_0_351.const", "file_size": 6881280 }, "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 465652736, "file_name": ".cache\\MatMulNBits_2_0_352.const", "file_size": 430080 }, "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 466082816, "file_name": ".cache\\MatMulNBits_2_0_353.const", "file_size": 53760 }, "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 466136576, "file_name": ".cache\\MatMulNBits_2_0_354.const", "file_size": 35840 }, "model.layers.13.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 466172416, "file_name": ".cache\\MatMulNBits_2_0_355.const", "file_size": 6881280 }, "model.layers.13.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 473053696, "file_name": ".cache\\MatMulNBits_2_0_356.const", "file_size": 430080 }, "model.layers.13.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 473483776, "file_name": ".cache\\MatMulNBits_2_0_357.const", "file_size": 53760 }, "model.layers.13.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 473537536, "file_name": ".cache\\MatMulNBits_2_0_358.const", "file_size": 35840 }, "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 473573376, "file_name": ".cache\\MatMulNBits_2_0_359.const", "file_size": 13762560 }, "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 487335936, "file_name": ".cache\\MatMulNBits_2_0_360.const", "file_size": 6144 }, "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 487342080, "file_name": ".cache\\MatMulNBits_2_0_361.const", "file_size": 430080 }, "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 487772160, "file_name": ".cache\\MatMulNBits_2_0_362.const", "file_size": 107520 }, "model.layers.14.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 487879680, "file_name": ".cache\\MatMulNBits_2_0_363.const", "file_size": 3072 }, "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 487882752, "file_name": ".cache\\MatMulNBits_2_0_364.const", "file_size": 2752512 }, "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 490635264, "file_name": ".cache\\MatMulNBits_2_0_365.const", "file_size": 7168 }, "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 490642432, "file_name": ".cache\\MatMulNBits_2_0_366.const", "file_size": 86016 }, "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 490728448, "file_name": ".cache\\MatMulNBits_2_0_367.const", "file_size": 21504 }, "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 490749952, "file_name": ".cache\\MatMulNBits_2_0_368.const", "file_size": 393216 }, "model.layers.14.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 491143168, "file_name": ".cache\\MatMulNBits_2_0_369.const", "file_size": 1024 }, "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 491144192, "file_name": ".cache\\MatMulNBits_2_0_370.const", "file_size": 12288 }, "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 491156480, "file_name": ".cache\\MatMulNBits_2_0_371.const", "file_size": 3072 }, "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 491159552, "file_name": ".cache\\MatMulNBits_2_0_372.const", "file_size": 2359296 }, "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 493518848, "file_name": ".cache\\MatMulNBits_2_0_373.const", "file_size": 6144 }, "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 493524992, "file_name": ".cache\\MatMulNBits_2_0_374.const", "file_size": 73728 }, "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 493598720, "file_name": ".cache\\MatMulNBits_2_0_375.const", "file_size": 18432 }, "model.layers.14.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 493617152, "file_name": ".cache\\MatMulNBits_2_0_376.const", "file_size": 3072 }, "model.layers.14.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 493620224, "file_name": ".cache\\MatMulNBits_2_0_377.const", "file_size": 6881280 }, "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 500501504, "file_name": ".cache\\MatMulNBits_2_0_378.const", "file_size": 430080 }, "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 500931584, "file_name": ".cache\\MatMulNBits_2_0_379.const", "file_size": 53760 }, "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 500985344, "file_name": ".cache\\MatMulNBits_2_0_380.const", "file_size": 35840 }, "model.layers.14.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 501021184, "file_name": ".cache\\MatMulNBits_2_0_381.const", "file_size": 6881280 }, "model.layers.14.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 507902464, "file_name": ".cache\\MatMulNBits_2_0_382.const", "file_size": 430080 }, "model.layers.14.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 508332544, "file_name": ".cache\\MatMulNBits_2_0_383.const", "file_size": 53760 }, "model.layers.14.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 508386304, "file_name": ".cache\\MatMulNBits_2_0_384.const", "file_size": 35840 }, "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 508422144, "file_name": ".cache\\MatMulNBits_2_0_385.const", "file_size": 13762560 }, "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 522184704, "file_name": ".cache\\MatMulNBits_2_0_386.const", "file_size": 6144 }, "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 522190848, "file_name": ".cache\\MatMulNBits_2_0_387.const", "file_size": 430080 }, "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 522620928, "file_name": ".cache\\MatMulNBits_2_0_388.const", "file_size": 107520 }, "model.layers.15.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 522728448, "file_name": ".cache\\MatMulNBits_2_0_389.const", "file_size": 3072 }, "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 522731520, "file_name": ".cache\\MatMulNBits_2_0_390.const", "file_size": 2752512 }, "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 525484032, "file_name": ".cache\\MatMulNBits_2_0_391.const", "file_size": 7168 }, "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 525491200, "file_name": ".cache\\MatMulNBits_2_0_392.const", "file_size": 86016 }, "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 525577216, "file_name": ".cache\\MatMulNBits_2_0_393.const", "file_size": 21504 }, "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 525598720, "file_name": ".cache\\MatMulNBits_2_0_394.const", "file_size": 393216 }, "model.layers.15.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 525991936, "file_name": ".cache\\MatMulNBits_2_0_395.const", "file_size": 1024 }, "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 525992960, "file_name": ".cache\\MatMulNBits_2_0_396.const", "file_size": 12288 }, "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 526005248, "file_name": ".cache\\MatMulNBits_2_0_397.const", "file_size": 3072 }, "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 526008320, "file_name": ".cache\\MatMulNBits_2_0_398.const", "file_size": 2359296 }, "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 528367616, "file_name": ".cache\\MatMulNBits_2_0_399.const", "file_size": 6144 }, "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 528373760, "file_name": ".cache\\MatMulNBits_2_0_400.const", "file_size": 73728 }, "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 528447488, "file_name": ".cache\\MatMulNBits_2_0_401.const", "file_size": 18432 }, "model.layers.15.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 528465920, "file_name": ".cache\\MatMulNBits_2_0_402.const", "file_size": 3072 }, "model.layers.15.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 528468992, "file_name": ".cache\\MatMulNBits_2_0_403.const", "file_size": 6881280 }, "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 535350272, "file_name": ".cache\\MatMulNBits_2_0_404.const", "file_size": 430080 }, "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 535780352, "file_name": ".cache\\MatMulNBits_2_0_405.const", "file_size": 53760 }, "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 535834112, "file_name": ".cache\\MatMulNBits_2_0_406.const", "file_size": 35840 }, "model.layers.15.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 535869952, "file_name": ".cache\\MatMulNBits_2_0_407.const", "file_size": 6881280 }, "model.layers.15.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 542751232, "file_name": ".cache\\MatMulNBits_2_0_408.const", "file_size": 430080 }, "model.layers.15.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 543181312, "file_name": ".cache\\MatMulNBits_2_0_409.const", "file_size": 53760 }, "model.layers.15.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 543235072, "file_name": ".cache\\MatMulNBits_2_0_410.const", "file_size": 35840 }, "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 543270912, "file_name": ".cache\\MatMulNBits_2_0_411.const", "file_size": 13762560 }, "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 557033472, "file_name": ".cache\\MatMulNBits_2_0_412.const", "file_size": 6144 }, "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 557039616, "file_name": ".cache\\MatMulNBits_2_0_413.const", "file_size": 430080 }, "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 557469696, "file_name": ".cache\\MatMulNBits_2_0_414.const", "file_size": 107520 }, "model.layers.16.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 557577216, "file_name": ".cache\\MatMulNBits_2_0_415.const", "file_size": 3072 }, "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 557580288, "file_name": ".cache\\MatMulNBits_2_0_416.const", "file_size": 2752512 }, "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 560332800, "file_name": ".cache\\MatMulNBits_2_0_417.const", "file_size": 7168 }, "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 560339968, "file_name": ".cache\\MatMulNBits_2_0_418.const", "file_size": 86016 }, "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 560425984, "file_name": ".cache\\MatMulNBits_2_0_419.const", "file_size": 21504 }, "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 560447488, "file_name": ".cache\\MatMulNBits_2_0_420.const", "file_size": 393216 }, "model.layers.16.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 560840704, "file_name": ".cache\\MatMulNBits_2_0_421.const", "file_size": 1024 }, "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 560841728, "file_name": ".cache\\MatMulNBits_2_0_422.const", "file_size": 12288 }, "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 560854016, "file_name": ".cache\\MatMulNBits_2_0_423.const", "file_size": 3072 }, "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 560857088, "file_name": ".cache\\MatMulNBits_2_0_424.const", "file_size": 2359296 }, "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 563216384, "file_name": ".cache\\MatMulNBits_2_0_425.const", "file_size": 6144 }, "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 563222528, "file_name": ".cache\\MatMulNBits_2_0_426.const", "file_size": 73728 }, "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 563296256, "file_name": ".cache\\MatMulNBits_2_0_427.const", "file_size": 18432 }, "model.layers.16.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 563314688, "file_name": ".cache\\MatMulNBits_2_0_428.const", "file_size": 3072 }, "model.layers.16.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 563317760, "file_name": ".cache\\MatMulNBits_2_0_429.const", "file_size": 6881280 }, "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 570199040, "file_name": ".cache\\MatMulNBits_2_0_430.const", "file_size": 430080 }, "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 570629120, "file_name": ".cache\\MatMulNBits_2_0_431.const", "file_size": 53760 }, "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 570682880, "file_name": ".cache\\MatMulNBits_2_0_432.const", "file_size": 35840 }, "model.layers.16.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 570718720, "file_name": ".cache\\MatMulNBits_2_0_433.const", "file_size": 6881280 }, "model.layers.16.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 577600000, "file_name": ".cache\\MatMulNBits_2_0_434.const", "file_size": 430080 }, "model.layers.16.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 578030080, "file_name": ".cache\\MatMulNBits_2_0_435.const", "file_size": 53760 }, "model.layers.16.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 578083840, "file_name": ".cache\\MatMulNBits_2_0_436.const", "file_size": 35840 }, "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 578119680, "file_name": ".cache\\MatMulNBits_2_0_437.const", "file_size": 13762560 }, "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 591882240, "file_name": ".cache\\MatMulNBits_2_0_438.const", "file_size": 6144 }, "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 591888384, "file_name": ".cache\\MatMulNBits_2_0_439.const", "file_size": 430080 }, "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 592318464, "file_name": ".cache\\MatMulNBits_2_0_440.const", "file_size": 107520 }, "model.layers.17.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 592425984, "file_name": ".cache\\MatMulNBits_2_0_441.const", "file_size": 3072 }, "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 592429056, "file_name": ".cache\\MatMulNBits_2_0_442.const", "file_size": 2752512 }, "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 595181568, "file_name": ".cache\\MatMulNBits_2_0_443.const", "file_size": 7168 }, "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 595188736, "file_name": ".cache\\MatMulNBits_2_0_444.const", "file_size": 86016 }, "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 595274752, "file_name": ".cache\\MatMulNBits_2_0_445.const", "file_size": 21504 }, "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 595296256, "file_name": ".cache\\MatMulNBits_2_0_446.const", "file_size": 393216 }, "model.layers.17.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 595689472, "file_name": ".cache\\MatMulNBits_2_0_447.const", "file_size": 1024 }, "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 595690496, "file_name": ".cache\\MatMulNBits_2_0_448.const", "file_size": 12288 }, "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 595702784, "file_name": ".cache\\MatMulNBits_2_0_449.const", "file_size": 3072 }, "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 595705856, "file_name": ".cache\\MatMulNBits_2_0_450.const", "file_size": 2359296 }, "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 598065152, "file_name": ".cache\\MatMulNBits_2_0_451.const", "file_size": 6144 }, "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 598071296, "file_name": ".cache\\MatMulNBits_2_0_452.const", "file_size": 73728 }, "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 598145024, "file_name": ".cache\\MatMulNBits_2_0_453.const", "file_size": 18432 }, "model.layers.17.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 598163456, "file_name": ".cache\\MatMulNBits_2_0_454.const", "file_size": 3072 }, "model.layers.17.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 598166528, "file_name": ".cache\\MatMulNBits_2_0_455.const", "file_size": 6881280 }, "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 605047808, "file_name": ".cache\\MatMulNBits_2_0_456.const", "file_size": 430080 }, "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 605477888, "file_name": ".cache\\MatMulNBits_2_0_457.const", "file_size": 53760 }, "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 605531648, "file_name": ".cache\\MatMulNBits_2_0_458.const", "file_size": 35840 }, "model.layers.17.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 605567488, "file_name": ".cache\\MatMulNBits_2_0_459.const", "file_size": 6881280 }, "model.layers.17.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 612448768, "file_name": ".cache\\MatMulNBits_2_0_460.const", "file_size": 430080 }, "model.layers.17.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 612878848, "file_name": ".cache\\MatMulNBits_2_0_461.const", "file_size": 53760 }, "model.layers.17.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 612932608, "file_name": ".cache\\MatMulNBits_2_0_462.const", "file_size": 35840 }, "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 612968448, "file_name": ".cache\\MatMulNBits_2_0_463.const", "file_size": 13762560 }, "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 626731008, "file_name": ".cache\\MatMulNBits_2_0_464.const", "file_size": 6144 }, "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 626737152, "file_name": ".cache\\MatMulNBits_2_0_465.const", "file_size": 430080 }, "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 627167232, "file_name": ".cache\\MatMulNBits_2_0_466.const", "file_size": 107520 }, "model.layers.18.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 627274752, "file_name": ".cache\\MatMulNBits_2_0_467.const", "file_size": 3072 }, "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 627277824, "file_name": ".cache\\MatMulNBits_2_0_468.const", "file_size": 2752512 }, "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 630030336, "file_name": ".cache\\MatMulNBits_2_0_469.const", "file_size": 7168 }, "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 630037504, "file_name": ".cache\\MatMulNBits_2_0_470.const", "file_size": 86016 }, "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 630123520, "file_name": ".cache\\MatMulNBits_2_0_471.const", "file_size": 21504 }, "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 630145024, "file_name": ".cache\\MatMulNBits_2_0_472.const", "file_size": 393216 }, "model.layers.18.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 630538240, "file_name": ".cache\\MatMulNBits_2_0_473.const", "file_size": 1024 }, "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 630539264, "file_name": ".cache\\MatMulNBits_2_0_474.const", "file_size": 12288 }, "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 630551552, "file_name": ".cache\\MatMulNBits_2_0_475.const", "file_size": 3072 }, "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 630554624, "file_name": ".cache\\MatMulNBits_2_0_476.const", "file_size": 2359296 }, "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 632913920, "file_name": ".cache\\MatMulNBits_2_0_477.const", "file_size": 6144 }, "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 632920064, "file_name": ".cache\\MatMulNBits_2_0_478.const", "file_size": 73728 }, "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 632993792, "file_name": ".cache\\MatMulNBits_2_0_479.const", "file_size": 18432 }, "model.layers.18.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 633012224, "file_name": ".cache\\MatMulNBits_2_0_480.const", "file_size": 3072 }, "model.layers.18.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 633015296, "file_name": ".cache\\MatMulNBits_2_0_481.const", "file_size": 6881280 }, "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 639896576, "file_name": ".cache\\MatMulNBits_2_0_482.const", "file_size": 430080 }, "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 640326656, "file_name": ".cache\\MatMulNBits_2_0_483.const", "file_size": 53760 }, "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 640380416, "file_name": ".cache\\MatMulNBits_2_0_484.const", "file_size": 35840 }, "model.layers.18.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 640416256, "file_name": ".cache\\MatMulNBits_2_0_485.const", "file_size": 6881280 }, "model.layers.18.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 647297536, "file_name": ".cache\\MatMulNBits_2_0_486.const", "file_size": 430080 }, "model.layers.18.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 647727616, "file_name": ".cache\\MatMulNBits_2_0_487.const", "file_size": 53760 }, "model.layers.18.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 647781376, "file_name": ".cache\\MatMulNBits_2_0_488.const", "file_size": 35840 }, "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 647817216, "file_name": ".cache\\MatMulNBits_2_0_489.const", "file_size": 13762560 }, "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 661579776, "file_name": ".cache\\MatMulNBits_2_0_490.const", "file_size": 6144 }, "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 661585920, "file_name": ".cache\\MatMulNBits_2_0_491.const", "file_size": 430080 }, "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 662016000, "file_name": ".cache\\MatMulNBits_2_0_492.const", "file_size": 107520 }, "model.layers.19.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 662123520, "file_name": ".cache\\MatMulNBits_2_0_493.const", "file_size": 3072 }, "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 662126592, "file_name": ".cache\\MatMulNBits_2_0_494.const", "file_size": 2752512 }, "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 664879104, "file_name": ".cache\\MatMulNBits_2_0_495.const", "file_size": 7168 }, "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 664886272, "file_name": ".cache\\MatMulNBits_2_0_496.const", "file_size": 86016 }, "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 664972288, "file_name": ".cache\\MatMulNBits_2_0_497.const", "file_size": 21504 }, "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 664993792, "file_name": ".cache\\MatMulNBits_2_0_498.const", "file_size": 393216 }, "model.layers.19.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 665387008, "file_name": ".cache\\MatMulNBits_2_0_499.const", "file_size": 1024 }, "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 665388032, "file_name": ".cache\\MatMulNBits_2_0_500.const", "file_size": 12288 }, "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 665400320, "file_name": ".cache\\MatMulNBits_2_0_501.const", "file_size": 3072 }, "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 665403392, "file_name": ".cache\\MatMulNBits_2_0_502.const", "file_size": 2359296 }, "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 667762688, "file_name": ".cache\\MatMulNBits_2_0_503.const", "file_size": 6144 }, "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 667768832, "file_name": ".cache\\MatMulNBits_2_0_504.const", "file_size": 73728 }, "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 667842560, "file_name": ".cache\\MatMulNBits_2_0_505.const", "file_size": 18432 }, "model.layers.19.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 667860992, "file_name": ".cache\\MatMulNBits_2_0_506.const", "file_size": 3072 }, "model.layers.19.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 667864064, "file_name": ".cache\\MatMulNBits_2_0_507.const", "file_size": 6881280 }, "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 674745344, "file_name": ".cache\\MatMulNBits_2_0_508.const", "file_size": 430080 }, "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 675175424, "file_name": ".cache\\MatMulNBits_2_0_509.const", "file_size": 53760 }, "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 675229184, "file_name": ".cache\\MatMulNBits_2_0_510.const", "file_size": 35840 }, "model.layers.19.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 675265024, "file_name": ".cache\\MatMulNBits_2_0_511.const", "file_size": 6881280 }, "model.layers.19.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 682146304, "file_name": ".cache\\MatMulNBits_2_0_512.const", "file_size": 430080 }, "model.layers.19.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 682576384, "file_name": ".cache\\MatMulNBits_2_0_513.const", "file_size": 53760 }, "model.layers.19.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 682630144, "file_name": ".cache\\MatMulNBits_2_0_514.const", "file_size": 35840 }, "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 682665984, "file_name": ".cache\\MatMulNBits_2_0_515.const", "file_size": 13762560 }, "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 696428544, "file_name": ".cache\\MatMulNBits_2_0_516.const", "file_size": 6144 }, "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 696434688, "file_name": ".cache\\MatMulNBits_2_0_517.const", "file_size": 430080 }, "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 696864768, "file_name": ".cache\\MatMulNBits_2_0_518.const", "file_size": 107520 }, "model.layers.20.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 696972288, "file_name": ".cache\\MatMulNBits_2_0_519.const", "file_size": 3072 }, "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 696975360, "file_name": ".cache\\MatMulNBits_2_0_520.const", "file_size": 2752512 }, "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 699727872, "file_name": ".cache\\MatMulNBits_2_0_521.const", "file_size": 7168 }, "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 699735040, "file_name": ".cache\\MatMulNBits_2_0_522.const", "file_size": 86016 }, "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 699821056, "file_name": ".cache\\MatMulNBits_2_0_523.const", "file_size": 21504 }, "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 699842560, "file_name": ".cache\\MatMulNBits_2_0_524.const", "file_size": 393216 }, "model.layers.20.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 700235776, "file_name": ".cache\\MatMulNBits_2_0_525.const", "file_size": 1024 }, "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 700236800, "file_name": ".cache\\MatMulNBits_2_0_526.const", "file_size": 12288 }, "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 700249088, "file_name": ".cache\\MatMulNBits_2_0_527.const", "file_size": 3072 }, "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 700252160, "file_name": ".cache\\MatMulNBits_2_0_528.const", "file_size": 2359296 }, "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 702611456, "file_name": ".cache\\MatMulNBits_2_0_529.const", "file_size": 6144 }, "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 702617600, "file_name": ".cache\\MatMulNBits_2_0_530.const", "file_size": 73728 }, "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 702691328, "file_name": ".cache\\MatMulNBits_2_0_531.const", "file_size": 18432 }, "model.layers.20.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 702709760, "file_name": ".cache\\MatMulNBits_2_0_532.const", "file_size": 3072 }, "model.layers.20.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 702712832, "file_name": ".cache\\MatMulNBits_2_0_533.const", "file_size": 6881280 }, "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 709594112, "file_name": ".cache\\MatMulNBits_2_0_534.const", "file_size": 430080 }, "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 710024192, "file_name": ".cache\\MatMulNBits_2_0_535.const", "file_size": 53760 }, "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 710077952, "file_name": ".cache\\MatMulNBits_2_0_536.const", "file_size": 35840 }, "model.layers.20.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 710113792, "file_name": ".cache\\MatMulNBits_2_0_537.const", "file_size": 6881280 }, "model.layers.20.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 716995072, "file_name": ".cache\\MatMulNBits_2_0_538.const", "file_size": 430080 }, "model.layers.20.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 717425152, "file_name": ".cache\\MatMulNBits_2_0_539.const", "file_size": 53760 }, "model.layers.20.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 717478912, "file_name": ".cache\\MatMulNBits_2_0_540.const", "file_size": 35840 }, "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 717514752, "file_name": ".cache\\MatMulNBits_2_0_541.const", "file_size": 13762560 }, "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 731277312, "file_name": ".cache\\MatMulNBits_2_0_542.const", "file_size": 6144 }, "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 731283456, "file_name": ".cache\\MatMulNBits_2_0_543.const", "file_size": 430080 }, "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 731713536, "file_name": ".cache\\MatMulNBits_2_0_544.const", "file_size": 107520 }, "model.layers.21.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 731821056, "file_name": ".cache\\MatMulNBits_2_0_545.const", "file_size": 3072 }, "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 731824128, "file_name": ".cache\\MatMulNBits_2_0_546.const", "file_size": 2752512 }, "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 734576640, "file_name": ".cache\\MatMulNBits_2_0_547.const", "file_size": 7168 }, "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 734583808, "file_name": ".cache\\MatMulNBits_2_0_548.const", "file_size": 86016 }, "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 734669824, "file_name": ".cache\\MatMulNBits_2_0_549.const", "file_size": 21504 }, "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 734691328, "file_name": ".cache\\MatMulNBits_2_0_550.const", "file_size": 393216 }, "model.layers.21.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 735084544, "file_name": ".cache\\MatMulNBits_2_0_551.const", "file_size": 1024 }, "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 735085568, "file_name": ".cache\\MatMulNBits_2_0_552.const", "file_size": 12288 }, "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 735097856, "file_name": ".cache\\MatMulNBits_2_0_553.const", "file_size": 3072 }, "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 735100928, "file_name": ".cache\\MatMulNBits_2_0_554.const", "file_size": 2359296 }, "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 737460224, "file_name": ".cache\\MatMulNBits_2_0_555.const", "file_size": 6144 }, "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 737466368, "file_name": ".cache\\MatMulNBits_2_0_556.const", "file_size": 73728 }, "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 737540096, "file_name": ".cache\\MatMulNBits_2_0_557.const", "file_size": 18432 }, "model.layers.21.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 737558528, "file_name": ".cache\\MatMulNBits_2_0_558.const", "file_size": 3072 }, "model.layers.21.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 737561600, "file_name": ".cache\\MatMulNBits_2_0_559.const", "file_size": 6881280 }, "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 744442880, "file_name": ".cache\\MatMulNBits_2_0_560.const", "file_size": 430080 }, "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 744872960, "file_name": ".cache\\MatMulNBits_2_0_561.const", "file_size": 53760 }, "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 744926720, "file_name": ".cache\\MatMulNBits_2_0_562.const", "file_size": 35840 }, "model.layers.21.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 744962560, "file_name": ".cache\\MatMulNBits_2_0_563.const", "file_size": 6881280 }, "model.layers.21.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 751843840, "file_name": ".cache\\MatMulNBits_2_0_564.const", "file_size": 430080 }, "model.layers.21.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 752273920, "file_name": ".cache\\MatMulNBits_2_0_565.const", "file_size": 53760 }, "model.layers.21.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 752327680, "file_name": ".cache\\MatMulNBits_2_0_566.const", "file_size": 35840 }, "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 752363520, "file_name": ".cache\\MatMulNBits_2_0_567.const", "file_size": 13762560 }, "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 766126080, "file_name": ".cache\\MatMulNBits_2_0_568.const", "file_size": 6144 }, "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 766132224, "file_name": ".cache\\MatMulNBits_2_0_569.const", "file_size": 430080 }, "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 766562304, "file_name": ".cache\\MatMulNBits_2_0_570.const", "file_size": 107520 }, "model.layers.22.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 766669824, "file_name": ".cache\\MatMulNBits_2_0_571.const", "file_size": 3072 }, "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 766672896, "file_name": ".cache\\MatMulNBits_2_0_572.const", "file_size": 2752512 }, "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 769425408, "file_name": ".cache\\MatMulNBits_2_0_573.const", "file_size": 7168 }, "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 769432576, "file_name": ".cache\\MatMulNBits_2_0_574.const", "file_size": 86016 }, "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 769518592, "file_name": ".cache\\MatMulNBits_2_0_575.const", "file_size": 21504 }, "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 769540096, "file_name": ".cache\\MatMulNBits_2_0_576.const", "file_size": 393216 }, "model.layers.22.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 769933312, "file_name": ".cache\\MatMulNBits_2_0_577.const", "file_size": 1024 }, "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 769934336, "file_name": ".cache\\MatMulNBits_2_0_578.const", "file_size": 12288 }, "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 769946624, "file_name": ".cache\\MatMulNBits_2_0_579.const", "file_size": 3072 }, "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 769949696, "file_name": ".cache\\MatMulNBits_2_0_580.const", "file_size": 2359296 }, "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 772308992, "file_name": ".cache\\MatMulNBits_2_0_581.const", "file_size": 6144 }, "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 772315136, "file_name": ".cache\\MatMulNBits_2_0_582.const", "file_size": 73728 }, "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 772388864, "file_name": ".cache\\MatMulNBits_2_0_583.const", "file_size": 18432 }, "model.layers.22.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 772407296, "file_name": ".cache\\MatMulNBits_2_0_584.const", "file_size": 3072 }, "model.layers.22.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 772410368, "file_name": ".cache\\MatMulNBits_2_0_585.const", "file_size": 6881280 }, "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 779291648, "file_name": ".cache\\MatMulNBits_2_0_586.const", "file_size": 430080 }, "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 779721728, "file_name": ".cache\\MatMulNBits_2_0_587.const", "file_size": 53760 }, "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 779775488, "file_name": ".cache\\MatMulNBits_2_0_588.const", "file_size": 35840 }, "model.layers.22.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 779811328, "file_name": ".cache\\MatMulNBits_2_0_589.const", "file_size": 6881280 }, "model.layers.22.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 786692608, "file_name": ".cache\\MatMulNBits_2_0_590.const", "file_size": 430080 }, "model.layers.22.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 787122688, "file_name": ".cache\\MatMulNBits_2_0_591.const", "file_size": 53760 }, "model.layers.22.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 787176448, "file_name": ".cache\\MatMulNBits_2_0_592.const", "file_size": 35840 }, "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 787212288, "file_name": ".cache\\MatMulNBits_2_0_593.const", "file_size": 13762560 }, "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 800974848, "file_name": ".cache\\MatMulNBits_2_0_594.const", "file_size": 6144 }, "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 800980992, "file_name": ".cache\\MatMulNBits_2_0_595.const", "file_size": 430080 }, "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 801411072, "file_name": ".cache\\MatMulNBits_2_0_596.const", "file_size": 107520 }, "model.layers.23.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 801518592, "file_name": ".cache\\MatMulNBits_2_0_597.const", "file_size": 3072 }, "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 801521664, "file_name": ".cache\\MatMulNBits_2_0_598.const", "file_size": 2752512 }, "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 804274176, "file_name": ".cache\\MatMulNBits_2_0_599.const", "file_size": 7168 }, "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 804281344, "file_name": ".cache\\MatMulNBits_2_0_600.const", "file_size": 86016 }, "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 804367360, "file_name": ".cache\\MatMulNBits_2_0_601.const", "file_size": 21504 }, "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 804388864, "file_name": ".cache\\MatMulNBits_2_0_602.const", "file_size": 393216 }, "model.layers.23.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 804782080, "file_name": ".cache\\MatMulNBits_2_0_603.const", "file_size": 1024 }, "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 804783104, "file_name": ".cache\\MatMulNBits_2_0_604.const", "file_size": 12288 }, "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 804795392, "file_name": ".cache\\MatMulNBits_2_0_605.const", "file_size": 3072 }, "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 804798464, "file_name": ".cache\\MatMulNBits_2_0_606.const", "file_size": 2359296 }, "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 807157760, "file_name": ".cache\\MatMulNBits_2_0_607.const", "file_size": 6144 }, "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 807163904, "file_name": ".cache\\MatMulNBits_2_0_608.const", "file_size": 73728 }, "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 807237632, "file_name": ".cache\\MatMulNBits_2_0_609.const", "file_size": 18432 }, "model.layers.23.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 807256064, "file_name": ".cache\\MatMulNBits_2_0_610.const", "file_size": 3072 }, "model.layers.23.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 807259136, "file_name": ".cache\\MatMulNBits_2_0_611.const", "file_size": 6881280 }, "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 814140416, "file_name": ".cache\\MatMulNBits_2_0_612.const", "file_size": 430080 }, "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 814570496, "file_name": ".cache\\MatMulNBits_2_0_613.const", "file_size": 53760 }, "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 814624256, "file_name": ".cache\\MatMulNBits_2_0_614.const", "file_size": 35840 }, "model.layers.23.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 814660096, "file_name": ".cache\\MatMulNBits_2_0_615.const", "file_size": 6881280 }, "model.layers.23.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 821541376, "file_name": ".cache\\MatMulNBits_2_0_616.const", "file_size": 430080 }, "model.layers.23.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 821971456, "file_name": ".cache\\MatMulNBits_2_0_617.const", "file_size": 53760 }, "model.layers.23.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 822025216, "file_name": ".cache\\MatMulNBits_2_0_618.const", "file_size": 35840 }, "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 822061056, "file_name": ".cache\\MatMulNBits_2_0_619.const", "file_size": 13762560 }, "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 835823616, "file_name": ".cache\\MatMulNBits_2_0_620.const", "file_size": 6144 }, "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 835829760, "file_name": ".cache\\MatMulNBits_2_0_621.const", "file_size": 430080 }, "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 836259840, "file_name": ".cache\\MatMulNBits_2_0_622.const", "file_size": 107520 }, "model.layers.24.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 836367360, "file_name": ".cache\\MatMulNBits_2_0_623.const", "file_size": 3072 }, "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 836370432, "file_name": ".cache\\MatMulNBits_2_0_624.const", "file_size": 2752512 }, "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 839122944, "file_name": ".cache\\MatMulNBits_2_0_625.const", "file_size": 7168 }, "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 839130112, "file_name": ".cache\\MatMulNBits_2_0_626.const", "file_size": 86016 }, "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 839216128, "file_name": ".cache\\MatMulNBits_2_0_627.const", "file_size": 21504 }, "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 839237632, "file_name": ".cache\\MatMulNBits_2_0_628.const", "file_size": 393216 }, "model.layers.24.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 839630848, "file_name": ".cache\\MatMulNBits_2_0_629.const", "file_size": 1024 }, "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 839631872, "file_name": ".cache\\MatMulNBits_2_0_630.const", "file_size": 12288 }, "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 839644160, "file_name": ".cache\\MatMulNBits_2_0_631.const", "file_size": 3072 }, "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 839647232, "file_name": ".cache\\MatMulNBits_2_0_632.const", "file_size": 2359296 }, "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 842006528, "file_name": ".cache\\MatMulNBits_2_0_633.const", "file_size": 6144 }, "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 842012672, "file_name": ".cache\\MatMulNBits_2_0_634.const", "file_size": 73728 }, "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 842086400, "file_name": ".cache\\MatMulNBits_2_0_635.const", "file_size": 18432 }, "model.layers.24.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 842104832, "file_name": ".cache\\MatMulNBits_2_0_636.const", "file_size": 3072 }, "model.layers.24.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 842107904, "file_name": ".cache\\MatMulNBits_2_0_637.const", "file_size": 6881280 }, "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 848989184, "file_name": ".cache\\MatMulNBits_2_0_638.const", "file_size": 430080 }, "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 849419264, "file_name": ".cache\\MatMulNBits_2_0_639.const", "file_size": 53760 }, "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 849473024, "file_name": ".cache\\MatMulNBits_2_0_640.const", "file_size": 35840 }, "model.layers.24.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 849508864, "file_name": ".cache\\MatMulNBits_2_0_641.const", "file_size": 6881280 }, "model.layers.24.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 856390144, "file_name": ".cache\\MatMulNBits_2_0_642.const", "file_size": 430080 }, "model.layers.24.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 856820224, "file_name": ".cache\\MatMulNBits_2_0_643.const", "file_size": 53760 }, "model.layers.24.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 856873984, "file_name": ".cache\\MatMulNBits_2_0_644.const", "file_size": 35840 }, "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 856909824, "file_name": ".cache\\MatMulNBits_2_0_645.const", "file_size": 13762560 }, "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 870672384, "file_name": ".cache\\MatMulNBits_2_0_646.const", "file_size": 6144 }, "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 870678528, "file_name": ".cache\\MatMulNBits_2_0_647.const", "file_size": 430080 }, "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 871108608, "file_name": ".cache\\MatMulNBits_2_0_648.const", "file_size": 107520 }, "model.layers.25.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 871216128, "file_name": ".cache\\MatMulNBits_2_0_649.const", "file_size": 3072 }, "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 871219200, "file_name": ".cache\\MatMulNBits_2_0_650.const", "file_size": 2752512 }, "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 873971712, "file_name": ".cache\\MatMulNBits_2_0_651.const", "file_size": 7168 }, "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 873978880, "file_name": ".cache\\MatMulNBits_2_0_652.const", "file_size": 86016 }, "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 874064896, "file_name": ".cache\\MatMulNBits_2_0_653.const", "file_size": 21504 }, "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 874086400, "file_name": ".cache\\MatMulNBits_2_0_654.const", "file_size": 393216 }, "model.layers.25.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 874479616, "file_name": ".cache\\MatMulNBits_2_0_655.const", "file_size": 1024 }, "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 874480640, "file_name": ".cache\\MatMulNBits_2_0_656.const", "file_size": 12288 }, "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 874492928, "file_name": ".cache\\MatMulNBits_2_0_657.const", "file_size": 3072 }, "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 874496000, "file_name": ".cache\\MatMulNBits_2_0_658.const", "file_size": 2359296 }, "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 876855296, "file_name": ".cache\\MatMulNBits_2_0_659.const", "file_size": 6144 }, "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 876861440, "file_name": ".cache\\MatMulNBits_2_0_660.const", "file_size": 73728 }, "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 876935168, "file_name": ".cache\\MatMulNBits_2_0_661.const", "file_size": 18432 }, "model.layers.25.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 876953600, "file_name": ".cache\\MatMulNBits_2_0_662.const", "file_size": 3072 }, "model.layers.25.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 876956672, "file_name": ".cache\\MatMulNBits_2_0_663.const", "file_size": 6881280 }, "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 883837952, "file_name": ".cache\\MatMulNBits_2_0_664.const", "file_size": 430080 }, "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 884268032, "file_name": ".cache\\MatMulNBits_2_0_665.const", "file_size": 53760 }, "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 884321792, "file_name": ".cache\\MatMulNBits_2_0_666.const", "file_size": 35840 }, "model.layers.25.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 884357632, "file_name": ".cache\\MatMulNBits_2_0_667.const", "file_size": 6881280 }, "model.layers.25.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 891238912, "file_name": ".cache\\MatMulNBits_2_0_668.const", "file_size": 430080 }, "model.layers.25.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 891668992, "file_name": ".cache\\MatMulNBits_2_0_669.const", "file_size": 53760 }, "model.layers.25.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 891722752, "file_name": ".cache\\MatMulNBits_2_0_670.const", "file_size": 35840 }, "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 891758592, "file_name": ".cache\\MatMulNBits_2_0_671.const", "file_size": 13762560 }, "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 905521152, "file_name": ".cache\\MatMulNBits_2_0_672.const", "file_size": 6144 }, "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 905527296, "file_name": ".cache\\MatMulNBits_2_0_673.const", "file_size": 430080 }, "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 905957376, "file_name": ".cache\\MatMulNBits_2_0_674.const", "file_size": 107520 }, "model.layers.26.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 906064896, "file_name": ".cache\\MatMulNBits_2_0_675.const", "file_size": 3072 }, "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 906067968, "file_name": ".cache\\MatMulNBits_2_0_676.const", "file_size": 2752512 }, "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 908820480, "file_name": ".cache\\MatMulNBits_2_0_677.const", "file_size": 7168 }, "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 908827648, "file_name": ".cache\\MatMulNBits_2_0_678.const", "file_size": 86016 }, "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 908913664, "file_name": ".cache\\MatMulNBits_2_0_679.const", "file_size": 21504 }, "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 908935168, "file_name": ".cache\\MatMulNBits_2_0_680.const", "file_size": 393216 }, "model.layers.26.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 909328384, "file_name": ".cache\\MatMulNBits_2_0_681.const", "file_size": 1024 }, "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 909329408, "file_name": ".cache\\MatMulNBits_2_0_682.const", "file_size": 12288 }, "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 909341696, "file_name": ".cache\\MatMulNBits_2_0_683.const", "file_size": 3072 }, "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 909344768, "file_name": ".cache\\MatMulNBits_2_0_684.const", "file_size": 2359296 }, "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 911704064, "file_name": ".cache\\MatMulNBits_2_0_685.const", "file_size": 6144 }, "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 911710208, "file_name": ".cache\\MatMulNBits_2_0_686.const", "file_size": 73728 }, "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 911783936, "file_name": ".cache\\MatMulNBits_2_0_687.const", "file_size": 18432 }, "model.layers.26.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 911802368, "file_name": ".cache\\MatMulNBits_2_0_688.const", "file_size": 3072 }, "model.layers.26.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 911805440, "file_name": ".cache\\MatMulNBits_2_0_689.const", "file_size": 6881280 }, "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 918686720, "file_name": ".cache\\MatMulNBits_2_0_690.const", "file_size": 430080 }, "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 919116800, "file_name": ".cache\\MatMulNBits_2_0_691.const", "file_size": 53760 }, "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 919170560, "file_name": ".cache\\MatMulNBits_2_0_692.const", "file_size": 35840 }, "model.layers.26.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 919206400, "file_name": ".cache\\MatMulNBits_2_0_693.const", "file_size": 6881280 }, "model.layers.26.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 926087680, "file_name": ".cache\\MatMulNBits_2_0_694.const", "file_size": 430080 }, "model.layers.26.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 926517760, "file_name": ".cache\\MatMulNBits_2_0_695.const", "file_size": 53760 }, "model.layers.26.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 926571520, "file_name": ".cache\\MatMulNBits_2_0_696.const", "file_size": 35840 }, "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 926607360, "file_name": ".cache\\MatMulNBits_2_0_697.const", "file_size": 13762560 }, "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 940369920, "file_name": ".cache\\MatMulNBits_2_0_698.const", "file_size": 6144 }, "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 940376064, "file_name": ".cache\\MatMulNBits_2_0_699.const", "file_size": 430080 }, "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 940806144, "file_name": ".cache\\MatMulNBits_2_0_700.const", "file_size": 107520 }, "model.layers.27.input_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 940913664, "file_name": ".cache\\MatMulNBits_2_0_701.const", "file_size": 3072 }, "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1792 ], "size_in_bytes": 2752512, "op_tensor_size": 2752512, "offset": 940916736, "file_name": ".cache\\MatMulNBits_2_0_702.const", "file_size": 2752512 }, "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1792 ], "size_in_bytes": 7168, "op_tensor_size": 7168, "offset": 943669248, "file_name": ".cache\\MatMulNBits_2_0_703.const", "file_size": 7168 }, "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 21504 ], "size_in_bytes": 86016, "op_tensor_size": 86016, "offset": 943676416, "file_name": ".cache\\MatMulNBits_2_0_704.const", "file_size": 86016 }, "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 21504 ], "size_in_bytes": 21504, "op_tensor_size": 21504, "offset": 943762432, "file_name": ".cache\\MatMulNBits_2_0_705.const", "file_size": 21504 }, "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 256 ], "size_in_bytes": 393216, "op_tensor_size": 393216, "offset": 943783936, "file_name": ".cache\\MatMulNBits_2_0_706.const", "file_size": 393216 }, "model.layers.27.attn.v_proj.Add.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 256 ], "size_in_bytes": 1024, "op_tensor_size": 1024, "offset": 944177152, "file_name": ".cache\\MatMulNBits_2_0_707.const", "file_size": 1024 }, "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 3072 ], "size_in_bytes": 12288, "op_tensor_size": 12288, "offset": 944178176, "file_name": ".cache\\MatMulNBits_2_0_708.const", "file_size": 12288 }, "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 3072 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 944190464, "file_name": ".cache\\MatMulNBits_2_0_709.const", "file_size": 3072 }, "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 1536 ], "size_in_bytes": 2359296, "op_tensor_size": 2359296, "offset": 944193536, "file_name": ".cache\\MatMulNBits_2_0_710.const", "file_size": 2359296 }, "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 946552832, "file_name": ".cache\\MatMulNBits_2_0_711.const", "file_size": 6144 }, "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 18432 ], "size_in_bytes": 73728, "op_tensor_size": 73728, "offset": 946558976, "file_name": ".cache\\MatMulNBits_2_0_712.const", "file_size": 73728 }, "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 18432 ], "size_in_bytes": 18432, "op_tensor_size": 18432, "offset": 946632704, "file_name": ".cache\\MatMulNBits_2_0_713.const", "file_size": 18432 }, "model.layers.27.post_attention_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 946651136, "file_name": ".cache\\MatMulNBits_2_0_714.const", "file_size": 3072 }, "model.layers.27.mlp.gate_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 946654208, "file_name": ".cache\\MatMulNBits_2_0_715.const", "file_size": 6881280 }, "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 953535488, "file_name": ".cache\\MatMulNBits_2_0_716.const", "file_size": 430080 }, "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 953965568, "file_name": ".cache\\MatMulNBits_2_0_717.const", "file_size": 53760 }, "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 954019328, "file_name": ".cache\\MatMulNBits_2_0_718.const", "file_size": 35840 }, "model.layers.27.mlp.up_proj.MatMulNBits.qweight": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 8960, 12, 64 ], "size_in_bytes": 6881280, "op_tensor_size": 6881280, "offset": 954055168, "file_name": ".cache\\MatMulNBits_2_0_719.const", "file_size": 6881280 }, "model.layers.27.mlp.up_proj.MatMulNBits.scales.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 960936448, "file_name": ".cache\\MatMulNBits_2_0_720.const", "file_size": 430080 }, "model.layers.27.mlp.up_proj.MatMulNBits.qzeros": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "uint8", "shape": [ 53760 ], "size_in_bytes": 53760, "op_tensor_size": 53760, "offset": 961366528, "file_name": ".cache\\MatMulNBits_2_0_721.const", "file_size": 53760 }, "model.layers.27.mlp.up_proj.MatMulNBits.bias.f": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 8960 ], "size_in_bytes": 35840, "op_tensor_size": 35840, "offset": 961420288, "file_name": ".cache\\MatMulNBits_2_0_722.const", "file_size": 35840 }, "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 8960, 1536 ], "size_in_bytes": 13762560, "op_tensor_size": 13762560, "offset": 961456128, "file_name": ".cache\\MatMulNBits_2_0_723.const", "file_size": 13762560 }, "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1536 ], "size_in_bytes": 6144, "op_tensor_size": 6144, "offset": 975218688, "file_name": ".cache\\MatMulNBits_2_0_724.const", "file_size": 6144 }, "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 107520 ], "size_in_bytes": 430080, "op_tensor_size": 430080, "offset": 975224832, "file_name": ".cache\\MatMulNBits_2_0_725.const", "file_size": 430080 }, "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 107520 ], "size_in_bytes": 107520, "op_tensor_size": 107520, "offset": 975654912, "file_name": ".cache\\MatMulNBits_2_0_726.const", "file_size": 107520 }, "model.layers.28.final_norm_layernorm.weight.bf": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "bfloat16", "shape": [ 1536 ], "size_in_bytes": 3072, "op_tensor_size": 3072, "offset": 975762432, "file_name": ".cache\\MatMulNBits_2_0_727.const", "file_size": 3072 }, "lm_head.MatMulNBits.qweight.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1536, 151936 ], "size_in_bytes": 233373696, "op_tensor_size": 233373696, "offset": 975765504, "file_name": ".cache\\MatMulNBits_2_0_728.const", "file_size": 233373696 }, "lm_head.MatMulNBits.bias.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 151936 ], "size_in_bytes": 607744, "op_tensor_size": 607744, "offset": 1209139200, "file_name": ".cache\\MatMulNBits_2_0_729.const", "file_size": 607744 }, "lm_head.MatMulNBits.scales.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "float", "shape": [ 1823232 ], "size_in_bytes": 7292928, "op_tensor_size": 7292928, "offset": 1209746944, "file_name": ".cache\\MatMulNBits_2_0_730.const", "file_size": 7292928 }, "lm_head.MatMulNBits.qzeros.preformat": { "packed_buffer_label": "const", "xrt_arg_id": 3, "dtype": "int8", "shape": [ 1823232 ], "size_in_bytes": 1823232, "op_tensor_size": 1823232, "offset": 1217039872, "file_name": ".cache\\MatMulNBits_2_0_731.const", "file_size": 1823232 }, "past_key_values.0.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 0 }, "past_key_values.0.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 2097152 }, "present.0.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 0 }, "present.0.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 2097152 }, "past_key_values.1.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 4194304 }, "past_key_values.1.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 6291456 }, "present.1.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 4194304 }, "present.1.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 6291456 }, "past_key_values.2.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 8388608 }, "past_key_values.2.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 10485760 }, "present.2.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 8388608 }, "present.2.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 10485760 }, "past_key_values.3.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 12582912 }, "past_key_values.3.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 14680064 }, "present.3.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 12582912 }, "present.3.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 14680064 }, "past_key_values.4.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 16777216 }, "past_key_values.4.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 18874368 }, "present.4.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 16777216 }, "present.4.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 18874368 }, "past_key_values.5.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 20971520 }, "past_key_values.5.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 23068672 }, "present.5.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 20971520 }, "present.5.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 23068672 }, "past_key_values.6.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 25165824 }, "past_key_values.6.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 27262976 }, "present.6.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 25165824 }, "present.6.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 27262976 }, "past_key_values.7.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 29360128 }, "past_key_values.7.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 31457280 }, "present.7.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 29360128 }, "present.7.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 31457280 }, "past_key_values.8.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 33554432 }, "past_key_values.8.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 35651584 }, "present.8.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 33554432 }, "present.8.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 35651584 }, "past_key_values.9.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 37748736 }, "past_key_values.9.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 39845888 }, "present.9.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 37748736 }, "present.9.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 39845888 }, "past_key_values.10.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 41943040 }, "past_key_values.10.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 44040192 }, "present.10.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 41943040 }, "present.10.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 44040192 }, "past_key_values.11.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 46137344 }, "past_key_values.11.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 48234496 }, "present.11.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 46137344 }, "present.11.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 48234496 }, "past_key_values.12.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 50331648 }, "past_key_values.12.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 52428800 }, "present.12.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 50331648 }, "present.12.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 52428800 }, "past_key_values.13.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 54525952 }, "past_key_values.13.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 56623104 }, "present.13.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 54525952 }, "present.13.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 56623104 }, "past_key_values.14.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 58720256 }, "past_key_values.14.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 60817408 }, "present.14.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 58720256 }, "present.14.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 60817408 }, "past_key_values.15.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 62914560 }, "past_key_values.15.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 65011712 }, "present.15.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 62914560 }, "present.15.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 65011712 }, "past_key_values.16.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 67108864 }, "past_key_values.16.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 69206016 }, "present.16.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 67108864 }, "present.16.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 69206016 }, "past_key_values.17.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 71303168 }, "past_key_values.17.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 73400320 }, "present.17.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 71303168 }, "present.17.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 73400320 }, "past_key_values.18.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 75497472 }, "past_key_values.18.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 77594624 }, "present.18.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 75497472 }, "present.18.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 77594624 }, "past_key_values.19.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 79691776 }, "past_key_values.19.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 81788928 }, "present.19.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 79691776 }, "present.19.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 81788928 }, "past_key_values.20.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 83886080 }, "past_key_values.20.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 85983232 }, "present.20.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 83886080 }, "present.20.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 85983232 }, "past_key_values.21.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 88080384 }, "past_key_values.21.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 90177536 }, "present.21.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 88080384 }, "present.21.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 90177536 }, "past_key_values.22.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 92274688 }, "past_key_values.22.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 94371840 }, "present.22.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 92274688 }, "present.22.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 94371840 }, "past_key_values.23.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 96468992 }, "past_key_values.23.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 98566144 }, "present.23.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 96468992 }, "present.23.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 98566144 }, "past_key_values.24.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 100663296 }, "past_key_values.24.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 102760448 }, "present.24.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 100663296 }, "present.24.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 102760448 }, "past_key_values.25.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 104857600 }, "past_key_values.25.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 106954752 }, "present.25.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 104857600 }, "present.25.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 106954752 }, "past_key_values.26.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 109051904 }, "past_key_values.26.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 111149056 }, "present.26.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 109051904 }, "present.26.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 111149056 }, "past_key_values.27.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 113246208 }, "past_key_values.27.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 115343360 }, "present.27.key": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 113246208 }, "present.27.value": { "packed_buffer_label": "ext_buf_0", "xrt_arg_id": 5, "dtype": "bfloat16", "shape": [ 1, 2, 4096, 128 ], "size_in_bytes": 2097152, "op_tensor_size": 2097152, "offset": 115343360 }, "sin_cos_cache_token": { "packed_buffer_label": "ext_buf_1", "xrt_arg_id": 6, "dtype": "bfloat16", "shape": [ 131072, 128 ], "size_in_bytes": 33554432, "op_tensor_size": 33554432, "offset": 0 } }, "aux_info": { "is_llm": true } }