{
  "dd_meta_major_version": 1,
  "dd_meta_minor_version": 4,
  "state_table_updates": [
    {
      "state_table_idx": 0,
      "update_func": 1,
      "update_arg": 1
    }
  ],
  "op_list": [
    {
      "name": "MatMulNBits_2_0",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/input_layernorm/output_0.out5_4_0"
      ],
      "const_args": [
        "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.0.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/input_layernorm/output_0.out5_4_0"
      ],
      "const_args": [
        "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.v_proj.Add.bias.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.0.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "3",
            "1"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.0/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0",
        "past_key_values.0.key",
        "past_key_values.0.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0",
        "present.0.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "0",
            "0",
            "2",
            "0",
            "1",
            "1",
            "6",
            "0",
            "2",
            "0"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.0.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0"
      ],
      "const_args": [
        "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_0",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/embed_tokens/Gather/output_0.out4_0",
        "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1"
      ],
      "const_args": [
        "model.layers.0.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.0/post_attention_layernorm/output_3.out4_0",
        "/model/layers.0/post_attention_layernorm/output_0.out4_0"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_0",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.0/post_attention_layernorm/output_0.out4_0"
      ],
      "const_args": [
        "model.layers.0.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.0/mlp/Mul/output_0.out3_0"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.0.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.0/mlp/Mul/output_0.out3_0"
      ],
      "const_args": [
        "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_1",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.0/post_attention_layernorm/output_3.out4_0",
        "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2"
      ],
      "const_args": [
        "model.layers.1.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.1/input_layernorm/output_3.out4_1",
        "/model/layers.1/input_layernorm/output_0.out4_1"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_1",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/input_layernorm/output_0.out4_1"
      ],
      "const_args": [
        "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.1.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/input_layernorm/output_0.out4_1"
      ],
      "const_args": [
        "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.v_proj.Add.bias.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.1.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "7",
            "3"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.1/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3",
        "past_key_values.1.key",
        "past_key_values.1.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1",
        "present.1.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "4",
            "2",
            "2",
            "0",
            "5",
            "3",
            "6",
            "0",
            "6",
            "2"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.1.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1"
      ],
      "const_args": [
        "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_2",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.1/input_layernorm/output_3.out4_1",
        "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4"
      ],
      "const_args": [
        "model.layers.1.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.1/post_attention_layernorm/output_3.out4_2",
        "/model/layers.1/post_attention_layernorm/output_0.out4_2"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_1",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.1/post_attention_layernorm/output_0.out4_2"
      ],
      "const_args": [
        "model.layers.1.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.1/mlp/Mul/output_0.out3_1"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.1.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.1/mlp/Mul/output_0.out3_1"
      ],
      "const_args": [
        "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_3",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.1/post_attention_layernorm/output_3.out4_2",
        "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5"
      ],
      "const_args": [
        "model.layers.2.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.2/input_layernorm/output_3.out4_3",
        "/model/layers.2/input_layernorm/output_0.out4_3"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_2",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/input_layernorm/output_0.out4_3"
      ],
      "const_args": [
        "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.2.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/input_layernorm/output_0.out4_3"
      ],
      "const_args": [
        "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.v_proj.Add.bias.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.2.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "11",
            "5"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.2/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6",
        "past_key_values.2.key",
        "past_key_values.2.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2",
        "present.2.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "8",
            "4",
            "2",
            "0",
            "9",
            "5",
            "6",
            "0",
            "10",
            "4"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.2.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2"
      ],
      "const_args": [
        "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_4",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.2/input_layernorm/output_3.out4_3",
        "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7"
      ],
      "const_args": [
        "model.layers.2.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.2/post_attention_layernorm/output_3.out4_4",
        "/model/layers.2/post_attention_layernorm/output_0.out4_4"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_2",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.2/post_attention_layernorm/output_0.out4_4"
      ],
      "const_args": [
        "model.layers.2.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.2/mlp/Mul/output_0.out3_2"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.2.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.2/mlp/Mul/output_0.out3_2"
      ],
      "const_args": [
        "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_5",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.2/post_attention_layernorm/output_3.out4_4",
        "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8"
      ],
      "const_args": [
        "model.layers.3.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.3/input_layernorm/output_3.out4_5",
        "/model/layers.3/input_layernorm/output_0.out4_5"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_3",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/input_layernorm/output_0.out4_5"
      ],
      "const_args": [
        "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.3.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/input_layernorm/output_0.out4_5"
      ],
      "const_args": [
        "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.v_proj.Add.bias.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.3.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "15",
            "7"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.3/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9",
        "past_key_values.3.key",
        "past_key_values.3.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3",
        "present.3.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "12",
            "6",
            "2",
            "0",
            "13",
            "7",
            "6",
            "0",
            "14",
            "6"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.3.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3"
      ],
      "const_args": [
        "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_6",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.3/input_layernorm/output_3.out4_5",
        "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10"
      ],
      "const_args": [
        "model.layers.3.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.3/post_attention_layernorm/output_3.out4_6",
        "/model/layers.3/post_attention_layernorm/output_0.out4_6"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_3",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.3/post_attention_layernorm/output_0.out4_6"
      ],
      "const_args": [
        "model.layers.3.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.3/mlp/Mul/output_0.out3_3"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.3.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.3/mlp/Mul/output_0.out3_3"
      ],
      "const_args": [
        "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_7",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.3/post_attention_layernorm/output_3.out4_6",
        "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11"
      ],
      "const_args": [
        "model.layers.4.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.4/input_layernorm/output_3.out4_7",
        "/model/layers.4/input_layernorm/output_0.out4_7"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_4",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/input_layernorm/output_0.out4_7"
      ],
      "const_args": [
        "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.4.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/input_layernorm/output_0.out4_7"
      ],
      "const_args": [
        "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.v_proj.Add.bias.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.4.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "19",
            "9"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.4/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12",
        "past_key_values.4.key",
        "past_key_values.4.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4",
        "present.4.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "16",
            "8",
            "2",
            "0",
            "17",
            "9",
            "6",
            "0",
            "18",
            "8"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.4.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4"
      ],
      "const_args": [
        "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_8",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.4/input_layernorm/output_3.out4_7",
        "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13"
      ],
      "const_args": [
        "model.layers.4.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.4/post_attention_layernorm/output_3.out4_8",
        "/model/layers.4/post_attention_layernorm/output_0.out4_8"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_4",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.4/post_attention_layernorm/output_0.out4_8"
      ],
      "const_args": [
        "model.layers.4.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.4/mlp/Mul/output_0.out3_4"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.4.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.4/mlp/Mul/output_0.out3_4"
      ],
      "const_args": [
        "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_9",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.4/post_attention_layernorm/output_3.out4_8",
        "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14"
      ],
      "const_args": [
        "model.layers.5.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.5/input_layernorm/output_3.out4_9",
        "/model/layers.5/input_layernorm/output_0.out4_9"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_5",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/input_layernorm/output_0.out4_9"
      ],
      "const_args": [
        "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.5.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/input_layernorm/output_0.out4_9"
      ],
      "const_args": [
        "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.v_proj.Add.bias.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.5.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "23",
            "11"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.5/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15",
        "past_key_values.5.key",
        "past_key_values.5.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5",
        "present.5.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "20",
            "10",
            "2",
            "0",
            "21",
            "11",
            "6",
            "0",
            "22",
            "10"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.5.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5"
      ],
      "const_args": [
        "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_10",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.5/input_layernorm/output_3.out4_9",
        "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16"
      ],
      "const_args": [
        "model.layers.5.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.5/post_attention_layernorm/output_3.out4_10",
        "/model/layers.5/post_attention_layernorm/output_0.out4_10"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_5",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.5/post_attention_layernorm/output_0.out4_10"
      ],
      "const_args": [
        "model.layers.5.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.5/mlp/Mul/output_0.out3_5"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.5.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.5/mlp/Mul/output_0.out3_5"
      ],
      "const_args": [
        "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_11",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.5/post_attention_layernorm/output_3.out4_10",
        "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17"
      ],
      "const_args": [
        "model.layers.6.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.6/input_layernorm/output_3.out4_11",
        "/model/layers.6/input_layernorm/output_0.out4_11"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_6",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/input_layernorm/output_0.out4_11"
      ],
      "const_args": [
        "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.6.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/input_layernorm/output_0.out4_11"
      ],
      "const_args": [
        "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.v_proj.Add.bias.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.6.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "27",
            "13"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.6/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18",
        "past_key_values.6.key",
        "past_key_values.6.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6",
        "present.6.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "24",
            "12",
            "2",
            "0",
            "25",
            "13",
            "6",
            "0",
            "26",
            "12"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.6.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6"
      ],
      "const_args": [
        "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_12",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.6/input_layernorm/output_3.out4_11",
        "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19"
      ],
      "const_args": [
        "model.layers.6.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.6/post_attention_layernorm/output_3.out4_12",
        "/model/layers.6/post_attention_layernorm/output_0.out4_12"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_6",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.6/post_attention_layernorm/output_0.out4_12"
      ],
      "const_args": [
        "model.layers.6.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.6/mlp/Mul/output_0.out3_6"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.6.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.6/mlp/Mul/output_0.out3_6"
      ],
      "const_args": [
        "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_13",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.6/post_attention_layernorm/output_3.out4_12",
        "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20"
      ],
      "const_args": [
        "model.layers.7.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.7/input_layernorm/output_3.out4_13",
        "/model/layers.7/input_layernorm/output_0.out4_13"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_7",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/input_layernorm/output_0.out4_13"
      ],
      "const_args": [
        "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.7.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/input_layernorm/output_0.out4_13"
      ],
      "const_args": [
        "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.v_proj.Add.bias.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.7.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "31",
            "15"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.7/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21",
        "past_key_values.7.key",
        "past_key_values.7.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7",
        "present.7.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "28",
            "14",
            "2",
            "0",
            "29",
            "15",
            "6",
            "0",
            "30",
            "14"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.7.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7"
      ],
      "const_args": [
        "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_14",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.7/input_layernorm/output_3.out4_13",
        "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22"
      ],
      "const_args": [
        "model.layers.7.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.7/post_attention_layernorm/output_3.out4_14",
        "/model/layers.7/post_attention_layernorm/output_0.out4_14"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_7",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.7/post_attention_layernorm/output_0.out4_14"
      ],
      "const_args": [
        "model.layers.7.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.7/mlp/Mul/output_0.out3_7"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.7.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.7/mlp/Mul/output_0.out3_7"
      ],
      "const_args": [
        "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_15",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.7/post_attention_layernorm/output_3.out4_14",
        "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23"
      ],
      "const_args": [
        "model.layers.8.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.8/input_layernorm/output_3.out4_15",
        "/model/layers.8/input_layernorm/output_0.out4_15"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_8",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/input_layernorm/output_0.out4_15"
      ],
      "const_args": [
        "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.8.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/input_layernorm/output_0.out4_15"
      ],
      "const_args": [
        "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.v_proj.Add.bias.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.8.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "35",
            "17"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.8/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24",
        "past_key_values.8.key",
        "past_key_values.8.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8",
        "present.8.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "32",
            "16",
            "2",
            "0",
            "33",
            "17",
            "6",
            "0",
            "34",
            "16"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.8.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8"
      ],
      "const_args": [
        "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_16",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.8/input_layernorm/output_3.out4_15",
        "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25"
      ],
      "const_args": [
        "model.layers.8.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.8/post_attention_layernorm/output_3.out4_16",
        "/model/layers.8/post_attention_layernorm/output_0.out4_16"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_8",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.8/post_attention_layernorm/output_0.out4_16"
      ],
      "const_args": [
        "model.layers.8.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.8/mlp/Mul/output_0.out3_8"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.8.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.8/mlp/Mul/output_0.out3_8"
      ],
      "const_args": [
        "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_17",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.8/post_attention_layernorm/output_3.out4_16",
        "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26"
      ],
      "const_args": [
        "model.layers.9.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.9/input_layernorm/output_3.out4_17",
        "/model/layers.9/input_layernorm/output_0.out4_17"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_9",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/input_layernorm/output_0.out4_17"
      ],
      "const_args": [
        "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.9.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/input_layernorm/output_0.out4_17"
      ],
      "const_args": [
        "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.v_proj.Add.bias.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.9.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "39",
            "19"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.9/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27",
        "past_key_values.9.key",
        "past_key_values.9.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9",
        "present.9.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "36",
            "18",
            "2",
            "0",
            "37",
            "19",
            "6",
            "0",
            "38",
            "18"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.9.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9"
      ],
      "const_args": [
        "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_18",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.9/input_layernorm/output_3.out4_17",
        "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28"
      ],
      "const_args": [
        "model.layers.9.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.9/post_attention_layernorm/output_3.out4_18",
        "/model/layers.9/post_attention_layernorm/output_0.out4_18"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_9",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.9/post_attention_layernorm/output_0.out4_18"
      ],
      "const_args": [
        "model.layers.9.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.9/mlp/Mul/output_0.out3_9"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.9.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.9/mlp/Mul/output_0.out3_9"
      ],
      "const_args": [
        "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_19",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.9/post_attention_layernorm/output_3.out4_18",
        "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29"
      ],
      "const_args": [
        "model.layers.10.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.10/input_layernorm/output_3.out4_19",
        "/model/layers.10/input_layernorm/output_0.out4_19"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_10",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/input_layernorm/output_0.out4_19"
      ],
      "const_args": [
        "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.10.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/input_layernorm/output_0.out4_19"
      ],
      "const_args": [
        "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.v_proj.Add.bias.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.10.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "43",
            "21"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.10/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30",
        "past_key_values.10.key",
        "past_key_values.10.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10",
        "present.10.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "40",
            "20",
            "2",
            "0",
            "41",
            "21",
            "6",
            "0",
            "42",
            "20"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.10.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10"
      ],
      "const_args": [
        "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_20",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.10/input_layernorm/output_3.out4_19",
        "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31"
      ],
      "const_args": [
        "model.layers.10.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.10/post_attention_layernorm/output_3.out4_20",
        "/model/layers.10/post_attention_layernorm/output_0.out4_20"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_10",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.10/post_attention_layernorm/output_0.out4_20"
      ],
      "const_args": [
        "model.layers.10.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.10/mlp/Mul/output_0.out3_10"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.10.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.10/mlp/Mul/output_0.out3_10"
      ],
      "const_args": [
        "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_21",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.10/post_attention_layernorm/output_3.out4_20",
        "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32"
      ],
      "const_args": [
        "model.layers.11.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.11/input_layernorm/output_3.out4_21",
        "/model/layers.11/input_layernorm/output_0.out4_21"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_11",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/input_layernorm/output_0.out4_21"
      ],
      "const_args": [
        "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.11.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/input_layernorm/output_0.out4_21"
      ],
      "const_args": [
        "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.v_proj.Add.bias.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.11.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "47",
            "23"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.11/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33",
        "past_key_values.11.key",
        "past_key_values.11.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11",
        "present.11.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "44",
            "22",
            "2",
            "0",
            "45",
            "23",
            "6",
            "0",
            "46",
            "22"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.11.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11"
      ],
      "const_args": [
        "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_22",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.11/input_layernorm/output_3.out4_21",
        "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34"
      ],
      "const_args": [
        "model.layers.11.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.11/post_attention_layernorm/output_3.out4_22",
        "/model/layers.11/post_attention_layernorm/output_0.out4_22"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_11",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.11/post_attention_layernorm/output_0.out4_22"
      ],
      "const_args": [
        "model.layers.11.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.11/mlp/Mul/output_0.out3_11"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.11.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.11/mlp/Mul/output_0.out3_11"
      ],
      "const_args": [
        "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_23",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.11/post_attention_layernorm/output_3.out4_22",
        "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35"
      ],
      "const_args": [
        "model.layers.12.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.12/input_layernorm/output_3.out4_23",
        "/model/layers.12/input_layernorm/output_0.out4_23"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_12",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/input_layernorm/output_0.out4_23"
      ],
      "const_args": [
        "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.12.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/input_layernorm/output_0.out4_23"
      ],
      "const_args": [
        "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.v_proj.Add.bias.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.12.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "51",
            "25"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.12/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36",
        "past_key_values.12.key",
        "past_key_values.12.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12",
        "present.12.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "48",
            "24",
            "2",
            "0",
            "49",
            "25",
            "6",
            "0",
            "50",
            "24"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.12.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12"
      ],
      "const_args": [
        "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_24",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.12/input_layernorm/output_3.out4_23",
        "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37"
      ],
      "const_args": [
        "model.layers.12.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.12/post_attention_layernorm/output_3.out4_24",
        "/model/layers.12/post_attention_layernorm/output_0.out4_24"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_12",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.12/post_attention_layernorm/output_0.out4_24"
      ],
      "const_args": [
        "model.layers.12.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.12/mlp/Mul/output_0.out3_12"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.12.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.12/mlp/Mul/output_0.out3_12"
      ],
      "const_args": [
        "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_25",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.12/post_attention_layernorm/output_3.out4_24",
        "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38"
      ],
      "const_args": [
        "model.layers.13.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.13/input_layernorm/output_3.out4_25",
        "/model/layers.13/input_layernorm/output_0.out4_25"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_13",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/input_layernorm/output_0.out4_25"
      ],
      "const_args": [
        "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.13.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/input_layernorm/output_0.out4_25"
      ],
      "const_args": [
        "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.v_proj.Add.bias.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.13.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "55",
            "27"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.13/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39",
        "past_key_values.13.key",
        "past_key_values.13.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13",
        "present.13.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "52",
            "26",
            "2",
            "0",
            "53",
            "27",
            "6",
            "0",
            "54",
            "26"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.13.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13"
      ],
      "const_args": [
        "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_26",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.13/input_layernorm/output_3.out4_25",
        "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40"
      ],
      "const_args": [
        "model.layers.13.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.13/post_attention_layernorm/output_3.out4_26",
        "/model/layers.13/post_attention_layernorm/output_0.out4_26"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_13",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.13/post_attention_layernorm/output_0.out4_26"
      ],
      "const_args": [
        "model.layers.13.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.13/mlp/Mul/output_0.out3_13"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.13.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.13/mlp/Mul/output_0.out3_13"
      ],
      "const_args": [
        "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_27",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.13/post_attention_layernorm/output_3.out4_26",
        "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41"
      ],
      "const_args": [
        "model.layers.14.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.14/input_layernorm/output_3.out4_27",
        "/model/layers.14/input_layernorm/output_0.out4_27"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_14",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/input_layernorm/output_0.out4_27"
      ],
      "const_args": [
        "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.14.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/input_layernorm/output_0.out4_27"
      ],
      "const_args": [
        "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.v_proj.Add.bias.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.14.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "59",
            "29"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.14/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42",
        "past_key_values.14.key",
        "past_key_values.14.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14",
        "present.14.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "56",
            "28",
            "2",
            "0",
            "57",
            "29",
            "6",
            "0",
            "58",
            "28"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.14.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14"
      ],
      "const_args": [
        "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_28",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.14/input_layernorm/output_3.out4_27",
        "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43"
      ],
      "const_args": [
        "model.layers.14.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.14/post_attention_layernorm/output_3.out4_28",
        "/model/layers.14/post_attention_layernorm/output_0.out4_28"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_14",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.14/post_attention_layernorm/output_0.out4_28"
      ],
      "const_args": [
        "model.layers.14.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.14/mlp/Mul/output_0.out3_14"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.14.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.14/mlp/Mul/output_0.out3_14"
      ],
      "const_args": [
        "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_29",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.14/post_attention_layernorm/output_3.out4_28",
        "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44"
      ],
      "const_args": [
        "model.layers.15.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.15/input_layernorm/output_3.out4_29",
        "/model/layers.15/input_layernorm/output_0.out4_29"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_15",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/input_layernorm/output_0.out4_29"
      ],
      "const_args": [
        "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.15.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/input_layernorm/output_0.out4_29"
      ],
      "const_args": [
        "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.v_proj.Add.bias.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.15.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "63",
            "31"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.15/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45",
        "past_key_values.15.key",
        "past_key_values.15.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15",
        "present.15.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "60",
            "30",
            "2",
            "0",
            "61",
            "31",
            "6",
            "0",
            "62",
            "30"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.15.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15"
      ],
      "const_args": [
        "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_30",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.15/input_layernorm/output_3.out4_29",
        "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46"
      ],
      "const_args": [
        "model.layers.15.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.15/post_attention_layernorm/output_3.out4_30",
        "/model/layers.15/post_attention_layernorm/output_0.out4_30"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_15",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.15/post_attention_layernorm/output_0.out4_30"
      ],
      "const_args": [
        "model.layers.15.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.15/mlp/Mul/output_0.out3_15"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.15.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.15/mlp/Mul/output_0.out3_15"
      ],
      "const_args": [
        "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_31",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.15/post_attention_layernorm/output_3.out4_30",
        "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47"
      ],
      "const_args": [
        "model.layers.16.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.16/input_layernorm/output_3.out4_31",
        "/model/layers.16/input_layernorm/output_0.out4_31"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_16",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/input_layernorm/output_0.out4_31"
      ],
      "const_args": [
        "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.16.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/input_layernorm/output_0.out4_31"
      ],
      "const_args": [
        "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.v_proj.Add.bias.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.16.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "67",
            "33"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.16/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48",
        "past_key_values.16.key",
        "past_key_values.16.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16",
        "present.16.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "64",
            "32",
            "2",
            "0",
            "65",
            "33",
            "6",
            "0",
            "66",
            "32"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.16.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16"
      ],
      "const_args": [
        "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_32",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.16/input_layernorm/output_3.out4_31",
        "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49"
      ],
      "const_args": [
        "model.layers.16.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.16/post_attention_layernorm/output_3.out4_32",
        "/model/layers.16/post_attention_layernorm/output_0.out4_32"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_16",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.16/post_attention_layernorm/output_0.out4_32"
      ],
      "const_args": [
        "model.layers.16.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.16/mlp/Mul/output_0.out3_16"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.16.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.16/mlp/Mul/output_0.out3_16"
      ],
      "const_args": [
        "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_33",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.16/post_attention_layernorm/output_3.out4_32",
        "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50"
      ],
      "const_args": [
        "model.layers.17.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.17/input_layernorm/output_3.out4_33",
        "/model/layers.17/input_layernorm/output_0.out4_33"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_17",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/input_layernorm/output_0.out4_33"
      ],
      "const_args": [
        "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.17.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/input_layernorm/output_0.out4_33"
      ],
      "const_args": [
        "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.v_proj.Add.bias.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.17.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "71",
            "35"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.17/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51",
        "past_key_values.17.key",
        "past_key_values.17.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17",
        "present.17.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "68",
            "34",
            "2",
            "0",
            "69",
            "35",
            "6",
            "0",
            "70",
            "34"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.17.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17"
      ],
      "const_args": [
        "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_34",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.17/input_layernorm/output_3.out4_33",
        "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52"
      ],
      "const_args": [
        "model.layers.17.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.17/post_attention_layernorm/output_3.out4_34",
        "/model/layers.17/post_attention_layernorm/output_0.out4_34"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_17",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.17/post_attention_layernorm/output_0.out4_34"
      ],
      "const_args": [
        "model.layers.17.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.17/mlp/Mul/output_0.out3_17"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.17.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.17/mlp/Mul/output_0.out3_17"
      ],
      "const_args": [
        "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_35",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.17/post_attention_layernorm/output_3.out4_34",
        "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53"
      ],
      "const_args": [
        "model.layers.18.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.18/input_layernorm/output_3.out4_35",
        "/model/layers.18/input_layernorm/output_0.out4_35"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_18",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/input_layernorm/output_0.out4_35"
      ],
      "const_args": [
        "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.18.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/input_layernorm/output_0.out4_35"
      ],
      "const_args": [
        "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.v_proj.Add.bias.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.18.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "75",
            "37"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.18/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54",
        "past_key_values.18.key",
        "past_key_values.18.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18",
        "present.18.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "72",
            "36",
            "2",
            "0",
            "73",
            "37",
            "6",
            "0",
            "74",
            "36"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.18.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18"
      ],
      "const_args": [
        "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_36",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.18/input_layernorm/output_3.out4_35",
        "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55"
      ],
      "const_args": [
        "model.layers.18.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.18/post_attention_layernorm/output_3.out4_36",
        "/model/layers.18/post_attention_layernorm/output_0.out4_36"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_18",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.18/post_attention_layernorm/output_0.out4_36"
      ],
      "const_args": [
        "model.layers.18.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.18/mlp/Mul/output_0.out3_18"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.18.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.18/mlp/Mul/output_0.out3_18"
      ],
      "const_args": [
        "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_37",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.18/post_attention_layernorm/output_3.out4_36",
        "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56"
      ],
      "const_args": [
        "model.layers.19.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.19/input_layernorm/output_3.out4_37",
        "/model/layers.19/input_layernorm/output_0.out4_37"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_19",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/input_layernorm/output_0.out4_37"
      ],
      "const_args": [
        "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.19.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/input_layernorm/output_0.out4_37"
      ],
      "const_args": [
        "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.v_proj.Add.bias.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.19.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "79",
            "39"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.19/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57",
        "past_key_values.19.key",
        "past_key_values.19.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19",
        "present.19.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "76",
            "38",
            "2",
            "0",
            "77",
            "39",
            "6",
            "0",
            "78",
            "38"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.19.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19"
      ],
      "const_args": [
        "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_38",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.19/input_layernorm/output_3.out4_37",
        "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58"
      ],
      "const_args": [
        "model.layers.19.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.19/post_attention_layernorm/output_3.out4_38",
        "/model/layers.19/post_attention_layernorm/output_0.out4_38"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_19",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.19/post_attention_layernorm/output_0.out4_38"
      ],
      "const_args": [
        "model.layers.19.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.19/mlp/Mul/output_0.out3_19"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.19.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.19/mlp/Mul/output_0.out3_19"
      ],
      "const_args": [
        "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_39",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.19/post_attention_layernorm/output_3.out4_38",
        "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59"
      ],
      "const_args": [
        "model.layers.20.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.20/input_layernorm/output_3.out4_39",
        "/model/layers.20/input_layernorm/output_0.out4_39"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_20",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/input_layernorm/output_0.out4_39"
      ],
      "const_args": [
        "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.20.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/input_layernorm/output_0.out4_39"
      ],
      "const_args": [
        "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.v_proj.Add.bias.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.20.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "83",
            "41"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.20/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60",
        "past_key_values.20.key",
        "past_key_values.20.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20",
        "present.20.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "80",
            "40",
            "2",
            "0",
            "81",
            "41",
            "6",
            "0",
            "82",
            "40"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.20.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20"
      ],
      "const_args": [
        "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_40",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.20/input_layernorm/output_3.out4_39",
        "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61"
      ],
      "const_args": [
        "model.layers.20.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.20/post_attention_layernorm/output_3.out4_40",
        "/model/layers.20/post_attention_layernorm/output_0.out4_40"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_20",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.20/post_attention_layernorm/output_0.out4_40"
      ],
      "const_args": [
        "model.layers.20.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.20/mlp/Mul/output_0.out3_20"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.20.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.20/mlp/Mul/output_0.out3_20"
      ],
      "const_args": [
        "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_41",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.20/post_attention_layernorm/output_3.out4_40",
        "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62"
      ],
      "const_args": [
        "model.layers.21.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.21/input_layernorm/output_3.out4_41",
        "/model/layers.21/input_layernorm/output_0.out4_41"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_21",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/input_layernorm/output_0.out4_41"
      ],
      "const_args": [
        "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.21.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/input_layernorm/output_0.out4_41"
      ],
      "const_args": [
        "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.v_proj.Add.bias.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.21.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "87",
            "43"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.21/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63",
        "past_key_values.21.key",
        "past_key_values.21.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21",
        "present.21.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "84",
            "42",
            "2",
            "0",
            "85",
            "43",
            "6",
            "0",
            "86",
            "42"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.21.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21"
      ],
      "const_args": [
        "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_42",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.21/input_layernorm/output_3.out4_41",
        "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64"
      ],
      "const_args": [
        "model.layers.21.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.21/post_attention_layernorm/output_3.out4_42",
        "/model/layers.21/post_attention_layernorm/output_0.out4_42"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_21",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.21/post_attention_layernorm/output_0.out4_42"
      ],
      "const_args": [
        "model.layers.21.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.21/mlp/Mul/output_0.out3_21"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.21.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.21/mlp/Mul/output_0.out3_21"
      ],
      "const_args": [
        "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_43",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.21/post_attention_layernorm/output_3.out4_42",
        "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65"
      ],
      "const_args": [
        "model.layers.22.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.22/input_layernorm/output_3.out4_43",
        "/model/layers.22/input_layernorm/output_0.out4_43"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_22",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/input_layernorm/output_0.out4_43"
      ],
      "const_args": [
        "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.22.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/input_layernorm/output_0.out4_43"
      ],
      "const_args": [
        "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.v_proj.Add.bias.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.22.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "91",
            "45"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.22/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66",
        "past_key_values.22.key",
        "past_key_values.22.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22",
        "present.22.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "88",
            "44",
            "2",
            "0",
            "89",
            "45",
            "6",
            "0",
            "90",
            "44"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.22.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22"
      ],
      "const_args": [
        "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_44",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.22/input_layernorm/output_3.out4_43",
        "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67"
      ],
      "const_args": [
        "model.layers.22.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.22/post_attention_layernorm/output_3.out4_44",
        "/model/layers.22/post_attention_layernorm/output_0.out4_44"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_22",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.22/post_attention_layernorm/output_0.out4_44"
      ],
      "const_args": [
        "model.layers.22.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.22/mlp/Mul/output_0.out3_22"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.22.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.22/mlp/Mul/output_0.out3_22"
      ],
      "const_args": [
        "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_45",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.22/post_attention_layernorm/output_3.out4_44",
        "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68"
      ],
      "const_args": [
        "model.layers.23.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.23/input_layernorm/output_3.out4_45",
        "/model/layers.23/input_layernorm/output_0.out4_45"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_23",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/input_layernorm/output_0.out4_45"
      ],
      "const_args": [
        "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.23.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/input_layernorm/output_0.out4_45"
      ],
      "const_args": [
        "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.v_proj.Add.bias.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.23.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "95",
            "47"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.23/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69",
        "past_key_values.23.key",
        "past_key_values.23.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23",
        "present.23.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "92",
            "46",
            "2",
            "0",
            "93",
            "47",
            "6",
            "0",
            "94",
            "46"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.23.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23"
      ],
      "const_args": [
        "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_46",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.23/input_layernorm/output_3.out4_45",
        "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70"
      ],
      "const_args": [
        "model.layers.23.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.23/post_attention_layernorm/output_3.out4_46",
        "/model/layers.23/post_attention_layernorm/output_0.out4_46"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_23",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.23/post_attention_layernorm/output_0.out4_46"
      ],
      "const_args": [
        "model.layers.23.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.23/mlp/Mul/output_0.out3_23"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.23.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.23/mlp/Mul/output_0.out3_23"
      ],
      "const_args": [
        "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_47",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.23/post_attention_layernorm/output_3.out4_46",
        "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71"
      ],
      "const_args": [
        "model.layers.24.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.24/input_layernorm/output_3.out4_47",
        "/model/layers.24/input_layernorm/output_0.out4_47"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_24",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/input_layernorm/output_0.out4_47"
      ],
      "const_args": [
        "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.24.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/input_layernorm/output_0.out4_47"
      ],
      "const_args": [
        "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.v_proj.Add.bias.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.24.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "99",
            "49"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.24/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72",
        "past_key_values.24.key",
        "past_key_values.24.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24",
        "present.24.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "96",
            "48",
            "2",
            "0",
            "97",
            "49",
            "6",
            "0",
            "98",
            "48"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.24.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24"
      ],
      "const_args": [
        "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_48",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.24/input_layernorm/output_3.out4_47",
        "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73"
      ],
      "const_args": [
        "model.layers.24.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.24/post_attention_layernorm/output_3.out4_48",
        "/model/layers.24/post_attention_layernorm/output_0.out4_48"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_24",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.24/post_attention_layernorm/output_0.out4_48"
      ],
      "const_args": [
        "model.layers.24.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.24/mlp/Mul/output_0.out3_24"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.24.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.24/mlp/Mul/output_0.out3_24"
      ],
      "const_args": [
        "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_49",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.24/post_attention_layernorm/output_3.out4_48",
        "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74"
      ],
      "const_args": [
        "model.layers.25.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.25/input_layernorm/output_3.out4_49",
        "/model/layers.25/input_layernorm/output_0.out4_49"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_25",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/input_layernorm/output_0.out4_49"
      ],
      "const_args": [
        "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.25.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/input_layernorm/output_0.out4_49"
      ],
      "const_args": [
        "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.v_proj.Add.bias.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.25.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "103",
            "51"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.25/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75",
        "past_key_values.25.key",
        "past_key_values.25.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25",
        "present.25.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "100",
            "50",
            "2",
            "0",
            "101",
            "51",
            "6",
            "0",
            "102",
            "50"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.25.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25"
      ],
      "const_args": [
        "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_50",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.25/input_layernorm/output_3.out4_49",
        "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76"
      ],
      "const_args": [
        "model.layers.25.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.25/post_attention_layernorm/output_3.out4_50",
        "/model/layers.25/post_attention_layernorm/output_0.out4_50"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_25",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.25/post_attention_layernorm/output_0.out4_50"
      ],
      "const_args": [
        "model.layers.25.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.25/mlp/Mul/output_0.out3_25"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.25.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.25/mlp/Mul/output_0.out3_25"
      ],
      "const_args": [
        "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_51",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.25/post_attention_layernorm/output_3.out4_50",
        "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77"
      ],
      "const_args": [
        "model.layers.26.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.26/input_layernorm/output_3.out4_51",
        "/model/layers.26/input_layernorm/output_0.out4_51"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_26",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/input_layernorm/output_0.out4_51"
      ],
      "const_args": [
        "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.26.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/input_layernorm/output_0.out4_51"
      ],
      "const_args": [
        "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.v_proj.Add.bias.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.26.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "107",
            "53"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.26/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78",
        "past_key_values.26.key",
        "past_key_values.26.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26",
        "present.26.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "104",
            "52",
            "2",
            "0",
            "105",
            "53",
            "6",
            "0",
            "106",
            "52"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.26.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26"
      ],
      "const_args": [
        "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_52",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.26/input_layernorm/output_3.out4_51",
        "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79"
      ],
      "const_args": [
        "model.layers.26.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.26/post_attention_layernorm/output_3.out4_52",
        "/model/layers.26/post_attention_layernorm/output_0.out4_52"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_26",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.26/post_attention_layernorm/output_0.out4_52"
      ],
      "const_args": [
        "model.layers.26.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.26/mlp/Mul/output_0.out3_26"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.26.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.26/mlp/Mul/output_0.out3_26"
      ],
      "const_args": [
        "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_53",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.26/post_attention_layernorm/output_3.out4_52",
        "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80"
      ],
      "const_args": [
        "model.layers.27.input_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.27/input_layernorm/output_3.out4_53",
        "/model/layers.27/input_layernorm/output_0.out4_53"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "MatMulNBits_2_27",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/input_layernorm/output_0.out4_53"
      ],
      "const_args": [
        "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1792"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "layers.27.attn.v_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/input_layernorm/output_0.out4_53"
      ],
      "const_args": [
        "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.v_proj.Add.bias.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "present.27.value"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "256"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "total_seq_len": {
          "type": "int",
          "value": [
            "4096"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "5",
            "0",
            "111",
            "55"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "5",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "/model/layers.27/attn/GroupQueryAttention",
      "type": "FLATMHA",
      "in_args": [
        "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81",
        "past_key_values.27.key",
        "past_key_values.27.value",
        "attention_mask_const_uint",
        "sin_cos_cache_token"
      ],
      "const_args": [],
      "out_args": [
        "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27",
        "present.27.key"
      ],
      "attrs": {
        "num_heads": {
          "type": "int",
          "value": [
            "12"
          ]
        },
        "kv_num_heads": {
          "type": "int",
          "value": [
            "2"
          ]
        },
        "scale": {
          "type": "float",
          "value": [
            "0.0883883461356163"
          ]
        },
        "softcap": {
          "type": "float",
          "value": [
            "0.0"
          ]
        },
        "do_rotary": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "rotary_interleaved": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "input_shape": {
          "type": "int",
          "value": [
            "2",
            "12",
            "1",
            "4096",
            "128"
          ]
        },
        "external_buffers": {
          "type": "int",
          "value": [
            "4",
            "1",
            "0",
            "0",
            "1",
            "0",
            "108",
            "54",
            "2",
            "0",
            "109",
            "55",
            "6",
            "0",
            "110",
            "54"
          ]
        },
        "update_tensor_offsets": {
          "type": "int",
          "value": [
            "4",
            "0",
            "0",
            "256",
            "6",
            "0",
            "0",
            "256"
          ]
        }
      }
    },
    {
      "name": "layers.27.attn.o_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27"
      ],
      "const_args": [
        "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_54",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.27/input_layernorm/output_3.out4_53",
        "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82"
      ],
      "const_args": [
        "model.layers.27.post_attention_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.27/post_attention_layernorm/output_3.out4_54",
        "/model/layers.27/post_attention_layernorm/output_0.out4_54"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "FlatMLP_3_27",
      "type": "FlatMLP",
      "in_args": [
        "/model/layers.27/post_attention_layernorm/output_0.out4_54"
      ],
      "const_args": [
        "model.layers.27.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.up_proj.MatMulNBits.bias.f"
      ],
      "out_args": [
        "/model/layers.27/mlp/Mul/output_0.out3_27"
      ],
      "attrs": {
        "input_shape": {
          "type": "int",
          "value": [
            "1",
            "1536",
            "8960"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float",
            "uint8",
            "float"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16"
          ]
        }
      }
    },
    {
      "name": "layers.27.mlp.down_proj",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.27/mlp/Mul/output_0.out3_27"
      ],
      "const_args": [
        "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "8960"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    },
    {
      "name": "FlatRMSAdd_4_55",
      "type": "FlatRMSAdd",
      "in_args": [
        "/model/layers.27/post_attention_layernorm/output_3.out4_54",
        "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83"
      ],
      "const_args": [
        "model.layers.28.final_norm_layernorm.weight.bf"
      ],
      "out_args": [
        "/model/layers.28/final_norm_layernorm/output_0.dummy",
        "/model/layers.28/final_norm_layernorm/output_0.out4_55"
      ],
      "attrs": {
        "a_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "in_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "out_dtypes": {
          "type": "str",
          "value": [
            "bfloat16",
            "bfloat16"
          ]
        },
        "c_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "b_shape": {
          "type": "int",
          "value": [
            "1",
            "1",
            "1536"
          ]
        },
        "is_gamma_ifm": {
          "type": "int",
          "value": [
            "1"
          ]
        }
      }
    },
    {
      "name": "/lm_head/MatMulNBits",
      "type": "MladfMatMul",
      "in_args": [
        "/model/layers.28/final_norm_layernorm/output_0.out4_55"
      ],
      "const_args": [
        "lm_head.MatMulNBits.qweight.preformat",
        "lm_head.MatMulNBits.bias.preformat",
        "lm_head.MatMulNBits.scales.preformat",
        "lm_head.MatMulNBits.qzeros.preformat"
      ],
      "out_args": [
        "logits.out5_4_84"
      ],
      "attrs": {
        "accuracy_level": {
          "type": "int",
          "value": [
            "0"
          ]
        },
        "bits": {
          "type": "int",
          "value": [
            "4"
          ]
        },
        "block_size": {
          "type": "int",
          "value": [
            "128"
          ]
        },
        "K": {
          "type": "int",
          "value": [
            "1536"
          ]
        },
        "N": {
          "type": "int",
          "value": [
            "151936"
          ]
        },
        "default_shape": {
          "type": "int",
          "value": [
            "1"
          ]
        },
        "op_version": {
          "type": "str",
          "value": [
            "flat"
          ]
        },
        "group_size": {
          "type": "int",
          "value": [
            "128"
          ]
        }
      }
    }
  ],
  "fused_tensors": {
    "in": {
      "buffer_size": 9328,
      "xrt_arg_id": 0,
      "packed_tensors": [
        "/model/layers.0/input_layernorm/output_0.out5_4_0",
        "attention_mask_const_uint",
        "/model/embed_tokens/Gather/output_0.out4_0"
      ]
    },
    "out": {
      "buffer_size": 306944,
      "xrt_arg_id": 1,
      "packed_tensors": [
        "/model/layers.28/final_norm_layernorm/output_0.dummy",
        "logits.out5_4_84"
      ]
    },
    "scratch": {
      "buffer_size": 1201152,
      "xrt_arg_id": 2,
      "packed_tensors": [
        "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0",
        "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0",
        "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1",
        "/model/layers.0/post_attention_layernorm/output_3.out4_0",
        "/model/layers.0/post_attention_layernorm/output_0.out4_0",
        "/model/layers.0/mlp/Mul/output_0.out3_0",
        "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2",
        "/model/layers.1/input_layernorm/output_3.out4_1",
        "/model/layers.1/input_layernorm/output_0.out4_1",
        "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3",
        "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1",
        "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4",
        "/model/layers.1/post_attention_layernorm/output_3.out4_2",
        "/model/layers.1/post_attention_layernorm/output_0.out4_2",
        "/model/layers.1/mlp/Mul/output_0.out3_1",
        "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5",
        "/model/layers.2/input_layernorm/output_3.out4_3",
        "/model/layers.2/input_layernorm/output_0.out4_3",
        "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6",
        "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2",
        "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7",
        "/model/layers.2/post_attention_layernorm/output_3.out4_4",
        "/model/layers.2/post_attention_layernorm/output_0.out4_4",
        "/model/layers.2/mlp/Mul/output_0.out3_2",
        "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8",
        "/model/layers.3/input_layernorm/output_3.out4_5",
        "/model/layers.3/input_layernorm/output_0.out4_5",
        "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9",
        "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3",
        "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10",
        "/model/layers.3/post_attention_layernorm/output_3.out4_6",
        "/model/layers.3/post_attention_layernorm/output_0.out4_6",
        "/model/layers.3/mlp/Mul/output_0.out3_3",
        "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11",
        "/model/layers.4/input_layernorm/output_3.out4_7",
        "/model/layers.4/input_layernorm/output_0.out4_7",
        "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12",
        "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4",
        "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13",
        "/model/layers.4/post_attention_layernorm/output_3.out4_8",
        "/model/layers.4/post_attention_layernorm/output_0.out4_8",
        "/model/layers.4/mlp/Mul/output_0.out3_4",
        "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14",
        "/model/layers.5/input_layernorm/output_3.out4_9",
        "/model/layers.5/input_layernorm/output_0.out4_9",
        "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15",
        "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5",
        "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16",
        "/model/layers.5/post_attention_layernorm/output_3.out4_10",
        "/model/layers.5/post_attention_layernorm/output_0.out4_10",
        "/model/layers.5/mlp/Mul/output_0.out3_5",
        "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17",
        "/model/layers.6/input_layernorm/output_3.out4_11",
        "/model/layers.6/input_layernorm/output_0.out4_11",
        "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18",
        "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6",
        "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19",
        "/model/layers.6/post_attention_layernorm/output_3.out4_12",
        "/model/layers.6/post_attention_layernorm/output_0.out4_12",
        "/model/layers.6/mlp/Mul/output_0.out3_6",
        "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20",
        "/model/layers.7/input_layernorm/output_3.out4_13",
        "/model/layers.7/input_layernorm/output_0.out4_13",
        "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21",
        "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7",
        "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22",
        "/model/layers.7/post_attention_layernorm/output_3.out4_14",
        "/model/layers.7/post_attention_layernorm/output_0.out4_14",
        "/model/layers.7/mlp/Mul/output_0.out3_7",
        "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23",
        "/model/layers.8/input_layernorm/output_3.out4_15",
        "/model/layers.8/input_layernorm/output_0.out4_15",
        "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24",
        "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8",
        "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25",
        "/model/layers.8/post_attention_layernorm/output_3.out4_16",
        "/model/layers.8/post_attention_layernorm/output_0.out4_16",
        "/model/layers.8/mlp/Mul/output_0.out3_8",
        "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26",
        "/model/layers.9/input_layernorm/output_3.out4_17",
        "/model/layers.9/input_layernorm/output_0.out4_17",
        "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27",
        "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9",
        "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28",
        "/model/layers.9/post_attention_layernorm/output_3.out4_18",
        "/model/layers.9/post_attention_layernorm/output_0.out4_18",
        "/model/layers.9/mlp/Mul/output_0.out3_9",
        "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29",
        "/model/layers.10/input_layernorm/output_3.out4_19",
        "/model/layers.10/input_layernorm/output_0.out4_19",
        "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30",
        "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10",
        "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31",
        "/model/layers.10/post_attention_layernorm/output_3.out4_20",
        "/model/layers.10/post_attention_layernorm/output_0.out4_20",
        "/model/layers.10/mlp/Mul/output_0.out3_10",
        "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32",
        "/model/layers.11/input_layernorm/output_3.out4_21",
        "/model/layers.11/input_layernorm/output_0.out4_21",
        "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33",
        "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11",
        "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34",
        "/model/layers.11/post_attention_layernorm/output_3.out4_22",
        "/model/layers.11/post_attention_layernorm/output_0.out4_22",
        "/model/layers.11/mlp/Mul/output_0.out3_11",
        "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35",
        "/model/layers.12/input_layernorm/output_3.out4_23",
        "/model/layers.12/input_layernorm/output_0.out4_23",
        "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36",
        "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12",
        "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37",
        "/model/layers.12/post_attention_layernorm/output_3.out4_24",
        "/model/layers.12/post_attention_layernorm/output_0.out4_24",
        "/model/layers.12/mlp/Mul/output_0.out3_12",
        "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38",
        "/model/layers.13/input_layernorm/output_3.out4_25",
        "/model/layers.13/input_layernorm/output_0.out4_25",
        "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39",
        "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13",
        "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40",
        "/model/layers.13/post_attention_layernorm/output_3.out4_26",
        "/model/layers.13/post_attention_layernorm/output_0.out4_26",
        "/model/layers.13/mlp/Mul/output_0.out3_13",
        "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41",
        "/model/layers.14/input_layernorm/output_3.out4_27",
        "/model/layers.14/input_layernorm/output_0.out4_27",
        "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42",
        "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14",
        "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43",
        "/model/layers.14/post_attention_layernorm/output_3.out4_28",
        "/model/layers.14/post_attention_layernorm/output_0.out4_28",
        "/model/layers.14/mlp/Mul/output_0.out3_14",
        "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44",
        "/model/layers.15/input_layernorm/output_3.out4_29",
        "/model/layers.15/input_layernorm/output_0.out4_29",
        "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45",
        "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15",
        "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46",
        "/model/layers.15/post_attention_layernorm/output_3.out4_30",
        "/model/layers.15/post_attention_layernorm/output_0.out4_30",
        "/model/layers.15/mlp/Mul/output_0.out3_15",
        "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47",
        "/model/layers.16/input_layernorm/output_3.out4_31",
        "/model/layers.16/input_layernorm/output_0.out4_31",
        "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48",
        "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16",
        "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49",
        "/model/layers.16/post_attention_layernorm/output_3.out4_32",
        "/model/layers.16/post_attention_layernorm/output_0.out4_32",
        "/model/layers.16/mlp/Mul/output_0.out3_16",
        "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50",
        "/model/layers.17/input_layernorm/output_3.out4_33",
        "/model/layers.17/input_layernorm/output_0.out4_33",
        "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51",
        "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17",
        "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52",
        "/model/layers.17/post_attention_layernorm/output_3.out4_34",
        "/model/layers.17/post_attention_layernorm/output_0.out4_34",
        "/model/layers.17/mlp/Mul/output_0.out3_17",
        "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53",
        "/model/layers.18/input_layernorm/output_3.out4_35",
        "/model/layers.18/input_layernorm/output_0.out4_35",
        "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54",
        "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18",
        "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55",
        "/model/layers.18/post_attention_layernorm/output_3.out4_36",
        "/model/layers.18/post_attention_layernorm/output_0.out4_36",
        "/model/layers.18/mlp/Mul/output_0.out3_18",
        "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56",
        "/model/layers.19/input_layernorm/output_3.out4_37",
        "/model/layers.19/input_layernorm/output_0.out4_37",
        "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57",
        "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19",
        "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58",
        "/model/layers.19/post_attention_layernorm/output_3.out4_38",
        "/model/layers.19/post_attention_layernorm/output_0.out4_38",
        "/model/layers.19/mlp/Mul/output_0.out3_19",
        "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59",
        "/model/layers.20/input_layernorm/output_3.out4_39",
        "/model/layers.20/input_layernorm/output_0.out4_39",
        "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60",
        "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20",
        "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61",
        "/model/layers.20/post_attention_layernorm/output_3.out4_40",
        "/model/layers.20/post_attention_layernorm/output_0.out4_40",
        "/model/layers.20/mlp/Mul/output_0.out3_20",
        "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62",
        "/model/layers.21/input_layernorm/output_3.out4_41",
        "/model/layers.21/input_layernorm/output_0.out4_41",
        "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63",
        "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21",
        "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64",
        "/model/layers.21/post_attention_layernorm/output_3.out4_42",
        "/model/layers.21/post_attention_layernorm/output_0.out4_42",
        "/model/layers.21/mlp/Mul/output_0.out3_21",
        "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65",
        "/model/layers.22/input_layernorm/output_3.out4_43",
        "/model/layers.22/input_layernorm/output_0.out4_43",
        "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66",
        "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22",
        "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67",
        "/model/layers.22/post_attention_layernorm/output_3.out4_44",
        "/model/layers.22/post_attention_layernorm/output_0.out4_44",
        "/model/layers.22/mlp/Mul/output_0.out3_22",
        "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68",
        "/model/layers.23/input_layernorm/output_3.out4_45",
        "/model/layers.23/input_layernorm/output_0.out4_45",
        "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69",
        "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23",
        "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70",
        "/model/layers.23/post_attention_layernorm/output_3.out4_46",
        "/model/layers.23/post_attention_layernorm/output_0.out4_46",
        "/model/layers.23/mlp/Mul/output_0.out3_23",
        "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71",
        "/model/layers.24/input_layernorm/output_3.out4_47",
        "/model/layers.24/input_layernorm/output_0.out4_47",
        "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72",
        "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24",
        "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73",
        "/model/layers.24/post_attention_layernorm/output_3.out4_48",
        "/model/layers.24/post_attention_layernorm/output_0.out4_48",
        "/model/layers.24/mlp/Mul/output_0.out3_24",
        "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74",
        "/model/layers.25/input_layernorm/output_3.out4_49",
        "/model/layers.25/input_layernorm/output_0.out4_49",
        "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75",
        "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25",
        "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76",
        "/model/layers.25/post_attention_layernorm/output_3.out4_50",
        "/model/layers.25/post_attention_layernorm/output_0.out4_50",
        "/model/layers.25/mlp/Mul/output_0.out3_25",
        "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77",
        "/model/layers.26/input_layernorm/output_3.out4_51",
        "/model/layers.26/input_layernorm/output_0.out4_51",
        "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78",
        "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26",
        "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79",
        "/model/layers.26/post_attention_layernorm/output_3.out4_52",
        "/model/layers.26/post_attention_layernorm/output_0.out4_52",
        "/model/layers.26/mlp/Mul/output_0.out3_26",
        "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80",
        "/model/layers.27/input_layernorm/output_3.out4_53",
        "/model/layers.27/input_layernorm/output_0.out4_53",
        "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81",
        "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27",
        "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82",
        "/model/layers.27/post_attention_layernorm/output_3.out4_54",
        "/model/layers.27/post_attention_layernorm/output_0.out4_54",
        "/model/layers.27/mlp/Mul/output_0.out3_27",
        "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83",
        "/model/layers.28/final_norm_layernorm/output_0.out4_55"
      ]
    },
    "const": {
      "buffer_size": 1218863104,
      "xrt_arg_id": 3,
      "packed_tensors": [
        "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.v_proj.Add.bias.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.0.post_attention_layernorm.weight.bf",
        "model.layers.0.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.0.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.0.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.0.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.input_layernorm.weight.bf",
        "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.v_proj.Add.bias.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.1.post_attention_layernorm.weight.bf",
        "model.layers.1.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.1.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.1.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.1.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.input_layernorm.weight.bf",
        "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.v_proj.Add.bias.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.2.post_attention_layernorm.weight.bf",
        "model.layers.2.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.2.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.2.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.2.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.input_layernorm.weight.bf",
        "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.v_proj.Add.bias.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.3.post_attention_layernorm.weight.bf",
        "model.layers.3.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.3.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.3.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.3.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.input_layernorm.weight.bf",
        "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.v_proj.Add.bias.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.4.post_attention_layernorm.weight.bf",
        "model.layers.4.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.4.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.4.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.4.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.input_layernorm.weight.bf",
        "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.v_proj.Add.bias.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.5.post_attention_layernorm.weight.bf",
        "model.layers.5.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.5.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.5.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.5.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.input_layernorm.weight.bf",
        "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.v_proj.Add.bias.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.6.post_attention_layernorm.weight.bf",
        "model.layers.6.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.6.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.6.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.6.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.input_layernorm.weight.bf",
        "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.v_proj.Add.bias.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.7.post_attention_layernorm.weight.bf",
        "model.layers.7.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.7.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.7.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.7.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.input_layernorm.weight.bf",
        "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.v_proj.Add.bias.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.8.post_attention_layernorm.weight.bf",
        "model.layers.8.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.8.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.8.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.8.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.input_layernorm.weight.bf",
        "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.v_proj.Add.bias.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.9.post_attention_layernorm.weight.bf",
        "model.layers.9.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.9.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.9.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.9.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.input_layernorm.weight.bf",
        "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.v_proj.Add.bias.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.10.post_attention_layernorm.weight.bf",
        "model.layers.10.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.10.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.10.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.10.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.input_layernorm.weight.bf",
        "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.v_proj.Add.bias.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.11.post_attention_layernorm.weight.bf",
        "model.layers.11.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.11.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.11.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.11.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.input_layernorm.weight.bf",
        "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.v_proj.Add.bias.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.12.post_attention_layernorm.weight.bf",
        "model.layers.12.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.12.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.12.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.12.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.input_layernorm.weight.bf",
        "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.v_proj.Add.bias.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.13.post_attention_layernorm.weight.bf",
        "model.layers.13.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.13.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.13.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.13.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.input_layernorm.weight.bf",
        "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.v_proj.Add.bias.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.14.post_attention_layernorm.weight.bf",
        "model.layers.14.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.14.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.14.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.14.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.input_layernorm.weight.bf",
        "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.v_proj.Add.bias.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.15.post_attention_layernorm.weight.bf",
        "model.layers.15.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.15.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.15.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.15.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.input_layernorm.weight.bf",
        "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.v_proj.Add.bias.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.16.post_attention_layernorm.weight.bf",
        "model.layers.16.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.16.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.16.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.16.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.input_layernorm.weight.bf",
        "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.v_proj.Add.bias.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.17.post_attention_layernorm.weight.bf",
        "model.layers.17.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.17.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.17.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.17.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.input_layernorm.weight.bf",
        "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.v_proj.Add.bias.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.18.post_attention_layernorm.weight.bf",
        "model.layers.18.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.18.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.18.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.18.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.input_layernorm.weight.bf",
        "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.v_proj.Add.bias.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.19.post_attention_layernorm.weight.bf",
        "model.layers.19.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.19.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.19.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.19.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.input_layernorm.weight.bf",
        "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.v_proj.Add.bias.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.20.post_attention_layernorm.weight.bf",
        "model.layers.20.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.20.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.20.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.20.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.input_layernorm.weight.bf",
        "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.v_proj.Add.bias.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.21.post_attention_layernorm.weight.bf",
        "model.layers.21.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.21.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.21.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.21.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.input_layernorm.weight.bf",
        "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.v_proj.Add.bias.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.22.post_attention_layernorm.weight.bf",
        "model.layers.22.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.22.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.22.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.22.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.input_layernorm.weight.bf",
        "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.v_proj.Add.bias.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.23.post_attention_layernorm.weight.bf",
        "model.layers.23.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.23.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.23.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.23.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.input_layernorm.weight.bf",
        "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.v_proj.Add.bias.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.24.post_attention_layernorm.weight.bf",
        "model.layers.24.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.24.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.24.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.24.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.input_layernorm.weight.bf",
        "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.v_proj.Add.bias.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.25.post_attention_layernorm.weight.bf",
        "model.layers.25.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.25.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.25.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.25.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.input_layernorm.weight.bf",
        "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.v_proj.Add.bias.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.26.post_attention_layernorm.weight.bf",
        "model.layers.26.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.26.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.26.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.26.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.input_layernorm.weight.bf",
        "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.v_proj.Add.bias.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat",
        "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat",
        "model.layers.27.post_attention_layernorm.weight.bf",
        "model.layers.27.mlp.gate_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qweight",
        "model.layers.27.mlp.up_proj.MatMulNBits.scales.f",
        "model.layers.27.mlp.up_proj.MatMulNBits.qzeros",
        "model.layers.27.mlp.up_proj.MatMulNBits.bias.f",
        "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat",
        "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat",
        "model.layers.28.final_norm_layernorm.weight.bf",
        "lm_head.MatMulNBits.qweight.preformat",
        "lm_head.MatMulNBits.bias.preformat",
        "lm_head.MatMulNBits.scales.preformat",
        "lm_head.MatMulNBits.qzeros.preformat"
      ]
    },
    "super_instr": {
      "buffer_size": 0,
      "xrt_arg_id": 4,
      "packed_tensors": []
    },
    "ext_buf_0": {
      "buffer_size": 117440512,
      "xrt_arg_id": 5,
      "packed_tensors": [
        "past_key_values.0.key",
        "past_key_values.0.value",
        "present.0.key",
        "present.0.value",
        "past_key_values.1.key",
        "past_key_values.1.value",
        "present.1.key",
        "present.1.value",
        "past_key_values.2.key",
        "past_key_values.2.value",
        "present.2.key",
        "present.2.value",
        "past_key_values.3.key",
        "past_key_values.3.value",
        "present.3.key",
        "present.3.value",
        "past_key_values.4.key",
        "past_key_values.4.value",
        "present.4.key",
        "present.4.value",
        "past_key_values.5.key",
        "past_key_values.5.value",
        "present.5.key",
        "present.5.value",
        "past_key_values.6.key",
        "past_key_values.6.value",
        "present.6.key",
        "present.6.value",
        "past_key_values.7.key",
        "past_key_values.7.value",
        "present.7.key",
        "present.7.value",
        "past_key_values.8.key",
        "past_key_values.8.value",
        "present.8.key",
        "present.8.value",
        "past_key_values.9.key",
        "past_key_values.9.value",
        "present.9.key",
        "present.9.value",
        "past_key_values.10.key",
        "past_key_values.10.value",
        "present.10.key",
        "present.10.value",
        "past_key_values.11.key",
        "past_key_values.11.value",
        "present.11.key",
        "present.11.value",
        "past_key_values.12.key",
        "past_key_values.12.value",
        "present.12.key",
        "present.12.value",
        "past_key_values.13.key",
        "past_key_values.13.value",
        "present.13.key",
        "present.13.value",
        "past_key_values.14.key",
        "past_key_values.14.value",
        "present.14.key",
        "present.14.value",
        "past_key_values.15.key",
        "past_key_values.15.value",
        "present.15.key",
        "present.15.value",
        "past_key_values.16.key",
        "past_key_values.16.value",
        "present.16.key",
        "present.16.value",
        "past_key_values.17.key",
        "past_key_values.17.value",
        "present.17.key",
        "present.17.value",
        "past_key_values.18.key",
        "past_key_values.18.value",
        "present.18.key",
        "present.18.value",
        "past_key_values.19.key",
        "past_key_values.19.value",
        "present.19.key",
        "present.19.value",
        "past_key_values.20.key",
        "past_key_values.20.value",
        "present.20.key",
        "present.20.value",
        "past_key_values.21.key",
        "past_key_values.21.value",
        "present.21.key",
        "present.21.value",
        "past_key_values.22.key",
        "past_key_values.22.value",
        "present.22.key",
        "present.22.value",
        "past_key_values.23.key",
        "past_key_values.23.value",
        "present.23.key",
        "present.23.value",
        "past_key_values.24.key",
        "past_key_values.24.value",
        "present.24.key",
        "present.24.value",
        "past_key_values.25.key",
        "past_key_values.25.value",
        "present.25.key",
        "present.25.value",
        "past_key_values.26.key",
        "past_key_values.26.value",
        "present.26.key",
        "present.26.value",
        "past_key_values.27.key",
        "past_key_values.27.value",
        "present.27.key",
        "present.27.value"
      ]
    },
    "ext_buf_1": {
      "buffer_size": 33554432,
      "xrt_arg_id": 6,
      "packed_tensors": [
        "sin_cos_cache_token"
      ]
    }
  },
  "tensor_map": {
    "/model/layers.0/input_layernorm/output_0.out5_4_0": {
      "packed_buffer_label": "in",
      "xrt_arg_id": 0,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 3072
    },
    "attention_mask_const_uint": {
      "packed_buffer_label": "in",
      "xrt_arg_id": 0,
      "dtype": "uint32",
      "shape": [
        1
      ],
      "size_in_bytes": 4,
      "op_tensor_size": 4,
      "offset": 9324
    },
    "/model/embed_tokens/Gather/output_0.out4_0": {
      "packed_buffer_label": "in",
      "xrt_arg_id": 0,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 6148
    },
    "/model/layers.28/final_norm_layernorm/output_0.dummy": {
      "packed_buffer_label": "out",
      "xrt_arg_id": 1,
      "dtype": "float16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 0
    },
    "logits.out5_4_84": {
      "packed_buffer_label": "out",
      "xrt_arg_id": 1,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        151936
      ],
      "size_in_bytes": 303872,
      "op_tensor_size": 303872,
      "offset": 3072
    },
    "/model/layers.0/attn/qk_proj/Add/output_0.out5_4_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 0
    },
    "/model/layers.0/attn/GroupQueryAttention/output_0.out2_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 3584
    },
    "/model/layers.0/attn/o_proj/MatMulNBits/output_0.out5_4_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 6656
    },
    "/model/layers.0/post_attention_layernorm/output_3.out4_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 9728
    },
    "/model/layers.0/post_attention_layernorm/output_0.out4_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 12800
    },
    "/model/layers.0/mlp/Mul/output_0.out3_0": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 15872
    },
    "/model/layers.0/mlp/down_proj/MatMulNBits/output_0.out5_4_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 33792
    },
    "/model/layers.1/input_layernorm/output_3.out4_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 36864
    },
    "/model/layers.1/input_layernorm/output_0.out4_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 39936
    },
    "/model/layers.1/attn/qk_proj/Add/output_0.out5_4_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 43008
    },
    "/model/layers.1/attn/GroupQueryAttention/output_0.out2_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 46592
    },
    "/model/layers.1/attn/o_proj/MatMulNBits/output_0.out5_4_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 49664
    },
    "/model/layers.1/post_attention_layernorm/output_3.out4_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 52736
    },
    "/model/layers.1/post_attention_layernorm/output_0.out4_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 55808
    },
    "/model/layers.1/mlp/Mul/output_0.out3_1": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 58880
    },
    "/model/layers.1/mlp/down_proj/MatMulNBits/output_0.out5_4_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 76800
    },
    "/model/layers.2/input_layernorm/output_3.out4_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 79872
    },
    "/model/layers.2/input_layernorm/output_0.out4_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 82944
    },
    "/model/layers.2/attn/qk_proj/Add/output_0.out5_4_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 86016
    },
    "/model/layers.2/attn/GroupQueryAttention/output_0.out2_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 89600
    },
    "/model/layers.2/attn/o_proj/MatMulNBits/output_0.out5_4_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 92672
    },
    "/model/layers.2/post_attention_layernorm/output_3.out4_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 95744
    },
    "/model/layers.2/post_attention_layernorm/output_0.out4_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 98816
    },
    "/model/layers.2/mlp/Mul/output_0.out3_2": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 101888
    },
    "/model/layers.2/mlp/down_proj/MatMulNBits/output_0.out5_4_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 119808
    },
    "/model/layers.3/input_layernorm/output_3.out4_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 122880
    },
    "/model/layers.3/input_layernorm/output_0.out4_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 125952
    },
    "/model/layers.3/attn/qk_proj/Add/output_0.out5_4_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 129024
    },
    "/model/layers.3/attn/GroupQueryAttention/output_0.out2_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 132608
    },
    "/model/layers.3/attn/o_proj/MatMulNBits/output_0.out5_4_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 135680
    },
    "/model/layers.3/post_attention_layernorm/output_3.out4_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 138752
    },
    "/model/layers.3/post_attention_layernorm/output_0.out4_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 141824
    },
    "/model/layers.3/mlp/Mul/output_0.out3_3": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 144896
    },
    "/model/layers.3/mlp/down_proj/MatMulNBits/output_0.out5_4_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 162816
    },
    "/model/layers.4/input_layernorm/output_3.out4_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 165888
    },
    "/model/layers.4/input_layernorm/output_0.out4_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 168960
    },
    "/model/layers.4/attn/qk_proj/Add/output_0.out5_4_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 172032
    },
    "/model/layers.4/attn/GroupQueryAttention/output_0.out2_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 175616
    },
    "/model/layers.4/attn/o_proj/MatMulNBits/output_0.out5_4_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 178688
    },
    "/model/layers.4/post_attention_layernorm/output_3.out4_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 181760
    },
    "/model/layers.4/post_attention_layernorm/output_0.out4_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 184832
    },
    "/model/layers.4/mlp/Mul/output_0.out3_4": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 187904
    },
    "/model/layers.4/mlp/down_proj/MatMulNBits/output_0.out5_4_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 205824
    },
    "/model/layers.5/input_layernorm/output_3.out4_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 208896
    },
    "/model/layers.5/input_layernorm/output_0.out4_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 211968
    },
    "/model/layers.5/attn/qk_proj/Add/output_0.out5_4_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 215040
    },
    "/model/layers.5/attn/GroupQueryAttention/output_0.out2_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 218624
    },
    "/model/layers.5/attn/o_proj/MatMulNBits/output_0.out5_4_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 221696
    },
    "/model/layers.5/post_attention_layernorm/output_3.out4_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 224768
    },
    "/model/layers.5/post_attention_layernorm/output_0.out4_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 227840
    },
    "/model/layers.5/mlp/Mul/output_0.out3_5": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 230912
    },
    "/model/layers.5/mlp/down_proj/MatMulNBits/output_0.out5_4_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 248832
    },
    "/model/layers.6/input_layernorm/output_3.out4_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 251904
    },
    "/model/layers.6/input_layernorm/output_0.out4_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 254976
    },
    "/model/layers.6/attn/qk_proj/Add/output_0.out5_4_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 258048
    },
    "/model/layers.6/attn/GroupQueryAttention/output_0.out2_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 261632
    },
    "/model/layers.6/attn/o_proj/MatMulNBits/output_0.out5_4_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 264704
    },
    "/model/layers.6/post_attention_layernorm/output_3.out4_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 267776
    },
    "/model/layers.6/post_attention_layernorm/output_0.out4_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 270848
    },
    "/model/layers.6/mlp/Mul/output_0.out3_6": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 273920
    },
    "/model/layers.6/mlp/down_proj/MatMulNBits/output_0.out5_4_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 291840
    },
    "/model/layers.7/input_layernorm/output_3.out4_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 294912
    },
    "/model/layers.7/input_layernorm/output_0.out4_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 297984
    },
    "/model/layers.7/attn/qk_proj/Add/output_0.out5_4_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 301056
    },
    "/model/layers.7/attn/GroupQueryAttention/output_0.out2_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 304640
    },
    "/model/layers.7/attn/o_proj/MatMulNBits/output_0.out5_4_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 307712
    },
    "/model/layers.7/post_attention_layernorm/output_3.out4_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 310784
    },
    "/model/layers.7/post_attention_layernorm/output_0.out4_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 313856
    },
    "/model/layers.7/mlp/Mul/output_0.out3_7": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 316928
    },
    "/model/layers.7/mlp/down_proj/MatMulNBits/output_0.out5_4_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 334848
    },
    "/model/layers.8/input_layernorm/output_3.out4_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 337920
    },
    "/model/layers.8/input_layernorm/output_0.out4_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 340992
    },
    "/model/layers.8/attn/qk_proj/Add/output_0.out5_4_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 344064
    },
    "/model/layers.8/attn/GroupQueryAttention/output_0.out2_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 347648
    },
    "/model/layers.8/attn/o_proj/MatMulNBits/output_0.out5_4_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 350720
    },
    "/model/layers.8/post_attention_layernorm/output_3.out4_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 353792
    },
    "/model/layers.8/post_attention_layernorm/output_0.out4_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 356864
    },
    "/model/layers.8/mlp/Mul/output_0.out3_8": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 359936
    },
    "/model/layers.8/mlp/down_proj/MatMulNBits/output_0.out5_4_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 377856
    },
    "/model/layers.9/input_layernorm/output_3.out4_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 380928
    },
    "/model/layers.9/input_layernorm/output_0.out4_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 384000
    },
    "/model/layers.9/attn/qk_proj/Add/output_0.out5_4_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 387072
    },
    "/model/layers.9/attn/GroupQueryAttention/output_0.out2_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 390656
    },
    "/model/layers.9/attn/o_proj/MatMulNBits/output_0.out5_4_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 393728
    },
    "/model/layers.9/post_attention_layernorm/output_3.out4_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 396800
    },
    "/model/layers.9/post_attention_layernorm/output_0.out4_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 399872
    },
    "/model/layers.9/mlp/Mul/output_0.out3_9": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 402944
    },
    "/model/layers.9/mlp/down_proj/MatMulNBits/output_0.out5_4_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 420864
    },
    "/model/layers.10/input_layernorm/output_3.out4_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 423936
    },
    "/model/layers.10/input_layernorm/output_0.out4_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 427008
    },
    "/model/layers.10/attn/qk_proj/Add/output_0.out5_4_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 430080
    },
    "/model/layers.10/attn/GroupQueryAttention/output_0.out2_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 433664
    },
    "/model/layers.10/attn/o_proj/MatMulNBits/output_0.out5_4_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 436736
    },
    "/model/layers.10/post_attention_layernorm/output_3.out4_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 439808
    },
    "/model/layers.10/post_attention_layernorm/output_0.out4_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 442880
    },
    "/model/layers.10/mlp/Mul/output_0.out3_10": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 445952
    },
    "/model/layers.10/mlp/down_proj/MatMulNBits/output_0.out5_4_32": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 463872
    },
    "/model/layers.11/input_layernorm/output_3.out4_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 466944
    },
    "/model/layers.11/input_layernorm/output_0.out4_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 470016
    },
    "/model/layers.11/attn/qk_proj/Add/output_0.out5_4_33": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 473088
    },
    "/model/layers.11/attn/GroupQueryAttention/output_0.out2_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 476672
    },
    "/model/layers.11/attn/o_proj/MatMulNBits/output_0.out5_4_34": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 479744
    },
    "/model/layers.11/post_attention_layernorm/output_3.out4_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 482816
    },
    "/model/layers.11/post_attention_layernorm/output_0.out4_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 485888
    },
    "/model/layers.11/mlp/Mul/output_0.out3_11": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 488960
    },
    "/model/layers.11/mlp/down_proj/MatMulNBits/output_0.out5_4_35": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 506880
    },
    "/model/layers.12/input_layernorm/output_3.out4_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 509952
    },
    "/model/layers.12/input_layernorm/output_0.out4_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 513024
    },
    "/model/layers.12/attn/qk_proj/Add/output_0.out5_4_36": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 516096
    },
    "/model/layers.12/attn/GroupQueryAttention/output_0.out2_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 519680
    },
    "/model/layers.12/attn/o_proj/MatMulNBits/output_0.out5_4_37": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 522752
    },
    "/model/layers.12/post_attention_layernorm/output_3.out4_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 525824
    },
    "/model/layers.12/post_attention_layernorm/output_0.out4_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 528896
    },
    "/model/layers.12/mlp/Mul/output_0.out3_12": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 531968
    },
    "/model/layers.12/mlp/down_proj/MatMulNBits/output_0.out5_4_38": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 549888
    },
    "/model/layers.13/input_layernorm/output_3.out4_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 552960
    },
    "/model/layers.13/input_layernorm/output_0.out4_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 556032
    },
    "/model/layers.13/attn/qk_proj/Add/output_0.out5_4_39": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 559104
    },
    "/model/layers.13/attn/GroupQueryAttention/output_0.out2_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 562688
    },
    "/model/layers.13/attn/o_proj/MatMulNBits/output_0.out5_4_40": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 565760
    },
    "/model/layers.13/post_attention_layernorm/output_3.out4_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 568832
    },
    "/model/layers.13/post_attention_layernorm/output_0.out4_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 571904
    },
    "/model/layers.13/mlp/Mul/output_0.out3_13": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 574976
    },
    "/model/layers.13/mlp/down_proj/MatMulNBits/output_0.out5_4_41": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 592896
    },
    "/model/layers.14/input_layernorm/output_3.out4_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 595968
    },
    "/model/layers.14/input_layernorm/output_0.out4_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 599040
    },
    "/model/layers.14/attn/qk_proj/Add/output_0.out5_4_42": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 602112
    },
    "/model/layers.14/attn/GroupQueryAttention/output_0.out2_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 605696
    },
    "/model/layers.14/attn/o_proj/MatMulNBits/output_0.out5_4_43": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 608768
    },
    "/model/layers.14/post_attention_layernorm/output_3.out4_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 611840
    },
    "/model/layers.14/post_attention_layernorm/output_0.out4_28": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 614912
    },
    "/model/layers.14/mlp/Mul/output_0.out3_14": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 617984
    },
    "/model/layers.14/mlp/down_proj/MatMulNBits/output_0.out5_4_44": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 635904
    },
    "/model/layers.15/input_layernorm/output_3.out4_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 638976
    },
    "/model/layers.15/input_layernorm/output_0.out4_29": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 642048
    },
    "/model/layers.15/attn/qk_proj/Add/output_0.out5_4_45": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 645120
    },
    "/model/layers.15/attn/GroupQueryAttention/output_0.out2_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 648704
    },
    "/model/layers.15/attn/o_proj/MatMulNBits/output_0.out5_4_46": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 651776
    },
    "/model/layers.15/post_attention_layernorm/output_3.out4_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 654848
    },
    "/model/layers.15/post_attention_layernorm/output_0.out4_30": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 657920
    },
    "/model/layers.15/mlp/Mul/output_0.out3_15": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 660992
    },
    "/model/layers.15/mlp/down_proj/MatMulNBits/output_0.out5_4_47": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 678912
    },
    "/model/layers.16/input_layernorm/output_3.out4_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 681984
    },
    "/model/layers.16/input_layernorm/output_0.out4_31": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 685056
    },
    "/model/layers.16/attn/qk_proj/Add/output_0.out5_4_48": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 688128
    },
    "/model/layers.16/attn/GroupQueryAttention/output_0.out2_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 691712
    },
    "/model/layers.16/attn/o_proj/MatMulNBits/output_0.out5_4_49": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 694784
    },
    "/model/layers.16/post_attention_layernorm/output_3.out4_32": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 697856
    },
    "/model/layers.16/post_attention_layernorm/output_0.out4_32": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 700928
    },
    "/model/layers.16/mlp/Mul/output_0.out3_16": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 704000
    },
    "/model/layers.16/mlp/down_proj/MatMulNBits/output_0.out5_4_50": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 721920
    },
    "/model/layers.17/input_layernorm/output_3.out4_33": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 724992
    },
    "/model/layers.17/input_layernorm/output_0.out4_33": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 728064
    },
    "/model/layers.17/attn/qk_proj/Add/output_0.out5_4_51": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 731136
    },
    "/model/layers.17/attn/GroupQueryAttention/output_0.out2_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 734720
    },
    "/model/layers.17/attn/o_proj/MatMulNBits/output_0.out5_4_52": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 737792
    },
    "/model/layers.17/post_attention_layernorm/output_3.out4_34": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 740864
    },
    "/model/layers.17/post_attention_layernorm/output_0.out4_34": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 743936
    },
    "/model/layers.17/mlp/Mul/output_0.out3_17": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 747008
    },
    "/model/layers.17/mlp/down_proj/MatMulNBits/output_0.out5_4_53": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 764928
    },
    "/model/layers.18/input_layernorm/output_3.out4_35": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 768000
    },
    "/model/layers.18/input_layernorm/output_0.out4_35": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 771072
    },
    "/model/layers.18/attn/qk_proj/Add/output_0.out5_4_54": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 774144
    },
    "/model/layers.18/attn/GroupQueryAttention/output_0.out2_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 777728
    },
    "/model/layers.18/attn/o_proj/MatMulNBits/output_0.out5_4_55": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 780800
    },
    "/model/layers.18/post_attention_layernorm/output_3.out4_36": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 783872
    },
    "/model/layers.18/post_attention_layernorm/output_0.out4_36": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 786944
    },
    "/model/layers.18/mlp/Mul/output_0.out3_18": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 790016
    },
    "/model/layers.18/mlp/down_proj/MatMulNBits/output_0.out5_4_56": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 807936
    },
    "/model/layers.19/input_layernorm/output_3.out4_37": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 811008
    },
    "/model/layers.19/input_layernorm/output_0.out4_37": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 814080
    },
    "/model/layers.19/attn/qk_proj/Add/output_0.out5_4_57": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 817152
    },
    "/model/layers.19/attn/GroupQueryAttention/output_0.out2_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 820736
    },
    "/model/layers.19/attn/o_proj/MatMulNBits/output_0.out5_4_58": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 823808
    },
    "/model/layers.19/post_attention_layernorm/output_3.out4_38": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 826880
    },
    "/model/layers.19/post_attention_layernorm/output_0.out4_38": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 829952
    },
    "/model/layers.19/mlp/Mul/output_0.out3_19": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 833024
    },
    "/model/layers.19/mlp/down_proj/MatMulNBits/output_0.out5_4_59": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 850944
    },
    "/model/layers.20/input_layernorm/output_3.out4_39": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 854016
    },
    "/model/layers.20/input_layernorm/output_0.out4_39": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 857088
    },
    "/model/layers.20/attn/qk_proj/Add/output_0.out5_4_60": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 860160
    },
    "/model/layers.20/attn/GroupQueryAttention/output_0.out2_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 863744
    },
    "/model/layers.20/attn/o_proj/MatMulNBits/output_0.out5_4_61": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 866816
    },
    "/model/layers.20/post_attention_layernorm/output_3.out4_40": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 869888
    },
    "/model/layers.20/post_attention_layernorm/output_0.out4_40": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 872960
    },
    "/model/layers.20/mlp/Mul/output_0.out3_20": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 876032
    },
    "/model/layers.20/mlp/down_proj/MatMulNBits/output_0.out5_4_62": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 893952
    },
    "/model/layers.21/input_layernorm/output_3.out4_41": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 897024
    },
    "/model/layers.21/input_layernorm/output_0.out4_41": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 900096
    },
    "/model/layers.21/attn/qk_proj/Add/output_0.out5_4_63": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 903168
    },
    "/model/layers.21/attn/GroupQueryAttention/output_0.out2_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 906752
    },
    "/model/layers.21/attn/o_proj/MatMulNBits/output_0.out5_4_64": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 909824
    },
    "/model/layers.21/post_attention_layernorm/output_3.out4_42": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 912896
    },
    "/model/layers.21/post_attention_layernorm/output_0.out4_42": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 915968
    },
    "/model/layers.21/mlp/Mul/output_0.out3_21": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 919040
    },
    "/model/layers.21/mlp/down_proj/MatMulNBits/output_0.out5_4_65": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 936960
    },
    "/model/layers.22/input_layernorm/output_3.out4_43": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 940032
    },
    "/model/layers.22/input_layernorm/output_0.out4_43": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 943104
    },
    "/model/layers.22/attn/qk_proj/Add/output_0.out5_4_66": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 946176
    },
    "/model/layers.22/attn/GroupQueryAttention/output_0.out2_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 949760
    },
    "/model/layers.22/attn/o_proj/MatMulNBits/output_0.out5_4_67": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 952832
    },
    "/model/layers.22/post_attention_layernorm/output_3.out4_44": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 955904
    },
    "/model/layers.22/post_attention_layernorm/output_0.out4_44": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 958976
    },
    "/model/layers.22/mlp/Mul/output_0.out3_22": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 962048
    },
    "/model/layers.22/mlp/down_proj/MatMulNBits/output_0.out5_4_68": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 979968
    },
    "/model/layers.23/input_layernorm/output_3.out4_45": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 983040
    },
    "/model/layers.23/input_layernorm/output_0.out4_45": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 986112
    },
    "/model/layers.23/attn/qk_proj/Add/output_0.out5_4_69": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 989184
    },
    "/model/layers.23/attn/GroupQueryAttention/output_0.out2_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 992768
    },
    "/model/layers.23/attn/o_proj/MatMulNBits/output_0.out5_4_70": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 995840
    },
    "/model/layers.23/post_attention_layernorm/output_3.out4_46": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 998912
    },
    "/model/layers.23/post_attention_layernorm/output_0.out4_46": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1001984
    },
    "/model/layers.23/mlp/Mul/output_0.out3_23": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 1005056
    },
    "/model/layers.23/mlp/down_proj/MatMulNBits/output_0.out5_4_71": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1022976
    },
    "/model/layers.24/input_layernorm/output_3.out4_47": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1026048
    },
    "/model/layers.24/input_layernorm/output_0.out4_47": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1029120
    },
    "/model/layers.24/attn/qk_proj/Add/output_0.out5_4_72": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 1032192
    },
    "/model/layers.24/attn/GroupQueryAttention/output_0.out2_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1035776
    },
    "/model/layers.24/attn/o_proj/MatMulNBits/output_0.out5_4_73": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1038848
    },
    "/model/layers.24/post_attention_layernorm/output_3.out4_48": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1041920
    },
    "/model/layers.24/post_attention_layernorm/output_0.out4_48": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1044992
    },
    "/model/layers.24/mlp/Mul/output_0.out3_24": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 1048064
    },
    "/model/layers.24/mlp/down_proj/MatMulNBits/output_0.out5_4_74": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1065984
    },
    "/model/layers.25/input_layernorm/output_3.out4_49": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1069056
    },
    "/model/layers.25/input_layernorm/output_0.out4_49": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1072128
    },
    "/model/layers.25/attn/qk_proj/Add/output_0.out5_4_75": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 1075200
    },
    "/model/layers.25/attn/GroupQueryAttention/output_0.out2_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1078784
    },
    "/model/layers.25/attn/o_proj/MatMulNBits/output_0.out5_4_76": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1081856
    },
    "/model/layers.25/post_attention_layernorm/output_3.out4_50": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1084928
    },
    "/model/layers.25/post_attention_layernorm/output_0.out4_50": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1088000
    },
    "/model/layers.25/mlp/Mul/output_0.out3_25": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 1091072
    },
    "/model/layers.25/mlp/down_proj/MatMulNBits/output_0.out5_4_77": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1108992
    },
    "/model/layers.26/input_layernorm/output_3.out4_51": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1112064
    },
    "/model/layers.26/input_layernorm/output_0.out4_51": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1115136
    },
    "/model/layers.26/attn/qk_proj/Add/output_0.out5_4_78": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 1118208
    },
    "/model/layers.26/attn/GroupQueryAttention/output_0.out2_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1121792
    },
    "/model/layers.26/attn/o_proj/MatMulNBits/output_0.out5_4_79": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1124864
    },
    "/model/layers.26/post_attention_layernorm/output_3.out4_52": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1127936
    },
    "/model/layers.26/post_attention_layernorm/output_0.out4_52": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1131008
    },
    "/model/layers.26/mlp/Mul/output_0.out3_26": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 1134080
    },
    "/model/layers.26/mlp/down_proj/MatMulNBits/output_0.out5_4_80": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1152000
    },
    "/model/layers.27/input_layernorm/output_3.out4_53": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1155072
    },
    "/model/layers.27/input_layernorm/output_0.out4_53": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1158144
    },
    "/model/layers.27/attn/qk_proj/Add/output_0.out5_4_81": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1792
      ],
      "size_in_bytes": 3584,
      "op_tensor_size": 3584,
      "offset": 1161216
    },
    "/model/layers.27/attn/GroupQueryAttention/output_0.out2_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1164800
    },
    "/model/layers.27/attn/o_proj/MatMulNBits/output_0.out5_4_82": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1167872
    },
    "/model/layers.27/post_attention_layernorm/output_3.out4_54": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1170944
    },
    "/model/layers.27/post_attention_layernorm/output_0.out4_54": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1174016
    },
    "/model/layers.27/mlp/Mul/output_0.out3_27": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        8960
      ],
      "size_in_bytes": 17920,
      "op_tensor_size": 17920,
      "offset": 1177088
    },
    "/model/layers.27/mlp/down_proj/MatMulNBits/output_0.out5_4_83": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1195008
    },
    "/model/layers.28/final_norm_layernorm/output_0.out4_55": {
      "packed_buffer_label": "scratch",
      "xrt_arg_id": 2,
      "dtype": "bfloat16",
      "shape": [
        1,
        1,
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 1198080
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 0,
      "file_name": ".cache\\MatMulNBits_2_0_0.const",
      "file_size": 2752512
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 2752512,
      "file_name": ".cache\\MatMulNBits_2_0_1.const",
      "file_size": 7168
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 2759680,
      "file_name": ".cache\\MatMulNBits_2_0_2.const",
      "file_size": 86016
    },
    "model.layers.0.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 2845696,
      "file_name": ".cache\\MatMulNBits_2_0_3.const",
      "file_size": 21504
    },
    "model.layers.0.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 2867200,
      "file_name": ".cache\\MatMulNBits_2_0_4.const",
      "file_size": 393216
    },
    "model.layers.0.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 3260416,
      "file_name": ".cache\\MatMulNBits_2_0_5.const",
      "file_size": 1024
    },
    "model.layers.0.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 3261440,
      "file_name": ".cache\\MatMulNBits_2_0_6.const",
      "file_size": 12288
    },
    "model.layers.0.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 3273728,
      "file_name": ".cache\\MatMulNBits_2_0_7.const",
      "file_size": 3072
    },
    "model.layers.0.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 3276800,
      "file_name": ".cache\\MatMulNBits_2_0_8.const",
      "file_size": 2359296
    },
    "model.layers.0.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 5636096,
      "file_name": ".cache\\MatMulNBits_2_0_9.const",
      "file_size": 6144
    },
    "model.layers.0.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 5642240,
      "file_name": ".cache\\MatMulNBits_2_0_10.const",
      "file_size": 73728
    },
    "model.layers.0.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 5715968,
      "file_name": ".cache\\MatMulNBits_2_0_11.const",
      "file_size": 18432
    },
    "model.layers.0.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 5734400,
      "file_name": ".cache\\MatMulNBits_2_0_12.const",
      "file_size": 3072
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 5737472,
      "file_name": ".cache\\MatMulNBits_2_0_13.const",
      "file_size": 6881280
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 12618752,
      "file_name": ".cache\\MatMulNBits_2_0_14.const",
      "file_size": 430080
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 13048832,
      "file_name": ".cache\\MatMulNBits_2_0_15.const",
      "file_size": 53760
    },
    "model.layers.0.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 13102592,
      "file_name": ".cache\\MatMulNBits_2_0_16.const",
      "file_size": 35840
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 13138432,
      "file_name": ".cache\\MatMulNBits_2_0_17.const",
      "file_size": 6881280
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 20019712,
      "file_name": ".cache\\MatMulNBits_2_0_18.const",
      "file_size": 430080
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 20449792,
      "file_name": ".cache\\MatMulNBits_2_0_19.const",
      "file_size": 53760
    },
    "model.layers.0.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 20503552,
      "file_name": ".cache\\MatMulNBits_2_0_20.const",
      "file_size": 35840
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 20539392,
      "file_name": ".cache\\MatMulNBits_2_0_21.const",
      "file_size": 13762560
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 34301952,
      "file_name": ".cache\\MatMulNBits_2_0_22.const",
      "file_size": 6144
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 34308096,
      "file_name": ".cache\\MatMulNBits_2_0_23.const",
      "file_size": 430080
    },
    "model.layers.0.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 34738176,
      "file_name": ".cache\\MatMulNBits_2_0_24.const",
      "file_size": 107520
    },
    "model.layers.1.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 34845696,
      "file_name": ".cache\\MatMulNBits_2_0_25.const",
      "file_size": 3072
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 34848768,
      "file_name": ".cache\\MatMulNBits_2_0_26.const",
      "file_size": 2752512
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 37601280,
      "file_name": ".cache\\MatMulNBits_2_0_27.const",
      "file_size": 7168
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 37608448,
      "file_name": ".cache\\MatMulNBits_2_0_28.const",
      "file_size": 86016
    },
    "model.layers.1.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 37694464,
      "file_name": ".cache\\MatMulNBits_2_0_29.const",
      "file_size": 21504
    },
    "model.layers.1.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 37715968,
      "file_name": ".cache\\MatMulNBits_2_0_30.const",
      "file_size": 393216
    },
    "model.layers.1.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 38109184,
      "file_name": ".cache\\MatMulNBits_2_0_31.const",
      "file_size": 1024
    },
    "model.layers.1.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 38110208,
      "file_name": ".cache\\MatMulNBits_2_0_32.const",
      "file_size": 12288
    },
    "model.layers.1.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 38122496,
      "file_name": ".cache\\MatMulNBits_2_0_33.const",
      "file_size": 3072
    },
    "model.layers.1.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 38125568,
      "file_name": ".cache\\MatMulNBits_2_0_34.const",
      "file_size": 2359296
    },
    "model.layers.1.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 40484864,
      "file_name": ".cache\\MatMulNBits_2_0_35.const",
      "file_size": 6144
    },
    "model.layers.1.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 40491008,
      "file_name": ".cache\\MatMulNBits_2_0_36.const",
      "file_size": 73728
    },
    "model.layers.1.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 40564736,
      "file_name": ".cache\\MatMulNBits_2_0_37.const",
      "file_size": 18432
    },
    "model.layers.1.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 40583168,
      "file_name": ".cache\\MatMulNBits_2_0_38.const",
      "file_size": 3072
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 40586240,
      "file_name": ".cache\\MatMulNBits_2_0_39.const",
      "file_size": 6881280
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 47467520,
      "file_name": ".cache\\MatMulNBits_2_0_40.const",
      "file_size": 430080
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 47897600,
      "file_name": ".cache\\MatMulNBits_2_0_41.const",
      "file_size": 53760
    },
    "model.layers.1.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 47951360,
      "file_name": ".cache\\MatMulNBits_2_0_42.const",
      "file_size": 35840
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 47987200,
      "file_name": ".cache\\MatMulNBits_2_0_43.const",
      "file_size": 6881280
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 54868480,
      "file_name": ".cache\\MatMulNBits_2_0_44.const",
      "file_size": 430080
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 55298560,
      "file_name": ".cache\\MatMulNBits_2_0_45.const",
      "file_size": 53760
    },
    "model.layers.1.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 55352320,
      "file_name": ".cache\\MatMulNBits_2_0_46.const",
      "file_size": 35840
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 55388160,
      "file_name": ".cache\\MatMulNBits_2_0_47.const",
      "file_size": 13762560
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 69150720,
      "file_name": ".cache\\MatMulNBits_2_0_48.const",
      "file_size": 6144
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 69156864,
      "file_name": ".cache\\MatMulNBits_2_0_49.const",
      "file_size": 430080
    },
    "model.layers.1.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 69586944,
      "file_name": ".cache\\MatMulNBits_2_0_50.const",
      "file_size": 107520
    },
    "model.layers.2.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 69694464,
      "file_name": ".cache\\MatMulNBits_2_0_51.const",
      "file_size": 3072
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 69697536,
      "file_name": ".cache\\MatMulNBits_2_0_52.const",
      "file_size": 2752512
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 72450048,
      "file_name": ".cache\\MatMulNBits_2_0_53.const",
      "file_size": 7168
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 72457216,
      "file_name": ".cache\\MatMulNBits_2_0_54.const",
      "file_size": 86016
    },
    "model.layers.2.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 72543232,
      "file_name": ".cache\\MatMulNBits_2_0_55.const",
      "file_size": 21504
    },
    "model.layers.2.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 72564736,
      "file_name": ".cache\\MatMulNBits_2_0_56.const",
      "file_size": 393216
    },
    "model.layers.2.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 72957952,
      "file_name": ".cache\\MatMulNBits_2_0_57.const",
      "file_size": 1024
    },
    "model.layers.2.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 72958976,
      "file_name": ".cache\\MatMulNBits_2_0_58.const",
      "file_size": 12288
    },
    "model.layers.2.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 72971264,
      "file_name": ".cache\\MatMulNBits_2_0_59.const",
      "file_size": 3072
    },
    "model.layers.2.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 72974336,
      "file_name": ".cache\\MatMulNBits_2_0_60.const",
      "file_size": 2359296
    },
    "model.layers.2.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 75333632,
      "file_name": ".cache\\MatMulNBits_2_0_61.const",
      "file_size": 6144
    },
    "model.layers.2.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 75339776,
      "file_name": ".cache\\MatMulNBits_2_0_62.const",
      "file_size": 73728
    },
    "model.layers.2.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 75413504,
      "file_name": ".cache\\MatMulNBits_2_0_63.const",
      "file_size": 18432
    },
    "model.layers.2.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 75431936,
      "file_name": ".cache\\MatMulNBits_2_0_64.const",
      "file_size": 3072
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 75435008,
      "file_name": ".cache\\MatMulNBits_2_0_65.const",
      "file_size": 6881280
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 82316288,
      "file_name": ".cache\\MatMulNBits_2_0_66.const",
      "file_size": 430080
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 82746368,
      "file_name": ".cache\\MatMulNBits_2_0_67.const",
      "file_size": 53760
    },
    "model.layers.2.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 82800128,
      "file_name": ".cache\\MatMulNBits_2_0_68.const",
      "file_size": 35840
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 82835968,
      "file_name": ".cache\\MatMulNBits_2_0_69.const",
      "file_size": 6881280
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 89717248,
      "file_name": ".cache\\MatMulNBits_2_0_70.const",
      "file_size": 430080
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 90147328,
      "file_name": ".cache\\MatMulNBits_2_0_71.const",
      "file_size": 53760
    },
    "model.layers.2.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 90201088,
      "file_name": ".cache\\MatMulNBits_2_0_72.const",
      "file_size": 35840
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 90236928,
      "file_name": ".cache\\MatMulNBits_2_0_73.const",
      "file_size": 13762560
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 103999488,
      "file_name": ".cache\\MatMulNBits_2_0_74.const",
      "file_size": 6144
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 104005632,
      "file_name": ".cache\\MatMulNBits_2_0_75.const",
      "file_size": 430080
    },
    "model.layers.2.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 104435712,
      "file_name": ".cache\\MatMulNBits_2_0_76.const",
      "file_size": 107520
    },
    "model.layers.3.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 104543232,
      "file_name": ".cache\\MatMulNBits_2_0_77.const",
      "file_size": 3072
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 104546304,
      "file_name": ".cache\\MatMulNBits_2_0_78.const",
      "file_size": 2752512
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 107298816,
      "file_name": ".cache\\MatMulNBits_2_0_79.const",
      "file_size": 7168
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 107305984,
      "file_name": ".cache\\MatMulNBits_2_0_80.const",
      "file_size": 86016
    },
    "model.layers.3.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 107392000,
      "file_name": ".cache\\MatMulNBits_2_0_81.const",
      "file_size": 21504
    },
    "model.layers.3.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 107413504,
      "file_name": ".cache\\MatMulNBits_2_0_82.const",
      "file_size": 393216
    },
    "model.layers.3.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 107806720,
      "file_name": ".cache\\MatMulNBits_2_0_83.const",
      "file_size": 1024
    },
    "model.layers.3.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 107807744,
      "file_name": ".cache\\MatMulNBits_2_0_84.const",
      "file_size": 12288
    },
    "model.layers.3.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 107820032,
      "file_name": ".cache\\MatMulNBits_2_0_85.const",
      "file_size": 3072
    },
    "model.layers.3.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 107823104,
      "file_name": ".cache\\MatMulNBits_2_0_86.const",
      "file_size": 2359296
    },
    "model.layers.3.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 110182400,
      "file_name": ".cache\\MatMulNBits_2_0_87.const",
      "file_size": 6144
    },
    "model.layers.3.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 110188544,
      "file_name": ".cache\\MatMulNBits_2_0_88.const",
      "file_size": 73728
    },
    "model.layers.3.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 110262272,
      "file_name": ".cache\\MatMulNBits_2_0_89.const",
      "file_size": 18432
    },
    "model.layers.3.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 110280704,
      "file_name": ".cache\\MatMulNBits_2_0_90.const",
      "file_size": 3072
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 110283776,
      "file_name": ".cache\\MatMulNBits_2_0_91.const",
      "file_size": 6881280
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 117165056,
      "file_name": ".cache\\MatMulNBits_2_0_92.const",
      "file_size": 430080
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 117595136,
      "file_name": ".cache\\MatMulNBits_2_0_93.const",
      "file_size": 53760
    },
    "model.layers.3.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 117648896,
      "file_name": ".cache\\MatMulNBits_2_0_94.const",
      "file_size": 35840
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 117684736,
      "file_name": ".cache\\MatMulNBits_2_0_95.const",
      "file_size": 6881280
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 124566016,
      "file_name": ".cache\\MatMulNBits_2_0_96.const",
      "file_size": 430080
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 124996096,
      "file_name": ".cache\\MatMulNBits_2_0_97.const",
      "file_size": 53760
    },
    "model.layers.3.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 125049856,
      "file_name": ".cache\\MatMulNBits_2_0_98.const",
      "file_size": 35840
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 125085696,
      "file_name": ".cache\\MatMulNBits_2_0_99.const",
      "file_size": 13762560
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 138848256,
      "file_name": ".cache\\MatMulNBits_2_0_100.const",
      "file_size": 6144
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 138854400,
      "file_name": ".cache\\MatMulNBits_2_0_101.const",
      "file_size": 430080
    },
    "model.layers.3.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 139284480,
      "file_name": ".cache\\MatMulNBits_2_0_102.const",
      "file_size": 107520
    },
    "model.layers.4.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 139392000,
      "file_name": ".cache\\MatMulNBits_2_0_103.const",
      "file_size": 3072
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 139395072,
      "file_name": ".cache\\MatMulNBits_2_0_104.const",
      "file_size": 2752512
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 142147584,
      "file_name": ".cache\\MatMulNBits_2_0_105.const",
      "file_size": 7168
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 142154752,
      "file_name": ".cache\\MatMulNBits_2_0_106.const",
      "file_size": 86016
    },
    "model.layers.4.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 142240768,
      "file_name": ".cache\\MatMulNBits_2_0_107.const",
      "file_size": 21504
    },
    "model.layers.4.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 142262272,
      "file_name": ".cache\\MatMulNBits_2_0_108.const",
      "file_size": 393216
    },
    "model.layers.4.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 142655488,
      "file_name": ".cache\\MatMulNBits_2_0_109.const",
      "file_size": 1024
    },
    "model.layers.4.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 142656512,
      "file_name": ".cache\\MatMulNBits_2_0_110.const",
      "file_size": 12288
    },
    "model.layers.4.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 142668800,
      "file_name": ".cache\\MatMulNBits_2_0_111.const",
      "file_size": 3072
    },
    "model.layers.4.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 142671872,
      "file_name": ".cache\\MatMulNBits_2_0_112.const",
      "file_size": 2359296
    },
    "model.layers.4.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 145031168,
      "file_name": ".cache\\MatMulNBits_2_0_113.const",
      "file_size": 6144
    },
    "model.layers.4.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 145037312,
      "file_name": ".cache\\MatMulNBits_2_0_114.const",
      "file_size": 73728
    },
    "model.layers.4.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 145111040,
      "file_name": ".cache\\MatMulNBits_2_0_115.const",
      "file_size": 18432
    },
    "model.layers.4.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 145129472,
      "file_name": ".cache\\MatMulNBits_2_0_116.const",
      "file_size": 3072
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 145132544,
      "file_name": ".cache\\MatMulNBits_2_0_117.const",
      "file_size": 6881280
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 152013824,
      "file_name": ".cache\\MatMulNBits_2_0_118.const",
      "file_size": 430080
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 152443904,
      "file_name": ".cache\\MatMulNBits_2_0_119.const",
      "file_size": 53760
    },
    "model.layers.4.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 152497664,
      "file_name": ".cache\\MatMulNBits_2_0_120.const",
      "file_size": 35840
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 152533504,
      "file_name": ".cache\\MatMulNBits_2_0_121.const",
      "file_size": 6881280
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 159414784,
      "file_name": ".cache\\MatMulNBits_2_0_122.const",
      "file_size": 430080
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 159844864,
      "file_name": ".cache\\MatMulNBits_2_0_123.const",
      "file_size": 53760
    },
    "model.layers.4.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 159898624,
      "file_name": ".cache\\MatMulNBits_2_0_124.const",
      "file_size": 35840
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 159934464,
      "file_name": ".cache\\MatMulNBits_2_0_125.const",
      "file_size": 13762560
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 173697024,
      "file_name": ".cache\\MatMulNBits_2_0_126.const",
      "file_size": 6144
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 173703168,
      "file_name": ".cache\\MatMulNBits_2_0_127.const",
      "file_size": 430080
    },
    "model.layers.4.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 174133248,
      "file_name": ".cache\\MatMulNBits_2_0_128.const",
      "file_size": 107520
    },
    "model.layers.5.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 174240768,
      "file_name": ".cache\\MatMulNBits_2_0_129.const",
      "file_size": 3072
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 174243840,
      "file_name": ".cache\\MatMulNBits_2_0_130.const",
      "file_size": 2752512
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 176996352,
      "file_name": ".cache\\MatMulNBits_2_0_131.const",
      "file_size": 7168
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 177003520,
      "file_name": ".cache\\MatMulNBits_2_0_132.const",
      "file_size": 86016
    },
    "model.layers.5.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 177089536,
      "file_name": ".cache\\MatMulNBits_2_0_133.const",
      "file_size": 21504
    },
    "model.layers.5.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 177111040,
      "file_name": ".cache\\MatMulNBits_2_0_134.const",
      "file_size": 393216
    },
    "model.layers.5.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 177504256,
      "file_name": ".cache\\MatMulNBits_2_0_135.const",
      "file_size": 1024
    },
    "model.layers.5.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 177505280,
      "file_name": ".cache\\MatMulNBits_2_0_136.const",
      "file_size": 12288
    },
    "model.layers.5.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 177517568,
      "file_name": ".cache\\MatMulNBits_2_0_137.const",
      "file_size": 3072
    },
    "model.layers.5.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 177520640,
      "file_name": ".cache\\MatMulNBits_2_0_138.const",
      "file_size": 2359296
    },
    "model.layers.5.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 179879936,
      "file_name": ".cache\\MatMulNBits_2_0_139.const",
      "file_size": 6144
    },
    "model.layers.5.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 179886080,
      "file_name": ".cache\\MatMulNBits_2_0_140.const",
      "file_size": 73728
    },
    "model.layers.5.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 179959808,
      "file_name": ".cache\\MatMulNBits_2_0_141.const",
      "file_size": 18432
    },
    "model.layers.5.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 179978240,
      "file_name": ".cache\\MatMulNBits_2_0_142.const",
      "file_size": 3072
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 179981312,
      "file_name": ".cache\\MatMulNBits_2_0_143.const",
      "file_size": 6881280
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 186862592,
      "file_name": ".cache\\MatMulNBits_2_0_144.const",
      "file_size": 430080
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 187292672,
      "file_name": ".cache\\MatMulNBits_2_0_145.const",
      "file_size": 53760
    },
    "model.layers.5.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 187346432,
      "file_name": ".cache\\MatMulNBits_2_0_146.const",
      "file_size": 35840
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 187382272,
      "file_name": ".cache\\MatMulNBits_2_0_147.const",
      "file_size": 6881280
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 194263552,
      "file_name": ".cache\\MatMulNBits_2_0_148.const",
      "file_size": 430080
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 194693632,
      "file_name": ".cache\\MatMulNBits_2_0_149.const",
      "file_size": 53760
    },
    "model.layers.5.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 194747392,
      "file_name": ".cache\\MatMulNBits_2_0_150.const",
      "file_size": 35840
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 194783232,
      "file_name": ".cache\\MatMulNBits_2_0_151.const",
      "file_size": 13762560
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 208545792,
      "file_name": ".cache\\MatMulNBits_2_0_152.const",
      "file_size": 6144
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 208551936,
      "file_name": ".cache\\MatMulNBits_2_0_153.const",
      "file_size": 430080
    },
    "model.layers.5.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 208982016,
      "file_name": ".cache\\MatMulNBits_2_0_154.const",
      "file_size": 107520
    },
    "model.layers.6.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 209089536,
      "file_name": ".cache\\MatMulNBits_2_0_155.const",
      "file_size": 3072
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 209092608,
      "file_name": ".cache\\MatMulNBits_2_0_156.const",
      "file_size": 2752512
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 211845120,
      "file_name": ".cache\\MatMulNBits_2_0_157.const",
      "file_size": 7168
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 211852288,
      "file_name": ".cache\\MatMulNBits_2_0_158.const",
      "file_size": 86016
    },
    "model.layers.6.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 211938304,
      "file_name": ".cache\\MatMulNBits_2_0_159.const",
      "file_size": 21504
    },
    "model.layers.6.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 211959808,
      "file_name": ".cache\\MatMulNBits_2_0_160.const",
      "file_size": 393216
    },
    "model.layers.6.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 212353024,
      "file_name": ".cache\\MatMulNBits_2_0_161.const",
      "file_size": 1024
    },
    "model.layers.6.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 212354048,
      "file_name": ".cache\\MatMulNBits_2_0_162.const",
      "file_size": 12288
    },
    "model.layers.6.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 212366336,
      "file_name": ".cache\\MatMulNBits_2_0_163.const",
      "file_size": 3072
    },
    "model.layers.6.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 212369408,
      "file_name": ".cache\\MatMulNBits_2_0_164.const",
      "file_size": 2359296
    },
    "model.layers.6.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 214728704,
      "file_name": ".cache\\MatMulNBits_2_0_165.const",
      "file_size": 6144
    },
    "model.layers.6.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 214734848,
      "file_name": ".cache\\MatMulNBits_2_0_166.const",
      "file_size": 73728
    },
    "model.layers.6.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 214808576,
      "file_name": ".cache\\MatMulNBits_2_0_167.const",
      "file_size": 18432
    },
    "model.layers.6.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 214827008,
      "file_name": ".cache\\MatMulNBits_2_0_168.const",
      "file_size": 3072
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 214830080,
      "file_name": ".cache\\MatMulNBits_2_0_169.const",
      "file_size": 6881280
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 221711360,
      "file_name": ".cache\\MatMulNBits_2_0_170.const",
      "file_size": 430080
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 222141440,
      "file_name": ".cache\\MatMulNBits_2_0_171.const",
      "file_size": 53760
    },
    "model.layers.6.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 222195200,
      "file_name": ".cache\\MatMulNBits_2_0_172.const",
      "file_size": 35840
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 222231040,
      "file_name": ".cache\\MatMulNBits_2_0_173.const",
      "file_size": 6881280
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 229112320,
      "file_name": ".cache\\MatMulNBits_2_0_174.const",
      "file_size": 430080
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 229542400,
      "file_name": ".cache\\MatMulNBits_2_0_175.const",
      "file_size": 53760
    },
    "model.layers.6.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 229596160,
      "file_name": ".cache\\MatMulNBits_2_0_176.const",
      "file_size": 35840
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 229632000,
      "file_name": ".cache\\MatMulNBits_2_0_177.const",
      "file_size": 13762560
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 243394560,
      "file_name": ".cache\\MatMulNBits_2_0_178.const",
      "file_size": 6144
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 243400704,
      "file_name": ".cache\\MatMulNBits_2_0_179.const",
      "file_size": 430080
    },
    "model.layers.6.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 243830784,
      "file_name": ".cache\\MatMulNBits_2_0_180.const",
      "file_size": 107520
    },
    "model.layers.7.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 243938304,
      "file_name": ".cache\\MatMulNBits_2_0_181.const",
      "file_size": 3072
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 243941376,
      "file_name": ".cache\\MatMulNBits_2_0_182.const",
      "file_size": 2752512
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 246693888,
      "file_name": ".cache\\MatMulNBits_2_0_183.const",
      "file_size": 7168
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 246701056,
      "file_name": ".cache\\MatMulNBits_2_0_184.const",
      "file_size": 86016
    },
    "model.layers.7.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 246787072,
      "file_name": ".cache\\MatMulNBits_2_0_185.const",
      "file_size": 21504
    },
    "model.layers.7.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 246808576,
      "file_name": ".cache\\MatMulNBits_2_0_186.const",
      "file_size": 393216
    },
    "model.layers.7.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 247201792,
      "file_name": ".cache\\MatMulNBits_2_0_187.const",
      "file_size": 1024
    },
    "model.layers.7.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 247202816,
      "file_name": ".cache\\MatMulNBits_2_0_188.const",
      "file_size": 12288
    },
    "model.layers.7.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 247215104,
      "file_name": ".cache\\MatMulNBits_2_0_189.const",
      "file_size": 3072
    },
    "model.layers.7.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 247218176,
      "file_name": ".cache\\MatMulNBits_2_0_190.const",
      "file_size": 2359296
    },
    "model.layers.7.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 249577472,
      "file_name": ".cache\\MatMulNBits_2_0_191.const",
      "file_size": 6144
    },
    "model.layers.7.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 249583616,
      "file_name": ".cache\\MatMulNBits_2_0_192.const",
      "file_size": 73728
    },
    "model.layers.7.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 249657344,
      "file_name": ".cache\\MatMulNBits_2_0_193.const",
      "file_size": 18432
    },
    "model.layers.7.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 249675776,
      "file_name": ".cache\\MatMulNBits_2_0_194.const",
      "file_size": 3072
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 249678848,
      "file_name": ".cache\\MatMulNBits_2_0_195.const",
      "file_size": 6881280
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 256560128,
      "file_name": ".cache\\MatMulNBits_2_0_196.const",
      "file_size": 430080
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 256990208,
      "file_name": ".cache\\MatMulNBits_2_0_197.const",
      "file_size": 53760
    },
    "model.layers.7.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 257043968,
      "file_name": ".cache\\MatMulNBits_2_0_198.const",
      "file_size": 35840
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 257079808,
      "file_name": ".cache\\MatMulNBits_2_0_199.const",
      "file_size": 6881280
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 263961088,
      "file_name": ".cache\\MatMulNBits_2_0_200.const",
      "file_size": 430080
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 264391168,
      "file_name": ".cache\\MatMulNBits_2_0_201.const",
      "file_size": 53760
    },
    "model.layers.7.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 264444928,
      "file_name": ".cache\\MatMulNBits_2_0_202.const",
      "file_size": 35840
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 264480768,
      "file_name": ".cache\\MatMulNBits_2_0_203.const",
      "file_size": 13762560
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 278243328,
      "file_name": ".cache\\MatMulNBits_2_0_204.const",
      "file_size": 6144
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 278249472,
      "file_name": ".cache\\MatMulNBits_2_0_205.const",
      "file_size": 430080
    },
    "model.layers.7.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 278679552,
      "file_name": ".cache\\MatMulNBits_2_0_206.const",
      "file_size": 107520
    },
    "model.layers.8.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 278787072,
      "file_name": ".cache\\MatMulNBits_2_0_207.const",
      "file_size": 3072
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 278790144,
      "file_name": ".cache\\MatMulNBits_2_0_208.const",
      "file_size": 2752512
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 281542656,
      "file_name": ".cache\\MatMulNBits_2_0_209.const",
      "file_size": 7168
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 281549824,
      "file_name": ".cache\\MatMulNBits_2_0_210.const",
      "file_size": 86016
    },
    "model.layers.8.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 281635840,
      "file_name": ".cache\\MatMulNBits_2_0_211.const",
      "file_size": 21504
    },
    "model.layers.8.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 281657344,
      "file_name": ".cache\\MatMulNBits_2_0_212.const",
      "file_size": 393216
    },
    "model.layers.8.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 282050560,
      "file_name": ".cache\\MatMulNBits_2_0_213.const",
      "file_size": 1024
    },
    "model.layers.8.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 282051584,
      "file_name": ".cache\\MatMulNBits_2_0_214.const",
      "file_size": 12288
    },
    "model.layers.8.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 282063872,
      "file_name": ".cache\\MatMulNBits_2_0_215.const",
      "file_size": 3072
    },
    "model.layers.8.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 282066944,
      "file_name": ".cache\\MatMulNBits_2_0_216.const",
      "file_size": 2359296
    },
    "model.layers.8.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 284426240,
      "file_name": ".cache\\MatMulNBits_2_0_217.const",
      "file_size": 6144
    },
    "model.layers.8.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 284432384,
      "file_name": ".cache\\MatMulNBits_2_0_218.const",
      "file_size": 73728
    },
    "model.layers.8.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 284506112,
      "file_name": ".cache\\MatMulNBits_2_0_219.const",
      "file_size": 18432
    },
    "model.layers.8.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 284524544,
      "file_name": ".cache\\MatMulNBits_2_0_220.const",
      "file_size": 3072
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 284527616,
      "file_name": ".cache\\MatMulNBits_2_0_221.const",
      "file_size": 6881280
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 291408896,
      "file_name": ".cache\\MatMulNBits_2_0_222.const",
      "file_size": 430080
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 291838976,
      "file_name": ".cache\\MatMulNBits_2_0_223.const",
      "file_size": 53760
    },
    "model.layers.8.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 291892736,
      "file_name": ".cache\\MatMulNBits_2_0_224.const",
      "file_size": 35840
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 291928576,
      "file_name": ".cache\\MatMulNBits_2_0_225.const",
      "file_size": 6881280
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 298809856,
      "file_name": ".cache\\MatMulNBits_2_0_226.const",
      "file_size": 430080
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 299239936,
      "file_name": ".cache\\MatMulNBits_2_0_227.const",
      "file_size": 53760
    },
    "model.layers.8.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 299293696,
      "file_name": ".cache\\MatMulNBits_2_0_228.const",
      "file_size": 35840
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 299329536,
      "file_name": ".cache\\MatMulNBits_2_0_229.const",
      "file_size": 13762560
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 313092096,
      "file_name": ".cache\\MatMulNBits_2_0_230.const",
      "file_size": 6144
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 313098240,
      "file_name": ".cache\\MatMulNBits_2_0_231.const",
      "file_size": 430080
    },
    "model.layers.8.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 313528320,
      "file_name": ".cache\\MatMulNBits_2_0_232.const",
      "file_size": 107520
    },
    "model.layers.9.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 313635840,
      "file_name": ".cache\\MatMulNBits_2_0_233.const",
      "file_size": 3072
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 313638912,
      "file_name": ".cache\\MatMulNBits_2_0_234.const",
      "file_size": 2752512
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 316391424,
      "file_name": ".cache\\MatMulNBits_2_0_235.const",
      "file_size": 7168
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 316398592,
      "file_name": ".cache\\MatMulNBits_2_0_236.const",
      "file_size": 86016
    },
    "model.layers.9.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 316484608,
      "file_name": ".cache\\MatMulNBits_2_0_237.const",
      "file_size": 21504
    },
    "model.layers.9.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 316506112,
      "file_name": ".cache\\MatMulNBits_2_0_238.const",
      "file_size": 393216
    },
    "model.layers.9.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 316899328,
      "file_name": ".cache\\MatMulNBits_2_0_239.const",
      "file_size": 1024
    },
    "model.layers.9.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 316900352,
      "file_name": ".cache\\MatMulNBits_2_0_240.const",
      "file_size": 12288
    },
    "model.layers.9.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 316912640,
      "file_name": ".cache\\MatMulNBits_2_0_241.const",
      "file_size": 3072
    },
    "model.layers.9.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 316915712,
      "file_name": ".cache\\MatMulNBits_2_0_242.const",
      "file_size": 2359296
    },
    "model.layers.9.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 319275008,
      "file_name": ".cache\\MatMulNBits_2_0_243.const",
      "file_size": 6144
    },
    "model.layers.9.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 319281152,
      "file_name": ".cache\\MatMulNBits_2_0_244.const",
      "file_size": 73728
    },
    "model.layers.9.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 319354880,
      "file_name": ".cache\\MatMulNBits_2_0_245.const",
      "file_size": 18432
    },
    "model.layers.9.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 319373312,
      "file_name": ".cache\\MatMulNBits_2_0_246.const",
      "file_size": 3072
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 319376384,
      "file_name": ".cache\\MatMulNBits_2_0_247.const",
      "file_size": 6881280
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 326257664,
      "file_name": ".cache\\MatMulNBits_2_0_248.const",
      "file_size": 430080
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 326687744,
      "file_name": ".cache\\MatMulNBits_2_0_249.const",
      "file_size": 53760
    },
    "model.layers.9.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 326741504,
      "file_name": ".cache\\MatMulNBits_2_0_250.const",
      "file_size": 35840
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 326777344,
      "file_name": ".cache\\MatMulNBits_2_0_251.const",
      "file_size": 6881280
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 333658624,
      "file_name": ".cache\\MatMulNBits_2_0_252.const",
      "file_size": 430080
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 334088704,
      "file_name": ".cache\\MatMulNBits_2_0_253.const",
      "file_size": 53760
    },
    "model.layers.9.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 334142464,
      "file_name": ".cache\\MatMulNBits_2_0_254.const",
      "file_size": 35840
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 334178304,
      "file_name": ".cache\\MatMulNBits_2_0_255.const",
      "file_size": 13762560
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 347940864,
      "file_name": ".cache\\MatMulNBits_2_0_256.const",
      "file_size": 6144
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 347947008,
      "file_name": ".cache\\MatMulNBits_2_0_257.const",
      "file_size": 430080
    },
    "model.layers.9.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 348377088,
      "file_name": ".cache\\MatMulNBits_2_0_258.const",
      "file_size": 107520
    },
    "model.layers.10.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 348484608,
      "file_name": ".cache\\MatMulNBits_2_0_259.const",
      "file_size": 3072
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 348487680,
      "file_name": ".cache\\MatMulNBits_2_0_260.const",
      "file_size": 2752512
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 351240192,
      "file_name": ".cache\\MatMulNBits_2_0_261.const",
      "file_size": 7168
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 351247360,
      "file_name": ".cache\\MatMulNBits_2_0_262.const",
      "file_size": 86016
    },
    "model.layers.10.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 351333376,
      "file_name": ".cache\\MatMulNBits_2_0_263.const",
      "file_size": 21504
    },
    "model.layers.10.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 351354880,
      "file_name": ".cache\\MatMulNBits_2_0_264.const",
      "file_size": 393216
    },
    "model.layers.10.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 351748096,
      "file_name": ".cache\\MatMulNBits_2_0_265.const",
      "file_size": 1024
    },
    "model.layers.10.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 351749120,
      "file_name": ".cache\\MatMulNBits_2_0_266.const",
      "file_size": 12288
    },
    "model.layers.10.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 351761408,
      "file_name": ".cache\\MatMulNBits_2_0_267.const",
      "file_size": 3072
    },
    "model.layers.10.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 351764480,
      "file_name": ".cache\\MatMulNBits_2_0_268.const",
      "file_size": 2359296
    },
    "model.layers.10.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 354123776,
      "file_name": ".cache\\MatMulNBits_2_0_269.const",
      "file_size": 6144
    },
    "model.layers.10.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 354129920,
      "file_name": ".cache\\MatMulNBits_2_0_270.const",
      "file_size": 73728
    },
    "model.layers.10.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 354203648,
      "file_name": ".cache\\MatMulNBits_2_0_271.const",
      "file_size": 18432
    },
    "model.layers.10.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 354222080,
      "file_name": ".cache\\MatMulNBits_2_0_272.const",
      "file_size": 3072
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 354225152,
      "file_name": ".cache\\MatMulNBits_2_0_273.const",
      "file_size": 6881280
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 361106432,
      "file_name": ".cache\\MatMulNBits_2_0_274.const",
      "file_size": 430080
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 361536512,
      "file_name": ".cache\\MatMulNBits_2_0_275.const",
      "file_size": 53760
    },
    "model.layers.10.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 361590272,
      "file_name": ".cache\\MatMulNBits_2_0_276.const",
      "file_size": 35840
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 361626112,
      "file_name": ".cache\\MatMulNBits_2_0_277.const",
      "file_size": 6881280
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 368507392,
      "file_name": ".cache\\MatMulNBits_2_0_278.const",
      "file_size": 430080
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 368937472,
      "file_name": ".cache\\MatMulNBits_2_0_279.const",
      "file_size": 53760
    },
    "model.layers.10.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 368991232,
      "file_name": ".cache\\MatMulNBits_2_0_280.const",
      "file_size": 35840
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 369027072,
      "file_name": ".cache\\MatMulNBits_2_0_281.const",
      "file_size": 13762560
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 382789632,
      "file_name": ".cache\\MatMulNBits_2_0_282.const",
      "file_size": 6144
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 382795776,
      "file_name": ".cache\\MatMulNBits_2_0_283.const",
      "file_size": 430080
    },
    "model.layers.10.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 383225856,
      "file_name": ".cache\\MatMulNBits_2_0_284.const",
      "file_size": 107520
    },
    "model.layers.11.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 383333376,
      "file_name": ".cache\\MatMulNBits_2_0_285.const",
      "file_size": 3072
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 383336448,
      "file_name": ".cache\\MatMulNBits_2_0_286.const",
      "file_size": 2752512
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 386088960,
      "file_name": ".cache\\MatMulNBits_2_0_287.const",
      "file_size": 7168
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 386096128,
      "file_name": ".cache\\MatMulNBits_2_0_288.const",
      "file_size": 86016
    },
    "model.layers.11.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 386182144,
      "file_name": ".cache\\MatMulNBits_2_0_289.const",
      "file_size": 21504
    },
    "model.layers.11.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 386203648,
      "file_name": ".cache\\MatMulNBits_2_0_290.const",
      "file_size": 393216
    },
    "model.layers.11.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 386596864,
      "file_name": ".cache\\MatMulNBits_2_0_291.const",
      "file_size": 1024
    },
    "model.layers.11.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 386597888,
      "file_name": ".cache\\MatMulNBits_2_0_292.const",
      "file_size": 12288
    },
    "model.layers.11.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 386610176,
      "file_name": ".cache\\MatMulNBits_2_0_293.const",
      "file_size": 3072
    },
    "model.layers.11.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 386613248,
      "file_name": ".cache\\MatMulNBits_2_0_294.const",
      "file_size": 2359296
    },
    "model.layers.11.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 388972544,
      "file_name": ".cache\\MatMulNBits_2_0_295.const",
      "file_size": 6144
    },
    "model.layers.11.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 388978688,
      "file_name": ".cache\\MatMulNBits_2_0_296.const",
      "file_size": 73728
    },
    "model.layers.11.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 389052416,
      "file_name": ".cache\\MatMulNBits_2_0_297.const",
      "file_size": 18432
    },
    "model.layers.11.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 389070848,
      "file_name": ".cache\\MatMulNBits_2_0_298.const",
      "file_size": 3072
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 389073920,
      "file_name": ".cache\\MatMulNBits_2_0_299.const",
      "file_size": 6881280
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 395955200,
      "file_name": ".cache\\MatMulNBits_2_0_300.const",
      "file_size": 430080
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 396385280,
      "file_name": ".cache\\MatMulNBits_2_0_301.const",
      "file_size": 53760
    },
    "model.layers.11.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 396439040,
      "file_name": ".cache\\MatMulNBits_2_0_302.const",
      "file_size": 35840
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 396474880,
      "file_name": ".cache\\MatMulNBits_2_0_303.const",
      "file_size": 6881280
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 403356160,
      "file_name": ".cache\\MatMulNBits_2_0_304.const",
      "file_size": 430080
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 403786240,
      "file_name": ".cache\\MatMulNBits_2_0_305.const",
      "file_size": 53760
    },
    "model.layers.11.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 403840000,
      "file_name": ".cache\\MatMulNBits_2_0_306.const",
      "file_size": 35840
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 403875840,
      "file_name": ".cache\\MatMulNBits_2_0_307.const",
      "file_size": 13762560
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 417638400,
      "file_name": ".cache\\MatMulNBits_2_0_308.const",
      "file_size": 6144
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 417644544,
      "file_name": ".cache\\MatMulNBits_2_0_309.const",
      "file_size": 430080
    },
    "model.layers.11.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 418074624,
      "file_name": ".cache\\MatMulNBits_2_0_310.const",
      "file_size": 107520
    },
    "model.layers.12.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 418182144,
      "file_name": ".cache\\MatMulNBits_2_0_311.const",
      "file_size": 3072
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 418185216,
      "file_name": ".cache\\MatMulNBits_2_0_312.const",
      "file_size": 2752512
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 420937728,
      "file_name": ".cache\\MatMulNBits_2_0_313.const",
      "file_size": 7168
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 420944896,
      "file_name": ".cache\\MatMulNBits_2_0_314.const",
      "file_size": 86016
    },
    "model.layers.12.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 421030912,
      "file_name": ".cache\\MatMulNBits_2_0_315.const",
      "file_size": 21504
    },
    "model.layers.12.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 421052416,
      "file_name": ".cache\\MatMulNBits_2_0_316.const",
      "file_size": 393216
    },
    "model.layers.12.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 421445632,
      "file_name": ".cache\\MatMulNBits_2_0_317.const",
      "file_size": 1024
    },
    "model.layers.12.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 421446656,
      "file_name": ".cache\\MatMulNBits_2_0_318.const",
      "file_size": 12288
    },
    "model.layers.12.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 421458944,
      "file_name": ".cache\\MatMulNBits_2_0_319.const",
      "file_size": 3072
    },
    "model.layers.12.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 421462016,
      "file_name": ".cache\\MatMulNBits_2_0_320.const",
      "file_size": 2359296
    },
    "model.layers.12.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 423821312,
      "file_name": ".cache\\MatMulNBits_2_0_321.const",
      "file_size": 6144
    },
    "model.layers.12.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 423827456,
      "file_name": ".cache\\MatMulNBits_2_0_322.const",
      "file_size": 73728
    },
    "model.layers.12.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 423901184,
      "file_name": ".cache\\MatMulNBits_2_0_323.const",
      "file_size": 18432
    },
    "model.layers.12.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 423919616,
      "file_name": ".cache\\MatMulNBits_2_0_324.const",
      "file_size": 3072
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 423922688,
      "file_name": ".cache\\MatMulNBits_2_0_325.const",
      "file_size": 6881280
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 430803968,
      "file_name": ".cache\\MatMulNBits_2_0_326.const",
      "file_size": 430080
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 431234048,
      "file_name": ".cache\\MatMulNBits_2_0_327.const",
      "file_size": 53760
    },
    "model.layers.12.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 431287808,
      "file_name": ".cache\\MatMulNBits_2_0_328.const",
      "file_size": 35840
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 431323648,
      "file_name": ".cache\\MatMulNBits_2_0_329.const",
      "file_size": 6881280
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 438204928,
      "file_name": ".cache\\MatMulNBits_2_0_330.const",
      "file_size": 430080
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 438635008,
      "file_name": ".cache\\MatMulNBits_2_0_331.const",
      "file_size": 53760
    },
    "model.layers.12.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 438688768,
      "file_name": ".cache\\MatMulNBits_2_0_332.const",
      "file_size": 35840
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 438724608,
      "file_name": ".cache\\MatMulNBits_2_0_333.const",
      "file_size": 13762560
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 452487168,
      "file_name": ".cache\\MatMulNBits_2_0_334.const",
      "file_size": 6144
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 452493312,
      "file_name": ".cache\\MatMulNBits_2_0_335.const",
      "file_size": 430080
    },
    "model.layers.12.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 452923392,
      "file_name": ".cache\\MatMulNBits_2_0_336.const",
      "file_size": 107520
    },
    "model.layers.13.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 453030912,
      "file_name": ".cache\\MatMulNBits_2_0_337.const",
      "file_size": 3072
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 453033984,
      "file_name": ".cache\\MatMulNBits_2_0_338.const",
      "file_size": 2752512
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 455786496,
      "file_name": ".cache\\MatMulNBits_2_0_339.const",
      "file_size": 7168
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 455793664,
      "file_name": ".cache\\MatMulNBits_2_0_340.const",
      "file_size": 86016
    },
    "model.layers.13.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 455879680,
      "file_name": ".cache\\MatMulNBits_2_0_341.const",
      "file_size": 21504
    },
    "model.layers.13.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 455901184,
      "file_name": ".cache\\MatMulNBits_2_0_342.const",
      "file_size": 393216
    },
    "model.layers.13.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 456294400,
      "file_name": ".cache\\MatMulNBits_2_0_343.const",
      "file_size": 1024
    },
    "model.layers.13.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 456295424,
      "file_name": ".cache\\MatMulNBits_2_0_344.const",
      "file_size": 12288
    },
    "model.layers.13.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 456307712,
      "file_name": ".cache\\MatMulNBits_2_0_345.const",
      "file_size": 3072
    },
    "model.layers.13.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 456310784,
      "file_name": ".cache\\MatMulNBits_2_0_346.const",
      "file_size": 2359296
    },
    "model.layers.13.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 458670080,
      "file_name": ".cache\\MatMulNBits_2_0_347.const",
      "file_size": 6144
    },
    "model.layers.13.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 458676224,
      "file_name": ".cache\\MatMulNBits_2_0_348.const",
      "file_size": 73728
    },
    "model.layers.13.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 458749952,
      "file_name": ".cache\\MatMulNBits_2_0_349.const",
      "file_size": 18432
    },
    "model.layers.13.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 458768384,
      "file_name": ".cache\\MatMulNBits_2_0_350.const",
      "file_size": 3072
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 458771456,
      "file_name": ".cache\\MatMulNBits_2_0_351.const",
      "file_size": 6881280
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 465652736,
      "file_name": ".cache\\MatMulNBits_2_0_352.const",
      "file_size": 430080
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 466082816,
      "file_name": ".cache\\MatMulNBits_2_0_353.const",
      "file_size": 53760
    },
    "model.layers.13.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 466136576,
      "file_name": ".cache\\MatMulNBits_2_0_354.const",
      "file_size": 35840
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 466172416,
      "file_name": ".cache\\MatMulNBits_2_0_355.const",
      "file_size": 6881280
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 473053696,
      "file_name": ".cache\\MatMulNBits_2_0_356.const",
      "file_size": 430080
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 473483776,
      "file_name": ".cache\\MatMulNBits_2_0_357.const",
      "file_size": 53760
    },
    "model.layers.13.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 473537536,
      "file_name": ".cache\\MatMulNBits_2_0_358.const",
      "file_size": 35840
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 473573376,
      "file_name": ".cache\\MatMulNBits_2_0_359.const",
      "file_size": 13762560
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 487335936,
      "file_name": ".cache\\MatMulNBits_2_0_360.const",
      "file_size": 6144
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 487342080,
      "file_name": ".cache\\MatMulNBits_2_0_361.const",
      "file_size": 430080
    },
    "model.layers.13.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 487772160,
      "file_name": ".cache\\MatMulNBits_2_0_362.const",
      "file_size": 107520
    },
    "model.layers.14.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 487879680,
      "file_name": ".cache\\MatMulNBits_2_0_363.const",
      "file_size": 3072
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 487882752,
      "file_name": ".cache\\MatMulNBits_2_0_364.const",
      "file_size": 2752512
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 490635264,
      "file_name": ".cache\\MatMulNBits_2_0_365.const",
      "file_size": 7168
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 490642432,
      "file_name": ".cache\\MatMulNBits_2_0_366.const",
      "file_size": 86016
    },
    "model.layers.14.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 490728448,
      "file_name": ".cache\\MatMulNBits_2_0_367.const",
      "file_size": 21504
    },
    "model.layers.14.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 490749952,
      "file_name": ".cache\\MatMulNBits_2_0_368.const",
      "file_size": 393216
    },
    "model.layers.14.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 491143168,
      "file_name": ".cache\\MatMulNBits_2_0_369.const",
      "file_size": 1024
    },
    "model.layers.14.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 491144192,
      "file_name": ".cache\\MatMulNBits_2_0_370.const",
      "file_size": 12288
    },
    "model.layers.14.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 491156480,
      "file_name": ".cache\\MatMulNBits_2_0_371.const",
      "file_size": 3072
    },
    "model.layers.14.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 491159552,
      "file_name": ".cache\\MatMulNBits_2_0_372.const",
      "file_size": 2359296
    },
    "model.layers.14.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 493518848,
      "file_name": ".cache\\MatMulNBits_2_0_373.const",
      "file_size": 6144
    },
    "model.layers.14.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 493524992,
      "file_name": ".cache\\MatMulNBits_2_0_374.const",
      "file_size": 73728
    },
    "model.layers.14.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 493598720,
      "file_name": ".cache\\MatMulNBits_2_0_375.const",
      "file_size": 18432
    },
    "model.layers.14.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 493617152,
      "file_name": ".cache\\MatMulNBits_2_0_376.const",
      "file_size": 3072
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 493620224,
      "file_name": ".cache\\MatMulNBits_2_0_377.const",
      "file_size": 6881280
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 500501504,
      "file_name": ".cache\\MatMulNBits_2_0_378.const",
      "file_size": 430080
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 500931584,
      "file_name": ".cache\\MatMulNBits_2_0_379.const",
      "file_size": 53760
    },
    "model.layers.14.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 500985344,
      "file_name": ".cache\\MatMulNBits_2_0_380.const",
      "file_size": 35840
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 501021184,
      "file_name": ".cache\\MatMulNBits_2_0_381.const",
      "file_size": 6881280
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 507902464,
      "file_name": ".cache\\MatMulNBits_2_0_382.const",
      "file_size": 430080
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 508332544,
      "file_name": ".cache\\MatMulNBits_2_0_383.const",
      "file_size": 53760
    },
    "model.layers.14.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 508386304,
      "file_name": ".cache\\MatMulNBits_2_0_384.const",
      "file_size": 35840
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 508422144,
      "file_name": ".cache\\MatMulNBits_2_0_385.const",
      "file_size": 13762560
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 522184704,
      "file_name": ".cache\\MatMulNBits_2_0_386.const",
      "file_size": 6144
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 522190848,
      "file_name": ".cache\\MatMulNBits_2_0_387.const",
      "file_size": 430080
    },
    "model.layers.14.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 522620928,
      "file_name": ".cache\\MatMulNBits_2_0_388.const",
      "file_size": 107520
    },
    "model.layers.15.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 522728448,
      "file_name": ".cache\\MatMulNBits_2_0_389.const",
      "file_size": 3072
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 522731520,
      "file_name": ".cache\\MatMulNBits_2_0_390.const",
      "file_size": 2752512
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 525484032,
      "file_name": ".cache\\MatMulNBits_2_0_391.const",
      "file_size": 7168
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 525491200,
      "file_name": ".cache\\MatMulNBits_2_0_392.const",
      "file_size": 86016
    },
    "model.layers.15.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 525577216,
      "file_name": ".cache\\MatMulNBits_2_0_393.const",
      "file_size": 21504
    },
    "model.layers.15.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 525598720,
      "file_name": ".cache\\MatMulNBits_2_0_394.const",
      "file_size": 393216
    },
    "model.layers.15.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 525991936,
      "file_name": ".cache\\MatMulNBits_2_0_395.const",
      "file_size": 1024
    },
    "model.layers.15.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 525992960,
      "file_name": ".cache\\MatMulNBits_2_0_396.const",
      "file_size": 12288
    },
    "model.layers.15.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 526005248,
      "file_name": ".cache\\MatMulNBits_2_0_397.const",
      "file_size": 3072
    },
    "model.layers.15.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 526008320,
      "file_name": ".cache\\MatMulNBits_2_0_398.const",
      "file_size": 2359296
    },
    "model.layers.15.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 528367616,
      "file_name": ".cache\\MatMulNBits_2_0_399.const",
      "file_size": 6144
    },
    "model.layers.15.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 528373760,
      "file_name": ".cache\\MatMulNBits_2_0_400.const",
      "file_size": 73728
    },
    "model.layers.15.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 528447488,
      "file_name": ".cache\\MatMulNBits_2_0_401.const",
      "file_size": 18432
    },
    "model.layers.15.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 528465920,
      "file_name": ".cache\\MatMulNBits_2_0_402.const",
      "file_size": 3072
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 528468992,
      "file_name": ".cache\\MatMulNBits_2_0_403.const",
      "file_size": 6881280
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 535350272,
      "file_name": ".cache\\MatMulNBits_2_0_404.const",
      "file_size": 430080
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 535780352,
      "file_name": ".cache\\MatMulNBits_2_0_405.const",
      "file_size": 53760
    },
    "model.layers.15.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 535834112,
      "file_name": ".cache\\MatMulNBits_2_0_406.const",
      "file_size": 35840
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 535869952,
      "file_name": ".cache\\MatMulNBits_2_0_407.const",
      "file_size": 6881280
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 542751232,
      "file_name": ".cache\\MatMulNBits_2_0_408.const",
      "file_size": 430080
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 543181312,
      "file_name": ".cache\\MatMulNBits_2_0_409.const",
      "file_size": 53760
    },
    "model.layers.15.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 543235072,
      "file_name": ".cache\\MatMulNBits_2_0_410.const",
      "file_size": 35840
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 543270912,
      "file_name": ".cache\\MatMulNBits_2_0_411.const",
      "file_size": 13762560
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 557033472,
      "file_name": ".cache\\MatMulNBits_2_0_412.const",
      "file_size": 6144
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 557039616,
      "file_name": ".cache\\MatMulNBits_2_0_413.const",
      "file_size": 430080
    },
    "model.layers.15.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 557469696,
      "file_name": ".cache\\MatMulNBits_2_0_414.const",
      "file_size": 107520
    },
    "model.layers.16.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 557577216,
      "file_name": ".cache\\MatMulNBits_2_0_415.const",
      "file_size": 3072
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 557580288,
      "file_name": ".cache\\MatMulNBits_2_0_416.const",
      "file_size": 2752512
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 560332800,
      "file_name": ".cache\\MatMulNBits_2_0_417.const",
      "file_size": 7168
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 560339968,
      "file_name": ".cache\\MatMulNBits_2_0_418.const",
      "file_size": 86016
    },
    "model.layers.16.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 560425984,
      "file_name": ".cache\\MatMulNBits_2_0_419.const",
      "file_size": 21504
    },
    "model.layers.16.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 560447488,
      "file_name": ".cache\\MatMulNBits_2_0_420.const",
      "file_size": 393216
    },
    "model.layers.16.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 560840704,
      "file_name": ".cache\\MatMulNBits_2_0_421.const",
      "file_size": 1024
    },
    "model.layers.16.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 560841728,
      "file_name": ".cache\\MatMulNBits_2_0_422.const",
      "file_size": 12288
    },
    "model.layers.16.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 560854016,
      "file_name": ".cache\\MatMulNBits_2_0_423.const",
      "file_size": 3072
    },
    "model.layers.16.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 560857088,
      "file_name": ".cache\\MatMulNBits_2_0_424.const",
      "file_size": 2359296
    },
    "model.layers.16.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 563216384,
      "file_name": ".cache\\MatMulNBits_2_0_425.const",
      "file_size": 6144
    },
    "model.layers.16.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 563222528,
      "file_name": ".cache\\MatMulNBits_2_0_426.const",
      "file_size": 73728
    },
    "model.layers.16.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 563296256,
      "file_name": ".cache\\MatMulNBits_2_0_427.const",
      "file_size": 18432
    },
    "model.layers.16.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 563314688,
      "file_name": ".cache\\MatMulNBits_2_0_428.const",
      "file_size": 3072
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 563317760,
      "file_name": ".cache\\MatMulNBits_2_0_429.const",
      "file_size": 6881280
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 570199040,
      "file_name": ".cache\\MatMulNBits_2_0_430.const",
      "file_size": 430080
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 570629120,
      "file_name": ".cache\\MatMulNBits_2_0_431.const",
      "file_size": 53760
    },
    "model.layers.16.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 570682880,
      "file_name": ".cache\\MatMulNBits_2_0_432.const",
      "file_size": 35840
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 570718720,
      "file_name": ".cache\\MatMulNBits_2_0_433.const",
      "file_size": 6881280
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 577600000,
      "file_name": ".cache\\MatMulNBits_2_0_434.const",
      "file_size": 430080
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 578030080,
      "file_name": ".cache\\MatMulNBits_2_0_435.const",
      "file_size": 53760
    },
    "model.layers.16.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 578083840,
      "file_name": ".cache\\MatMulNBits_2_0_436.const",
      "file_size": 35840
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 578119680,
      "file_name": ".cache\\MatMulNBits_2_0_437.const",
      "file_size": 13762560
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 591882240,
      "file_name": ".cache\\MatMulNBits_2_0_438.const",
      "file_size": 6144
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 591888384,
      "file_name": ".cache\\MatMulNBits_2_0_439.const",
      "file_size": 430080
    },
    "model.layers.16.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 592318464,
      "file_name": ".cache\\MatMulNBits_2_0_440.const",
      "file_size": 107520
    },
    "model.layers.17.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 592425984,
      "file_name": ".cache\\MatMulNBits_2_0_441.const",
      "file_size": 3072
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 592429056,
      "file_name": ".cache\\MatMulNBits_2_0_442.const",
      "file_size": 2752512
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 595181568,
      "file_name": ".cache\\MatMulNBits_2_0_443.const",
      "file_size": 7168
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 595188736,
      "file_name": ".cache\\MatMulNBits_2_0_444.const",
      "file_size": 86016
    },
    "model.layers.17.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 595274752,
      "file_name": ".cache\\MatMulNBits_2_0_445.const",
      "file_size": 21504
    },
    "model.layers.17.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 595296256,
      "file_name": ".cache\\MatMulNBits_2_0_446.const",
      "file_size": 393216
    },
    "model.layers.17.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 595689472,
      "file_name": ".cache\\MatMulNBits_2_0_447.const",
      "file_size": 1024
    },
    "model.layers.17.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 595690496,
      "file_name": ".cache\\MatMulNBits_2_0_448.const",
      "file_size": 12288
    },
    "model.layers.17.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 595702784,
      "file_name": ".cache\\MatMulNBits_2_0_449.const",
      "file_size": 3072
    },
    "model.layers.17.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 595705856,
      "file_name": ".cache\\MatMulNBits_2_0_450.const",
      "file_size": 2359296
    },
    "model.layers.17.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 598065152,
      "file_name": ".cache\\MatMulNBits_2_0_451.const",
      "file_size": 6144
    },
    "model.layers.17.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 598071296,
      "file_name": ".cache\\MatMulNBits_2_0_452.const",
      "file_size": 73728
    },
    "model.layers.17.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 598145024,
      "file_name": ".cache\\MatMulNBits_2_0_453.const",
      "file_size": 18432
    },
    "model.layers.17.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 598163456,
      "file_name": ".cache\\MatMulNBits_2_0_454.const",
      "file_size": 3072
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 598166528,
      "file_name": ".cache\\MatMulNBits_2_0_455.const",
      "file_size": 6881280
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 605047808,
      "file_name": ".cache\\MatMulNBits_2_0_456.const",
      "file_size": 430080
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 605477888,
      "file_name": ".cache\\MatMulNBits_2_0_457.const",
      "file_size": 53760
    },
    "model.layers.17.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 605531648,
      "file_name": ".cache\\MatMulNBits_2_0_458.const",
      "file_size": 35840
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 605567488,
      "file_name": ".cache\\MatMulNBits_2_0_459.const",
      "file_size": 6881280
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 612448768,
      "file_name": ".cache\\MatMulNBits_2_0_460.const",
      "file_size": 430080
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 612878848,
      "file_name": ".cache\\MatMulNBits_2_0_461.const",
      "file_size": 53760
    },
    "model.layers.17.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 612932608,
      "file_name": ".cache\\MatMulNBits_2_0_462.const",
      "file_size": 35840
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 612968448,
      "file_name": ".cache\\MatMulNBits_2_0_463.const",
      "file_size": 13762560
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 626731008,
      "file_name": ".cache\\MatMulNBits_2_0_464.const",
      "file_size": 6144
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 626737152,
      "file_name": ".cache\\MatMulNBits_2_0_465.const",
      "file_size": 430080
    },
    "model.layers.17.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 627167232,
      "file_name": ".cache\\MatMulNBits_2_0_466.const",
      "file_size": 107520
    },
    "model.layers.18.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 627274752,
      "file_name": ".cache\\MatMulNBits_2_0_467.const",
      "file_size": 3072
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 627277824,
      "file_name": ".cache\\MatMulNBits_2_0_468.const",
      "file_size": 2752512
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 630030336,
      "file_name": ".cache\\MatMulNBits_2_0_469.const",
      "file_size": 7168
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 630037504,
      "file_name": ".cache\\MatMulNBits_2_0_470.const",
      "file_size": 86016
    },
    "model.layers.18.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 630123520,
      "file_name": ".cache\\MatMulNBits_2_0_471.const",
      "file_size": 21504
    },
    "model.layers.18.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 630145024,
      "file_name": ".cache\\MatMulNBits_2_0_472.const",
      "file_size": 393216
    },
    "model.layers.18.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 630538240,
      "file_name": ".cache\\MatMulNBits_2_0_473.const",
      "file_size": 1024
    },
    "model.layers.18.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 630539264,
      "file_name": ".cache\\MatMulNBits_2_0_474.const",
      "file_size": 12288
    },
    "model.layers.18.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 630551552,
      "file_name": ".cache\\MatMulNBits_2_0_475.const",
      "file_size": 3072
    },
    "model.layers.18.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 630554624,
      "file_name": ".cache\\MatMulNBits_2_0_476.const",
      "file_size": 2359296
    },
    "model.layers.18.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 632913920,
      "file_name": ".cache\\MatMulNBits_2_0_477.const",
      "file_size": 6144
    },
    "model.layers.18.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 632920064,
      "file_name": ".cache\\MatMulNBits_2_0_478.const",
      "file_size": 73728
    },
    "model.layers.18.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 632993792,
      "file_name": ".cache\\MatMulNBits_2_0_479.const",
      "file_size": 18432
    },
    "model.layers.18.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 633012224,
      "file_name": ".cache\\MatMulNBits_2_0_480.const",
      "file_size": 3072
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 633015296,
      "file_name": ".cache\\MatMulNBits_2_0_481.const",
      "file_size": 6881280
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 639896576,
      "file_name": ".cache\\MatMulNBits_2_0_482.const",
      "file_size": 430080
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 640326656,
      "file_name": ".cache\\MatMulNBits_2_0_483.const",
      "file_size": 53760
    },
    "model.layers.18.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 640380416,
      "file_name": ".cache\\MatMulNBits_2_0_484.const",
      "file_size": 35840
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 640416256,
      "file_name": ".cache\\MatMulNBits_2_0_485.const",
      "file_size": 6881280
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 647297536,
      "file_name": ".cache\\MatMulNBits_2_0_486.const",
      "file_size": 430080
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 647727616,
      "file_name": ".cache\\MatMulNBits_2_0_487.const",
      "file_size": 53760
    },
    "model.layers.18.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 647781376,
      "file_name": ".cache\\MatMulNBits_2_0_488.const",
      "file_size": 35840
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 647817216,
      "file_name": ".cache\\MatMulNBits_2_0_489.const",
      "file_size": 13762560
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 661579776,
      "file_name": ".cache\\MatMulNBits_2_0_490.const",
      "file_size": 6144
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 661585920,
      "file_name": ".cache\\MatMulNBits_2_0_491.const",
      "file_size": 430080
    },
    "model.layers.18.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 662016000,
      "file_name": ".cache\\MatMulNBits_2_0_492.const",
      "file_size": 107520
    },
    "model.layers.19.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 662123520,
      "file_name": ".cache\\MatMulNBits_2_0_493.const",
      "file_size": 3072
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 662126592,
      "file_name": ".cache\\MatMulNBits_2_0_494.const",
      "file_size": 2752512
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 664879104,
      "file_name": ".cache\\MatMulNBits_2_0_495.const",
      "file_size": 7168
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 664886272,
      "file_name": ".cache\\MatMulNBits_2_0_496.const",
      "file_size": 86016
    },
    "model.layers.19.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 664972288,
      "file_name": ".cache\\MatMulNBits_2_0_497.const",
      "file_size": 21504
    },
    "model.layers.19.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 664993792,
      "file_name": ".cache\\MatMulNBits_2_0_498.const",
      "file_size": 393216
    },
    "model.layers.19.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 665387008,
      "file_name": ".cache\\MatMulNBits_2_0_499.const",
      "file_size": 1024
    },
    "model.layers.19.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 665388032,
      "file_name": ".cache\\MatMulNBits_2_0_500.const",
      "file_size": 12288
    },
    "model.layers.19.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 665400320,
      "file_name": ".cache\\MatMulNBits_2_0_501.const",
      "file_size": 3072
    },
    "model.layers.19.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 665403392,
      "file_name": ".cache\\MatMulNBits_2_0_502.const",
      "file_size": 2359296
    },
    "model.layers.19.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 667762688,
      "file_name": ".cache\\MatMulNBits_2_0_503.const",
      "file_size": 6144
    },
    "model.layers.19.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 667768832,
      "file_name": ".cache\\MatMulNBits_2_0_504.const",
      "file_size": 73728
    },
    "model.layers.19.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 667842560,
      "file_name": ".cache\\MatMulNBits_2_0_505.const",
      "file_size": 18432
    },
    "model.layers.19.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 667860992,
      "file_name": ".cache\\MatMulNBits_2_0_506.const",
      "file_size": 3072
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 667864064,
      "file_name": ".cache\\MatMulNBits_2_0_507.const",
      "file_size": 6881280
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 674745344,
      "file_name": ".cache\\MatMulNBits_2_0_508.const",
      "file_size": 430080
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 675175424,
      "file_name": ".cache\\MatMulNBits_2_0_509.const",
      "file_size": 53760
    },
    "model.layers.19.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 675229184,
      "file_name": ".cache\\MatMulNBits_2_0_510.const",
      "file_size": 35840
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 675265024,
      "file_name": ".cache\\MatMulNBits_2_0_511.const",
      "file_size": 6881280
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 682146304,
      "file_name": ".cache\\MatMulNBits_2_0_512.const",
      "file_size": 430080
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 682576384,
      "file_name": ".cache\\MatMulNBits_2_0_513.const",
      "file_size": 53760
    },
    "model.layers.19.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 682630144,
      "file_name": ".cache\\MatMulNBits_2_0_514.const",
      "file_size": 35840
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 682665984,
      "file_name": ".cache\\MatMulNBits_2_0_515.const",
      "file_size": 13762560
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 696428544,
      "file_name": ".cache\\MatMulNBits_2_0_516.const",
      "file_size": 6144
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 696434688,
      "file_name": ".cache\\MatMulNBits_2_0_517.const",
      "file_size": 430080
    },
    "model.layers.19.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 696864768,
      "file_name": ".cache\\MatMulNBits_2_0_518.const",
      "file_size": 107520
    },
    "model.layers.20.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 696972288,
      "file_name": ".cache\\MatMulNBits_2_0_519.const",
      "file_size": 3072
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 696975360,
      "file_name": ".cache\\MatMulNBits_2_0_520.const",
      "file_size": 2752512
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 699727872,
      "file_name": ".cache\\MatMulNBits_2_0_521.const",
      "file_size": 7168
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 699735040,
      "file_name": ".cache\\MatMulNBits_2_0_522.const",
      "file_size": 86016
    },
    "model.layers.20.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 699821056,
      "file_name": ".cache\\MatMulNBits_2_0_523.const",
      "file_size": 21504
    },
    "model.layers.20.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 699842560,
      "file_name": ".cache\\MatMulNBits_2_0_524.const",
      "file_size": 393216
    },
    "model.layers.20.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 700235776,
      "file_name": ".cache\\MatMulNBits_2_0_525.const",
      "file_size": 1024
    },
    "model.layers.20.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 700236800,
      "file_name": ".cache\\MatMulNBits_2_0_526.const",
      "file_size": 12288
    },
    "model.layers.20.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 700249088,
      "file_name": ".cache\\MatMulNBits_2_0_527.const",
      "file_size": 3072
    },
    "model.layers.20.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 700252160,
      "file_name": ".cache\\MatMulNBits_2_0_528.const",
      "file_size": 2359296
    },
    "model.layers.20.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 702611456,
      "file_name": ".cache\\MatMulNBits_2_0_529.const",
      "file_size": 6144
    },
    "model.layers.20.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 702617600,
      "file_name": ".cache\\MatMulNBits_2_0_530.const",
      "file_size": 73728
    },
    "model.layers.20.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 702691328,
      "file_name": ".cache\\MatMulNBits_2_0_531.const",
      "file_size": 18432
    },
    "model.layers.20.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 702709760,
      "file_name": ".cache\\MatMulNBits_2_0_532.const",
      "file_size": 3072
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 702712832,
      "file_name": ".cache\\MatMulNBits_2_0_533.const",
      "file_size": 6881280
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 709594112,
      "file_name": ".cache\\MatMulNBits_2_0_534.const",
      "file_size": 430080
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 710024192,
      "file_name": ".cache\\MatMulNBits_2_0_535.const",
      "file_size": 53760
    },
    "model.layers.20.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 710077952,
      "file_name": ".cache\\MatMulNBits_2_0_536.const",
      "file_size": 35840
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 710113792,
      "file_name": ".cache\\MatMulNBits_2_0_537.const",
      "file_size": 6881280
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 716995072,
      "file_name": ".cache\\MatMulNBits_2_0_538.const",
      "file_size": 430080
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 717425152,
      "file_name": ".cache\\MatMulNBits_2_0_539.const",
      "file_size": 53760
    },
    "model.layers.20.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 717478912,
      "file_name": ".cache\\MatMulNBits_2_0_540.const",
      "file_size": 35840
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 717514752,
      "file_name": ".cache\\MatMulNBits_2_0_541.const",
      "file_size": 13762560
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 731277312,
      "file_name": ".cache\\MatMulNBits_2_0_542.const",
      "file_size": 6144
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 731283456,
      "file_name": ".cache\\MatMulNBits_2_0_543.const",
      "file_size": 430080
    },
    "model.layers.20.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 731713536,
      "file_name": ".cache\\MatMulNBits_2_0_544.const",
      "file_size": 107520
    },
    "model.layers.21.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 731821056,
      "file_name": ".cache\\MatMulNBits_2_0_545.const",
      "file_size": 3072
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 731824128,
      "file_name": ".cache\\MatMulNBits_2_0_546.const",
      "file_size": 2752512
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 734576640,
      "file_name": ".cache\\MatMulNBits_2_0_547.const",
      "file_size": 7168
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 734583808,
      "file_name": ".cache\\MatMulNBits_2_0_548.const",
      "file_size": 86016
    },
    "model.layers.21.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 734669824,
      "file_name": ".cache\\MatMulNBits_2_0_549.const",
      "file_size": 21504
    },
    "model.layers.21.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 734691328,
      "file_name": ".cache\\MatMulNBits_2_0_550.const",
      "file_size": 393216
    },
    "model.layers.21.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 735084544,
      "file_name": ".cache\\MatMulNBits_2_0_551.const",
      "file_size": 1024
    },
    "model.layers.21.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 735085568,
      "file_name": ".cache\\MatMulNBits_2_0_552.const",
      "file_size": 12288
    },
    "model.layers.21.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 735097856,
      "file_name": ".cache\\MatMulNBits_2_0_553.const",
      "file_size": 3072
    },
    "model.layers.21.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 735100928,
      "file_name": ".cache\\MatMulNBits_2_0_554.const",
      "file_size": 2359296
    },
    "model.layers.21.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 737460224,
      "file_name": ".cache\\MatMulNBits_2_0_555.const",
      "file_size": 6144
    },
    "model.layers.21.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 737466368,
      "file_name": ".cache\\MatMulNBits_2_0_556.const",
      "file_size": 73728
    },
    "model.layers.21.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 737540096,
      "file_name": ".cache\\MatMulNBits_2_0_557.const",
      "file_size": 18432
    },
    "model.layers.21.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 737558528,
      "file_name": ".cache\\MatMulNBits_2_0_558.const",
      "file_size": 3072
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 737561600,
      "file_name": ".cache\\MatMulNBits_2_0_559.const",
      "file_size": 6881280
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 744442880,
      "file_name": ".cache\\MatMulNBits_2_0_560.const",
      "file_size": 430080
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 744872960,
      "file_name": ".cache\\MatMulNBits_2_0_561.const",
      "file_size": 53760
    },
    "model.layers.21.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 744926720,
      "file_name": ".cache\\MatMulNBits_2_0_562.const",
      "file_size": 35840
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 744962560,
      "file_name": ".cache\\MatMulNBits_2_0_563.const",
      "file_size": 6881280
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 751843840,
      "file_name": ".cache\\MatMulNBits_2_0_564.const",
      "file_size": 430080
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 752273920,
      "file_name": ".cache\\MatMulNBits_2_0_565.const",
      "file_size": 53760
    },
    "model.layers.21.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 752327680,
      "file_name": ".cache\\MatMulNBits_2_0_566.const",
      "file_size": 35840
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 752363520,
      "file_name": ".cache\\MatMulNBits_2_0_567.const",
      "file_size": 13762560
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 766126080,
      "file_name": ".cache\\MatMulNBits_2_0_568.const",
      "file_size": 6144
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 766132224,
      "file_name": ".cache\\MatMulNBits_2_0_569.const",
      "file_size": 430080
    },
    "model.layers.21.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 766562304,
      "file_name": ".cache\\MatMulNBits_2_0_570.const",
      "file_size": 107520
    },
    "model.layers.22.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 766669824,
      "file_name": ".cache\\MatMulNBits_2_0_571.const",
      "file_size": 3072
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 766672896,
      "file_name": ".cache\\MatMulNBits_2_0_572.const",
      "file_size": 2752512
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 769425408,
      "file_name": ".cache\\MatMulNBits_2_0_573.const",
      "file_size": 7168
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 769432576,
      "file_name": ".cache\\MatMulNBits_2_0_574.const",
      "file_size": 86016
    },
    "model.layers.22.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 769518592,
      "file_name": ".cache\\MatMulNBits_2_0_575.const",
      "file_size": 21504
    },
    "model.layers.22.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 769540096,
      "file_name": ".cache\\MatMulNBits_2_0_576.const",
      "file_size": 393216
    },
    "model.layers.22.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 769933312,
      "file_name": ".cache\\MatMulNBits_2_0_577.const",
      "file_size": 1024
    },
    "model.layers.22.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 769934336,
      "file_name": ".cache\\MatMulNBits_2_0_578.const",
      "file_size": 12288
    },
    "model.layers.22.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 769946624,
      "file_name": ".cache\\MatMulNBits_2_0_579.const",
      "file_size": 3072
    },
    "model.layers.22.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 769949696,
      "file_name": ".cache\\MatMulNBits_2_0_580.const",
      "file_size": 2359296
    },
    "model.layers.22.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 772308992,
      "file_name": ".cache\\MatMulNBits_2_0_581.const",
      "file_size": 6144
    },
    "model.layers.22.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 772315136,
      "file_name": ".cache\\MatMulNBits_2_0_582.const",
      "file_size": 73728
    },
    "model.layers.22.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 772388864,
      "file_name": ".cache\\MatMulNBits_2_0_583.const",
      "file_size": 18432
    },
    "model.layers.22.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 772407296,
      "file_name": ".cache\\MatMulNBits_2_0_584.const",
      "file_size": 3072
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 772410368,
      "file_name": ".cache\\MatMulNBits_2_0_585.const",
      "file_size": 6881280
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 779291648,
      "file_name": ".cache\\MatMulNBits_2_0_586.const",
      "file_size": 430080
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 779721728,
      "file_name": ".cache\\MatMulNBits_2_0_587.const",
      "file_size": 53760
    },
    "model.layers.22.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 779775488,
      "file_name": ".cache\\MatMulNBits_2_0_588.const",
      "file_size": 35840
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 779811328,
      "file_name": ".cache\\MatMulNBits_2_0_589.const",
      "file_size": 6881280
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 786692608,
      "file_name": ".cache\\MatMulNBits_2_0_590.const",
      "file_size": 430080
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 787122688,
      "file_name": ".cache\\MatMulNBits_2_0_591.const",
      "file_size": 53760
    },
    "model.layers.22.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 787176448,
      "file_name": ".cache\\MatMulNBits_2_0_592.const",
      "file_size": 35840
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 787212288,
      "file_name": ".cache\\MatMulNBits_2_0_593.const",
      "file_size": 13762560
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 800974848,
      "file_name": ".cache\\MatMulNBits_2_0_594.const",
      "file_size": 6144
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 800980992,
      "file_name": ".cache\\MatMulNBits_2_0_595.const",
      "file_size": 430080
    },
    "model.layers.22.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 801411072,
      "file_name": ".cache\\MatMulNBits_2_0_596.const",
      "file_size": 107520
    },
    "model.layers.23.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 801518592,
      "file_name": ".cache\\MatMulNBits_2_0_597.const",
      "file_size": 3072
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 801521664,
      "file_name": ".cache\\MatMulNBits_2_0_598.const",
      "file_size": 2752512
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 804274176,
      "file_name": ".cache\\MatMulNBits_2_0_599.const",
      "file_size": 7168
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 804281344,
      "file_name": ".cache\\MatMulNBits_2_0_600.const",
      "file_size": 86016
    },
    "model.layers.23.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 804367360,
      "file_name": ".cache\\MatMulNBits_2_0_601.const",
      "file_size": 21504
    },
    "model.layers.23.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 804388864,
      "file_name": ".cache\\MatMulNBits_2_0_602.const",
      "file_size": 393216
    },
    "model.layers.23.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 804782080,
      "file_name": ".cache\\MatMulNBits_2_0_603.const",
      "file_size": 1024
    },
    "model.layers.23.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 804783104,
      "file_name": ".cache\\MatMulNBits_2_0_604.const",
      "file_size": 12288
    },
    "model.layers.23.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 804795392,
      "file_name": ".cache\\MatMulNBits_2_0_605.const",
      "file_size": 3072
    },
    "model.layers.23.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 804798464,
      "file_name": ".cache\\MatMulNBits_2_0_606.const",
      "file_size": 2359296
    },
    "model.layers.23.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 807157760,
      "file_name": ".cache\\MatMulNBits_2_0_607.const",
      "file_size": 6144
    },
    "model.layers.23.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 807163904,
      "file_name": ".cache\\MatMulNBits_2_0_608.const",
      "file_size": 73728
    },
    "model.layers.23.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 807237632,
      "file_name": ".cache\\MatMulNBits_2_0_609.const",
      "file_size": 18432
    },
    "model.layers.23.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 807256064,
      "file_name": ".cache\\MatMulNBits_2_0_610.const",
      "file_size": 3072
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 807259136,
      "file_name": ".cache\\MatMulNBits_2_0_611.const",
      "file_size": 6881280
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 814140416,
      "file_name": ".cache\\MatMulNBits_2_0_612.const",
      "file_size": 430080
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 814570496,
      "file_name": ".cache\\MatMulNBits_2_0_613.const",
      "file_size": 53760
    },
    "model.layers.23.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 814624256,
      "file_name": ".cache\\MatMulNBits_2_0_614.const",
      "file_size": 35840
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 814660096,
      "file_name": ".cache\\MatMulNBits_2_0_615.const",
      "file_size": 6881280
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 821541376,
      "file_name": ".cache\\MatMulNBits_2_0_616.const",
      "file_size": 430080
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 821971456,
      "file_name": ".cache\\MatMulNBits_2_0_617.const",
      "file_size": 53760
    },
    "model.layers.23.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 822025216,
      "file_name": ".cache\\MatMulNBits_2_0_618.const",
      "file_size": 35840
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 822061056,
      "file_name": ".cache\\MatMulNBits_2_0_619.const",
      "file_size": 13762560
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 835823616,
      "file_name": ".cache\\MatMulNBits_2_0_620.const",
      "file_size": 6144
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 835829760,
      "file_name": ".cache\\MatMulNBits_2_0_621.const",
      "file_size": 430080
    },
    "model.layers.23.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 836259840,
      "file_name": ".cache\\MatMulNBits_2_0_622.const",
      "file_size": 107520
    },
    "model.layers.24.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 836367360,
      "file_name": ".cache\\MatMulNBits_2_0_623.const",
      "file_size": 3072
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 836370432,
      "file_name": ".cache\\MatMulNBits_2_0_624.const",
      "file_size": 2752512
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 839122944,
      "file_name": ".cache\\MatMulNBits_2_0_625.const",
      "file_size": 7168
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 839130112,
      "file_name": ".cache\\MatMulNBits_2_0_626.const",
      "file_size": 86016
    },
    "model.layers.24.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 839216128,
      "file_name": ".cache\\MatMulNBits_2_0_627.const",
      "file_size": 21504
    },
    "model.layers.24.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 839237632,
      "file_name": ".cache\\MatMulNBits_2_0_628.const",
      "file_size": 393216
    },
    "model.layers.24.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 839630848,
      "file_name": ".cache\\MatMulNBits_2_0_629.const",
      "file_size": 1024
    },
    "model.layers.24.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 839631872,
      "file_name": ".cache\\MatMulNBits_2_0_630.const",
      "file_size": 12288
    },
    "model.layers.24.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 839644160,
      "file_name": ".cache\\MatMulNBits_2_0_631.const",
      "file_size": 3072
    },
    "model.layers.24.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 839647232,
      "file_name": ".cache\\MatMulNBits_2_0_632.const",
      "file_size": 2359296
    },
    "model.layers.24.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 842006528,
      "file_name": ".cache\\MatMulNBits_2_0_633.const",
      "file_size": 6144
    },
    "model.layers.24.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 842012672,
      "file_name": ".cache\\MatMulNBits_2_0_634.const",
      "file_size": 73728
    },
    "model.layers.24.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 842086400,
      "file_name": ".cache\\MatMulNBits_2_0_635.const",
      "file_size": 18432
    },
    "model.layers.24.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 842104832,
      "file_name": ".cache\\MatMulNBits_2_0_636.const",
      "file_size": 3072
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 842107904,
      "file_name": ".cache\\MatMulNBits_2_0_637.const",
      "file_size": 6881280
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 848989184,
      "file_name": ".cache\\MatMulNBits_2_0_638.const",
      "file_size": 430080
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 849419264,
      "file_name": ".cache\\MatMulNBits_2_0_639.const",
      "file_size": 53760
    },
    "model.layers.24.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 849473024,
      "file_name": ".cache\\MatMulNBits_2_0_640.const",
      "file_size": 35840
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 849508864,
      "file_name": ".cache\\MatMulNBits_2_0_641.const",
      "file_size": 6881280
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 856390144,
      "file_name": ".cache\\MatMulNBits_2_0_642.const",
      "file_size": 430080
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 856820224,
      "file_name": ".cache\\MatMulNBits_2_0_643.const",
      "file_size": 53760
    },
    "model.layers.24.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 856873984,
      "file_name": ".cache\\MatMulNBits_2_0_644.const",
      "file_size": 35840
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 856909824,
      "file_name": ".cache\\MatMulNBits_2_0_645.const",
      "file_size": 13762560
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 870672384,
      "file_name": ".cache\\MatMulNBits_2_0_646.const",
      "file_size": 6144
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 870678528,
      "file_name": ".cache\\MatMulNBits_2_0_647.const",
      "file_size": 430080
    },
    "model.layers.24.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 871108608,
      "file_name": ".cache\\MatMulNBits_2_0_648.const",
      "file_size": 107520
    },
    "model.layers.25.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 871216128,
      "file_name": ".cache\\MatMulNBits_2_0_649.const",
      "file_size": 3072
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 871219200,
      "file_name": ".cache\\MatMulNBits_2_0_650.const",
      "file_size": 2752512
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 873971712,
      "file_name": ".cache\\MatMulNBits_2_0_651.const",
      "file_size": 7168
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 873978880,
      "file_name": ".cache\\MatMulNBits_2_0_652.const",
      "file_size": 86016
    },
    "model.layers.25.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 874064896,
      "file_name": ".cache\\MatMulNBits_2_0_653.const",
      "file_size": 21504
    },
    "model.layers.25.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 874086400,
      "file_name": ".cache\\MatMulNBits_2_0_654.const",
      "file_size": 393216
    },
    "model.layers.25.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 874479616,
      "file_name": ".cache\\MatMulNBits_2_0_655.const",
      "file_size": 1024
    },
    "model.layers.25.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 874480640,
      "file_name": ".cache\\MatMulNBits_2_0_656.const",
      "file_size": 12288
    },
    "model.layers.25.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 874492928,
      "file_name": ".cache\\MatMulNBits_2_0_657.const",
      "file_size": 3072
    },
    "model.layers.25.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 874496000,
      "file_name": ".cache\\MatMulNBits_2_0_658.const",
      "file_size": 2359296
    },
    "model.layers.25.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 876855296,
      "file_name": ".cache\\MatMulNBits_2_0_659.const",
      "file_size": 6144
    },
    "model.layers.25.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 876861440,
      "file_name": ".cache\\MatMulNBits_2_0_660.const",
      "file_size": 73728
    },
    "model.layers.25.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 876935168,
      "file_name": ".cache\\MatMulNBits_2_0_661.const",
      "file_size": 18432
    },
    "model.layers.25.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 876953600,
      "file_name": ".cache\\MatMulNBits_2_0_662.const",
      "file_size": 3072
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 876956672,
      "file_name": ".cache\\MatMulNBits_2_0_663.const",
      "file_size": 6881280
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 883837952,
      "file_name": ".cache\\MatMulNBits_2_0_664.const",
      "file_size": 430080
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 884268032,
      "file_name": ".cache\\MatMulNBits_2_0_665.const",
      "file_size": 53760
    },
    "model.layers.25.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 884321792,
      "file_name": ".cache\\MatMulNBits_2_0_666.const",
      "file_size": 35840
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 884357632,
      "file_name": ".cache\\MatMulNBits_2_0_667.const",
      "file_size": 6881280
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 891238912,
      "file_name": ".cache\\MatMulNBits_2_0_668.const",
      "file_size": 430080
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 891668992,
      "file_name": ".cache\\MatMulNBits_2_0_669.const",
      "file_size": 53760
    },
    "model.layers.25.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 891722752,
      "file_name": ".cache\\MatMulNBits_2_0_670.const",
      "file_size": 35840
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 891758592,
      "file_name": ".cache\\MatMulNBits_2_0_671.const",
      "file_size": 13762560
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 905521152,
      "file_name": ".cache\\MatMulNBits_2_0_672.const",
      "file_size": 6144
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 905527296,
      "file_name": ".cache\\MatMulNBits_2_0_673.const",
      "file_size": 430080
    },
    "model.layers.25.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 905957376,
      "file_name": ".cache\\MatMulNBits_2_0_674.const",
      "file_size": 107520
    },
    "model.layers.26.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 906064896,
      "file_name": ".cache\\MatMulNBits_2_0_675.const",
      "file_size": 3072
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 906067968,
      "file_name": ".cache\\MatMulNBits_2_0_676.const",
      "file_size": 2752512
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 908820480,
      "file_name": ".cache\\MatMulNBits_2_0_677.const",
      "file_size": 7168
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 908827648,
      "file_name": ".cache\\MatMulNBits_2_0_678.const",
      "file_size": 86016
    },
    "model.layers.26.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 908913664,
      "file_name": ".cache\\MatMulNBits_2_0_679.const",
      "file_size": 21504
    },
    "model.layers.26.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 908935168,
      "file_name": ".cache\\MatMulNBits_2_0_680.const",
      "file_size": 393216
    },
    "model.layers.26.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 909328384,
      "file_name": ".cache\\MatMulNBits_2_0_681.const",
      "file_size": 1024
    },
    "model.layers.26.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 909329408,
      "file_name": ".cache\\MatMulNBits_2_0_682.const",
      "file_size": 12288
    },
    "model.layers.26.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 909341696,
      "file_name": ".cache\\MatMulNBits_2_0_683.const",
      "file_size": 3072
    },
    "model.layers.26.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 909344768,
      "file_name": ".cache\\MatMulNBits_2_0_684.const",
      "file_size": 2359296
    },
    "model.layers.26.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 911704064,
      "file_name": ".cache\\MatMulNBits_2_0_685.const",
      "file_size": 6144
    },
    "model.layers.26.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 911710208,
      "file_name": ".cache\\MatMulNBits_2_0_686.const",
      "file_size": 73728
    },
    "model.layers.26.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 911783936,
      "file_name": ".cache\\MatMulNBits_2_0_687.const",
      "file_size": 18432
    },
    "model.layers.26.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 911802368,
      "file_name": ".cache\\MatMulNBits_2_0_688.const",
      "file_size": 3072
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 911805440,
      "file_name": ".cache\\MatMulNBits_2_0_689.const",
      "file_size": 6881280
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 918686720,
      "file_name": ".cache\\MatMulNBits_2_0_690.const",
      "file_size": 430080
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 919116800,
      "file_name": ".cache\\MatMulNBits_2_0_691.const",
      "file_size": 53760
    },
    "model.layers.26.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 919170560,
      "file_name": ".cache\\MatMulNBits_2_0_692.const",
      "file_size": 35840
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 919206400,
      "file_name": ".cache\\MatMulNBits_2_0_693.const",
      "file_size": 6881280
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 926087680,
      "file_name": ".cache\\MatMulNBits_2_0_694.const",
      "file_size": 430080
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 926517760,
      "file_name": ".cache\\MatMulNBits_2_0_695.const",
      "file_size": 53760
    },
    "model.layers.26.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 926571520,
      "file_name": ".cache\\MatMulNBits_2_0_696.const",
      "file_size": 35840
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 926607360,
      "file_name": ".cache\\MatMulNBits_2_0_697.const",
      "file_size": 13762560
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 940369920,
      "file_name": ".cache\\MatMulNBits_2_0_698.const",
      "file_size": 6144
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 940376064,
      "file_name": ".cache\\MatMulNBits_2_0_699.const",
      "file_size": 430080
    },
    "model.layers.26.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 940806144,
      "file_name": ".cache\\MatMulNBits_2_0_700.const",
      "file_size": 107520
    },
    "model.layers.27.input_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 940913664,
      "file_name": ".cache\\MatMulNBits_2_0_701.const",
      "file_size": 3072
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1792
      ],
      "size_in_bytes": 2752512,
      "op_tensor_size": 2752512,
      "offset": 940916736,
      "file_name": ".cache\\MatMulNBits_2_0_702.const",
      "file_size": 2752512
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1792
      ],
      "size_in_bytes": 7168,
      "op_tensor_size": 7168,
      "offset": 943669248,
      "file_name": ".cache\\MatMulNBits_2_0_703.const",
      "file_size": 7168
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        21504
      ],
      "size_in_bytes": 86016,
      "op_tensor_size": 86016,
      "offset": 943676416,
      "file_name": ".cache\\MatMulNBits_2_0_704.const",
      "file_size": 86016
    },
    "model.layers.27.attn.qk_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        21504
      ],
      "size_in_bytes": 21504,
      "op_tensor_size": 21504,
      "offset": 943762432,
      "file_name": ".cache\\MatMulNBits_2_0_705.const",
      "file_size": 21504
    },
    "model.layers.27.attn.v_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        256
      ],
      "size_in_bytes": 393216,
      "op_tensor_size": 393216,
      "offset": 943783936,
      "file_name": ".cache\\MatMulNBits_2_0_706.const",
      "file_size": 393216
    },
    "model.layers.27.attn.v_proj.Add.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        256
      ],
      "size_in_bytes": 1024,
      "op_tensor_size": 1024,
      "offset": 944177152,
      "file_name": ".cache\\MatMulNBits_2_0_707.const",
      "file_size": 1024
    },
    "model.layers.27.attn.v_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        3072
      ],
      "size_in_bytes": 12288,
      "op_tensor_size": 12288,
      "offset": 944178176,
      "file_name": ".cache\\MatMulNBits_2_0_708.const",
      "file_size": 12288
    },
    "model.layers.27.attn.v_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        3072
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 944190464,
      "file_name": ".cache\\MatMulNBits_2_0_709.const",
      "file_size": 3072
    },
    "model.layers.27.attn.o_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        1536
      ],
      "size_in_bytes": 2359296,
      "op_tensor_size": 2359296,
      "offset": 944193536,
      "file_name": ".cache\\MatMulNBits_2_0_710.const",
      "file_size": 2359296
    },
    "model.layers.27.attn.o_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 946552832,
      "file_name": ".cache\\MatMulNBits_2_0_711.const",
      "file_size": 6144
    },
    "model.layers.27.attn.o_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        18432
      ],
      "size_in_bytes": 73728,
      "op_tensor_size": 73728,
      "offset": 946558976,
      "file_name": ".cache\\MatMulNBits_2_0_712.const",
      "file_size": 73728
    },
    "model.layers.27.attn.o_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        18432
      ],
      "size_in_bytes": 18432,
      "op_tensor_size": 18432,
      "offset": 946632704,
      "file_name": ".cache\\MatMulNBits_2_0_713.const",
      "file_size": 18432
    },
    "model.layers.27.post_attention_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 946651136,
      "file_name": ".cache\\MatMulNBits_2_0_714.const",
      "file_size": 3072
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 946654208,
      "file_name": ".cache\\MatMulNBits_2_0_715.const",
      "file_size": 6881280
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 953535488,
      "file_name": ".cache\\MatMulNBits_2_0_716.const",
      "file_size": 430080
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 953965568,
      "file_name": ".cache\\MatMulNBits_2_0_717.const",
      "file_size": 53760
    },
    "model.layers.27.mlp.gate_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 954019328,
      "file_name": ".cache\\MatMulNBits_2_0_718.const",
      "file_size": 35840
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.qweight": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        8960,
        12,
        64
      ],
      "size_in_bytes": 6881280,
      "op_tensor_size": 6881280,
      "offset": 954055168,
      "file_name": ".cache\\MatMulNBits_2_0_719.const",
      "file_size": 6881280
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.scales.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 960936448,
      "file_name": ".cache\\MatMulNBits_2_0_720.const",
      "file_size": 430080
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.qzeros": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "uint8",
      "shape": [
        53760
      ],
      "size_in_bytes": 53760,
      "op_tensor_size": 53760,
      "offset": 961366528,
      "file_name": ".cache\\MatMulNBits_2_0_721.const",
      "file_size": 53760
    },
    "model.layers.27.mlp.up_proj.MatMulNBits.bias.f": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        8960
      ],
      "size_in_bytes": 35840,
      "op_tensor_size": 35840,
      "offset": 961420288,
      "file_name": ".cache\\MatMulNBits_2_0_722.const",
      "file_size": 35840
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        8960,
        1536
      ],
      "size_in_bytes": 13762560,
      "op_tensor_size": 13762560,
      "offset": 961456128,
      "file_name": ".cache\\MatMulNBits_2_0_723.const",
      "file_size": 13762560
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1536
      ],
      "size_in_bytes": 6144,
      "op_tensor_size": 6144,
      "offset": 975218688,
      "file_name": ".cache\\MatMulNBits_2_0_724.const",
      "file_size": 6144
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        107520
      ],
      "size_in_bytes": 430080,
      "op_tensor_size": 430080,
      "offset": 975224832,
      "file_name": ".cache\\MatMulNBits_2_0_725.const",
      "file_size": 430080
    },
    "model.layers.27.mlp.down_proj.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        107520
      ],
      "size_in_bytes": 107520,
      "op_tensor_size": 107520,
      "offset": 975654912,
      "file_name": ".cache\\MatMulNBits_2_0_726.const",
      "file_size": 107520
    },
    "model.layers.28.final_norm_layernorm.weight.bf": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "bfloat16",
      "shape": [
        1536
      ],
      "size_in_bytes": 3072,
      "op_tensor_size": 3072,
      "offset": 975762432,
      "file_name": ".cache\\MatMulNBits_2_0_727.const",
      "file_size": 3072
    },
    "lm_head.MatMulNBits.qweight.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1536,
        151936
      ],
      "size_in_bytes": 233373696,
      "op_tensor_size": 233373696,
      "offset": 975765504,
      "file_name": ".cache\\MatMulNBits_2_0_728.const",
      "file_size": 233373696
    },
    "lm_head.MatMulNBits.bias.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        151936
      ],
      "size_in_bytes": 607744,
      "op_tensor_size": 607744,
      "offset": 1209139200,
      "file_name": ".cache\\MatMulNBits_2_0_729.const",
      "file_size": 607744
    },
    "lm_head.MatMulNBits.scales.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "float",
      "shape": [
        1823232
      ],
      "size_in_bytes": 7292928,
      "op_tensor_size": 7292928,
      "offset": 1209746944,
      "file_name": ".cache\\MatMulNBits_2_0_730.const",
      "file_size": 7292928
    },
    "lm_head.MatMulNBits.qzeros.preformat": {
      "packed_buffer_label": "const",
      "xrt_arg_id": 3,
      "dtype": "int8",
      "shape": [
        1823232
      ],
      "size_in_bytes": 1823232,
      "op_tensor_size": 1823232,
      "offset": 1217039872,
      "file_name": ".cache\\MatMulNBits_2_0_731.const",
      "file_size": 1823232
    },
    "past_key_values.0.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 0
    },
    "past_key_values.0.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 2097152
    },
    "present.0.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 0
    },
    "present.0.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 2097152
    },
    "past_key_values.1.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 4194304
    },
    "past_key_values.1.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 6291456
    },
    "present.1.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 4194304
    },
    "present.1.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 6291456
    },
    "past_key_values.2.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 8388608
    },
    "past_key_values.2.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 10485760
    },
    "present.2.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 8388608
    },
    "present.2.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 10485760
    },
    "past_key_values.3.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 12582912
    },
    "past_key_values.3.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 14680064
    },
    "present.3.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 12582912
    },
    "present.3.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 14680064
    },
    "past_key_values.4.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 16777216
    },
    "past_key_values.4.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 18874368
    },
    "present.4.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 16777216
    },
    "present.4.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 18874368
    },
    "past_key_values.5.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 20971520
    },
    "past_key_values.5.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 23068672
    },
    "present.5.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 20971520
    },
    "present.5.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 23068672
    },
    "past_key_values.6.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 25165824
    },
    "past_key_values.6.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 27262976
    },
    "present.6.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 25165824
    },
    "present.6.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 27262976
    },
    "past_key_values.7.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 29360128
    },
    "past_key_values.7.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 31457280
    },
    "present.7.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 29360128
    },
    "present.7.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 31457280
    },
    "past_key_values.8.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 33554432
    },
    "past_key_values.8.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 35651584
    },
    "present.8.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 33554432
    },
    "present.8.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 35651584
    },
    "past_key_values.9.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 37748736
    },
    "past_key_values.9.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 39845888
    },
    "present.9.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 37748736
    },
    "present.9.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 39845888
    },
    "past_key_values.10.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 41943040
    },
    "past_key_values.10.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 44040192
    },
    "present.10.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 41943040
    },
    "present.10.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 44040192
    },
    "past_key_values.11.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 46137344
    },
    "past_key_values.11.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 48234496
    },
    "present.11.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 46137344
    },
    "present.11.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 48234496
    },
    "past_key_values.12.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 50331648
    },
    "past_key_values.12.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 52428800
    },
    "present.12.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 50331648
    },
    "present.12.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 52428800
    },
    "past_key_values.13.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 54525952
    },
    "past_key_values.13.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 56623104
    },
    "present.13.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 54525952
    },
    "present.13.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 56623104
    },
    "past_key_values.14.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 58720256
    },
    "past_key_values.14.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 60817408
    },
    "present.14.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 58720256
    },
    "present.14.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 60817408
    },
    "past_key_values.15.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 62914560
    },
    "past_key_values.15.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 65011712
    },
    "present.15.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 62914560
    },
    "present.15.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 65011712
    },
    "past_key_values.16.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 67108864
    },
    "past_key_values.16.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 69206016
    },
    "present.16.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 67108864
    },
    "present.16.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 69206016
    },
    "past_key_values.17.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 71303168
    },
    "past_key_values.17.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 73400320
    },
    "present.17.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 71303168
    },
    "present.17.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 73400320
    },
    "past_key_values.18.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 75497472
    },
    "past_key_values.18.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 77594624
    },
    "present.18.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 75497472
    },
    "present.18.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 77594624
    },
    "past_key_values.19.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 79691776
    },
    "past_key_values.19.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 81788928
    },
    "present.19.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 79691776
    },
    "present.19.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 81788928
    },
    "past_key_values.20.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 83886080
    },
    "past_key_values.20.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 85983232
    },
    "present.20.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 83886080
    },
    "present.20.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 85983232
    },
    "past_key_values.21.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 88080384
    },
    "past_key_values.21.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 90177536
    },
    "present.21.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 88080384
    },
    "present.21.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 90177536
    },
    "past_key_values.22.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 92274688
    },
    "past_key_values.22.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 94371840
    },
    "present.22.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 92274688
    },
    "present.22.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 94371840
    },
    "past_key_values.23.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 96468992
    },
    "past_key_values.23.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 98566144
    },
    "present.23.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 96468992
    },
    "present.23.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 98566144
    },
    "past_key_values.24.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 100663296
    },
    "past_key_values.24.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 102760448
    },
    "present.24.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 100663296
    },
    "present.24.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 102760448
    },
    "past_key_values.25.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 104857600
    },
    "past_key_values.25.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 106954752
    },
    "present.25.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 104857600
    },
    "present.25.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 106954752
    },
    "past_key_values.26.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 109051904
    },
    "past_key_values.26.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 111149056
    },
    "present.26.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 109051904
    },
    "present.26.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 111149056
    },
    "past_key_values.27.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 113246208
    },
    "past_key_values.27.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 115343360
    },
    "present.27.key": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 113246208
    },
    "present.27.value": {
      "packed_buffer_label": "ext_buf_0",
      "xrt_arg_id": 5,
      "dtype": "bfloat16",
      "shape": [
        1,
        2,
        4096,
        128
      ],
      "size_in_bytes": 2097152,
      "op_tensor_size": 2097152,
      "offset": 115343360
    },
    "sin_cos_cache_token": {
      "packed_buffer_label": "ext_buf_1",
      "xrt_arg_id": 6,
      "dtype": "bfloat16",
      "shape": [
        131072,
        128
      ],
      "size_in_bytes": 33554432,
      "op_tensor_size": 33554432,
      "offset": 0
    }
  },
  "aux_info": {
    "is_llm": true
  }
}