Qwen3-4B-q0f16-MLC / ndarray-cache.json
riczhou's picture
Upload folder using huggingface_hub
19fe29e verified
{
"metadata": {
"ParamSize": 290,
"ParamBytes": 8044936192.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 777912320,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
151936,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 777912320,
"byteOffset": 0
}
],
"md5sum": "16268f056ee3f5fe717f2473597d0422"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "2afe7dc3cf5821e9463c8158c3d10a25"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "c2b2ac28327e5d7e097450b4656f92b2"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "52c25ce480d8709e389d9744450fd689"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "df89d781717c27414bd1487eb1e8397d"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "60f8f1bd53d7ebbecd9124ef4cc0fd2d"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "57e0b77dc0db309e1f26de1c44a6233e"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "6041eb4ec5416162ccbb46851c213ab0"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "a383f9020109ae6e27c2bc7e57f38049"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "5b716ccd217fa9d891fa98775e4ab3e8"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "2a8c4a6aa90e5d9cf52d5f1807fd3eef"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "edde730e3628495ecf3c5d9d8215fd00"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "3784c6ff85a3c4d54304200a935c6f7f"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "4017cbb11048f4b1c8cab3eee4bd7b9e"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "4aeb901cc612ee6c2b7959b9b2674af6"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "07944ea44ad20617082847450b33ba32"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "c5d369f918f29049a5e46614ded7e826"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "869102303c45547b613d2d32c9e87b9b"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "92c0b2003cb8de9d1357edf78c4f9076"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "a1cfa5de52f4ef5faf7cef1b6c4c0a8c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "9627d6fa76f30fcd45675ad2da5700f1"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "8385fda577606af24e528d670a314ea9"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "b183dda9d172ab2c44e0a0a8f8a80ed1"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "ee1804b0a61ef35ee9982285e157561a"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "d10778599fb449a568f14aa6c7d35c74"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "c5bc1c4247c306f6e097dbf4b3371e6f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "a1839bce4b465b25e03bb79c3e439a07"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "d2497827aea347488026ea3b7ba972a1"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "13b9f21f74e5b68891718b35b9b0f0f6"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "438dff5153798f6833d28e1ee9250ab9"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "28947d5a5e18423c93e7b9b4e91ced47"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "4798001ea680897a67fea0dc81cc000e"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "f6d7209e3540882a7645e5595a93f369"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "14a08b5f0a5c1c85fe7b01d129b2385c"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "00c097640a127a19322fd7c761c102a1"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "3baffbd380716bd4a12a7f64e702b678"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "d5ead370d4628ce7b93f55db1821e21d"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "5fd917720705c20b3b8a505944281553"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "03689af1abac95ad8be1e51a0a1a2b36"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "5d4f780de5e1ead853ebe41322795fde"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "25f7e76cb12355343e030474245c36dc"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "c0be46a71af79bef393e219b4a5877d1"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "0f8f1fe07b0970cce4cdb912493dc7d0"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "d3a153efeea91b8a37b74533a2502f30"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "87f91d301ee95006453a50e59db13c14"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "687ae98d24fc1111cbd0459089fd4b72"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "9e659aa3490d3d319c3d3a8be7f727ab"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "1da6a6bcaa6f5849e42240aa016ea8b2"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "23ca26ff45ad4f5d28e5e11b481255d0"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "aea2a62fb13ba9db21b0b6599052ed2d"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "6590b2bffe271e8a54e8a58ca55bb70f"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "7a3ce9a4a4908e5287acce28dc801dc2"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "b180d0de2749d4522f7a77431687fdd9"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "fa5d5eefa097b10e2b51a1ff20ba5a66"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "9d8f127fe1929880fe6069b4ea826ce6"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "47643f943e56b20d4bb857fd049897be"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "a30c224f419e76bce038487b8fc463be"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "f995b06bb5d9172f509f7b876fe5c413"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "08e898a85a85e71b7cf59c99c7a51aa8"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "618dd40c3fab89deb6aecde9d7e79b89"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "4b9f3a4787320a8bbfdd238c8e82ee39"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "c3fcbd97415b30bd19aab15dd3fa06c6"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "bbef993025a919b462976269782c14c6"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "8523f2db75f3c0fb74b13c0b8356dd96"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "79584f2340afbac63ffa51b83f16718f"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "0e1aea089e9e9fccacfc12c540a4906e"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "2fabc53f2267d4e080e85e1b89eb56bc"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "0e2def5b521224ab7243a338204a154e"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "ac86941165a4cfb914b4e4c79a92ae71"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "470e13ad46727a6ca8254296e0c1f0ef"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "d57825c602cd94cf0536dbaf24d45d60"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "fd0c96c4c3c1aeea97adb5224fd7165e"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "af41c2acc8d569cebca29b140c164af9"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "b8443565521378e242ad9cfa75042196"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "3647f57b5a443b44c95baea585bdf37d"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "cf1ff24cc4e2e57540517d896a58f374"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "b560f8952b029e3dc65247075544d8ef"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "ca836295c241bbbcea32f64aa13d82ff"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "90264965e62c7c1cfbde697f73e50bb1"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "26cd250188114d2e9d049b3eec072500"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "2600c6e972dda0c6f9e31060fb7b4b16"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "07a858666c6f96b522ced23710a05161"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "4c95c9d32654d1bce420c425e8900d5e"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "6484ffb51f939213a5171918a26cdac4"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "48ad2f92300814be40eb3a11bc66ee22"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "20cd349e41423e70cbfc3174e4b6840e"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "728e4aceb7c3313c080c565b80c7aae1"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "09d55466046740fc51a65564b4c3e24e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.22.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "99911fdf47bbcf1265ac936feeab932f"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "66e8f519d243d3be95f39f4f71fb6644"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "b8414e0713f01a9e4c965c057406599b"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "0160aee5acb3241a827096993eae8e23"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.23.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "ca884b9a5705212fc0ee70173150a190"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "61557d7906de413ce2c45402795a85b4"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "5ccbc954e2cd47ea009f5e3a80c7a0f6"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "a920d5012d437de49a1fd984a6c6c0b4"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.24.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "fc1a4684649e7b1ddd2e32b7e43ef3aa"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "3cab136bddb20e017bc457ad959aa7a5"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "9c4cf48be70960fca9c00097ad0baa55"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "c1bd2d91ade06664163fb9fb8c2ad0e4"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.25.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "b86457218c0052370718be397f4489ce"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "367a179e2cc03e4be92e6aa6d83b6021"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "4892e95a34d2188836b0427136edd8d6"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "6090afa4f77f7131a0c00277b48e03ab"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.26.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "e2e7eb1949edf3dd584aa89688585b04"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "f06bd3b3a03fc2cb0ea1ae40dab30c52"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "0ad191db4a32c080bd6d55391f0ec2a8"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "1e2fc0039ce126ba36cb4f9390c99da2"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.27.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "e6b1cf88efd38dc00ef49959b83b7240"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "945013c3541b0fb2e0d19c781c85947d"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "f1d3e756637a0b3ada81b7002fe0a28a"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "279fb52e3495c1f125ed580eabb503ce"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.28.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "5e07e7e1a25140aa62b03103ee073580"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "c1a39ab06253df5a61614bd4e3caa6a8"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "40a77dd52a5a0fd6d549f06a4a2b2a89"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "3e330360509d0d83efd1b0196e2e429f"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.29.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "019cc2f065b7566589f3f87189eef346"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "81ec5b9361a846dc2bc9ea725a72eb37"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "73bdc4578242158e1e42aa3007d627a0"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "8ea81580e68d80e915e6cfc3b2c92d9d"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.30.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "cd7265ad7a8e48e1d6dc05562be30af7"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "95f03425977d91c41556437e1b67f271"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "3ac6f3123cfa199d810139f05cacdcf7"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "f4e1199138c343ba3fe0794ca874ce1e"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.31.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "2ade9927a09507bfbcf831a607cd91d4"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "4e08fea49bc805c6b4c66474f1273a20"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "aef037cc5390dead2aaf84b243b7f402"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "5c445cdb815a6d24ddfe5a676902895a"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.32.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "46ef04678fa41c27652c02930573855c"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "684ed44e196fd4284fa76c51b7f178b5"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "e2014970610c264b074ac440f88e4b4e"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "1356c271a041b23a27dd00cbd2bc60c1"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.33.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "128b7cfa764505a10118528cf443249b"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "e2c4d82572bc1157e0a6b47589ae47c8"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "958654cc40dd1158612fd480c94c4dd2"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "fffddd9d5c30a0dce6a09f106f11b729"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.34.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "4248875070521c45890ef25d3748101e"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "c8feb9eceb20704bb20aae8df9855c09"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "bf053f4e939abbbeb98cea064016a71e"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "ab76118ae27ca81b16077c5cfeb06e24"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 99614720,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.weight",
"shape": [
19456,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 99614720,
"byteOffset": 0
}
],
"md5sum": "c6718830ae9c831d53267cce3b8f954d"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 0
}
],
"md5sum": "55999bb2193c4a15b0e41d100a4b3e67"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 20971520,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.weight",
"shape": [
2560,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 20971520,
"byteOffset": 0
}
],
"md5sum": "76c91a26d963d71effea0466d0595225"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 49807360,
"records": [
{
"name": "model.layers.35.mlp.down_proj.weight",
"shape": [
2560,
9728
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 49807360,
"byteOffset": 0
}
],
"md5sum": "26bf72d3412938f1af3ddf6aca836dd0"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 31849472,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 5120
},
{
"name": "model.layers.0.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 10240
},
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
6144,
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 31457280,
"byteOffset": 10496
},
{
"name": "model.layers.0.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31467776
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31468032
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31473152
},
{
"name": "model.layers.1.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31478272
},
{
"name": "model.layers.1.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31478528
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31478784
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31483904
},
{
"name": "model.layers.10.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31489024
},
{
"name": "model.layers.10.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31489280
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31489536
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31494656
},
{
"name": "model.layers.11.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31499776
},
{
"name": "model.layers.11.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31500032
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31500288
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31505408
},
{
"name": "model.layers.12.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31510528
},
{
"name": "model.layers.12.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31510784
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31511040
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31516160
},
{
"name": "model.layers.13.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31521280
},
{
"name": "model.layers.13.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31521536
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31521792
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31526912
},
{
"name": "model.layers.14.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31532032
},
{
"name": "model.layers.14.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31532288
},
{
"name": "model.layers.15.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31532544
},
{
"name": "model.layers.15.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31532800
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31533056
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31538176
},
{
"name": "model.layers.2.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31543296
},
{
"name": "model.layers.2.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31543552
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31543808
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31548928
},
{
"name": "model.layers.3.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31554048
},
{
"name": "model.layers.3.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31554304
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31554560
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31559680
},
{
"name": "model.layers.4.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31564800
},
{
"name": "model.layers.4.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31565056
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31565312
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31570432
},
{
"name": "model.layers.5.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31575552
},
{
"name": "model.layers.5.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31575808
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31576064
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31581184
},
{
"name": "model.layers.6.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31586304
},
{
"name": "model.layers.6.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31586560
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31586816
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31591936
},
{
"name": "model.layers.7.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31597056
},
{
"name": "model.layers.7.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31597312
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31597568
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31602688
},
{
"name": "model.layers.8.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31607808
},
{
"name": "model.layers.8.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31608064
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31608320
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31613440
},
{
"name": "model.layers.9.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31618560
},
{
"name": "model.layers.9.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31618816
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31619072
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31624192
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31629312
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31634432
},
{
"name": "model.layers.16.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31639552
},
{
"name": "model.layers.16.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31639808
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31640064
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31645184
},
{
"name": "model.layers.17.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31650304
},
{
"name": "model.layers.17.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31650560
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31650816
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31655936
},
{
"name": "model.layers.18.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31661056
},
{
"name": "model.layers.18.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31661312
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31661568
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31666688
},
{
"name": "model.layers.19.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31671808
},
{
"name": "model.layers.19.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31672064
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31672320
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31677440
},
{
"name": "model.layers.20.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31682560
},
{
"name": "model.layers.20.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31682816
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31683072
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31688192
},
{
"name": "model.layers.21.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31693312
},
{
"name": "model.layers.21.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31693568
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31693824
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31698944
},
{
"name": "model.layers.22.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31704064
},
{
"name": "model.layers.22.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31704320
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31704576
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31709696
},
{
"name": "model.layers.23.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31714816
},
{
"name": "model.layers.23.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31715072
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31715328
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31720448
},
{
"name": "model.layers.24.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31725568
},
{
"name": "model.layers.24.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31725824
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31726080
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31731200
},
{
"name": "model.layers.25.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31736320
},
{
"name": "model.layers.25.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31736576
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31736832
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31741952
},
{
"name": "model.layers.26.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31747072
},
{
"name": "model.layers.26.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31747328
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31747584
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31752704
},
{
"name": "model.layers.27.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31757824
},
{
"name": "model.layers.27.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31758080
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31758336
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31763456
},
{
"name": "model.layers.28.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31768576
},
{
"name": "model.layers.28.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31768832
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31769088
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31774208
},
{
"name": "model.layers.29.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31779328
},
{
"name": "model.layers.29.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31779584
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31779840
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31784960
},
{
"name": "model.layers.30.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31790080
},
{
"name": "model.layers.30.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31790336
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31790592
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31795712
},
{
"name": "model.layers.31.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31800832
},
{
"name": "model.layers.31.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31801088
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31801344
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31806464
},
{
"name": "model.layers.32.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31811584
},
{
"name": "model.layers.32.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31811840
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31812096
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31817216
},
{
"name": "model.layers.33.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31822336
},
{
"name": "model.layers.33.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31822592
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31822848
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31827968
},
{
"name": "model.layers.34.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31833088
},
{
"name": "model.layers.34.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31833344
},
{
"name": "model.layers.35.self_attn.k_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31833600
},
{
"name": "model.layers.35.self_attn.q_norm.weight",
"shape": [
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 256,
"byteOffset": 31833856
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31834112
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31839232
},
{
"name": "model.norm.weight",
"shape": [
2560
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 5120,
"byteOffset": 31844352
}
],
"md5sum": "aed03879d10b9aba07b420bf2820bf3f"
}
]
}