| { | |
| "metadata": { | |
| "ParamSize": 290, | |
| "ParamBytes": 8044936192.0, | |
| "BitsPerParam": 16.0 | |
| }, | |
| "records": [ | |
| { | |
| "dataPath": "params_shard_0.bin", | |
| "format": "raw-shard", | |
| "nbytes": 777912320, | |
| "records": [ | |
| { | |
| "name": "model.embed_tokens.weight", | |
| "shape": [ | |
| 151936, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 777912320, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "16268f056ee3f5fe717f2473597d0422" | |
| }, | |
| { | |
| "dataPath": "params_shard_1.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2afe7dc3cf5821e9463c8158c3d10a25" | |
| }, | |
| { | |
| "dataPath": "params_shard_2.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c2b2ac28327e5d7e097450b4656f92b2" | |
| }, | |
| { | |
| "dataPath": "params_shard_3.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "52c25ce480d8709e389d9744450fd689" | |
| }, | |
| { | |
| "dataPath": "params_shard_4.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "df89d781717c27414bd1487eb1e8397d" | |
| }, | |
| { | |
| "dataPath": "params_shard_5.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "60f8f1bd53d7ebbecd9124ef4cc0fd2d" | |
| }, | |
| { | |
| "dataPath": "params_shard_6.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "57e0b77dc0db309e1f26de1c44a6233e" | |
| }, | |
| { | |
| "dataPath": "params_shard_7.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.1.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6041eb4ec5416162ccbb46851c213ab0" | |
| }, | |
| { | |
| "dataPath": "params_shard_8.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a383f9020109ae6e27c2bc7e57f38049" | |
| }, | |
| { | |
| "dataPath": "params_shard_9.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5b716ccd217fa9d891fa98775e4ab3e8" | |
| }, | |
| { | |
| "dataPath": "params_shard_10.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2a8c4a6aa90e5d9cf52d5f1807fd3eef" | |
| }, | |
| { | |
| "dataPath": "params_shard_11.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.10.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "edde730e3628495ecf3c5d9d8215fd00" | |
| }, | |
| { | |
| "dataPath": "params_shard_12.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3784c6ff85a3c4d54304200a935c6f7f" | |
| }, | |
| { | |
| "dataPath": "params_shard_13.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4017cbb11048f4b1c8cab3eee4bd7b9e" | |
| }, | |
| { | |
| "dataPath": "params_shard_14.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4aeb901cc612ee6c2b7959b9b2674af6" | |
| }, | |
| { | |
| "dataPath": "params_shard_15.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.11.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07944ea44ad20617082847450b33ba32" | |
| }, | |
| { | |
| "dataPath": "params_shard_16.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c5d369f918f29049a5e46614ded7e826" | |
| }, | |
| { | |
| "dataPath": "params_shard_17.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "869102303c45547b613d2d32c9e87b9b" | |
| }, | |
| { | |
| "dataPath": "params_shard_18.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "92c0b2003cb8de9d1357edf78c4f9076" | |
| }, | |
| { | |
| "dataPath": "params_shard_19.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.12.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a1cfa5de52f4ef5faf7cef1b6c4c0a8c" | |
| }, | |
| { | |
| "dataPath": "params_shard_20.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9627d6fa76f30fcd45675ad2da5700f1" | |
| }, | |
| { | |
| "dataPath": "params_shard_21.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8385fda577606af24e528d670a314ea9" | |
| }, | |
| { | |
| "dataPath": "params_shard_22.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b183dda9d172ab2c44e0a0a8f8a80ed1" | |
| }, | |
| { | |
| "dataPath": "params_shard_23.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.13.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ee1804b0a61ef35ee9982285e157561a" | |
| }, | |
| { | |
| "dataPath": "params_shard_24.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d10778599fb449a568f14aa6c7d35c74" | |
| }, | |
| { | |
| "dataPath": "params_shard_25.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c5bc1c4247c306f6e097dbf4b3371e6f" | |
| }, | |
| { | |
| "dataPath": "params_shard_26.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a1839bce4b465b25e03bb79c3e439a07" | |
| }, | |
| { | |
| "dataPath": "params_shard_27.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.14.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d2497827aea347488026ea3b7ba972a1" | |
| }, | |
| { | |
| "dataPath": "params_shard_28.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "13b9f21f74e5b68891718b35b9b0f0f6" | |
| }, | |
| { | |
| "dataPath": "params_shard_29.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "438dff5153798f6833d28e1ee9250ab9" | |
| }, | |
| { | |
| "dataPath": "params_shard_30.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "28947d5a5e18423c93e7b9b4e91ced47" | |
| }, | |
| { | |
| "dataPath": "params_shard_31.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4798001ea680897a67fea0dc81cc000e" | |
| }, | |
| { | |
| "dataPath": "params_shard_32.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f6d7209e3540882a7645e5595a93f369" | |
| }, | |
| { | |
| "dataPath": "params_shard_33.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "14a08b5f0a5c1c85fe7b01d129b2385c" | |
| }, | |
| { | |
| "dataPath": "params_shard_34.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.2.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "00c097640a127a19322fd7c761c102a1" | |
| }, | |
| { | |
| "dataPath": "params_shard_35.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3baffbd380716bd4a12a7f64e702b678" | |
| }, | |
| { | |
| "dataPath": "params_shard_36.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d5ead370d4628ce7b93f55db1821e21d" | |
| }, | |
| { | |
| "dataPath": "params_shard_37.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5fd917720705c20b3b8a505944281553" | |
| }, | |
| { | |
| "dataPath": "params_shard_38.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.3.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "03689af1abac95ad8be1e51a0a1a2b36" | |
| }, | |
| { | |
| "dataPath": "params_shard_39.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5d4f780de5e1ead853ebe41322795fde" | |
| }, | |
| { | |
| "dataPath": "params_shard_40.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "25f7e76cb12355343e030474245c36dc" | |
| }, | |
| { | |
| "dataPath": "params_shard_41.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c0be46a71af79bef393e219b4a5877d1" | |
| }, | |
| { | |
| "dataPath": "params_shard_42.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.4.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0f8f1fe07b0970cce4cdb912493dc7d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_43.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d3a153efeea91b8a37b74533a2502f30" | |
| }, | |
| { | |
| "dataPath": "params_shard_44.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "87f91d301ee95006453a50e59db13c14" | |
| }, | |
| { | |
| "dataPath": "params_shard_45.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "687ae98d24fc1111cbd0459089fd4b72" | |
| }, | |
| { | |
| "dataPath": "params_shard_46.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.5.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9e659aa3490d3d319c3d3a8be7f727ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_47.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1da6a6bcaa6f5849e42240aa016ea8b2" | |
| }, | |
| { | |
| "dataPath": "params_shard_48.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "23ca26ff45ad4f5d28e5e11b481255d0" | |
| }, | |
| { | |
| "dataPath": "params_shard_49.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aea2a62fb13ba9db21b0b6599052ed2d" | |
| }, | |
| { | |
| "dataPath": "params_shard_50.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.6.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6590b2bffe271e8a54e8a58ca55bb70f" | |
| }, | |
| { | |
| "dataPath": "params_shard_51.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "7a3ce9a4a4908e5287acce28dc801dc2" | |
| }, | |
| { | |
| "dataPath": "params_shard_52.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b180d0de2749d4522f7a77431687fdd9" | |
| }, | |
| { | |
| "dataPath": "params_shard_53.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fa5d5eefa097b10e2b51a1ff20ba5a66" | |
| }, | |
| { | |
| "dataPath": "params_shard_54.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.7.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9d8f127fe1929880fe6069b4ea826ce6" | |
| }, | |
| { | |
| "dataPath": "params_shard_55.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "47643f943e56b20d4bb857fd049897be" | |
| }, | |
| { | |
| "dataPath": "params_shard_56.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a30c224f419e76bce038487b8fc463be" | |
| }, | |
| { | |
| "dataPath": "params_shard_57.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f995b06bb5d9172f509f7b876fe5c413" | |
| }, | |
| { | |
| "dataPath": "params_shard_58.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.8.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "08e898a85a85e71b7cf59c99c7a51aa8" | |
| }, | |
| { | |
| "dataPath": "params_shard_59.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "618dd40c3fab89deb6aecde9d7e79b89" | |
| }, | |
| { | |
| "dataPath": "params_shard_60.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4b9f3a4787320a8bbfdd238c8e82ee39" | |
| }, | |
| { | |
| "dataPath": "params_shard_61.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c3fcbd97415b30bd19aab15dd3fa06c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_62.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.9.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bbef993025a919b462976269782c14c6" | |
| }, | |
| { | |
| "dataPath": "params_shard_63.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.15.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8523f2db75f3c0fb74b13c0b8356dd96" | |
| }, | |
| { | |
| "dataPath": "params_shard_64.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "79584f2340afbac63ffa51b83f16718f" | |
| }, | |
| { | |
| "dataPath": "params_shard_65.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0e1aea089e9e9fccacfc12c540a4906e" | |
| }, | |
| { | |
| "dataPath": "params_shard_66.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2fabc53f2267d4e080e85e1b89eb56bc" | |
| }, | |
| { | |
| "dataPath": "params_shard_67.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.16.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0e2def5b521224ab7243a338204a154e" | |
| }, | |
| { | |
| "dataPath": "params_shard_68.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ac86941165a4cfb914b4e4c79a92ae71" | |
| }, | |
| { | |
| "dataPath": "params_shard_69.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "470e13ad46727a6ca8254296e0c1f0ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_70.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "d57825c602cd94cf0536dbaf24d45d60" | |
| }, | |
| { | |
| "dataPath": "params_shard_71.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.17.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fd0c96c4c3c1aeea97adb5224fd7165e" | |
| }, | |
| { | |
| "dataPath": "params_shard_72.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "af41c2acc8d569cebca29b140c164af9" | |
| }, | |
| { | |
| "dataPath": "params_shard_73.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8443565521378e242ad9cfa75042196" | |
| }, | |
| { | |
| "dataPath": "params_shard_74.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3647f57b5a443b44c95baea585bdf37d" | |
| }, | |
| { | |
| "dataPath": "params_shard_75.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.18.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cf1ff24cc4e2e57540517d896a58f374" | |
| }, | |
| { | |
| "dataPath": "params_shard_76.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b560f8952b029e3dc65247075544d8ef" | |
| }, | |
| { | |
| "dataPath": "params_shard_77.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ca836295c241bbbcea32f64aa13d82ff" | |
| }, | |
| { | |
| "dataPath": "params_shard_78.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "90264965e62c7c1cfbde697f73e50bb1" | |
| }, | |
| { | |
| "dataPath": "params_shard_79.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.19.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "26cd250188114d2e9d049b3eec072500" | |
| }, | |
| { | |
| "dataPath": "params_shard_80.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2600c6e972dda0c6f9e31060fb7b4b16" | |
| }, | |
| { | |
| "dataPath": "params_shard_81.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "07a858666c6f96b522ced23710a05161" | |
| }, | |
| { | |
| "dataPath": "params_shard_82.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4c95c9d32654d1bce420c425e8900d5e" | |
| }, | |
| { | |
| "dataPath": "params_shard_83.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.20.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6484ffb51f939213a5171918a26cdac4" | |
| }, | |
| { | |
| "dataPath": "params_shard_84.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "48ad2f92300814be40eb3a11bc66ee22" | |
| }, | |
| { | |
| "dataPath": "params_shard_85.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "20cd349e41423e70cbfc3174e4b6840e" | |
| }, | |
| { | |
| "dataPath": "params_shard_86.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "728e4aceb7c3313c080c565b80c7aae1" | |
| }, | |
| { | |
| "dataPath": "params_shard_87.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.21.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "09d55466046740fc51a65564b4c3e24e" | |
| }, | |
| { | |
| "dataPath": "params_shard_88.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "99911fdf47bbcf1265ac936feeab932f" | |
| }, | |
| { | |
| "dataPath": "params_shard_89.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "66e8f519d243d3be95f39f4f71fb6644" | |
| }, | |
| { | |
| "dataPath": "params_shard_90.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b8414e0713f01a9e4c965c057406599b" | |
| }, | |
| { | |
| "dataPath": "params_shard_91.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.22.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0160aee5acb3241a827096993eae8e23" | |
| }, | |
| { | |
| "dataPath": "params_shard_92.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ca884b9a5705212fc0ee70173150a190" | |
| }, | |
| { | |
| "dataPath": "params_shard_93.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "61557d7906de413ce2c45402795a85b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_94.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5ccbc954e2cd47ea009f5e3a80c7a0f6" | |
| }, | |
| { | |
| "dataPath": "params_shard_95.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.23.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "a920d5012d437de49a1fd984a6c6c0b4" | |
| }, | |
| { | |
| "dataPath": "params_shard_96.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fc1a4684649e7b1ddd2e32b7e43ef3aa" | |
| }, | |
| { | |
| "dataPath": "params_shard_97.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3cab136bddb20e017bc457ad959aa7a5" | |
| }, | |
| { | |
| "dataPath": "params_shard_98.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "9c4cf48be70960fca9c00097ad0baa55" | |
| }, | |
| { | |
| "dataPath": "params_shard_99.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.24.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c1bd2d91ade06664163fb9fb8c2ad0e4" | |
| }, | |
| { | |
| "dataPath": "params_shard_100.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "b86457218c0052370718be397f4489ce" | |
| }, | |
| { | |
| "dataPath": "params_shard_101.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "367a179e2cc03e4be92e6aa6d83b6021" | |
| }, | |
| { | |
| "dataPath": "params_shard_102.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4892e95a34d2188836b0427136edd8d6" | |
| }, | |
| { | |
| "dataPath": "params_shard_103.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.25.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "6090afa4f77f7131a0c00277b48e03ab" | |
| }, | |
| { | |
| "dataPath": "params_shard_104.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e2e7eb1949edf3dd584aa89688585b04" | |
| }, | |
| { | |
| "dataPath": "params_shard_105.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f06bd3b3a03fc2cb0ea1ae40dab30c52" | |
| }, | |
| { | |
| "dataPath": "params_shard_106.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "0ad191db4a32c080bd6d55391f0ec2a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_107.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.26.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1e2fc0039ce126ba36cb4f9390c99da2" | |
| }, | |
| { | |
| "dataPath": "params_shard_108.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e6b1cf88efd38dc00ef49959b83b7240" | |
| }, | |
| { | |
| "dataPath": "params_shard_109.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "945013c3541b0fb2e0d19c781c85947d" | |
| }, | |
| { | |
| "dataPath": "params_shard_110.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f1d3e756637a0b3ada81b7002fe0a28a" | |
| }, | |
| { | |
| "dataPath": "params_shard_111.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.27.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "279fb52e3495c1f125ed580eabb503ce" | |
| }, | |
| { | |
| "dataPath": "params_shard_112.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5e07e7e1a25140aa62b03103ee073580" | |
| }, | |
| { | |
| "dataPath": "params_shard_113.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c1a39ab06253df5a61614bd4e3caa6a8" | |
| }, | |
| { | |
| "dataPath": "params_shard_114.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "40a77dd52a5a0fd6d549f06a4a2b2a89" | |
| }, | |
| { | |
| "dataPath": "params_shard_115.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.28.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3e330360509d0d83efd1b0196e2e429f" | |
| }, | |
| { | |
| "dataPath": "params_shard_116.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "019cc2f065b7566589f3f87189eef346" | |
| }, | |
| { | |
| "dataPath": "params_shard_117.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "81ec5b9361a846dc2bc9ea725a72eb37" | |
| }, | |
| { | |
| "dataPath": "params_shard_118.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "73bdc4578242158e1e42aa3007d627a0" | |
| }, | |
| { | |
| "dataPath": "params_shard_119.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.29.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "8ea81580e68d80e915e6cfc3b2c92d9d" | |
| }, | |
| { | |
| "dataPath": "params_shard_120.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "cd7265ad7a8e48e1d6dc05562be30af7" | |
| }, | |
| { | |
| "dataPath": "params_shard_121.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "95f03425977d91c41556437e1b67f271" | |
| }, | |
| { | |
| "dataPath": "params_shard_122.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "3ac6f3123cfa199d810139f05cacdcf7" | |
| }, | |
| { | |
| "dataPath": "params_shard_123.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.30.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "f4e1199138c343ba3fe0794ca874ce1e" | |
| }, | |
| { | |
| "dataPath": "params_shard_124.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "2ade9927a09507bfbcf831a607cd91d4" | |
| }, | |
| { | |
| "dataPath": "params_shard_125.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4e08fea49bc805c6b4c66474f1273a20" | |
| }, | |
| { | |
| "dataPath": "params_shard_126.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "aef037cc5390dead2aaf84b243b7f402" | |
| }, | |
| { | |
| "dataPath": "params_shard_127.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.31.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "5c445cdb815a6d24ddfe5a676902895a" | |
| }, | |
| { | |
| "dataPath": "params_shard_128.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "46ef04678fa41c27652c02930573855c" | |
| }, | |
| { | |
| "dataPath": "params_shard_129.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "684ed44e196fd4284fa76c51b7f178b5" | |
| }, | |
| { | |
| "dataPath": "params_shard_130.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e2014970610c264b074ac440f88e4b4e" | |
| }, | |
| { | |
| "dataPath": "params_shard_131.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.32.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "1356c271a041b23a27dd00cbd2bc60c1" | |
| }, | |
| { | |
| "dataPath": "params_shard_132.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "128b7cfa764505a10118528cf443249b" | |
| }, | |
| { | |
| "dataPath": "params_shard_133.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "e2c4d82572bc1157e0a6b47589ae47c8" | |
| }, | |
| { | |
| "dataPath": "params_shard_134.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "958654cc40dd1158612fd480c94c4dd2" | |
| }, | |
| { | |
| "dataPath": "params_shard_135.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.33.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "fffddd9d5c30a0dce6a09f106f11b729" | |
| }, | |
| { | |
| "dataPath": "params_shard_136.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "4248875070521c45890ef25d3748101e" | |
| }, | |
| { | |
| "dataPath": "params_shard_137.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c8feb9eceb20704bb20aae8df9855c09" | |
| }, | |
| { | |
| "dataPath": "params_shard_138.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "bf053f4e939abbbeb98cea064016a71e" | |
| }, | |
| { | |
| "dataPath": "params_shard_139.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.34.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "ab76118ae27ca81b16077c5cfeb06e24" | |
| }, | |
| { | |
| "dataPath": "params_shard_140.bin", | |
| "format": "raw-shard", | |
| "nbytes": 99614720, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.gate_up_proj.weight", | |
| "shape": [ | |
| 19456, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 99614720, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "c6718830ae9c831d53267cce3b8f954d" | |
| }, | |
| { | |
| "dataPath": "params_shard_141.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31457280, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "55999bb2193c4a15b0e41d100a4b3e67" | |
| }, | |
| { | |
| "dataPath": "params_shard_142.bin", | |
| "format": "raw-shard", | |
| "nbytes": 20971520, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.self_attn.o_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 4096 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 20971520, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "76c91a26d963d71effea0466d0595225" | |
| }, | |
| { | |
| "dataPath": "params_shard_143.bin", | |
| "format": "raw-shard", | |
| "nbytes": 49807360, | |
| "records": [ | |
| { | |
| "name": "model.layers.35.mlp.down_proj.weight", | |
| "shape": [ | |
| 2560, | |
| 9728 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 49807360, | |
| "byteOffset": 0 | |
| } | |
| ], | |
| "md5sum": "26bf72d3412938f1af3ddf6aca836dd0" | |
| }, | |
| { | |
| "dataPath": "params_shard_144.bin", | |
| "format": "raw-shard", | |
| "nbytes": 31849472, | |
| "records": [ | |
| { | |
| "name": "model.layers.0.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 0 | |
| }, | |
| { | |
| "name": "model.layers.0.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 5120 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 10240 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.c_attn.weight", | |
| "shape": [ | |
| 6144, | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 31457280, | |
| "byteOffset": 10496 | |
| }, | |
| { | |
| "name": "model.layers.0.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31467776 | |
| }, | |
| { | |
| "name": "model.layers.1.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31468032 | |
| }, | |
| { | |
| "name": "model.layers.1.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31473152 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31478272 | |
| }, | |
| { | |
| "name": "model.layers.1.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31478528 | |
| }, | |
| { | |
| "name": "model.layers.10.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31478784 | |
| }, | |
| { | |
| "name": "model.layers.10.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31483904 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31489024 | |
| }, | |
| { | |
| "name": "model.layers.10.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31489280 | |
| }, | |
| { | |
| "name": "model.layers.11.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31489536 | |
| }, | |
| { | |
| "name": "model.layers.11.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31494656 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31499776 | |
| }, | |
| { | |
| "name": "model.layers.11.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31500032 | |
| }, | |
| { | |
| "name": "model.layers.12.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31500288 | |
| }, | |
| { | |
| "name": "model.layers.12.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31505408 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31510528 | |
| }, | |
| { | |
| "name": "model.layers.12.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31510784 | |
| }, | |
| { | |
| "name": "model.layers.13.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31511040 | |
| }, | |
| { | |
| "name": "model.layers.13.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31516160 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31521280 | |
| }, | |
| { | |
| "name": "model.layers.13.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31521536 | |
| }, | |
| { | |
| "name": "model.layers.14.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31521792 | |
| }, | |
| { | |
| "name": "model.layers.14.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31526912 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31532032 | |
| }, | |
| { | |
| "name": "model.layers.14.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31532288 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31532544 | |
| }, | |
| { | |
| "name": "model.layers.15.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31532800 | |
| }, | |
| { | |
| "name": "model.layers.2.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31533056 | |
| }, | |
| { | |
| "name": "model.layers.2.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31538176 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31543296 | |
| }, | |
| { | |
| "name": "model.layers.2.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31543552 | |
| }, | |
| { | |
| "name": "model.layers.3.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31543808 | |
| }, | |
| { | |
| "name": "model.layers.3.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31548928 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31554048 | |
| }, | |
| { | |
| "name": "model.layers.3.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31554304 | |
| }, | |
| { | |
| "name": "model.layers.4.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31554560 | |
| }, | |
| { | |
| "name": "model.layers.4.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31559680 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31564800 | |
| }, | |
| { | |
| "name": "model.layers.4.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31565056 | |
| }, | |
| { | |
| "name": "model.layers.5.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31565312 | |
| }, | |
| { | |
| "name": "model.layers.5.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31570432 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31575552 | |
| }, | |
| { | |
| "name": "model.layers.5.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31575808 | |
| }, | |
| { | |
| "name": "model.layers.6.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31576064 | |
| }, | |
| { | |
| "name": "model.layers.6.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31581184 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31586304 | |
| }, | |
| { | |
| "name": "model.layers.6.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31586560 | |
| }, | |
| { | |
| "name": "model.layers.7.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31586816 | |
| }, | |
| { | |
| "name": "model.layers.7.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31591936 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31597056 | |
| }, | |
| { | |
| "name": "model.layers.7.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31597312 | |
| }, | |
| { | |
| "name": "model.layers.8.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31597568 | |
| }, | |
| { | |
| "name": "model.layers.8.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31602688 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31607808 | |
| }, | |
| { | |
| "name": "model.layers.8.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31608064 | |
| }, | |
| { | |
| "name": "model.layers.9.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31608320 | |
| }, | |
| { | |
| "name": "model.layers.9.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31613440 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31618560 | |
| }, | |
| { | |
| "name": "model.layers.9.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31618816 | |
| }, | |
| { | |
| "name": "model.layers.15.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31619072 | |
| }, | |
| { | |
| "name": "model.layers.15.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31624192 | |
| }, | |
| { | |
| "name": "model.layers.16.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31629312 | |
| }, | |
| { | |
| "name": "model.layers.16.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31634432 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31639552 | |
| }, | |
| { | |
| "name": "model.layers.16.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31639808 | |
| }, | |
| { | |
| "name": "model.layers.17.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31640064 | |
| }, | |
| { | |
| "name": "model.layers.17.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31645184 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31650304 | |
| }, | |
| { | |
| "name": "model.layers.17.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31650560 | |
| }, | |
| { | |
| "name": "model.layers.18.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31650816 | |
| }, | |
| { | |
| "name": "model.layers.18.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31655936 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31661056 | |
| }, | |
| { | |
| "name": "model.layers.18.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31661312 | |
| }, | |
| { | |
| "name": "model.layers.19.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31661568 | |
| }, | |
| { | |
| "name": "model.layers.19.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31666688 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31671808 | |
| }, | |
| { | |
| "name": "model.layers.19.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31672064 | |
| }, | |
| { | |
| "name": "model.layers.20.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31672320 | |
| }, | |
| { | |
| "name": "model.layers.20.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31677440 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31682560 | |
| }, | |
| { | |
| "name": "model.layers.20.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31682816 | |
| }, | |
| { | |
| "name": "model.layers.21.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31683072 | |
| }, | |
| { | |
| "name": "model.layers.21.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31688192 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31693312 | |
| }, | |
| { | |
| "name": "model.layers.21.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31693568 | |
| }, | |
| { | |
| "name": "model.layers.22.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31693824 | |
| }, | |
| { | |
| "name": "model.layers.22.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31698944 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31704064 | |
| }, | |
| { | |
| "name": "model.layers.22.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31704320 | |
| }, | |
| { | |
| "name": "model.layers.23.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31704576 | |
| }, | |
| { | |
| "name": "model.layers.23.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31709696 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31714816 | |
| }, | |
| { | |
| "name": "model.layers.23.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31715072 | |
| }, | |
| { | |
| "name": "model.layers.24.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31715328 | |
| }, | |
| { | |
| "name": "model.layers.24.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31720448 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31725568 | |
| }, | |
| { | |
| "name": "model.layers.24.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31725824 | |
| }, | |
| { | |
| "name": "model.layers.25.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31726080 | |
| }, | |
| { | |
| "name": "model.layers.25.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31731200 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31736320 | |
| }, | |
| { | |
| "name": "model.layers.25.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31736576 | |
| }, | |
| { | |
| "name": "model.layers.26.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31736832 | |
| }, | |
| { | |
| "name": "model.layers.26.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31741952 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31747072 | |
| }, | |
| { | |
| "name": "model.layers.26.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31747328 | |
| }, | |
| { | |
| "name": "model.layers.27.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31747584 | |
| }, | |
| { | |
| "name": "model.layers.27.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31752704 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31757824 | |
| }, | |
| { | |
| "name": "model.layers.27.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31758080 | |
| }, | |
| { | |
| "name": "model.layers.28.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31758336 | |
| }, | |
| { | |
| "name": "model.layers.28.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31763456 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31768576 | |
| }, | |
| { | |
| "name": "model.layers.28.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31768832 | |
| }, | |
| { | |
| "name": "model.layers.29.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31769088 | |
| }, | |
| { | |
| "name": "model.layers.29.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31774208 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31779328 | |
| }, | |
| { | |
| "name": "model.layers.29.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31779584 | |
| }, | |
| { | |
| "name": "model.layers.30.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31779840 | |
| }, | |
| { | |
| "name": "model.layers.30.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31784960 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31790080 | |
| }, | |
| { | |
| "name": "model.layers.30.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31790336 | |
| }, | |
| { | |
| "name": "model.layers.31.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31790592 | |
| }, | |
| { | |
| "name": "model.layers.31.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31795712 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31800832 | |
| }, | |
| { | |
| "name": "model.layers.31.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31801088 | |
| }, | |
| { | |
| "name": "model.layers.32.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31801344 | |
| }, | |
| { | |
| "name": "model.layers.32.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31806464 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31811584 | |
| }, | |
| { | |
| "name": "model.layers.32.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31811840 | |
| }, | |
| { | |
| "name": "model.layers.33.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31812096 | |
| }, | |
| { | |
| "name": "model.layers.33.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31817216 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31822336 | |
| }, | |
| { | |
| "name": "model.layers.33.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31822592 | |
| }, | |
| { | |
| "name": "model.layers.34.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31822848 | |
| }, | |
| { | |
| "name": "model.layers.34.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31827968 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31833088 | |
| }, | |
| { | |
| "name": "model.layers.34.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31833344 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.k_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31833600 | |
| }, | |
| { | |
| "name": "model.layers.35.self_attn.q_norm.weight", | |
| "shape": [ | |
| 128 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 256, | |
| "byteOffset": 31833856 | |
| }, | |
| { | |
| "name": "model.layers.35.input_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31834112 | |
| }, | |
| { | |
| "name": "model.layers.35.post_attention_layernorm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31839232 | |
| }, | |
| { | |
| "name": "model.norm.weight", | |
| "shape": [ | |
| 2560 | |
| ], | |
| "dtype": "float16", | |
| "format": "f32-to-bf16", | |
| "nbytes": 5120, | |
| "byteOffset": 31844352 | |
| } | |
| ], | |
| "md5sum": "aed03879d10b9aba07b420bf2820bf3f" | |
| } | |
| ] | |
| } |