| { | |
| "_name_or_path": "Phi-3-small-128k-instruct", | |
| "architectures": [ | |
| "Phi3SmallForCausalLM" | |
| ], | |
| "attention_dropout_prob": 0.0, | |
| "auto_map": { | |
| "AutoConfig": "configuration_phi3_small.Phi3SmallConfig", | |
| "AutoModelForCausalLM": "modeling_phi3_small.Phi3SmallForCausalLM", | |
| "AutoTokenizer": "tokenization_phi3_small.Phi3SmallTokenizer" | |
| }, | |
| "blocksparse_block_size": 64, | |
| "blocksparse_homo_head_pattern": false, | |
| "blocksparse_num_local_blocks": 16, | |
| "blocksparse_triton_kernel_block_size": 64, | |
| "blocksparse_vert_stride": 8, | |
| "bos_token_id": 100257, | |
| "dense_attention_every_n_layers": 2, | |
| "embedding_dropout_prob": 0.1, | |
| "eos_token_id": 100257, | |
| "ff_dim_multiplier": null, | |
| "ff_intermediate_size": 14336, | |
| "ffn_dropout_prob": 0.1, | |
| "gegelu_limit": 20.0, | |
| "gegelu_pad_to_256": true, | |
| "hidden_act": "gegelu", | |
| "hidden_size": 4096, | |
| "initializer_range": 0.02, | |
| "layer_norm_epsilon": 1e-05, | |
| "max_position_embeddings": 131072, | |
| "model_type": "phi3small", | |
| "mup_attn_multiplier": 1.0, | |
| "mup_embedding_multiplier": 10.0, | |
| "mup_use_scaling": true, | |
| "mup_width_multiplier": 8.0, | |
| "num_attention_heads": 32, | |
| "num_hidden_layers": 32, | |
| "num_key_value_heads": 8, | |
| "original_max_position_embeddings": 8192, | |
| "pad_sequence_to_multiple_of_64": true, | |
| "reorder_and_upcast_attn": false, | |
| "rope_embedding_base": 1000000, | |
| "rope_position_scale": 1.0, | |
| "rope_scaling": { | |
| "long_factor": [ | |
| 1.0, | |
| 1.01, | |
| 1.01, | |
| 1.02, | |
| 1.04, | |
| 1.04, | |
| 1.04, | |
| 1.05, | |
| 1.05, | |
| 1.06, | |
| 1.07, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.09, | |
| 1.09, | |
| 1.2, | |
| 2.31, | |
| 3.76, | |
| 9.38, | |
| 10.1, | |
| 10.8, | |
| 18.1, | |
| 25.2, | |
| 25.3, | |
| 26.1, | |
| 26.6, | |
| 30.2, | |
| 33.0, | |
| 41.5, | |
| 44.4, | |
| 44.8, | |
| 50.2, | |
| 51.9, | |
| 59.3, | |
| 62.7, | |
| 66.1, | |
| 66.3, | |
| 85.8, | |
| 89.3, | |
| 90.0, | |
| 99.9, | |
| 107.0, | |
| 110.0, | |
| 111.0, | |
| 117.0, | |
| 118.0, | |
| 121.0, | |
| 122.0, | |
| 127.0, | |
| 127.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 128.0, | |
| 129.0, | |
| 129.0, | |
| 129.0 | |
| ], | |
| "long_mscale": 1.1902380714238083, | |
| "original_max_position_embeddings": 8192, | |
| "short_factor": [ | |
| 1.02, | |
| 1.02, | |
| 1.05, | |
| 1.05, | |
| 1.06, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.08, | |
| 1.12, | |
| 1.1800000000000002, | |
| 1.1900000000000002, | |
| 1.1900000000000002, | |
| 1.2100000000000002, | |
| 1.2300000000000002, | |
| 1.2400000000000002, | |
| 1.2400000000000002, | |
| 1.2500000000000002, | |
| 1.3000000000000003, | |
| 1.3100000000000003, | |
| 1.4600000000000004, | |
| 1.5100000000000005, | |
| 1.7000000000000006, | |
| 1.9300000000000008, | |
| 2.080000000000001, | |
| 2.4399999999999933, | |
| 3.2199999999999767, | |
| 3.4499999999999718, | |
| 3.579999999999969, | |
| 4.669999999999946, | |
| 4.779999999999943, | |
| 5.999999999999917, | |
| 6.009999999999917, | |
| 6.4199999999999084, | |
| 6.619999999999904, | |
| 7.189999999999892, | |
| 7.3099999999998895, | |
| 7.339999999999889, | |
| 7.479999999999886, | |
| 9.749999999999837, | |
| 10.919999999999812, | |
| 11.219999999999805, | |
| 11.749999999999794, | |
| 11.979999999999789, | |
| 13.239999999999762, | |
| 13.579999999999755, | |
| 13.669999999999753, | |
| 13.82999999999975, | |
| 14.009999999999746, | |
| 14.679999999999731, | |
| 14.889999999999727, | |
| 15.769999999999708, | |
| 15.769999999999708, | |
| 15.819999999999707, | |
| 15.839999999999707, | |
| 15.919999999999705, | |
| 16.029999999999703, | |
| 16.12999999999972, | |
| 16.44999999999977, | |
| 16.44999999999977, | |
| 16.77999999999982, | |
| 16.83999999999983, | |
| 16.83999999999983, | |
| 16.889999999999837 | |
| ], | |
| "short_mscale": 1.0, | |
| "type": "su" | |
| }, | |
| "torch_dtype": "bfloat16", | |
| "transformers_version": "4.38.1", | |
| "use_cache": true, | |
| "vocab_size": 100352 | |
| } |