Upload README.md with huggingface_hub
Browse files
README.md
CHANGED
|
@@ -8,20 +8,7 @@ tags:
|
|
| 8 |
- llama-2
|
| 9 |
- llama
|
| 10 |
base_model: meta-llama/Llama-2-7b-hf
|
| 11 |
-
model_name:
|
| 12 |
-
\ (embedding): Embedding(32000, 4096)\n (blocks): ModuleList(\n \
|
| 13 |
-
\ (0-31): 32 x LlamaLikeBlock(\n (norm_1): FasterTransformerRMSNorm()\n\
|
| 14 |
-
\ (attn): QuantAttentionFused(\n (qkv_proj): WQLinear_GEMM(in_features=4096,\
|
| 15 |
-
\ out_features=12288, bias=False, w_bit=4, group_size=128)\n (o_proj):\
|
| 16 |
-
\ WQLinear_GEMM(in_features=4096, out_features=4096, bias=False, w_bit=4, group_size=128)\n\
|
| 17 |
-
\ (rope): RoPE()\n )\n (norm_2): FasterTransformerRMSNorm()\n\
|
| 18 |
-
\ (mlp): LlamaMLP(\n (gate_proj): WQLinear_GEMM(in_features=4096,\
|
| 19 |
-
\ out_features=11008, bias=False, w_bit=4, group_size=128)\n (up_proj):\
|
| 20 |
-
\ WQLinear_GEMM(in_features=4096, out_features=11008, bias=False, w_bit=4, group_size=128)\n\
|
| 21 |
-
\ (down_proj): WQLinear_GEMM(in_features=11008, out_features=4096, bias=False,\
|
| 22 |
-
\ w_bit=4, group_size=128)\n (act_fn): SiLU()\n )\n )\n\
|
| 23 |
-
\ )\n (norm): LlamaRMSNorm()\n )\n (lm_head): Linear(in_features=4096,\
|
| 24 |
-
\ out_features=32000, bias=False)\n )\n)"
|
| 25 |
library:
|
| 26 |
- Transformers
|
| 27 |
- AWQ
|
|
|
|
| 8 |
- llama-2
|
| 9 |
- llama
|
| 10 |
base_model: meta-llama/Llama-2-7b-hf
|
| 11 |
+
model_name: Llama-2-7b-hf-AWQ
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
library:
|
| 13 |
- Transformers
|
| 14 |
- AWQ
|