Update README.md
Browse files
README.md
CHANGED
|
@@ -22,7 +22,7 @@ Base model: [Qwen/Qwen3-30B-A3B-Instruct-2507](https://huggingface.co/Qwen/Qwen3
|
|
| 22 |
CONTEXT_LENGTH=32768 # 262144
|
| 23 |
|
| 24 |
vllm serve \
|
| 25 |
-
|
| 26 |
--served-model-name Qwen3-30B-A3B-Instruct-2507-GPTQ-Int8 \
|
| 27 |
--enable-expert-parallel \
|
| 28 |
--swap-space 16 \
|
|
@@ -57,8 +57,8 @@ vllm>=0.9.2
|
|
| 57 |
### 【Model Download】
|
| 58 |
|
| 59 |
```python
|
| 60 |
-
from
|
| 61 |
-
snapshot_download('
|
| 62 |
```
|
| 63 |
|
| 64 |
### 【Overview】
|
|
|
|
| 22 |
CONTEXT_LENGTH=32768 # 262144
|
| 23 |
|
| 24 |
vllm serve \
|
| 25 |
+
QuantTrio/Qwen3-30B-A3B-Instruct-2507-GPTQ-Int8 \
|
| 26 |
--served-model-name Qwen3-30B-A3B-Instruct-2507-GPTQ-Int8 \
|
| 27 |
--enable-expert-parallel \
|
| 28 |
--swap-space 16 \
|
|
|
|
| 57 |
### 【Model Download】
|
| 58 |
|
| 59 |
```python
|
| 60 |
+
from huggingface_hub import snapshot_download
|
| 61 |
+
snapshot_download('QuantTrio/Qwen3-30B-A3B-Instruct-2507-GPTQ-Int8', cache_dir="your_local_path")
|
| 62 |
```
|
| 63 |
|
| 64 |
### 【Overview】
|