Update README.md
Browse files
README.md
CHANGED
|
@@ -671,21 +671,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
| 671 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
| 672 |
<td>1.70</td>
|
| 673 |
<td>0.8</td>
|
| 674 |
-
<td>
|
| 675 |
<td>1.1</td>
|
| 676 |
-
<td>
|
| 677 |
<td>1.3</td>
|
| 678 |
-
<td>
|
| 679 |
</tr>
|
| 680 |
<tr>
|
| 681 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 682 |
<td>1.48</td>
|
| 683 |
<td>0.5</td>
|
| 684 |
-
<td>
|
| 685 |
<td>1.0</td>
|
| 686 |
-
<td>
|
| 687 |
<td>1.4</td>
|
| 688 |
-
<td>
|
| 689 |
</tr>
|
| 690 |
<tr>
|
| 691 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
@@ -702,21 +702,21 @@ The following performance benchmarks were conducted with [vLLM](https://docs.vll
|
|
| 702 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
| 703 |
<td>1.61</td>
|
| 704 |
<td>1.7</td>
|
| 705 |
-
<td>
|
| 706 |
<td>2.6</td>
|
| 707 |
-
<td>
|
| 708 |
<td>3.2</td>
|
| 709 |
-
<td>
|
| 710 |
</tr>
|
| 711 |
<tr>
|
| 712 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 713 |
<td>1.33</td>
|
| 714 |
<td>1.4</td>
|
| 715 |
-
<td>
|
| 716 |
<td>2.2</td>
|
| 717 |
-
<td>
|
| 718 |
<td>2.7</td>
|
| 719 |
-
<td>
|
| 720 |
</tr>
|
| 721 |
</tbody>
|
| 722 |
</table>
|
|
|
|
| 671 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w8a8</td>
|
| 672 |
<td>1.70</td>
|
| 673 |
<td>0.8</td>
|
| 674 |
+
<td>383</td>
|
| 675 |
<td>1.1</td>
|
| 676 |
+
<td>571</td>
|
| 677 |
<td>1.3</td>
|
| 678 |
+
<td>674</td>
|
| 679 |
</tr>
|
| 680 |
<tr>
|
| 681 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 682 |
<td>1.48</td>
|
| 683 |
<td>0.5</td>
|
| 684 |
+
<td>276</td>
|
| 685 |
<td>1.0</td>
|
| 686 |
+
<td>505</td>
|
| 687 |
<td>1.4</td>
|
| 688 |
+
<td>680</td>
|
| 689 |
</tr>
|
| 690 |
<tr>
|
| 691 |
<<th rowspan="3" valign="top">H100x4</th>
|
|
|
|
| 702 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-FP8-Dynamic</td>
|
| 703 |
<td>1.61</td>
|
| 704 |
<td>1.7</td>
|
| 705 |
+
<td>467</td>
|
| 706 |
<td>2.6</td>
|
| 707 |
+
<td>726</td>
|
| 708 |
<td>3.2</td>
|
| 709 |
+
<td>908</td>
|
| 710 |
</tr>
|
| 711 |
<tr>
|
| 712 |
<td>neuralmagic/Pixtral-Large-Instruct-2411-hf-quantized.w4a16</td>
|
| 713 |
<td>1.33</td>
|
| 714 |
<td>1.4</td>
|
| 715 |
+
<td>393</td>
|
| 716 |
<td>2.2</td>
|
| 717 |
+
<td>726</td>
|
| 718 |
<td>2.7</td>
|
| 719 |
+
<td>764</td>
|
| 720 |
</tr>
|
| 721 |
</tbody>
|
| 722 |
</table>
|