krishnateja95 commited on
Commit
d3da676
·
verified ·
1 Parent(s): 1d7434b

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +43 -43
README.md CHANGED
@@ -12,8 +12,8 @@ license: apache-2.0
12
  <tr>
13
  <th>Category</th>
14
  <th>Metric</th>
15
- <th>meta-llama/Llama-4-Scout-17B-16E-Instruct</th>
16
- <th>nm-testing/Llama-4-Scout-17B-16E-Instruct-BLOCK-FP8</th>
17
  <th>Recovery (%)</th>
18
  </tr>
19
  </thead>
@@ -22,89 +22,89 @@ license: apache-2.0
22
  <tr>
23
  <td rowspan="7"><b>OpenLLM V1</b></td>
24
  <td>ARC-Challenge (Acc-Norm, 25-shot)</td>
25
- <td>69.11</td>
26
- <td>68.77</td>
27
- <td>99.51</td>
28
  </tr>
29
  <tr>
30
  <td>GSM8K (Strict-Match, 5-shot)</td>
31
- <td>90.60</td>
32
- <td>91.21</td>
33
- <td>100.67</td>
34
  </tr>
35
  <tr>
36
  <td>HellaSwag (Acc-Norm, 10-shot)</td>
37
- <td>85.12</td>
38
- <td>85.30</td>
39
- <td>100.21</td>
40
  </tr>
41
  <tr>
42
  <td>MMLU (Acc, 5-shot)</td>
43
- <td>80.49</td>
44
- <td>80.54</td>
45
- <td>100.06</td>
46
  </tr>
47
  <tr>
48
  <td>TruthfulQA (MC2, 0-shot)</td>
49
- <td>61.33</td>
50
- <td>61.38</td>
51
- <td>100.08</td>
52
  </tr>
53
  <tr>
54
  <td>Winogrande (Acc, 5-shot)</td>
55
- <td>77.82</td>
56
- <td>77.19</td>
57
- <td>99.19</td>
58
  </tr>
59
  <tr>
60
  <td><b>Average Score</b></td>
61
- <td><b>77.41</b></td>
62
- <td><b>77.40</b></td>
63
- <td><b>99.99</b></td>
64
  </tr>
65
  <!-- OpenLLM Leaderboard V2 -->
66
  <tr>
67
  <td rowspan="7"><b>OpenLLM V2</b></td>
68
  <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
69
- <td>89.09</td>
70
  <td>90.89</td>
71
- <td>102.02</td>
72
  </tr>
73
  <tr>
74
  <td>BBH (Acc-Norm, 3-shot)</td>
75
- <td>65.04</td>
76
- <td>65.37</td>
77
- <td>100.51</td>
78
  </tr>
79
  <tr>
80
  <td>Math-Hard (Exact-Match, 4-shot)</td>
81
- <td>57.40</td>
82
- <td>57.70</td>
83
- <td>100.53</td>
84
  </tr>
85
  <tr>
86
  <td>GPQA (Acc-Norm, 0-shot)</td>
87
- <td>32.38</td>
88
- <td>32.30</td>
89
- <td>99.74</td>
90
  </tr>
91
  <tr>
92
  <td>MUSR (Acc-Norm, 0-shot)</td>
93
- <td>42.59</td>
94
- <td>42.86</td>
95
- <td>100.62</td>
96
  </tr>
97
  <tr>
98
  <td>MMLU-Pro (Acc, 5-shot)</td>
99
- <td>55.68</td>
100
- <td>55.53</td>
101
- <td>99.73</td>
102
  </tr>
103
  <tr>
104
  <td><b>Average Score</b></td>
105
- <td><b>57.03</b></td>
106
- <td><b>57.44</b></td>
107
- <td><b>100.72</b></td>
108
  </tr>
109
  <td rowspan="4" ><strong>Coding</strong>
110
  </td>
 
12
  <tr>
13
  <th>Category</th>
14
  <th>Metric</th>
15
+ <th>meta-llama/Llama-4-Maverick-17B-128E-Instruct</th>
16
+ <th>nm-testing/Llama-4-Maverick-17B-128E-Instruct-block-FP8</th>
17
  <th>Recovery (%)</th>
18
  </tr>
19
  </thead>
 
22
  <tr>
23
  <td rowspan="7"><b>OpenLLM V1</b></td>
24
  <td>ARC-Challenge (Acc-Norm, 25-shot)</td>
25
+ <td>73.38</td>
26
+ <td>73.38</td>
27
+ <td>100.00</td>
28
  </tr>
29
  <tr>
30
  <td>GSM8K (Strict-Match, 5-shot)</td>
31
+ <td>93.03</td>
32
+ <td>92.72</td>
33
+ <td>99.67</td>
34
  </tr>
35
  <tr>
36
  <td>HellaSwag (Acc-Norm, 10-shot)</td>
37
+ <td>87.39</td>
38
+ <td>87.33</td>
39
+ <td>99.93</td>
40
  </tr>
41
  <tr>
42
  <td>MMLU (Acc, 5-shot)</td>
43
+ <td>86.03</td>
44
+ <td>86.15</td>
45
+ <td>100.13</td>
46
  </tr>
47
  <tr>
48
  <td>TruthfulQA (MC2, 0-shot)</td>
49
+ <td>62.76</td>
50
+ <td>62.90</td>
51
+ <td>100.23</td>
52
  </tr>
53
  <tr>
54
  <td>Winogrande (Acc, 5-shot)</td>
55
+ <td>79.56</td>
56
+ <td>79.40</td>
57
+ <td>99.80</td>
58
  </tr>
59
  <tr>
60
  <td><b>Average Score</b></td>
61
+ <td><b>80.36</b></td>
62
+ <td><b>80.31</b></td>
63
+ <td><b>99.94</b></td>
64
  </tr>
65
  <!-- OpenLLM Leaderboard V2 -->
66
  <tr>
67
  <td rowspan="7"><b>OpenLLM V2</b></td>
68
  <td>IFEval (Inst Level Strict Acc, 0-shot)</td>
69
+ <td>89.93</td>
70
  <td>90.89</td>
71
+ <td>101.07</td>
72
  </tr>
73
  <tr>
74
  <td>BBH (Acc-Norm, 3-shot)</td>
75
+ <td>70.53</td>
76
+ <td>71.03</td>
77
+ <td>100.71</td>
78
  </tr>
79
  <tr>
80
  <td>Math-Hard (Exact-Match, 4-shot)</td>
81
+ <td>64.73</td>
82
+ <td>65.26</td>
83
+ <td>100.82</td>
84
  </tr>
85
  <tr>
86
  <td>GPQA (Acc-Norm, 0-shot)</td>
87
+ <td>31.29</td>
88
+ <td>30.54</td>
89
+ <td>97.59</td>
90
  </tr>
91
  <tr>
92
  <td>MUSR (Acc-Norm, 0-shot)</td>
93
+ <td>46.56</td>
94
+ <td>46.03</td>
95
+ <td>98.86</td>
96
  </tr>
97
  <tr>
98
  <td>MMLU-Pro (Acc, 5-shot)</td>
99
+ <td>64.11</td>
100
+ <td>63.95</td>
101
+ <td>99.75</td>
102
  </tr>
103
  <tr>
104
  <td><b>Average Score</b></td>
105
+ <td><b>61.19</b></td>
106
+ <td><b>61.28</b></td>
107
+ <td><b>100.15</b></td>
108
  </tr>
109
  <td rowspan="4" ><strong>Coding</strong>
110
  </td>