Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -138,13 +138,6 @@ with tab1:
|
|
| 138 |
<th>Rank</th>
|
| 139 |
<th>Model</th>
|
| 140 |
<th>Factuality Score</th>
|
| 141 |
-
<th>Factual Recall</th>
|
| 142 |
-
<th>Conceptual Understanding</th>
|
| 143 |
-
<th>Procedural Execution</th>
|
| 144 |
-
<th>Comparative Analysis</th>
|
| 145 |
-
<th>Recommendations and Insights</th>
|
| 146 |
-
<th>Domain-Specific Knowledge</th>
|
| 147 |
-
<th>Temporal Context</th>
|
| 148 |
<th>Hallucination Score</th>
|
| 149 |
<th># Tokens</th>
|
| 150 |
<th># Factual</th>
|
|
@@ -162,13 +155,6 @@ with tab1:
|
|
| 162 |
<th>Rank</th>
|
| 163 |
<th>Model</th>
|
| 164 |
<th>Factuality Score</th>
|
| 165 |
-
<th>Factual Recall</th>
|
| 166 |
-
<th>Conceptual Understanding</th>
|
| 167 |
-
<th>Procedural Execution</th>
|
| 168 |
-
<th>Comparative Analysis</th>
|
| 169 |
-
<th>Recommendations and Insights</th>
|
| 170 |
-
<th>Domain-Specific Knowledge</th>
|
| 171 |
-
<th>Temporal Context</th>
|
| 172 |
<th>Hallucination Score</th>
|
| 173 |
<th># Tokens</th>
|
| 174 |
<th># Factual</th>
|
|
@@ -195,13 +181,6 @@ with tab1:
|
|
| 195 |
<td>{row['rank']}</td>
|
| 196 |
<td>{row['model']}</td>
|
| 197 |
<td>{row['factuality_score']}</td>
|
| 198 |
-
<td>{row['prompt_categories.Factual Recall']}</td>
|
| 199 |
-
<td>{row['prompt_categories.Conceptual Understanding']}</td>
|
| 200 |
-
<td>{row['prompt_categories.Procedural Execution']}</td>
|
| 201 |
-
<td>{row['prompt_categories.Comparative Analysis']}</td>
|
| 202 |
-
<td>{row['prompt_categories.Recommendations and Insights']}</td>
|
| 203 |
-
<td>{row['prompt_categories.Domain-Specific Knowledge']}</td>
|
| 204 |
-
<td>{row['prompt_categories.Temporal Context']}</td>
|
| 205 |
<td>{row['hallucination_score']}</td>
|
| 206 |
<td>{row['avg_tokens']}</td>
|
| 207 |
<td>{row['avg_factual_units']}</td>
|
|
|
|
| 138 |
<th>Rank</th>
|
| 139 |
<th>Model</th>
|
| 140 |
<th>Factuality Score</th>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
<th>Hallucination Score</th>
|
| 142 |
<th># Tokens</th>
|
| 143 |
<th># Factual</th>
|
|
|
|
| 155 |
<th>Rank</th>
|
| 156 |
<th>Model</th>
|
| 157 |
<th>Factuality Score</th>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
<th>Hallucination Score</th>
|
| 159 |
<th># Tokens</th>
|
| 160 |
<th># Factual</th>
|
|
|
|
| 181 |
<td>{row['rank']}</td>
|
| 182 |
<td>{row['model']}</td>
|
| 183 |
<td>{row['factuality_score']}</td>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
<td>{row['hallucination_score']}</td>
|
| 185 |
<td>{row['avg_tokens']}</td>
|
| 186 |
<td>{row['avg_factual_units']}</td>
|