factrbench

Running

App Files Files Community

farimafatahi commited on Oct 26, 2024

Commit

347446d

verified ·

1 Parent(s): 2c11405

Update app.py

Browse files

Files changed (1) hide show

app.py +0 -21

app.py CHANGED Viewed

@@ -138,13 +138,6 @@ with tab1:
                     <th>Rank</th>
                     <th>Model</th>
                     <th>Factuality Score</th>
-                    <th>Factual Recall</th>
-                    <th>Conceptual Understanding</th>
-                    <th>Procedural Execution</th>
-                    <th>Comparative Analysis</th>
-                    <th>Recommendations and Insights</th>
-                    <th>Domain-Specific Knowledge</th>
-                    <th>Temporal Context</th>
                     <th>Hallucination Score</th>
                     <th># Tokens</th>
                     <th># Factual</th>
@@ -162,13 +155,6 @@ with tab1:
                     <th>Rank</th>
                     <th>Model</th>
                     <th>Factuality Score</th>
-                    <th>Factual Recall</th>
-                    <th>Conceptual Understanding</th>
-                    <th>Procedural Execution</th>
-                    <th>Comparative Analysis</th>
-                    <th>Recommendations and Insights</th>
-                    <th>Domain-Specific Knowledge</th>
-                    <th>Temporal Context</th>
                     <th>Hallucination Score</th>
                     <th># Tokens</th>
                     <th># Factual</th>
@@ -195,13 +181,6 @@ with tab1:
             <td>{row['rank']}</td>
             <td>{row['model']}</td>
             <td>{row['factuality_score']}</td>
-            <td>{row['prompt_categories.Factual Recall']}</td>
-            <td>{row['prompt_categories.Conceptual Understanding']}</td>
-            <td>{row['prompt_categories.Procedural Execution']}</td>
-            <td>{row['prompt_categories.Comparative Analysis']}</td>
-            <td>{row['prompt_categories.Recommendations and Insights']}</td>
-            <td>{row['prompt_categories.Domain-Specific Knowledge']}</td>
-            <td>{row['prompt_categories.Temporal Context']}</td>
             <td>{row['hallucination_score']}</td>
             <td>{row['avg_tokens']}</td>
             <td>{row['avg_factual_units']}</td>

                     <th>Rank</th>
                     <th>Model</th>
                     <th>Factuality Score</th>
                     <th>Hallucination Score</th>
                     <th># Tokens</th>
                     <th># Factual</th>
                     <th>Rank</th>
                     <th>Model</th>
                     <th>Factuality Score</th>
                     <th>Hallucination Score</th>
                     <th># Tokens</th>
                     <th># Factual</th>
             <td>{row['rank']}</td>
             <td>{row['model']}</td>
             <td>{row['factuality_score']}</td>
             <td>{row['hallucination_score']}</td>
             <td>{row['avg_tokens']}</td>
             <td>{row['avg_factual_units']}</td>