Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -46,6 +46,43 @@ if "evaluation_params" not in st.session_state: 
     | 
|
| 46 | 
         
             
            if "show_results" not in st.session_state:
         
     | 
| 47 | 
         
             
                st.session_state.show_results = False
         
     | 
| 48 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 49 | 
         
             
            def run_evaluation_sync(request: EvaluationRequest):
         
     | 
| 50 | 
         
             
                """Run evaluation synchronously with proper event loop handling"""
         
     | 
| 51 | 
         
             
                try:
         
     | 
| 
         @@ -481,23 +518,6 @@ def build_request_object(questions: List[str], ground_truths: List[str], model_r 
     | 
|
| 481 | 
         | 
| 482 | 
         
             
                return request
         
     | 
| 483 | 
         | 
| 484 | 
         
            -
            def read_json_file(uploaded_file):
         
     | 
| 485 | 
         
            -
                """Read JSON file with proper error handling for Spaces"""
         
     | 
| 486 | 
         
            -
                try:
         
     | 
| 487 | 
         
            -
                    # For Spaces environment, use file uploader content directly
         
     | 
| 488 | 
         
            -
                    if hasattr(uploaded_file, 'getvalue'):
         
     | 
| 489 | 
         
            -
                        content = uploaded_file.getvalue()
         
     | 
| 490 | 
         
            -
                        if isinstance(content, bytes):
         
     | 
| 491 | 
         
            -
                            content = content.decode('utf-8')
         
     | 
| 492 | 
         
            -
                        return json.loads(content)
         
     | 
| 493 | 
         
            -
                    else:
         
     | 
| 494 | 
         
            -
                        # For local files
         
     | 
| 495 | 
         
            -
                        with open(uploaded_file, 'r', encoding='utf-8') as f:
         
     | 
| 496 | 
         
            -
                            return json.load(f)
         
     | 
| 497 | 
         
            -
                except Exception as e:
         
     | 
| 498 | 
         
            -
                    st.error(f"Error reading JSON file: {e}")
         
     | 
| 499 | 
         
            -
                    return None
         
     | 
| 500 | 
         
            -
             
     | 
| 501 | 
         
             
            def main():
         
     | 
| 502 | 
         
             
                st.title("🤖 LMVal: Multi-Metric LLM Evaluation")
         
     | 
| 503 | 
         
             
                st.markdown("Advanced RAG pipeline evaluation using LangGraph and Groq/OpenAI")
         
     | 
| 
         @@ -639,44 +659,72 @@ def main(): 
     | 
|
| 639 | 
         | 
| 640 | 
         
             
                        if uploaded_file is not None:
         
     | 
| 641 | 
         
             
                            try:
         
     | 
| 642 | 
         
            -
                                #  
     | 
| 643 | 
         
            -
                                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 644 | 
         | 
| 645 | 
         
            -
                                if data:
         
     | 
| 646 | 
         
            -
                                    #  
     | 
| 647 | 
         
            -
                                     
     | 
| 648 | 
         
            -
             
     | 
| 649 | 
         
            -
             
     | 
| 650 | 
         
            -
             
     | 
| 651 | 
         
            -
             
     | 
| 652 | 
         
            -
             
     | 
| 653 | 
         
            -
                                     
     | 
| 654 | 
         
            -
                                         
     | 
| 655 | 
         
            -
             
     | 
| 656 | 
         
            -
                                             
     | 
| 657 | 
         
            -
             
     | 
| 658 | 
         
            -
             
     | 
| 659 | 
         
            -
             
     | 
| 660 | 
         
            -
             
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 661 | 
         | 
| 662 | 
         
            -
                                    if questions_list:
         
     | 
| 663 | 
         
            -
                                        st.success(f"Loaded {len(questions_list)} items from JSON")
         
     | 
| 664 | 
         
            -
                                        
         
     | 
| 665 | 
         
            -
                                        # Show preview
         
     | 
| 666 | 
         
            -
                                        with st.expander("Preview loaded data"):
         
     | 
| 667 | 
         
            -
                                            preview_data = {
         
     | 
| 668 | 
         
            -
                                                "questions": questions_list[:3] + ["..."] if len(questions_list) > 3 else questions_list,
         
     | 
| 669 | 
         
            -
                                                "ground_truths": truths_list[:3] + ["..."] if len(truths_list) > 3 else truths_list,
         
     | 
| 670 | 
         
            -
                                                "model_responses": responses_list[:3] + ["..."] if responses_list and len(responses_list) > 3 else responses_list,
         
     | 
| 671 | 
         
            -
                                                "contexts": contexts_list[:3] + ["..."] if contexts_list and len(contexts_list) > 3 else contexts_list
         
     | 
| 672 | 
         
            -
                                            }
         
     | 
| 673 | 
         
            -
                                            st.json(preview_data)
         
     | 
| 674 | 
         
            -
                                    else:
         
     | 
| 675 | 
         
            -
                                        st.warning("No valid data found in the JSON file")
         
     | 
| 676 | 
         
            -
                                        
         
     | 
| 677 | 
         
             
                            except Exception as e:
         
     | 
| 678 | 
         
             
                                st.error(f"Error processing JSON file: {e}")
         
     | 
| 679 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 680 | 
         
             
                    # Run evaluation button
         
     | 
| 681 | 
         
             
                    run_button = st.button("▶️ Run Evaluation", use_container_width=True, 
         
     | 
| 682 | 
         
             
                                          disabled=st.session_state.evaluation_in_progress)
         
     | 
| 
         @@ -852,7 +900,7 @@ def main(): 
     | 
|
| 852 | 
         
             
                                    st.rerun()
         
     | 
| 853 | 
         | 
| 854 | 
         
             
                        # Clear all history button
         
     | 
| 855 | 
         
            -
                        if st.button("Clear All History 
     | 
| 856 | 
         
             
                            st.session_state.evaluation_history = []
         
     | 
| 857 | 
         
             
                            st.success("All history cleared")
         
     | 
| 858 | 
         
             
                            st.rerun()
         
     | 
| 
         | 
|
| 46 | 
         
             
            if "show_results" not in st.session_state:
         
     | 
| 47 | 
         
             
                st.session_state.show_results = False
         
     | 
| 48 | 
         | 
| 49 | 
         
            +
            def is_running_on_spaces():
         
     | 
| 50 | 
         
            +
                """Check if we're running on Hugging Face Spaces"""
         
     | 
| 51 | 
         
            +
                return os.environ.get('SPACES_APP_TYPE') is not None
         
     | 
| 52 | 
         
            +
             
     | 
| 53 | 
         
            +
            def create_sample_data():
         
     | 
| 54 | 
         
            +
                """Create sample data for demonstration"""
         
     | 
| 55 | 
         
            +
                return {
         
     | 
| 56 | 
         
            +
                    "questions": [
         
     | 
| 57 | 
         
            +
                        "What is the capital of France?",
         
     | 
| 58 | 
         
            +
                        "How does photosynthesis work?",
         
     | 
| 59 | 
         
            +
                        "What is the theory of relativity?",
         
     | 
| 60 | 
         
            +
                        "What is the main ingredient in guacamole?",
         
     | 
| 61 | 
         
            +
                        "Who developed the theory of relativity?"
         
     | 
| 62 | 
         
            +
                    ],
         
     | 
| 63 | 
         
            +
                    "ground_truths": [
         
     | 
| 64 | 
         
            +
                        "The capital of France is Paris.",
         
     | 
| 65 | 
         
            +
                        "Photosynthesis is the process by which plants convert sunlight into energy.",
         
     | 
| 66 | 
         
            +
                        "The theory of relativity was developed by Albert Einstein.",
         
     | 
| 67 | 
         
            +
                        "The main ingredient in guacamole is avocado.",
         
     | 
| 68 | 
         
            +
                        "Albert Einstein developed the theory of relativity."
         
     | 
| 69 | 
         
            +
                    ],
         
     | 
| 70 | 
         
            +
                    "model_responses": [
         
     | 
| 71 | 
         
            +
                        "Paris is the capital city of France.",
         
     | 
| 72 | 
         
            +
                        "Plants use sunlight to create energy through photosynthesis.",
         
     | 
| 73 | 
         
            +
                        "Einstein developed the theory of relativity.",
         
     | 
| 74 | 
         
            +
                        "The main ingredient in guacamole is tomato.",
         
     | 
| 75 | 
         
            +
                        "Isaac Newton developed the theory of relativity."
         
     | 
| 76 | 
         
            +
                    ],
         
     | 
| 77 | 
         
            +
                    "contexts": [
         
     | 
| 78 | 
         
            +
                        "France is a country in Western Europe with Paris as its capital.",
         
     | 
| 79 | 
         
            +
                        "Photosynthesis is a biological process used by plants to create energy.",
         
     | 
| 80 | 
         
            +
                        "Albert Einstein was a physicist who developed the theory of relativity.",
         
     | 
| 81 | 
         
            +
                        "Guacamole is an avocado-based dip first developed in Mexico.",
         
     | 
| 82 | 
         
            +
                        "Albert Einstein was a German-born theoretical physicist who developed the theory of relativity."
         
     | 
| 83 | 
         
            +
                    ]
         
     | 
| 84 | 
         
            +
                }
         
     | 
| 85 | 
         
            +
             
     | 
| 86 | 
         
             
            def run_evaluation_sync(request: EvaluationRequest):
         
     | 
| 87 | 
         
             
                """Run evaluation synchronously with proper event loop handling"""
         
     | 
| 88 | 
         
             
                try:
         
     | 
| 
         | 
|
| 518 | 
         | 
| 519 | 
         
             
                return request
         
     | 
| 520 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 521 | 
         
             
            def main():
         
     | 
| 522 | 
         
             
                st.title("🤖 LMVal: Multi-Metric LLM Evaluation")
         
     | 
| 523 | 
         
             
                st.markdown("Advanced RAG pipeline evaluation using LangGraph and Groq/OpenAI")
         
     | 
| 
         | 
|
| 659 | 
         | 
| 660 | 
         
             
                        if uploaded_file is not None:
         
     | 
| 661 | 
         
             
                            try:
         
     | 
| 662 | 
         
            +
                                # Read content directly from the uploaded file
         
     | 
| 663 | 
         
            +
                                content = uploaded_file.getvalue()
         
     | 
| 664 | 
         
            +
                                if isinstance(content, bytes):
         
     | 
| 665 | 
         
            +
                                    content = content.decode('utf-8')
         
     | 
| 666 | 
         
            +
                                
         
     | 
| 667 | 
         
            +
                                data = json.loads(content)
         
     | 
| 668 | 
         
            +
                                
         
     | 
| 669 | 
         
            +
                                # Handle different JSON structures
         
     | 
| 670 | 
         
            +
                                questions_list = []
         
     | 
| 671 | 
         
            +
                                truths_list = []
         
     | 
| 672 | 
         
            +
                                responses_list = []
         
     | 
| 673 | 
         
            +
                                contexts_list = []
         
     | 
| 674 | 
         | 
| 675 | 
         
            +
                                if isinstance(data, dict):
         
     | 
| 676 | 
         
            +
                                    # Standard format with separate arrays
         
     | 
| 677 | 
         
            +
                                    questions_list = data.get("questions", [])
         
     | 
| 678 | 
         
            +
                                    truths_list = data.get("ground_truths", [])
         
     | 
| 679 | 
         
            +
                                    responses_list = data.get("model_responses", [])
         
     | 
| 680 | 
         
            +
                                    contexts_list = data.get("contexts", [])
         
     | 
| 681 | 
         
            +
                                elif isinstance(data, list):
         
     | 
| 682 | 
         
            +
                                    # List of question objects
         
     | 
| 683 | 
         
            +
                                    for item in data:
         
     | 
| 684 | 
         
            +
                                        if isinstance(item, dict):
         
     | 
| 685 | 
         
            +
                                            questions_list.append(item.get("question", ""))
         
     | 
| 686 | 
         
            +
                                            truths_list.append(item.get("ground_truth", ""))
         
     | 
| 687 | 
         
            +
                                            responses_list.append(item.get("model_response", ""))
         
     | 
| 688 | 
         
            +
                                            contexts_list.append(item.get("context", ""))
         
     | 
| 689 | 
         
            +
                                
         
     | 
| 690 | 
         
            +
                                if questions_list:
         
     | 
| 691 | 
         
            +
                                    st.success(f"Loaded {len(questions_list)} items from JSON")
         
     | 
| 692 | 
         
            +
                                    
         
     | 
| 693 | 
         
            +
                                    # Show preview
         
     | 
| 694 | 
         
            +
                                    with st.expander("Preview loaded data"):
         
     | 
| 695 | 
         
            +
                                        preview_data = {
         
     | 
| 696 | 
         
            +
                                            "questions": questions_list[:3] + ["..."] if len(questions_list) > 3 else questions_list,
         
     | 
| 697 | 
         
            +
                                            "ground_truths": truths_list[:3] + ["..."] if len(truths_list) > 3 else truths_list,
         
     | 
| 698 | 
         
            +
                                            "model_responses": responses_list[:3] + ["..."] if responses_list and len(responses_list) > 3 else responses_list,
         
     | 
| 699 | 
         
            +
                                            "contexts": contexts_list[:3] + ["..."] if contexts_list and len(contexts_list) > 3 else contexts_list
         
     | 
| 700 | 
         
            +
                                        }
         
     | 
| 701 | 
         
            +
                                        st.json(preview_data)
         
     | 
| 702 | 
         
            +
                                else:
         
     | 
| 703 | 
         
            +
                                    st.warning("No valid data found in the JSON file")
         
     | 
| 704 | 
         | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 705 | 
         
             
                            except Exception as e:
         
     | 
| 706 | 
         
             
                                st.error(f"Error processing JSON file: {e}")
         
     | 
| 707 | 
         | 
| 708 | 
         
            +
                    # Add sample data button for Spaces
         
     | 
| 709 | 
         
            +
                    if is_running_on_spaces() and not questions_list:
         
     | 
| 710 | 
         
            +
                        if st.button("📋 Load Sample Data", help="Load sample data for testing"):
         
     | 
| 711 | 
         
            +
                            sample_data = create_sample_data()
         
     | 
| 712 | 
         
            +
                            questions_list = sample_data["questions"]
         
     | 
| 713 | 
         
            +
                            truths_list = sample_data["ground_truths"]
         
     | 
| 714 | 
         
            +
                            responses_list = sample_data["model_responses"]
         
     | 
| 715 | 
         
            +
                            contexts_list = sample_data["contexts"]
         
     | 
| 716 | 
         
            +
                            
         
     | 
| 717 | 
         
            +
                            st.success("Sample data loaded successfully!")
         
     | 
| 718 | 
         
            +
                            
         
     | 
| 719 | 
         
            +
                            # Show preview
         
     | 
| 720 | 
         
            +
                            with st.expander("Preview sample data"):
         
     | 
| 721 | 
         
            +
                                st.json({
         
     | 
| 722 | 
         
            +
                                    "questions": questions_list,
         
     | 
| 723 | 
         
            +
                                    "ground_truths": truths_list,
         
     | 
| 724 | 
         
            +
                                    "model_responses": responses_list,
         
     | 
| 725 | 
         
            +
                                    "contexts": contexts_list
         
     | 
| 726 | 
         
            +
                                })
         
     | 
| 727 | 
         
            +
             
     | 
| 728 | 
         
             
                    # Run evaluation button
         
     | 
| 729 | 
         
             
                    run_button = st.button("▶️ Run Evaluation", use_container_width=True, 
         
     | 
| 730 | 
         
             
                                          disabled=st.session_state.evaluation_in_progress)
         
     | 
| 
         | 
|
| 900 | 
         
             
                                    st.rerun()
         
     | 
| 901 | 
         | 
| 902 | 
         
             
                        # Clear all history button
         
     | 
| 903 | 
         
            +
                        if st.button("Clear All History", use_container_width=True, type="secondary"):
         
     | 
| 904 | 
         
             
                            st.session_state.evaluation_history = []
         
     | 
| 905 | 
         
             
                            st.success("All history cleared")
         
     | 
| 906 | 
         
             
                            st.rerun()
         
     |