NIRAJz commited on
Commit
3d4bd94
·
verified ·
1 Parent(s): 4d6c43c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +99 -51
app.py CHANGED
@@ -46,6 +46,43 @@ if "evaluation_params" not in st.session_state:
46
  if "show_results" not in st.session_state:
47
  st.session_state.show_results = False
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  def run_evaluation_sync(request: EvaluationRequest):
50
  """Run evaluation synchronously with proper event loop handling"""
51
  try:
@@ -481,23 +518,6 @@ def build_request_object(questions: List[str], ground_truths: List[str], model_r
481
 
482
  return request
483
 
484
- def read_json_file(uploaded_file):
485
- """Read JSON file with proper error handling for Spaces"""
486
- try:
487
- # For Spaces environment, use file uploader content directly
488
- if hasattr(uploaded_file, 'getvalue'):
489
- content = uploaded_file.getvalue()
490
- if isinstance(content, bytes):
491
- content = content.decode('utf-8')
492
- return json.loads(content)
493
- else:
494
- # For local files
495
- with open(uploaded_file, 'r', encoding='utf-8') as f:
496
- return json.load(f)
497
- except Exception as e:
498
- st.error(f"Error reading JSON file: {e}")
499
- return None
500
-
501
  def main():
502
  st.title("🤖 LMVal: Multi-Metric LLM Evaluation")
503
  st.markdown("Advanced RAG pipeline evaluation using LangGraph and Groq/OpenAI")
@@ -639,44 +659,72 @@ def main():
639
 
640
  if uploaded_file is not None:
641
  try:
642
- # Use the new file reading function
643
- data = read_json_file(uploaded_file)
 
 
 
 
 
 
 
 
 
 
644
 
645
- if data:
646
- # Handle different JSON structures
647
- if isinstance(data, dict):
648
- # Standard format with separate arrays
649
- questions_list = data.get("questions", [])
650
- truths_list = data.get("ground_truths", [])
651
- responses_list = data.get("model_responses", [])
652
- contexts_list = data.get("contexts", [])
653
- elif isinstance(data, list):
654
- # List of question objects
655
- for item in data:
656
- if isinstance(item, dict):
657
- questions_list.append(item.get("question", ""))
658
- truths_list.append(item.get("ground_truth", ""))
659
- responses_list.append(item.get("model_response", ""))
660
- contexts_list.append(item.get("context", ""))
 
 
 
 
 
 
 
 
 
 
 
 
 
661
 
662
- if questions_list:
663
- st.success(f"Loaded {len(questions_list)} items from JSON")
664
-
665
- # Show preview
666
- with st.expander("Preview loaded data"):
667
- preview_data = {
668
- "questions": questions_list[:3] + ["..."] if len(questions_list) > 3 else questions_list,
669
- "ground_truths": truths_list[:3] + ["..."] if len(truths_list) > 3 else truths_list,
670
- "model_responses": responses_list[:3] + ["..."] if responses_list and len(responses_list) > 3 else responses_list,
671
- "contexts": contexts_list[:3] + ["..."] if contexts_list and len(contexts_list) > 3 else contexts_list
672
- }
673
- st.json(preview_data)
674
- else:
675
- st.warning("No valid data found in the JSON file")
676
-
677
  except Exception as e:
678
  st.error(f"Error processing JSON file: {e}")
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
  # Run evaluation button
681
  run_button = st.button("▶️ Run Evaluation", use_container_width=True,
682
  disabled=st.session_state.evaluation_in_progress)
@@ -852,7 +900,7 @@ def main():
852
  st.rerun()
853
 
854
  # Clear all history button
855
- if st.button("Clear All History ", use_container_width=True, type="secondary"):
856
  st.session_state.evaluation_history = []
857
  st.success("All history cleared")
858
  st.rerun()
 
46
  if "show_results" not in st.session_state:
47
  st.session_state.show_results = False
48
 
49
+ def is_running_on_spaces():
50
+ """Check if we're running on Hugging Face Spaces"""
51
+ return os.environ.get('SPACES_APP_TYPE') is not None
52
+
53
+ def create_sample_data():
54
+ """Create sample data for demonstration"""
55
+ return {
56
+ "questions": [
57
+ "What is the capital of France?",
58
+ "How does photosynthesis work?",
59
+ "What is the theory of relativity?",
60
+ "What is the main ingredient in guacamole?",
61
+ "Who developed the theory of relativity?"
62
+ ],
63
+ "ground_truths": [
64
+ "The capital of France is Paris.",
65
+ "Photosynthesis is the process by which plants convert sunlight into energy.",
66
+ "The theory of relativity was developed by Albert Einstein.",
67
+ "The main ingredient in guacamole is avocado.",
68
+ "Albert Einstein developed the theory of relativity."
69
+ ],
70
+ "model_responses": [
71
+ "Paris is the capital city of France.",
72
+ "Plants use sunlight to create energy through photosynthesis.",
73
+ "Einstein developed the theory of relativity.",
74
+ "The main ingredient in guacamole is tomato.",
75
+ "Isaac Newton developed the theory of relativity."
76
+ ],
77
+ "contexts": [
78
+ "France is a country in Western Europe with Paris as its capital.",
79
+ "Photosynthesis is a biological process used by plants to create energy.",
80
+ "Albert Einstein was a physicist who developed the theory of relativity.",
81
+ "Guacamole is an avocado-based dip first developed in Mexico.",
82
+ "Albert Einstein was a German-born theoretical physicist who developed the theory of relativity."
83
+ ]
84
+ }
85
+
86
  def run_evaluation_sync(request: EvaluationRequest):
87
  """Run evaluation synchronously with proper event loop handling"""
88
  try:
 
518
 
519
  return request
520
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
  def main():
522
  st.title("🤖 LMVal: Multi-Metric LLM Evaluation")
523
  st.markdown("Advanced RAG pipeline evaluation using LangGraph and Groq/OpenAI")
 
659
 
660
  if uploaded_file is not None:
661
  try:
662
+ # Read content directly from the uploaded file
663
+ content = uploaded_file.getvalue()
664
+ if isinstance(content, bytes):
665
+ content = content.decode('utf-8')
666
+
667
+ data = json.loads(content)
668
+
669
+ # Handle different JSON structures
670
+ questions_list = []
671
+ truths_list = []
672
+ responses_list = []
673
+ contexts_list = []
674
 
675
+ if isinstance(data, dict):
676
+ # Standard format with separate arrays
677
+ questions_list = data.get("questions", [])
678
+ truths_list = data.get("ground_truths", [])
679
+ responses_list = data.get("model_responses", [])
680
+ contexts_list = data.get("contexts", [])
681
+ elif isinstance(data, list):
682
+ # List of question objects
683
+ for item in data:
684
+ if isinstance(item, dict):
685
+ questions_list.append(item.get("question", ""))
686
+ truths_list.append(item.get("ground_truth", ""))
687
+ responses_list.append(item.get("model_response", ""))
688
+ contexts_list.append(item.get("context", ""))
689
+
690
+ if questions_list:
691
+ st.success(f"Loaded {len(questions_list)} items from JSON")
692
+
693
+ # Show preview
694
+ with st.expander("Preview loaded data"):
695
+ preview_data = {
696
+ "questions": questions_list[:3] + ["..."] if len(questions_list) > 3 else questions_list,
697
+ "ground_truths": truths_list[:3] + ["..."] if len(truths_list) > 3 else truths_list,
698
+ "model_responses": responses_list[:3] + ["..."] if responses_list and len(responses_list) > 3 else responses_list,
699
+ "contexts": contexts_list[:3] + ["..."] if contexts_list and len(contexts_list) > 3 else contexts_list
700
+ }
701
+ st.json(preview_data)
702
+ else:
703
+ st.warning("No valid data found in the JSON file")
704
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
705
  except Exception as e:
706
  st.error(f"Error processing JSON file: {e}")
707
 
708
+ # Add sample data button for Spaces
709
+ if is_running_on_spaces() and not questions_list:
710
+ if st.button("📋 Load Sample Data", help="Load sample data for testing"):
711
+ sample_data = create_sample_data()
712
+ questions_list = sample_data["questions"]
713
+ truths_list = sample_data["ground_truths"]
714
+ responses_list = sample_data["model_responses"]
715
+ contexts_list = sample_data["contexts"]
716
+
717
+ st.success("Sample data loaded successfully!")
718
+
719
+ # Show preview
720
+ with st.expander("Preview sample data"):
721
+ st.json({
722
+ "questions": questions_list,
723
+ "ground_truths": truths_list,
724
+ "model_responses": responses_list,
725
+ "contexts": contexts_list
726
+ })
727
+
728
  # Run evaluation button
729
  run_button = st.button("▶️ Run Evaluation", use_container_width=True,
730
  disabled=st.session_state.evaluation_in_progress)
 
900
  st.rerun()
901
 
902
  # Clear all history button
903
+ if st.button("Clear All History", use_container_width=True, type="secondary"):
904
  st.session_state.evaluation_history = []
905
  st.success("All history cleared")
906
  st.rerun()