Spaces:

darisdzakwanhoesien
/

esgdata

Sleeping

App Files Files Community

darisdzakwanhoesien2 commited on Nov 1

Commit

9437058

1 Parent(s): 19902b0

Stage 5 Testing

Browse files

Files changed (1) hide show

app.py +61 -60

app.py CHANGED Viewed

@@ -1,46 +1,34 @@
 import re
 from src.ontology_adapter import ESGOntologyAdapter
 import gradio as gr
-from collections import Counter
 adapter = ESGOntologyAdapter("ontology/esg_ontology.owl")
 def detect_sections(text):
     """Detects section titles and content from text."""
-    # Regex to find section titles (e.g., "Introduction", "1. Introduction", "Results and Discussion")
-    # This regex looks for lines that are not heavily indented and likely represent titles.
-    # It splits the text by lines that look like titles.
     pattern = r'^\s*(?:\d+\.\s*)?([A-Z][a-zA-Z\s&]+)\s*$'
     sections = []
-    # Find all potential titles
     potential_titles = re.findall(pattern, text, re.MULTILINE)
-    # Split text by these titles
     split_text = re.split(pattern, text, flags=re.MULTILINE)
     content = split_text[0]
     if content.strip():
         sections.append({"title": "Introduction", "content": content.strip()})
-    # The regex split gives us [content_before_first_title, title1, content1, title2, content2, ...]
     it = iter(split_text[1:])
     for title, content in zip(it, it):
         sections.append({"title": title.strip(), "content": content.strip()})
     return sections
-def analyze_text(text):
     detected_sections = detect_sections(text)
-    results = []
-    pillar_sentiments = {
-        "Environmental": [],
-        "Social": [],
-        "Governance": [],
-        "Unknown": []
-    }
     for section in detected_sections:
         title = section['title']
@@ -49,65 +37,78 @@ def analyze_text(text):
         if not content.strip():
             continue
-        # Determine weight based on section title
         weight = 1.0
         if any(keyword in title.lower() for keyword in ["results", "performance"]):
-            weight = 1.5  # Higher weight for results sections
         mapping = adapter.map_term(content)
-        section_snippet = content.strip().replace("\n", " ")[:120]
         if mapping['matches']:
-            top_match = mapping['matches'][0]
-            pillar = top_match['pillar']
             sentiment = mapping['sentiment']
-            if pillar != "Unknown":
-                # Apply weight to sentiment score if possible (conceptual)
-                # For now, we just note it. The ontology adapter would need to support weights.
-                pillar_sentiments[pillar].append(sentiment)
-            predicted_aspects = []
             for match in mapping['matches']:
-                label = match['mapped_to'].replace("_", " ").title()
-                sim = match['similarity']
-                pillar_name = match['pillar']
-                # Adjust similarity score based on weight
-                adjusted_sim = min(sim * weight, 1.0)
-                predicted_aspects.append(f"**{label}** (Pillar: {pillar_name}, Sim: {adjusted_sim:.2f})")
-            results.append(f"🟢 Section: **{title}**\n{section_snippet}\n→ Predicted Aspects: {', '.join(predicted_aspects)}\n→ Sentiment: {sentiment.title()}")
-        else:
-            results.append(f"🟢 Section: **{title}**\n{section_snippet}\n→ No ESG aspects identified")
-    summary = ["\n\n--- ESG Pillar Sentiment Summary ---"]
-    for pillar, sentiments in pillar_sentiments.items():
-        if not sentiments:
-            summary.append(f"\n**{pillar}**: No sections mapped.")
-            continue
-        count = Counter(sentiments)
-        total = len(sentiments)
-        summary_parts = []
-        for sentiment_type in ['positive', 'neutral', 'negative']:
-            if count[sentiment_type] > 0:
-                percentage = round((count[sentiment_type] / total) * 100)
-                summary_parts.append(f"{percentage}% {sentiment_type}")
-        summary.append(f"\n**{pillar}**: {', '.join(summary_parts)}")
-    return "\n".join(results) + "\n" + "".join(summary)
 iface = gr.Interface(
-    fn=analyze_text,
-    inputs=gr.Textbox(label="Input ESG Report Text", lines=20, placeholder="Paste your multi-section report here..."),
-    outputs=gr.Textbox(label="Ontology Mapping and Pillar-Level Sentiment Analysis", lines=40),
-    title="ESG Aspect-Level Clustering and Sentiment Analysis",
-    description="Maps document sections to ESG ontology classes, aggregates sentiment at the pillar level (Environmental, Social, Governance)."
 )
 iface.launch()

 import re
 from src.ontology_adapter import ESGOntologyAdapter
 import gradio as gr
+from collections import Counter, defaultdict
 adapter = ESGOntologyAdapter("ontology/esg_ontology.owl")
 def detect_sections(text):
     """Detects section titles and content from text."""
     pattern = r'^\s*(?:\d+\.\s*)?([A-Z][a-zA-Z\s&]+)\s*$'
     sections = []
     potential_titles = re.findall(pattern, text, re.MULTILINE)
     split_text = re.split(pattern, text, flags=re.MULTILINE)
     content = split_text[0]
     if content.strip():
         sections.append({"title": "Introduction", "content": content.strip()})
     it = iter(split_text[1:])
     for title, content in zip(it, it):
         sections.append({"title": title.strip(), "content": content.strip()})
     return sections
+def analyze_single_document(text):
+    """Analyzes a single document and returns aspect-level sentiment."""
     detected_sections = detect_sections(text)
+    aspect_sentiments = defaultdict(list)
     for section in detected_sections:
         title = section['title']
         if not content.strip():
             continue
         weight = 1.0
         if any(keyword in title.lower() for keyword in ["results", "performance"]):
+            weight = 1.5
         mapping = adapter.map_term(content)
         if mapping['matches']:
             sentiment = mapping['sentiment']
             for match in mapping['matches']:
+                aspect = match['mapped_to']
+                aspect_sentiments[aspect].append(sentiment)
+    # Aggregate sentiments for each aspect (e.g., by taking the most common one)
+    aggregated_sentiments = {}
+    for aspect, sentiments in aspect_sentiments.items():
+        if sentiments:
+            aggregated_sentiments[aspect] = Counter(sentiments).most_common(1)[0][0]
+    return aggregated_sentiments
+def compare_documents(text1, text2):
+    """Compares two documents for sentiment consistency."""
+    # Analyze both documents
+    sentiments1 = analyze_single_document(text1)
+    sentiments2 = analyze_single_document(text2)
+    # --- Generate formatted output for each document ---
+    report1 = "--- Document 1 Analysis ---\n"
+    for aspect, sentiment in sentiments1.items():
+        report1 += f"- **{aspect.replace('_', ' ').title()}**: {sentiment.title()}\n"
+    report2 = "\n--- Document 2 Analysis ---\n"
+    for aspect, sentiment in sentiments2.items():
+        report2 += f"- **{aspect.replace('_', ' ').title()}**: {sentiment.title()}\n"
+    # --- Cross-Document Consistency Analysis ---
+    consistency_report = "\n--- Cross-Document Consistency Analysis ---\n"
+    all_aspects = sorted(list(set(sentiments1.keys()) | set(sentiments2.keys())))
+    found_drift = False
+    for aspect in all_aspects:
+        sentiment1 = sentiments1.get(aspect)
+        sentiment2 = sentiments2.get(aspect)
+        aspect_name = aspect.replace('_', ' ').title()
+        if sentiment1 and sentiment2 and sentiment1 != sentiment2:
+            consistency_report += f"🟡 **Sentiment Drift Detected for '{aspect_name}'**\n"
+            consistency_report += f"  - Document 1: {sentiment1.title()}\n"
+            consistency_report += f"  - Document 2: {sentiment2.title()}\n\n"
+            found_drift = True
+        elif sentiment1 and not sentiment2:
+            consistency_report += f"⚪️ **'{aspect_name}'** only found in Document 1 (Sentiment: {sentiment1.title()})\n\n"
+        elif not sentiment1 and sentiment2:
+            consistency_report += f"⚪️ **'{aspect_name}'** only found in Document 2 (Sentiment: {sentiment2.title()})\n\n"
+    if not found_drift:
+        consistency_report += "✅ No sentiment contradictions detected between the two documents for common aspects.\n"
+    return report1 + report2 + consistency_report
 iface = gr.Interface(
+    fn=compare_documents,
+    inputs=[
+        gr.Textbox(label="Input ESG Report Text 1 (e.g., 2022 Report)", lines=15, placeholder="Paste the first report here..."),
+        gr.Textbox(label="Input ESG Report Text 2 (e.g., 2023 Report)", lines=15, placeholder="Paste the second report here...")
+    ],
+    outputs=gr.Textbox(label="Cross-Document Consistency Analysis", lines=40),
+    title="ESG Cross-Document Sentiment Consistency Analysis",
+    description="Compares two ESG reports to detect sentiment drift or contradictions for the same aspects over time."
 )
 iface.launch()