darisdzakwanhoesien2 commited on
Commit
9437058
·
1 Parent(s): 19902b0

Stage 5 Testing

Browse files
Files changed (1) hide show
  1. app.py +61 -60
app.py CHANGED
@@ -1,46 +1,34 @@
1
  import re
2
  from src.ontology_adapter import ESGOntologyAdapter
3
  import gradio as gr
4
- from collections import Counter
5
 
6
  adapter = ESGOntologyAdapter("ontology/esg_ontology.owl")
7
 
8
  def detect_sections(text):
9
  """Detects section titles and content from text."""
10
- # Regex to find section titles (e.g., "Introduction", "1. Introduction", "Results and Discussion")
11
- # This regex looks for lines that are not heavily indented and likely represent titles.
12
- # It splits the text by lines that look like titles.
13
  pattern = r'^\s*(?:\d+\.\s*)?([A-Z][a-zA-Z\s&]+)\s*$'
14
 
15
  sections = []
16
 
17
- # Find all potential titles
18
  potential_titles = re.findall(pattern, text, re.MULTILINE)
19
 
20
- # Split text by these titles
21
  split_text = re.split(pattern, text, flags=re.MULTILINE)
22
 
23
  content = split_text[0]
24
  if content.strip():
25
  sections.append({"title": "Introduction", "content": content.strip()})
26
 
27
- # The regex split gives us [content_before_first_title, title1, content1, title2, content2, ...]
28
  it = iter(split_text[1:])
29
  for title, content in zip(it, it):
30
  sections.append({"title": title.strip(), "content": content.strip()})
31
 
32
  return sections
33
 
34
- def analyze_text(text):
 
35
  detected_sections = detect_sections(text)
36
- results = []
37
-
38
- pillar_sentiments = {
39
- "Environmental": [],
40
- "Social": [],
41
- "Governance": [],
42
- "Unknown": []
43
- }
44
 
45
  for section in detected_sections:
46
  title = section['title']
@@ -49,65 +37,78 @@ def analyze_text(text):
49
  if not content.strip():
50
  continue
51
 
52
- # Determine weight based on section title
53
  weight = 1.0
54
  if any(keyword in title.lower() for keyword in ["results", "performance"]):
55
- weight = 1.5 # Higher weight for results sections
56
 
57
  mapping = adapter.map_term(content)
58
- section_snippet = content.strip().replace("\n", " ")[:120]
59
 
60
  if mapping['matches']:
61
- top_match = mapping['matches'][0]
62
- pillar = top_match['pillar']
63
  sentiment = mapping['sentiment']
64
-
65
- if pillar != "Unknown":
66
- # Apply weight to sentiment score if possible (conceptual)
67
- # For now, we just note it. The ontology adapter would need to support weights.
68
- pillar_sentiments[pillar].append(sentiment)
69
-
70
- predicted_aspects = []
71
  for match in mapping['matches']:
72
- label = match['mapped_to'].replace("_", " ").title()
73
- sim = match['similarity']
74
- pillar_name = match['pillar']
75
-
76
- # Adjust similarity score based on weight
77
- adjusted_sim = min(sim * weight, 1.0)
78
-
79
- predicted_aspects.append(f"**{label}** (Pillar: {pillar_name}, Sim: {adjusted_sim:.2f})")
80
-
81
- results.append(f"🟢 Section: **{title}**\n{section_snippet}\n→ Predicted Aspects: {', '.join(predicted_aspects)}\n→ Sentiment: {sentiment.title()}")
82
- else:
83
- results.append(f"🟢 Section: **{title}**\n{section_snippet}\n→ No ESG aspects identified")
84
 
85
- summary = ["\n\n--- ESG Pillar Sentiment Summary ---"]
86
- for pillar, sentiments in pillar_sentiments.items():
87
- if not sentiments:
88
- summary.append(f"\n**{pillar}**: No sections mapped.")
89
- continue
90
-
91
- count = Counter(sentiments)
92
- total = len(sentiments)
 
 
 
 
 
93
 
94
- summary_parts = []
95
- for sentiment_type in ['positive', 'neutral', 'negative']:
96
- if count[sentiment_type] > 0:
97
- percentage = round((count[sentiment_type] / total) * 100)
98
- summary_parts.append(f"{percentage}% {sentiment_type}")
 
 
 
 
 
 
 
99
 
100
- summary.append(f"\n**{pillar}**: {', '.join(summary_parts)}")
 
 
 
 
 
 
 
 
 
 
101
 
102
- return "\n".join(results) + "\n" + "".join(summary)
 
 
 
103
 
104
 
105
  iface = gr.Interface(
106
- fn=analyze_text,
107
- inputs=gr.Textbox(label="Input ESG Report Text", lines=20, placeholder="Paste your multi-section report here..."),
108
- outputs=gr.Textbox(label="Ontology Mapping and Pillar-Level Sentiment Analysis", lines=40),
109
- title="ESG Aspect-Level Clustering and Sentiment Analysis",
110
- description="Maps document sections to ESG ontology classes, aggregates sentiment at the pillar level (Environmental, Social, Governance)."
 
 
 
111
  )
112
 
113
  iface.launch()
 
1
  import re
2
  from src.ontology_adapter import ESGOntologyAdapter
3
  import gradio as gr
4
+ from collections import Counter, defaultdict
5
 
6
  adapter = ESGOntologyAdapter("ontology/esg_ontology.owl")
7
 
8
  def detect_sections(text):
9
  """Detects section titles and content from text."""
 
 
 
10
  pattern = r'^\s*(?:\d+\.\s*)?([A-Z][a-zA-Z\s&]+)\s*$'
11
 
12
  sections = []
13
 
 
14
  potential_titles = re.findall(pattern, text, re.MULTILINE)
15
 
 
16
  split_text = re.split(pattern, text, flags=re.MULTILINE)
17
 
18
  content = split_text[0]
19
  if content.strip():
20
  sections.append({"title": "Introduction", "content": content.strip()})
21
 
 
22
  it = iter(split_text[1:])
23
  for title, content in zip(it, it):
24
  sections.append({"title": title.strip(), "content": content.strip()})
25
 
26
  return sections
27
 
28
+ def analyze_single_document(text):
29
+ """Analyzes a single document and returns aspect-level sentiment."""
30
  detected_sections = detect_sections(text)
31
+ aspect_sentiments = defaultdict(list)
 
 
 
 
 
 
 
32
 
33
  for section in detected_sections:
34
  title = section['title']
 
37
  if not content.strip():
38
  continue
39
 
 
40
  weight = 1.0
41
  if any(keyword in title.lower() for keyword in ["results", "performance"]):
42
+ weight = 1.5
43
 
44
  mapping = adapter.map_term(content)
 
45
 
46
  if mapping['matches']:
 
 
47
  sentiment = mapping['sentiment']
 
 
 
 
 
 
 
48
  for match in mapping['matches']:
49
+ aspect = match['mapped_to']
50
+ aspect_sentiments[aspect].append(sentiment)
51
+
52
+ # Aggregate sentiments for each aspect (e.g., by taking the most common one)
53
+ aggregated_sentiments = {}
54
+ for aspect, sentiments in aspect_sentiments.items():
55
+ if sentiments:
56
+ aggregated_sentiments[aspect] = Counter(sentiments).most_common(1)[0][0]
 
 
 
 
57
 
58
+ return aggregated_sentiments
59
+
60
+ def compare_documents(text1, text2):
61
+ """Compares two documents for sentiment consistency."""
62
+
63
+ # Analyze both documents
64
+ sentiments1 = analyze_single_document(text1)
65
+ sentiments2 = analyze_single_document(text2)
66
+
67
+ # --- Generate formatted output for each document ---
68
+ report1 = "--- Document 1 Analysis ---\n"
69
+ for aspect, sentiment in sentiments1.items():
70
+ report1 += f"- **{aspect.replace('_', ' ').title()}**: {sentiment.title()}\n"
71
 
72
+ report2 = "\n--- Document 2 Analysis ---\n"
73
+ for aspect, sentiment in sentiments2.items():
74
+ report2 += f"- **{aspect.replace('_', ' ').title()}**: {sentiment.title()}\n"
75
+
76
+ # --- Cross-Document Consistency Analysis ---
77
+ consistency_report = "\n--- Cross-Document Consistency Analysis ---\n"
78
+ all_aspects = sorted(list(set(sentiments1.keys()) | set(sentiments2.keys())))
79
+
80
+ found_drift = False
81
+ for aspect in all_aspects:
82
+ sentiment1 = sentiments1.get(aspect)
83
+ sentiment2 = sentiments2.get(aspect)
84
 
85
+ aspect_name = aspect.replace('_', ' ').title()
86
+
87
+ if sentiment1 and sentiment2 and sentiment1 != sentiment2:
88
+ consistency_report += f"🟡 **Sentiment Drift Detected for '{aspect_name}'**\n"
89
+ consistency_report += f" - Document 1: {sentiment1.title()}\n"
90
+ consistency_report += f" - Document 2: {sentiment2.title()}\n\n"
91
+ found_drift = True
92
+ elif sentiment1 and not sentiment2:
93
+ consistency_report += f"⚪️ **'{aspect_name}'** only found in Document 1 (Sentiment: {sentiment1.title()})\n\n"
94
+ elif not sentiment1 and sentiment2:
95
+ consistency_report += f"⚪️ **'{aspect_name}'** only found in Document 2 (Sentiment: {sentiment2.title()})\n\n"
96
 
97
+ if not found_drift:
98
+ consistency_report += "✅ No sentiment contradictions detected between the two documents for common aspects.\n"
99
+
100
+ return report1 + report2 + consistency_report
101
 
102
 
103
  iface = gr.Interface(
104
+ fn=compare_documents,
105
+ inputs=[
106
+ gr.Textbox(label="Input ESG Report Text 1 (e.g., 2022 Report)", lines=15, placeholder="Paste the first report here..."),
107
+ gr.Textbox(label="Input ESG Report Text 2 (e.g., 2023 Report)", lines=15, placeholder="Paste the second report here...")
108
+ ],
109
+ outputs=gr.Textbox(label="Cross-Document Consistency Analysis", lines=40),
110
+ title="ESG Cross-Document Sentiment Consistency Analysis",
111
+ description="Compares two ESG reports to detect sentiment drift or contradictions for the same aspects over time."
112
  )
113
 
114
  iface.launch()