esgdata / src /ontology_adapter.py
darisdzakwanhoesien2
Testting
08d23ba
raw
history blame
4.02 kB
# src/ontology_adapter.py
from owlready2 import get_ontology
from sentence_transformers import SentenceTransformer, util
import torch
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
class ESGOntologyAdapter:
def __init__(self, ontology_path="ontology/esg_ontology.owl", model_name_or_path="all-MiniLM-L6-v2"):
print(f"Loading ontology from {ontology_path}...")
self.onto = get_ontology(ontology_path).load()
self.class_to_pillar = {}
self.pillars = ["Environmental", "Social", "Governance"]
# Extract classes and map them to pillars
self.classes = []
processed_classes = set()
for cls in self.onto.classes():
if cls.name in ['Thing', 'Nothing']:
continue
label = getattr(cls, "label", [cls.name])
clean_label = label[0] if isinstance(label, list) else label
if clean_label in processed_classes:
continue
processed_classes.add(clean_label)
self.classes.append(clean_label)
pillar = self.get_pillar_for_class(cls)
self.class_to_pillar[clean_label] = pillar
print(f"Loading SentenceTransformer model from: {model_name_or_path}")
self.model = SentenceTransformer(model_name_or_path)
self.embeddings = self.model.encode(self.classes, convert_to_tensor=True)
self.sentiment_analyzer = SentimentIntensityAnalyzer()
def get_pillar_for_class(self, cls):
"""Finds the top-level parent (pillar) for a given class."""
if cls.name in self.pillars:
return cls.name
for ancestor in cls.ancestors():
if ancestor.name in self.pillars:
return ancestor.name
return "Unknown"
def analyze_sentiment(self, text):
"""Analyzes the sentiment of a given text and returns a simple category."""
sentiment = self.sentiment_analyzer.polarity_scores(text)
if sentiment['compound'] >= 0.05:
return 'positive'
elif sentiment['compound'] <= -0.05:
return 'negative'
else:
return 'neutral'
def map_term(self, text, threshold=0.05):
"""Map input text to closest ESG classes, including pillar and sentiment."""
query_emb = self.model.encode(text, convert_to_tensor=True)
cos_scores = util.cos_sim(query_emb, self.embeddings)[0]
sentiment = self.analyze_sentiment(text)
matches = []
for i, score in enumerate(cos_scores):
if score.item() > threshold:
class_label = self.classes[i]
pillar = self.class_to_pillar.get(class_label, 'Unknown')
matches.append({
"mapped_to": class_label,
"similarity": round(score.item(), 2),
"pillar": pillar
})
matches.sort(key=lambda x: x['similarity'], reverse=True)
return {"term": text, "matches": matches, "sentiment": sentiment}
def get_individuals(self):
"""Retrieve individuals and their data properties"""
data = []
for ind in self.onto.individuals():
props = {}
for prop in self.onto.data_properties():
vals = prop[ind]
if vals:
props[prop.name] = vals
data.append({"individual": ind.name, "properties": props})
return data
def get_direct_parents(self):
"""Creates a dictionary mapping each class to its direct parent."""
parents = {}
for cls in self.onto.classes():
if cls.name in ['Thing', 'Nothing'] or cls.name in self.pillars:
continue
for parent in cls.is_a:
if hasattr(parent, 'name') and parent.name != 'Thing':
parents[cls.name] = parent.name
break
return parents