Update app.py
Browse files
app.py
CHANGED
|
@@ -15,7 +15,6 @@ import spacy
|
|
| 15 |
|
| 16 |
st.set_page_config(page_title="Advanced Political Speech Analysis", page_icon="🗣️", layout="wide")
|
| 17 |
|
| 18 |
-
# Advanced NLP Libraries
|
| 19 |
from transformers import (
|
| 20 |
AutoTokenizer,
|
| 21 |
AutoModelForSequenceClassification,
|
|
@@ -29,13 +28,11 @@ from nltk.corpus import stopwords
|
|
| 29 |
from nltk.tokenize import word_tokenize
|
| 30 |
from textstat import flesch_reading_ease, flesch_kincaid_grade
|
| 31 |
|
| 32 |
-
# Download necessary NLTK resources
|
| 33 |
nltk.download('punkt', quiet=True)
|
| 34 |
nltk.download('averaged_perceptron_tagger', quiet=True)
|
| 35 |
nltk.download('stopwords', quiet=True)
|
| 36 |
nltk.download('punkt_tab', quiet=True)
|
| 37 |
|
| 38 |
-
# Load spaCy model (requires separate installation)
|
| 39 |
try:
|
| 40 |
nlp = spacy.load('en_core_web_lg')
|
| 41 |
except:
|
|
@@ -43,7 +40,6 @@ except:
|
|
| 43 |
"pip install spacy\n"
|
| 44 |
"python -m spacy download en_core_web_lg")
|
| 45 |
|
| 46 |
-
# Constants and Configurations
|
| 47 |
MORAL_FOUNDATIONS = {
|
| 48 |
'care': 'Care/Harm',
|
| 49 |
'fairness': 'Fairness/Cheating',
|
|
@@ -62,21 +58,17 @@ RHETORICAL_DEVICES = {
|
|
| 62 |
|
| 63 |
class SpeechAnalyzer:
|
| 64 |
def __init__(self):
|
| 65 |
-
# Load MoralFoundations model
|
| 66 |
self.moral_model_path = "MMADS/MoralFoundationsClassifier"
|
| 67 |
self.moral_tokenizer = RobertaTokenizer.from_pretrained(self.moral_model_path)
|
| 68 |
self.moral_model = RobertaForSequenceClassification.from_pretrained(self.moral_model_path)
|
| 69 |
|
| 70 |
-
# Define label names directly
|
| 71 |
self.label_names = ['care', 'fairness', 'loyalty', 'authority', 'sanctity']
|
| 72 |
|
| 73 |
-
# Other pipelines remain the same
|
| 74 |
self.sentiment_pipeline = pipeline("sentiment-analysis")
|
| 75 |
self.ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
|
| 76 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
| 77 |
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
| 78 |
|
| 79 |
-
# Add emotion classifier
|
| 80 |
self.emotion_classifier = pipeline("text-classification",
|
| 81 |
model="j-hartmann/emotion-english-distilroberta-base")
|
| 82 |
|
|
@@ -91,7 +83,6 @@ class SpeechAnalyzer:
|
|
| 91 |
for word in words:
|
| 92 |
if current_length + len(word.split()) > max_length:
|
| 93 |
segments.append(' '.join(current_segment))
|
| 94 |
-
# Use the overlap parameter from the method arguments
|
| 95 |
current_segment = current_segment[-overlap:] + [word]
|
| 96 |
current_length = len(' '.join(current_segment).split())
|
| 97 |
else:
|
|
@@ -125,7 +116,6 @@ class SpeechAnalyzer:
|
|
| 125 |
if foundation in foundation_scores:
|
| 126 |
foundation_scores[foundation].append(probabilities[0][idx].item())
|
| 127 |
|
| 128 |
-
# Average the scores across segments
|
| 129 |
aggregated_scores = {
|
| 130 |
foundation: np.mean(scores) for foundation, scores in foundation_scores.items()
|
| 131 |
}
|
|
@@ -139,7 +129,6 @@ class SpeechAnalyzer:
|
|
| 139 |
basic_emotions = []
|
| 140 |
|
| 141 |
for segment in segments:
|
| 142 |
-
# Get sentiment scores with truncation
|
| 143 |
sentiment_result = self.sentiment_pipeline(segment, truncation=True, max_length=512)
|
| 144 |
score = sentiment_result[0]['score']
|
| 145 |
if sentiment_result[0]['label'] == 'POSITIVE':
|
|
@@ -148,7 +137,6 @@ class SpeechAnalyzer:
|
|
| 148 |
score = 0.5 - (score * 0.5)
|
| 149 |
sentiment_scores.append(score)
|
| 150 |
|
| 151 |
-
# Get emotion classification with truncation
|
| 152 |
emotion_result = self.emotion_classifier(segment, truncation=True, max_length=512)
|
| 153 |
emotion = emotion_result[0]['label']
|
| 154 |
basic_emotions.append(emotion)
|
|
@@ -167,7 +155,6 @@ class SpeechAnalyzer:
|
|
| 167 |
tfidf_matrix = vectorizer.fit_transform([text])
|
| 168 |
feature_names = vectorizer.get_feature_names_out()
|
| 169 |
|
| 170 |
-
# Get top phrases by TF-IDF score
|
| 171 |
sorted_idx = tfidf_matrix.toarray()[0].argsort()[::-1]
|
| 172 |
top_phrases = [feature_names[i] for i in sorted_idx[:top_n]]
|
| 173 |
|
|
@@ -191,29 +178,23 @@ class SpeechAnalyzer:
|
|
| 191 |
|
| 192 |
def create_semantic_network(self, text, top_n=20, window_size=10, chunk_size=10000):
|
| 193 |
"""Create semantic network graph with weighted edges"""
|
| 194 |
-
# Process text in chunks
|
| 195 |
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
| 196 |
|
| 197 |
-
# Initialize collections for aggregating results
|
| 198 |
all_nouns = []
|
| 199 |
noun_freq = nltk.FreqDist()
|
| 200 |
|
| 201 |
-
# Process each chunk
|
| 202 |
for chunk in chunks:
|
| 203 |
doc = nlp(chunk)
|
| 204 |
chunk_nouns = [token.text.lower() for token in doc if token.pos_ == 'NOUN']
|
| 205 |
all_nouns.extend(chunk_nouns)
|
| 206 |
noun_freq.update(chunk_nouns)
|
| 207 |
|
| 208 |
-
# Get top nouns across all chunks
|
| 209 |
top_nouns = [noun for noun, freq in noun_freq.most_common(top_n)]
|
| 210 |
|
| 211 |
-
# Create graph and co-occurrence matrix
|
| 212 |
G = nx.Graph()
|
| 213 |
cooc_matrix = np.zeros((len(top_nouns), len(top_nouns)))
|
| 214 |
noun_to_idx = {noun: idx for idx, noun in enumerate(top_nouns)}
|
| 215 |
|
| 216 |
-
# Process co-occurrences in chunks
|
| 217 |
for chunk in chunks:
|
| 218 |
doc = nlp(chunk)
|
| 219 |
words = [token.text.lower() for token in doc]
|
|
@@ -228,11 +209,9 @@ class SpeechAnalyzer:
|
|
| 228 |
cooc_matrix[idx1][idx2] += 1
|
| 229 |
cooc_matrix[idx2][idx1] += 1
|
| 230 |
|
| 231 |
-
# Build network
|
| 232 |
for noun in top_nouns:
|
| 233 |
G.add_node(noun, size=noun_freq[noun])
|
| 234 |
|
| 235 |
-
# Add weighted edges
|
| 236 |
max_weight = np.max(cooc_matrix)
|
| 237 |
if max_weight > 0: # Prevent division by zero
|
| 238 |
for i in range(len(top_nouns)):
|
|
@@ -243,7 +222,6 @@ class SpeechAnalyzer:
|
|
| 243 |
weight=weight,
|
| 244 |
width=3 * (weight/max_weight))
|
| 245 |
|
| 246 |
-
# Calculate and store layout
|
| 247 |
pos = nx.spring_layout(G, k=1, iterations=50)
|
| 248 |
for node in G.nodes():
|
| 249 |
G.nodes[node]['pos'] = pos[node]
|
|
@@ -269,14 +247,11 @@ def process_all_analyses(text, _analyzer):
|
|
| 269 |
def main():
|
| 270 |
st.title("🗣️ Political Text Analysis Toolkit")
|
| 271 |
|
| 272 |
-
# Initialize analyzer
|
| 273 |
analyzer = SpeechAnalyzer()
|
| 274 |
|
| 275 |
-
# File upload
|
| 276 |
uploaded_file = st.file_uploader("Upload Political Speech", type=['txt', 'docx', 'pdf'])
|
| 277 |
|
| 278 |
if uploaded_file is not None:
|
| 279 |
-
# Read file (similar to previous implementation)
|
| 280 |
if uploaded_file.name.endswith('.txt'):
|
| 281 |
text = uploaded_file.getvalue().decode('utf-8')
|
| 282 |
elif uploaded_file.name.endswith('.docx'):
|
|
@@ -288,7 +263,6 @@ def main():
|
|
| 288 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
| 289 |
text = ' '.join([page.extract_text() for page in pdf_reader.pages])
|
| 290 |
|
| 291 |
-
# Create tabs for different analyses
|
| 292 |
progress_bar = st.progress(0)
|
| 293 |
status_text = st.empty()
|
| 294 |
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
|
@@ -305,7 +279,6 @@ def main():
|
|
| 305 |
st.subheader("Moral Foundations Analysis")
|
| 306 |
moral_scores = analyzer.analyze_moral_foundations(text)
|
| 307 |
|
| 308 |
-
# Plotly bar chart
|
| 309 |
moral_df = pd.DataFrame.from_dict(moral_scores, orient='index', columns=['Score'])
|
| 310 |
moral_df.index.name = 'Moral Foundation'
|
| 311 |
moral_df = moral_df.reset_index()
|
|
@@ -319,7 +292,6 @@ def main():
|
|
| 319 |
)
|
| 320 |
st.plotly_chart(fig)
|
| 321 |
|
| 322 |
-
# Detailed insights
|
| 323 |
for foundation, score in moral_scores.items():
|
| 324 |
st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
|
| 325 |
|
|
@@ -328,13 +300,10 @@ def main():
|
|
| 328 |
progress_bar.progress(40)
|
| 329 |
st.subheader("Speech Trajectory Analysis")
|
| 330 |
|
| 331 |
-
# Get cached data
|
| 332 |
segments, segment_labels, sentiment_scores, basic_emotions, moral_trajectories = process_all_analyses(text, analyzer)
|
| 333 |
|
| 334 |
-
# Create unified figure
|
| 335 |
unified_fig = go.Figure()
|
| 336 |
|
| 337 |
-
# Add traces for each analysis type
|
| 338 |
viz_options = st.multiselect(
|
| 339 |
"Select analyses to display:",
|
| 340 |
["Sentiment Flow", "Moral Foundations Flow", "Basic Emotions Flow"],
|
|
@@ -371,7 +340,6 @@ def main():
|
|
| 371 |
'Emotion': basic_emotions
|
| 372 |
})
|
| 373 |
|
| 374 |
-
# Create color mapping for emotions
|
| 375 |
emotion_colors = {
|
| 376 |
'joy': '#FFD700', # Gold
|
| 377 |
'sadness': '#4169E1', # Royal Blue
|
|
@@ -383,11 +351,11 @@ def main():
|
|
| 383 |
|
| 384 |
unified_fig.add_trace(go.Bar(
|
| 385 |
x=segment_labels,
|
| 386 |
-
y=[1] * len(basic_emotions),
|
| 387 |
-
name=f'Emotions Found: {", ".join(sorted(set(basic_emotions)))}',
|
| 388 |
marker=dict(
|
| 389 |
color=[emotion_colors.get(e.lower(), '#808080') for e in basic_emotions],
|
| 390 |
-
line=dict(width=1, color='#000000')
|
| 391 |
),
|
| 392 |
opacity=0.8,
|
| 393 |
hovertemplate="Segment %{x}<br>Emotion: %{text}<extra></extra>",
|
|
@@ -403,7 +371,6 @@ def main():
|
|
| 403 |
st.subheader("Linguistic Analysis")
|
| 404 |
readability = analyzer.calculate_readability(text)
|
| 405 |
|
| 406 |
-
# Readability metrics with context
|
| 407 |
col1, col2 = st.columns(2)
|
| 408 |
with col1:
|
| 409 |
score = readability['Flesch Reading Ease']
|
|
@@ -424,11 +391,9 @@ def main():
|
|
| 424 |
delta_color="normal"
|
| 425 |
)
|
| 426 |
|
| 427 |
-
# Enhanced key phrases display
|
| 428 |
st.subheader("Key Topics and Themes")
|
| 429 |
key_phrases = analyzer.extract_key_phrases(text)
|
| 430 |
|
| 431 |
-
# Create columns for better phrase organization
|
| 432 |
cols = st.columns(3)
|
| 433 |
for idx, phrase in enumerate(key_phrases):
|
| 434 |
col_idx = idx % 3
|
|
@@ -452,20 +417,16 @@ def main():
|
|
| 452 |
|
| 453 |
network_fig = go.Figure()
|
| 454 |
|
| 455 |
-
# Add edges with enhanced visual encoding
|
| 456 |
for edge in semantic_graph.edges():
|
| 457 |
x0, y0 = semantic_graph.nodes[edge[0]]['pos']
|
| 458 |
x1, y1 = semantic_graph.nodes[edge[1]]['pos']
|
| 459 |
weight = semantic_graph.edges[edge]['weight']
|
| 460 |
max_weight = max(d['weight'] for _, _, d in semantic_graph.edges(data=True))
|
| 461 |
|
| 462 |
-
# Normalize weight for visual encoding
|
| 463 |
normalized_weight = weight / max_weight
|
| 464 |
|
| 465 |
-
# Enhanced width scaling (more pronounced differences)
|
| 466 |
width = 2 + (normalized_weight * 8)
|
| 467 |
|
| 468 |
-
# Color gradient from light to dark based on weight
|
| 469 |
color = f'rgba(31, 119, 180, {0.3 + normalized_weight * 0.7})'
|
| 470 |
|
| 471 |
network_fig.add_trace(go.Scatter(
|
|
@@ -480,7 +441,6 @@ def main():
|
|
| 480 |
hovertext=f'Relationship strength: {weight:.2f}'
|
| 481 |
))
|
| 482 |
|
| 483 |
-
# Enhanced nodes with better visibility
|
| 484 |
for node in semantic_graph.nodes():
|
| 485 |
x, y = semantic_graph.nodes[node]['pos']
|
| 486 |
size = semantic_graph.nodes[node]['size']
|
|
@@ -521,10 +481,8 @@ def main():
|
|
| 521 |
st.subheader("Named Entity Recognition")
|
| 522 |
named_entities = analyzer.detect_named_entities(text)
|
| 523 |
|
| 524 |
-
# Process entities
|
| 525 |
entities_df = pd.DataFrame(named_entities)
|
| 526 |
|
| 527 |
-
# Map entity types to friendly names
|
| 528 |
type_mapping = {
|
| 529 |
'B-PER': 'Person',
|
| 530 |
'I-PER': 'Person',
|
|
@@ -536,20 +494,17 @@ def main():
|
|
| 536 |
'I-MISC': 'Other'
|
| 537 |
}
|
| 538 |
|
| 539 |
-
# Clean and transform the data
|
| 540 |
display_df = pd.DataFrame({
|
| 541 |
'Term': entities_df['word'],
|
| 542 |
'Category': entities_df['entity'].map(type_mapping),
|
| 543 |
'Confidence': entities_df['score'].apply(lambda x: f"{x*100:.1f}%")
|
| 544 |
})
|
| 545 |
|
| 546 |
-
# Group similar entities
|
| 547 |
grouped_df = display_df.groupby('Category').agg({
|
| 548 |
'Term': lambda x: ', '.join(set(x)),
|
| 549 |
'Confidence': 'count'
|
| 550 |
}).reset_index()
|
| 551 |
|
| 552 |
-
# Display results in an organized way
|
| 553 |
for category in grouped_df['Category'].unique():
|
| 554 |
category_data = grouped_df[grouped_df['Category'] == category]
|
| 555 |
st.write(f"### {category}")
|
|
|
|
| 15 |
|
| 16 |
st.set_page_config(page_title="Advanced Political Speech Analysis", page_icon="🗣️", layout="wide")
|
| 17 |
|
|
|
|
| 18 |
from transformers import (
|
| 19 |
AutoTokenizer,
|
| 20 |
AutoModelForSequenceClassification,
|
|
|
|
| 28 |
from nltk.tokenize import word_tokenize
|
| 29 |
from textstat import flesch_reading_ease, flesch_kincaid_grade
|
| 30 |
|
|
|
|
| 31 |
nltk.download('punkt', quiet=True)
|
| 32 |
nltk.download('averaged_perceptron_tagger', quiet=True)
|
| 33 |
nltk.download('stopwords', quiet=True)
|
| 34 |
nltk.download('punkt_tab', quiet=True)
|
| 35 |
|
|
|
|
| 36 |
try:
|
| 37 |
nlp = spacy.load('en_core_web_lg')
|
| 38 |
except:
|
|
|
|
| 40 |
"pip install spacy\n"
|
| 41 |
"python -m spacy download en_core_web_lg")
|
| 42 |
|
|
|
|
| 43 |
MORAL_FOUNDATIONS = {
|
| 44 |
'care': 'Care/Harm',
|
| 45 |
'fairness': 'Fairness/Cheating',
|
|
|
|
| 58 |
|
| 59 |
class SpeechAnalyzer:
|
| 60 |
def __init__(self):
|
|
|
|
| 61 |
self.moral_model_path = "MMADS/MoralFoundationsClassifier"
|
| 62 |
self.moral_tokenizer = RobertaTokenizer.from_pretrained(self.moral_model_path)
|
| 63 |
self.moral_model = RobertaForSequenceClassification.from_pretrained(self.moral_model_path)
|
| 64 |
|
|
|
|
| 65 |
self.label_names = ['care', 'fairness', 'loyalty', 'authority', 'sanctity']
|
| 66 |
|
|
|
|
| 67 |
self.sentiment_pipeline = pipeline("sentiment-analysis")
|
| 68 |
self.ner_tokenizer = AutoTokenizer.from_pretrained("dslim/bert-base-NER")
|
| 69 |
self.ner_model = AutoModelForTokenClassification.from_pretrained("dslim/bert-base-NER")
|
| 70 |
self.ner_pipeline = pipeline("ner", model=self.ner_model, tokenizer=self.ner_tokenizer)
|
| 71 |
|
|
|
|
| 72 |
self.emotion_classifier = pipeline("text-classification",
|
| 73 |
model="j-hartmann/emotion-english-distilroberta-base")
|
| 74 |
|
|
|
|
| 83 |
for word in words:
|
| 84 |
if current_length + len(word.split()) > max_length:
|
| 85 |
segments.append(' '.join(current_segment))
|
|
|
|
| 86 |
current_segment = current_segment[-overlap:] + [word]
|
| 87 |
current_length = len(' '.join(current_segment).split())
|
| 88 |
else:
|
|
|
|
| 116 |
if foundation in foundation_scores:
|
| 117 |
foundation_scores[foundation].append(probabilities[0][idx].item())
|
| 118 |
|
|
|
|
| 119 |
aggregated_scores = {
|
| 120 |
foundation: np.mean(scores) for foundation, scores in foundation_scores.items()
|
| 121 |
}
|
|
|
|
| 129 |
basic_emotions = []
|
| 130 |
|
| 131 |
for segment in segments:
|
|
|
|
| 132 |
sentiment_result = self.sentiment_pipeline(segment, truncation=True, max_length=512)
|
| 133 |
score = sentiment_result[0]['score']
|
| 134 |
if sentiment_result[0]['label'] == 'POSITIVE':
|
|
|
|
| 137 |
score = 0.5 - (score * 0.5)
|
| 138 |
sentiment_scores.append(score)
|
| 139 |
|
|
|
|
| 140 |
emotion_result = self.emotion_classifier(segment, truncation=True, max_length=512)
|
| 141 |
emotion = emotion_result[0]['label']
|
| 142 |
basic_emotions.append(emotion)
|
|
|
|
| 155 |
tfidf_matrix = vectorizer.fit_transform([text])
|
| 156 |
feature_names = vectorizer.get_feature_names_out()
|
| 157 |
|
|
|
|
| 158 |
sorted_idx = tfidf_matrix.toarray()[0].argsort()[::-1]
|
| 159 |
top_phrases = [feature_names[i] for i in sorted_idx[:top_n]]
|
| 160 |
|
|
|
|
| 178 |
|
| 179 |
def create_semantic_network(self, text, top_n=20, window_size=10, chunk_size=10000):
|
| 180 |
"""Create semantic network graph with weighted edges"""
|
|
|
|
| 181 |
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
|
| 182 |
|
|
|
|
| 183 |
all_nouns = []
|
| 184 |
noun_freq = nltk.FreqDist()
|
| 185 |
|
|
|
|
| 186 |
for chunk in chunks:
|
| 187 |
doc = nlp(chunk)
|
| 188 |
chunk_nouns = [token.text.lower() for token in doc if token.pos_ == 'NOUN']
|
| 189 |
all_nouns.extend(chunk_nouns)
|
| 190 |
noun_freq.update(chunk_nouns)
|
| 191 |
|
|
|
|
| 192 |
top_nouns = [noun for noun, freq in noun_freq.most_common(top_n)]
|
| 193 |
|
|
|
|
| 194 |
G = nx.Graph()
|
| 195 |
cooc_matrix = np.zeros((len(top_nouns), len(top_nouns)))
|
| 196 |
noun_to_idx = {noun: idx for idx, noun in enumerate(top_nouns)}
|
| 197 |
|
|
|
|
| 198 |
for chunk in chunks:
|
| 199 |
doc = nlp(chunk)
|
| 200 |
words = [token.text.lower() for token in doc]
|
|
|
|
| 209 |
cooc_matrix[idx1][idx2] += 1
|
| 210 |
cooc_matrix[idx2][idx1] += 1
|
| 211 |
|
|
|
|
| 212 |
for noun in top_nouns:
|
| 213 |
G.add_node(noun, size=noun_freq[noun])
|
| 214 |
|
|
|
|
| 215 |
max_weight = np.max(cooc_matrix)
|
| 216 |
if max_weight > 0: # Prevent division by zero
|
| 217 |
for i in range(len(top_nouns)):
|
|
|
|
| 222 |
weight=weight,
|
| 223 |
width=3 * (weight/max_weight))
|
| 224 |
|
|
|
|
| 225 |
pos = nx.spring_layout(G, k=1, iterations=50)
|
| 226 |
for node in G.nodes():
|
| 227 |
G.nodes[node]['pos'] = pos[node]
|
|
|
|
| 247 |
def main():
|
| 248 |
st.title("🗣️ Political Text Analysis Toolkit")
|
| 249 |
|
|
|
|
| 250 |
analyzer = SpeechAnalyzer()
|
| 251 |
|
|
|
|
| 252 |
uploaded_file = st.file_uploader("Upload Political Speech", type=['txt', 'docx', 'pdf'])
|
| 253 |
|
| 254 |
if uploaded_file is not None:
|
|
|
|
| 255 |
if uploaded_file.name.endswith('.txt'):
|
| 256 |
text = uploaded_file.getvalue().decode('utf-8')
|
| 257 |
elif uploaded_file.name.endswith('.docx'):
|
|
|
|
| 263 |
pdf_reader = PyPDF2.PdfReader(uploaded_file)
|
| 264 |
text = ' '.join([page.extract_text() for page in pdf_reader.pages])
|
| 265 |
|
|
|
|
| 266 |
progress_bar = st.progress(0)
|
| 267 |
status_text = st.empty()
|
| 268 |
tab1, tab2, tab3, tab4, tab5 = st.tabs([
|
|
|
|
| 279 |
st.subheader("Moral Foundations Analysis")
|
| 280 |
moral_scores = analyzer.analyze_moral_foundations(text)
|
| 281 |
|
|
|
|
| 282 |
moral_df = pd.DataFrame.from_dict(moral_scores, orient='index', columns=['Score'])
|
| 283 |
moral_df.index.name = 'Moral Foundation'
|
| 284 |
moral_df = moral_df.reset_index()
|
|
|
|
| 292 |
)
|
| 293 |
st.plotly_chart(fig)
|
| 294 |
|
|
|
|
| 295 |
for foundation, score in moral_scores.items():
|
| 296 |
st.write(f"**{MORAL_FOUNDATIONS[foundation]}**: {score:.2%}")
|
| 297 |
|
|
|
|
| 300 |
progress_bar.progress(40)
|
| 301 |
st.subheader("Speech Trajectory Analysis")
|
| 302 |
|
|
|
|
| 303 |
segments, segment_labels, sentiment_scores, basic_emotions, moral_trajectories = process_all_analyses(text, analyzer)
|
| 304 |
|
|
|
|
| 305 |
unified_fig = go.Figure()
|
| 306 |
|
|
|
|
| 307 |
viz_options = st.multiselect(
|
| 308 |
"Select analyses to display:",
|
| 309 |
["Sentiment Flow", "Moral Foundations Flow", "Basic Emotions Flow"],
|
|
|
|
| 340 |
'Emotion': basic_emotions
|
| 341 |
})
|
| 342 |
|
|
|
|
| 343 |
emotion_colors = {
|
| 344 |
'joy': '#FFD700', # Gold
|
| 345 |
'sadness': '#4169E1', # Royal Blue
|
|
|
|
| 351 |
|
| 352 |
unified_fig.add_trace(go.Bar(
|
| 353 |
x=segment_labels,
|
| 354 |
+
y=[1] * len(basic_emotions),
|
| 355 |
+
name=f'Emotions Found: {", ".join(sorted(set(basic_emotions)))}',
|
| 356 |
marker=dict(
|
| 357 |
color=[emotion_colors.get(e.lower(), '#808080') for e in basic_emotions],
|
| 358 |
+
line=dict(width=1, color='#000000')
|
| 359 |
),
|
| 360 |
opacity=0.8,
|
| 361 |
hovertemplate="Segment %{x}<br>Emotion: %{text}<extra></extra>",
|
|
|
|
| 371 |
st.subheader("Linguistic Analysis")
|
| 372 |
readability = analyzer.calculate_readability(text)
|
| 373 |
|
|
|
|
| 374 |
col1, col2 = st.columns(2)
|
| 375 |
with col1:
|
| 376 |
score = readability['Flesch Reading Ease']
|
|
|
|
| 391 |
delta_color="normal"
|
| 392 |
)
|
| 393 |
|
|
|
|
| 394 |
st.subheader("Key Topics and Themes")
|
| 395 |
key_phrases = analyzer.extract_key_phrases(text)
|
| 396 |
|
|
|
|
| 397 |
cols = st.columns(3)
|
| 398 |
for idx, phrase in enumerate(key_phrases):
|
| 399 |
col_idx = idx % 3
|
|
|
|
| 417 |
|
| 418 |
network_fig = go.Figure()
|
| 419 |
|
|
|
|
| 420 |
for edge in semantic_graph.edges():
|
| 421 |
x0, y0 = semantic_graph.nodes[edge[0]]['pos']
|
| 422 |
x1, y1 = semantic_graph.nodes[edge[1]]['pos']
|
| 423 |
weight = semantic_graph.edges[edge]['weight']
|
| 424 |
max_weight = max(d['weight'] for _, _, d in semantic_graph.edges(data=True))
|
| 425 |
|
|
|
|
| 426 |
normalized_weight = weight / max_weight
|
| 427 |
|
|
|
|
| 428 |
width = 2 + (normalized_weight * 8)
|
| 429 |
|
|
|
|
| 430 |
color = f'rgba(31, 119, 180, {0.3 + normalized_weight * 0.7})'
|
| 431 |
|
| 432 |
network_fig.add_trace(go.Scatter(
|
|
|
|
| 441 |
hovertext=f'Relationship strength: {weight:.2f}'
|
| 442 |
))
|
| 443 |
|
|
|
|
| 444 |
for node in semantic_graph.nodes():
|
| 445 |
x, y = semantic_graph.nodes[node]['pos']
|
| 446 |
size = semantic_graph.nodes[node]['size']
|
|
|
|
| 481 |
st.subheader("Named Entity Recognition")
|
| 482 |
named_entities = analyzer.detect_named_entities(text)
|
| 483 |
|
|
|
|
| 484 |
entities_df = pd.DataFrame(named_entities)
|
| 485 |
|
|
|
|
| 486 |
type_mapping = {
|
| 487 |
'B-PER': 'Person',
|
| 488 |
'I-PER': 'Person',
|
|
|
|
| 494 |
'I-MISC': 'Other'
|
| 495 |
}
|
| 496 |
|
|
|
|
| 497 |
display_df = pd.DataFrame({
|
| 498 |
'Term': entities_df['word'],
|
| 499 |
'Category': entities_df['entity'].map(type_mapping),
|
| 500 |
'Confidence': entities_df['score'].apply(lambda x: f"{x*100:.1f}%")
|
| 501 |
})
|
| 502 |
|
|
|
|
| 503 |
grouped_df = display_df.groupby('Category').agg({
|
| 504 |
'Term': lambda x: ', '.join(set(x)),
|
| 505 |
'Confidence': 'count'
|
| 506 |
}).reset_index()
|
| 507 |
|
|
|
|
| 508 |
for category in grouped_df['Category'].unique():
|
| 509 |
category_data = grouped_df[grouped_df['Category'] == category]
|
| 510 |
st.write(f"### {category}")
|