import os
import tempfile
# â
CRITICAL: Set environment variables BEFORE any other imports
os.environ['TRANSFORMERS_CACHE'] = tempfile.gettempdir()
os.environ['HF_HOME'] = tempfile.gettempdir()
os.environ['TORCH_HOME'] = tempfile.gettempdir()
os.environ['HF_DATASETS_CACHE'] = tempfile.gettempdir()
os.environ['HUGGINGFACE_HUB_CACHE'] = tempfile.gettempdir()
import streamlit as st
# â
CRITICAL: set_page_config() MUST be called first, before ANY Streamlit commands
st.set_page_config(
page_title="AI Study Helper Pro - by Umaima Qureshi",
page_icon="đ§ ",
layout="wide",
initial_sidebar_state="expanded"
)
# Now import other libraries
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering, AutoModelForSequenceClassification
from nltk.tokenize import sent_tokenize
import base64
import torch
import nltk
@st.cache_resource
def init_nltk():
"""Initialize NLTK with writable directory"""
nltk_data_dir = os.path.join(tempfile.gettempdir(), "nltk_data")
os.makedirs(nltk_data_dir, exist_ok=True)
if nltk_data_dir not in nltk.data.path:
nltk.data.path.insert(0, nltk_data_dir)
for pkg in ["punkt", "punkt_tab"]:
try:
nltk.data.find(f"tokenizers/{pkg}")
except LookupError:
try:
nltk.download(pkg, download_dir=nltk_data_dir, quiet=True)
except:
pass # Continue if download fails
return True
# Initialize NLTK
init_nltk()
# Device detection
DEVICE = 0 if torch.cuda.is_available() else -1
# Lazy model loading with proper cache handling - FIXED (No cache_dir in pipeline)
@st.cache_resource
def get_summarizer():
"""Load summarization model - cache_dir handled by environment variables"""
try:
model = AutoModelForSeq2SeqLM.from_pretrained(
"sshleifer/distilbart-cnn-12-6",
cache_dir=tempfile.gettempdir()
)
tokenizer = AutoTokenizer.from_pretrained(
"sshleifer/distilbart-cnn-12-6",
cache_dir=tempfile.gettempdir()
)
summarizer = pipeline(
"summarization",
model=model,
tokenizer=tokenizer,
device=DEVICE
)
return summarizer
except Exception as e:
st.error(f"Failed to load summarizer: {str(e)}")
return None
@st.cache_resource
def get_qa():
"""Load Q&A model - cache_dir handled by environment variables"""
try:
model = AutoModelForQuestionAnswering.from_pretrained(
"distilbert-base-uncased-distilled-squad",
cache_dir=tempfile.gettempdir()
)
tokenizer = AutoTokenizer.from_pretrained(
"distilbert-base-uncased-distilled-squad",
cache_dir=tempfile.gettempdir()
)
qa_pipeline = pipeline(
"question-answering",
model=model,
tokenizer=tokenizer,
device=DEVICE
)
return qa_pipeline
except Exception as e:
st.error(f"Failed to load Q&A model: {str(e)}")
return None
@st.cache_resource
def get_classifier():
"""Load classifier model - cache_dir handled by environment variables"""
try:
model = AutoModelForSequenceClassification.from_pretrained(
"typeform/distilbert-base-uncased-mnli",
cache_dir=tempfile.gettempdir()
)
tokenizer = AutoTokenizer.from_pretrained(
"typeform/distilbert-base-uncased-mnli",
cache_dir=tempfile.gettempdir()
)
classifier = pipeline(
"zero-shot-classification",
model=model,
tokenizer=tokenizer,
device=DEVICE
)
return classifier
except Exception as e:
st.error(f"Failed to load classifier: {str(e)}")
return None
@st.cache_resource
def load_translator(model_name):
"""Load translation model - cache_dir handled by environment variables"""
try:
model = AutoModelForSeq2SeqLM.from_pretrained(
model_name,
cache_dir=tempfile.gettempdir()
)
tokenizer = AutoTokenizer.from_pretrained(
model_name,
cache_dir=tempfile.gettempdir()
)
translator = pipeline(
"translation",
model=model,
tokenizer=tokenizer,
device=DEVICE
)
return translator
except Exception as e:
st.error(f"Failed to load translator: {str(e)}")
return None
def truncate_text(text, max_words=400):
"""Truncate text to maximum word count"""
words = text.split()
return (" ".join(words[:max_words]), len(words) > max_words)
# ULTRA PREMIUM CSS - Glassmorphism + Animations
st.markdown("""
""", unsafe_allow_html=True)
# Hero Header
st.markdown("""
""", unsafe_allow_html=True)
# Add cache clear button (for troubleshooting)
with st.expander("âī¸ Settings", expanded=False):
if st.button("đ Clear Model Cache (if you see errors)"):
st.cache_resource.clear()
st.success("â
Cache cleared! Please refresh the page.")
st.info("đĄ This will reload all AI models on next use.")
# Sidebar
with st.sidebar:
st.markdown("### đ¯ Dashboard")
st.markdown("---")
# Stats
col1, col2 = st.columns(2)
with col1:
st.markdown('đ 247 Processed
', unsafe_allow_html=True)
with col2:
st.markdown('⥠2.3s Avg
', unsafe_allow_html=True)
st.markdown("---")
st.markdown("### ⨠Features")
features = [
"đ AI Summarization",
"đŦ Smart Q&A",
"đ¯ Quiz Generator",
"đ Multi-Language",
"đ Keyword Extraction",
"đ¨ Lightning Fast"
]
for feat in features:
st.markdown(f"**{feat}**")
st.markdown("---")
st.markdown("### đŠâđģ Developer")
st.markdown("**Umaima Qureshi**")
st.markdown("[GitHub](https://github.com/Umaima122)")
# Initialize session state
for key in ["summary", "quiz", "translation", "keywords"]:
if key not in st.session_state:
st.session_state[key] = "" if key not in ["quiz", "keywords"] else []
# Tabs
tab1, tab2, tab3, tab4, tab5, tab6 = st.tabs([
"đ Summarize", "đŦ Q&A", "đ¯ Quiz", "đ Translate", "đ Keywords", "đĨ Download"
])
# ============================================
# TAB 1: SUMMARIZE
# ============================================
with tab1:
st.markdown("### đ Intelligent Summarization")
text = st.text_area(
"âī¸ Your notes or textbook:",
value="",
height=250,
key="sum_txt",
placeholder="Paste your content here and watch AI magic happen..."
)
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
if st.button("⨠Generate Summary", key="sum_btn"):
if not text.strip():
st.error("â ī¸ Please provide text to summarize")
else:
trunc, was_trunc = truncate_text(text, 400)
if was_trunc:
st.info("đ Text optimized to 400 words for processing")
if len(trunc.split()) < 20:
st.error("â ī¸ Need at least 20 words to generate a meaningful summary")
else:
with st.spinner("đ§ AI is thinking..."):
try:
summarizer = get_summarizer()
if summarizer:
result = summarizer(
trunc,
max_length=130,
min_length=30,
do_sample=False
)
summary = result[0]['summary_text']
st.markdown(f"""
đ AI-Generated Summary
{summary}
{len(summary.split())} words
â Completed
""", unsafe_allow_html=True)
st.session_state["summary"] = summary
except Exception as e:
st.error(f"â Error generating summary: {str(e)}")
# ============================================
# TAB 2: Q&A
# ============================================
with tab2:
st.markdown("### đŦ Intelligent Q&A System")
context = st.text_area(
"đ Context (Your notes):",
value="",
height=200,
key="qa_ctx",
placeholder="Paste your study material here..."
)
question = st.text_input(
"â Ask your question:",
key="qa_q",
placeholder="What would you like to know?"
)
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
if st.button("đ Get Answer", key="qa_btn"):
if not context.strip() or not question.strip():
st.error("â ī¸ Please provide both context and question")
else:
trunc_ctx, _ = truncate_text(context, 400)
with st.spinner("đ¤ Analyzing..."):
try:
qa_model = get_qa()
if qa_model:
result = qa_model(question=question, context=trunc_ctx)
answer = result['answer']
confidence = result.get('score', 0)
st.markdown(f"""
đĄ AI Answer
{answer}
â Answer Found
Confidence: {confidence:.1%}
""", unsafe_allow_html=True)
except Exception as e:
st.error(f"â Error finding answer: {str(e)}")
# ============================================
# TAB 3: QUIZ - FIXED
# ============================================
with tab3:
st.markdown("### đ¯ AI Quiz Generator")
quiz_ctx = st.text_area(
"đ Study material:",
value="",
height=200,
key="quiz_ctx",
placeholder="Paste content for quiz generation..."
)
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
if st.button("đ Generate Quiz", key="quiz_btn"):
if not quiz_ctx.strip():
st.error("â ī¸ Please provide text for quiz generation")
else:
trunc_quiz, _ = truncate_text(quiz_ctx, 200)
with st.spinner("đ˛ Creating questions..."):
try:
sentences = sent_tokenize(trunc_quiz)[:5]
if len(sentences) == 0:
st.warning("â ī¸ Could not extract sentences from the text")
else:
def get_first_words(text, max_words=12):
"""Get first N complete words from text"""
words = text.split()
if len(words) <= max_words:
return text
return ' '.join(words[:max_words])
questions = [f"What is the main concept in: '{get_first_words(s, 12)}'?" for s in sentences if len(s) > 10]
if questions:
st.markdown("đ Generated Quiz Questions
", unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
for i, q in enumerate(questions, 1):
st.markdown(f"""
Question {i}: {q}
""", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.markdown('', unsafe_allow_html=True)
st.session_state["quiz"] = questions
else:
st.warning("â ī¸ Could not generate questions from the provided text")
except Exception as e:
st.error(f"â Error generating quiz: {str(e)}")
# ============================================
# TAB 4: TRANSLATE
# ============================================
with tab4:
st.markdown("### đ AI Translation")
trans_text = st.text_area(
"âī¸ Text to translate:",
height=200,
key="trans_txt",
placeholder="Enter text to translate..."
)
col1, col2 = st.columns(2)
with col1:
lang = st.selectbox(
"đ¯ Target language:",
["French", "German", "Spanish", "Italian", "Hindi"]
)
with col2:
st.write("")
st.write("")
if st.button("đ Translate Now", key="trans_btn"):
if not trans_text.strip():
st.error("â ī¸ Please provide text to translate")
else:
model_map = {
"French": "Helsinki-NLP/opus-mt-en-fr",
"German": "Helsinki-NLP/opus-mt-en-de",
"Spanish": "Helsinki-NLP/opus-mt-en-es",
"Italian": "Helsinki-NLP/opus-mt-en-it",
"Hindi": "Helsinki-NLP/opus-mt-en-hi"
}
trunc_trans, _ = truncate_text(trans_text, 200)
with st.spinner(f"đ Translating to {lang}..."):
try:
translator = load_translator(model_map[lang])
if translator:
result = translator(trunc_trans, max_length=256)
translation = result[0]['translation_text']
st.markdown(f"""
đ Translation ({lang})
{translation}
â Translated
""", unsafe_allow_html=True)
st.session_state["translation"] = translation
except Exception as e:
st.error(f"â Translation Error: {str(e)}")
# ============================================
# TAB 5: KEYWORDS - FIXED
# ============================================
with tab5:
st.markdown("### đ AI Keyword Extraction")
keyword_input = st.text_area(
"đ Text for analysis:",
value="",
height=200,
key="kw_txt",
placeholder="Paste text to extract key concepts..."
)
col1, col2, col3 = st.columns([1, 1, 1])
with col2:
if st.button("đ Extract Keywords", key="kw_btn"):
if not keyword_input.strip():
st.error("â ī¸ Please provide text for keyword extraction")
else:
trunc_kw, _ = truncate_text(keyword_input, 200)
with st.spinner("đ Analyzing concepts..."):
try:
classifier = get_classifier()
if classifier:
labels = ["technology", "science", "education", "health", "business", "finance", "medical", "engineering", "mathematics", "history"]
result = classifier(trunc_kw, labels)
keywords = [lbl for lbl, score in zip(result['labels'], result['scores']) if score > 0.3][:5]
if keywords:
st.markdown('', unsafe_allow_html=True)
st.markdown("
đ¯ Extracted Keywords
", unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
kw_html = " ".join([
f"{kw}"
for kw in keywords
])
st.markdown(kw_html, unsafe_allow_html=True)
st.markdown('
', unsafe_allow_html=True)
st.session_state["keywords"] = keywords
else:
st.info("âšī¸ No strong keywords found. Try providing more detailed text.")
except Exception as e:
st.error(f"â Error extracting keywords: {str(e)}")
# ============================================
# TAB 6: DOWNLOAD
# ============================================
with tab6:
st.markdown("### đĨ Download Results")
def download_link(text, filename, emoji):
"""Generate download link for text content"""
b64 = base64.b64encode(text.encode()).decode()
return f"""
{emoji} Download {filename}
"""
col1, col2 = st.columns(2)
with col1:
if st.session_state["summary"]:
st.markdown(download_link(st.session_state["summary"], "summary.txt", "đ"), unsafe_allow_html=True)
else:
st.info("đ Generate a summary first")
if st.session_state["quiz"]:
quiz_text = "\n\n".join([f"Question {i}: {q}" for i, q in enumerate(st.session_state["quiz"], 1)])
st.markdown(download_link(quiz_text, "quiz.txt", "đ¯"), unsafe_allow_html=True)
else:
st.info("đ¯ Generate a quiz first")
with col2:
if st.session_state["translation"]:
st.markdown(download_link(st.session_state["translation"], "translation.txt", "đ"), unsafe_allow_html=True)
else:
st.info("đ Translate text first")
if st.session_state["keywords"]:
keywords_text = "Extracted Keywords:\n\n" + "\n".join([f"- {kw}" for kw in st.session_state["keywords"]])
st.markdown(download_link(keywords_text, "keywords.txt", "đ"), unsafe_allow_html=True)
else:
st.info("đ Extract keywords first")
st.markdown("---")
if not any([st.session_state["summary"], st.session_state["quiz"],
st.session_state["translation"], st.session_state["keywords"]]):
st.warning("âšī¸ Generate content in other tabs to enable downloads")
else:
st.success("â
Content ready for download! Click the buttons above.")
# ============================================
# PREMIUM FOOTER
# ============================================
st.markdown("""
""", unsafe_allow_html=True)