Spaces:
Runtime error
Runtime error
Upload 2 files
Browse files- rag_system.py +55 -138
- requirements.txt +2 -1
rag_system.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
-
|
| 2 |
-
RAG System for Law Chatbot using Langchain, Groq, and ChromaDB
|
| 3 |
-
"""
|
| 4 |
|
| 5 |
import os
|
| 6 |
import logging
|
|
@@ -301,10 +299,12 @@ class RAGSystem:
|
|
| 301 |
search_results = self._filter_relevant_results(search_results, question)
|
| 302 |
|
| 303 |
if not search_results:
|
|
|
|
|
|
|
| 304 |
return {
|
| 305 |
-
"answer":
|
| 306 |
"sources": [],
|
| 307 |
-
"confidence": 0.
|
| 308 |
}
|
| 309 |
|
| 310 |
# Prepare context for LLM
|
|
@@ -412,40 +412,47 @@ class RAGSystem:
|
|
| 412 |
async def _generate_llm_response(self, question: str, context: str) -> str:
|
| 413 |
"""Generate response using Groq LLM with token management"""
|
| 414 |
try:
|
| 415 |
-
#
|
| 416 |
-
|
| 417 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
Use the following evidence-based psychological information to address the user’s concerns with care and accuracy.
|
| 419 |
|
| 420 |
Therapeutic Context:
|
| 421 |
-
{context}
|
| 422 |
|
| 423 |
-
User’s Concern: {question}
|
| 424 |
|
| 425 |
Guidelines for Response:
|
| 426 |
-
|
| 427 |
-
|
| 428 |
-
|
| 429 |
-
|
| 430 |
-
|
| 431 |
-
Cite sources when referencing specific therapies or studies (e.g., "APA guidelines suggest...").
|
| 432 |
-
|
| 433 |
-
|
| 434 |
-
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
When discussing crises, emphasize jurisdictional resources (e.g., "Laws/programs vary by location, but here’s how to find local help...").
|
| 438 |
-
|
| 439 |
-
Prioritize validation and education—not just information.
|
| 440 |
|
| 441 |
Example Response:
|
| 442 |
-
"I hear you’re feeling overwhelmed. Based on [Context Source], deep breathing exercises can help calm acute anxiety. However, if these feelings persist for weeks, it might reflect generalized anxiety disorder (GAD). Always consult a licensed therapist for personalized care. Would you like crisis hotline numbers or a step-by-step grounding technique?
|
| 443 |
-
|
| 444 |
-
|
| 445 |
# Estimate total tokens
|
| 446 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
| 447 |
logger.info(f"Estimated prompt tokens: {estimated_prompt_tokens}")
|
| 448 |
-
|
| 449 |
# If still too large, truncate context further
|
| 450 |
if estimated_prompt_tokens > MAX_PROMPT_TOKENS: # Use config value
|
| 451 |
logger.warning(f"Prompt too large ({estimated_prompt_tokens} tokens), truncating context further")
|
|
@@ -453,20 +460,23 @@ Example Response:
|
|
| 453 |
context = self._truncate_context(context, max_context_tokens)
|
| 454 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
| 455 |
logger.info(f"After truncation: {estimated_prompt_tokens} tokens")
|
| 456 |
-
|
| 457 |
# Create enhanced prompt template for legal questions
|
| 458 |
prompt = ChatPromptTemplate.from_template(prompt_template)
|
| 459 |
-
|
| 460 |
# Create chain
|
| 461 |
chain = prompt | self.llm | StrOutputParser()
|
| 462 |
-
|
| 463 |
# Generate response
|
| 464 |
response = await chain.ainvoke({
|
| 465 |
"question": question,
|
| 466 |
"context": context
|
| 467 |
})
|
| 468 |
-
|
| 469 |
-
|
|
|
|
|
|
|
|
|
|
| 470 |
|
| 471 |
except Exception as e:
|
| 472 |
logger.error(f"Error generating LLM response: {e}")
|
|
@@ -868,132 +878,39 @@ If you have a specific legal question, please try rephrasing it or contact a loc
|
|
| 868 |
)
|
| 869 |
|
| 870 |
def _is_conversational_query(self, question: str) -> bool:
|
| 871 |
-
"""Detect if the query is
|
| 872 |
question_lower = question.lower().strip()
|
| 873 |
-
|
| 874 |
# Common greetings and casual conversation
|
| 875 |
greetings = [
|
| 876 |
"hi", "hello", "hey", "good morning", "good afternoon", "good evening",
|
| 877 |
"how are you", "how's it going", "what's up", "sup", "yo"
|
| 878 |
]
|
| 879 |
-
|
| 880 |
# Very short or casual queries
|
| 881 |
if len(question_lower) <= 3 or question_lower in greetings:
|
| 882 |
return True
|
| 883 |
-
|
| 884 |
-
# Questions that don't need legal context
|
| 885 |
casual_questions = [
|
| 886 |
"how can you help", "what can you do", "what are you", "who are you",
|
| 887 |
"are you working", "are you there", "can you hear me", "test"
|
| 888 |
]
|
| 889 |
-
|
| 890 |
for casual in casual_questions:
|
| 891 |
-
if casual
|
| 892 |
return True
|
| 893 |
-
|
| 894 |
-
# If it's not clearly legal, treat as conversational
|
| 895 |
-
if not self._is_legal_query(question):
|
| 896 |
-
return True
|
| 897 |
-
|
| 898 |
return False
|
| 899 |
|
| 900 |
def _generate_conversational_response(self, question: str) -> str:
|
| 901 |
-
"""Generate
|
| 902 |
question_lower = question.lower().strip()
|
| 903 |
-
|
| 904 |
-
if question_lower in
|
| 905 |
-
return "
|
| 906 |
-
|
| 907 |
-
• Anxiety and stress management
|
| 908 |
-
• Depression and mood challenges
|
| 909 |
-
• Trauma healing and PTSD recovery
|
| 910 |
-
• Relationship and family dynamics
|
| 911 |
-
• Workplace stress and burnout prevention
|
| 912 |
-
• Self-esteem and personal growth journeys
|
| 913 |
-
• Grief processing and life transitions
|
| 914 |
-
• And many other emotional wellness concerns
|
| 915 |
-
|
| 916 |
-
This is a safe space where you can:
|
| 917 |
-
|
| 918 |
-
Share what's on your mind without judgment
|
| 919 |
-
|
| 920 |
-
Explore healthy coping strategies
|
| 921 |
-
|
| 922 |
-
Understand your emotional experiences
|
| 923 |
-
|
| 924 |
-
Find resources for professional support
|
| 925 |
-
|
| 926 |
-
How would you like to begin today?
|
| 927 |
-
You could tell me how you're feeling, ask about coping techniques, or explore resources for specific challenges."""
|
| 928 |
-
|
| 929 |
elif "how can you help" in question_lower or "what can you do" in question_lower:
|
| 930 |
-
return "
|
| 931 |
-
|
| 932 |
-
• Anxiety and stress management
|
| 933 |
-
• Depression and mood disorders
|
| 934 |
-
• Trauma recovery and PTSD
|
| 935 |
-
• Relationship and family challenges
|
| 936 |
-
• Workplace burnout and career stress
|
| 937 |
-
• Grief and loss processing
|
| 938 |
-
• Self-esteem and personal growth
|
| 939 |
-
• Coping skills and resilience building
|
| 940 |
-
• And many other emotional wellness concerns
|
| 941 |
-
|
| 942 |
-
I offer a safe space to explore your feelings, develop coping strategies, and find resources. Remember, while I'm here to support you, I'm not a replacement for professional care in crisis situations.
|
| 943 |
-
|
| 944 |
-
How would you like to begin today?
|
| 945 |
-
You could share what's on your mind, how you're feeling, or ask about:
|
| 946 |
-
|
| 947 |
-
Coping techniques for [specific emotion]
|
| 948 |
-
|
| 949 |
-
Understanding [mental health term]
|
| 950 |
-
|
| 951 |
-
Local therapist resources
|
| 952 |
-
|
| 953 |
-
Self-care strategies"""
|
| 954 |
-
|
| 955 |
elif "who are you" in question_lower or "what are you" in question_lower:
|
| 956 |
-
return "
|
| 957 |
-
|
| 958 |
-
• Search through therapeutic resources and evidence-based practices
|
| 959 |
-
• Explain mental health concepts and coping strategies
|
| 960 |
-
• Provide information on conditions, symptoms, and treatments
|
| 961 |
-
• Help you navigate therapy options and self-care techniques
|
| 962 |
-
• Share reputable mental health sources and crisis resources
|
| 963 |
-
|
| 964 |
-
I'm not a licensed therapist, and I can't diagnose or treat conditions, but I can offer general information, emotional support, and tools to help you better understand your well-being.
|
| 965 |
-
|
| 966 |
-
What would you like to explore today?
|
| 967 |
-
You might ask about:
|
| 968 |
-
|
| 969 |
-
Understanding anxiety/depression symptoms
|
| 970 |
-
|
| 971 |
-
Grounding techniques for stress
|
| 972 |
-
|
| 973 |
-
How cognitive behavioral therapy (CBT) works
|
| 974 |
-
|
| 975 |
-
Finding a therapist near you
|
| 976 |
-
|
| 977 |
-
Managing [specific emotion or situation]"""
|
| 978 |
-
|
| 979 |
else:
|
| 980 |
-
return "
|
| 981 |
-
|
| 982 |
-
How can I assist you today? You might ask about:**
|
| 983 |
-
|
| 984 |
-
Relaxation techniques for anxiety
|
| 985 |
-
|
| 986 |
-
Understanding depression symptoms
|
| 987 |
-
|
| 988 |
-
How to find a therapist
|
| 989 |
-
|
| 990 |
-
Coping with [specific stressor]
|
| 991 |
-
|
| 992 |
-
Self-care for tough emotions
|
| 993 |
-
|
| 994 |
-
(Note: I’m not a substitute for professional care, but I’m here to listen and guide.)
|
| 995 |
-
|
| 996 |
-
What’s on your mind?"""
|
| 997 |
|
| 998 |
def _filter_relevant_results(self, search_results: List[Dict[str, Any]], question: str) -> List[Dict[str, Any]]:
|
| 999 |
"""Filter search results for relevance to the question"""
|
|
@@ -1050,4 +967,4 @@ What’s on your mind?"""
|
|
| 1050 |
relevant_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
| 1051 |
|
| 1052 |
logger.info(f"Filtered {len(search_results)} results to {len(relevant_results)} relevant results")
|
| 1053 |
-
return relevant_results
|
|
|
|
| 1 |
+
|
|
|
|
|
|
|
| 2 |
|
| 3 |
import os
|
| 4 |
import logging
|
|
|
|
| 299 |
search_results = self._filter_relevant_results(search_results, question)
|
| 300 |
|
| 301 |
if not search_results:
|
| 302 |
+
# No relevant docs found: generate a short, supportive answer using LLM with empty context
|
| 303 |
+
response = await self._generate_llm_response(question, context="")
|
| 304 |
return {
|
| 305 |
+
"answer": response,
|
| 306 |
"sources": [],
|
| 307 |
+
"confidence": 0.5 # Lower confidence since no docs
|
| 308 |
}
|
| 309 |
|
| 310 |
# Prepare context for LLM
|
|
|
|
| 412 |
async def _generate_llm_response(self, question: str, context: str) -> str:
|
| 413 |
"""Generate response using Groq LLM with token management"""
|
| 414 |
try:
|
| 415 |
+
# Detect language of the question
|
| 416 |
+
import re
|
| 417 |
+
from langdetect import detect, LangDetectException
|
| 418 |
+
try:
|
| 419 |
+
user_language = detect(question)
|
| 420 |
+
except LangDetectException:
|
| 421 |
+
user_language = "en"
|
| 422 |
+
# Map language code to readable name (for prompt)
|
| 423 |
+
lang_map = {"en": "English", "hi": "Hindi"}
|
| 424 |
+
language_name = lang_map.get(user_language, "the user's language")
|
| 425 |
+
|
| 426 |
+
# Updated prompt template
|
| 427 |
+
prompt_template = f"""
|
| 428 |
+
You are a compassionate mental health supporter with training in anxiety, depression, trauma, and coping strategies.
|
| 429 |
Use the following evidence-based psychological information to address the user’s concerns with care and accuracy.
|
| 430 |
|
| 431 |
Therapeutic Context:
|
| 432 |
+
{{context}}
|
| 433 |
|
| 434 |
+
User’s Concern: {{question}}
|
| 435 |
|
| 436 |
Guidelines for Response:
|
| 437 |
+
- Reply in the same language as the user's question. If the question is in Hindi, answer in Hindi. If in another language, answer in that language.
|
| 438 |
+
- Strictly limit your answer to 2 sentences. Do not elaborate or add extra information. Do not repeat yourself.
|
| 439 |
+
- Keep your answer conversational and natural, as if chatting with a friend.
|
| 440 |
+
- Provide empathetic, evidence-based support rooted in the context (e.g., CBT, DBT, or mindfulness principles).
|
| 441 |
+
- If context is insufficient, acknowledge limits and offer general wellness strategies (e.g., grounding techniques, self-care tips).
|
| 442 |
+
- Cite sources when referencing specific therapies or studies (e.g., "APA guidelines suggest...").
|
| 443 |
+
- For symptom-related questions, differentiate between mild, moderate, and severe cases (e.g., situational stress vs. clinical anxiety).
|
| 444 |
+
- Use clear, stigma-free language while maintaining clinical accuracy.
|
| 445 |
+
- When discussing crises, emphasize jurisdictional resources (e.g., "Laws/programs vary by location, but here’s how to find local help...").
|
| 446 |
+
- Prioritize validation and education—not just information.
|
| 447 |
+
- Always reply in {language_name}.
|
|
|
|
|
|
|
|
|
|
| 448 |
|
| 449 |
Example Response:
|
| 450 |
+
"I hear you’re feeling overwhelmed. Based on [Context Source], deep breathing exercises can help calm acute anxiety. However, if these feelings persist for weeks, it might reflect generalized anxiety disorder (GAD). Always consult a licensed therapist for personalized care. Would you like crisis hotline numbers or a step-by-step grounding technique?"
|
| 451 |
+
"""
|
|
|
|
| 452 |
# Estimate total tokens
|
| 453 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
| 454 |
logger.info(f"Estimated prompt tokens: {estimated_prompt_tokens}")
|
| 455 |
+
|
| 456 |
# If still too large, truncate context further
|
| 457 |
if estimated_prompt_tokens > MAX_PROMPT_TOKENS: # Use config value
|
| 458 |
logger.warning(f"Prompt too large ({estimated_prompt_tokens} tokens), truncating context further")
|
|
|
|
| 460 |
context = self._truncate_context(context, max_context_tokens)
|
| 461 |
estimated_prompt_tokens = self._count_tokens(prompt_template.format(context=context, question=question))
|
| 462 |
logger.info(f"After truncation: {estimated_prompt_tokens} tokens")
|
| 463 |
+
|
| 464 |
# Create enhanced prompt template for legal questions
|
| 465 |
prompt = ChatPromptTemplate.from_template(prompt_template)
|
| 466 |
+
|
| 467 |
# Create chain
|
| 468 |
chain = prompt | self.llm | StrOutputParser()
|
| 469 |
+
|
| 470 |
# Generate response
|
| 471 |
response = await chain.ainvoke({
|
| 472 |
"question": question,
|
| 473 |
"context": context
|
| 474 |
})
|
| 475 |
+
|
| 476 |
+
# Post-process: Truncate to first 2 sentences
|
| 477 |
+
sentences = re.split(r'(?<=[.!?])\s+', response.strip())
|
| 478 |
+
short_response = ' '.join(sentences[:2]).strip()
|
| 479 |
+
return short_response
|
| 480 |
|
| 481 |
except Exception as e:
|
| 482 |
logger.error(f"Error generating LLM response: {e}")
|
|
|
|
| 878 |
)
|
| 879 |
|
| 880 |
def _is_conversational_query(self, question: str) -> bool:
|
| 881 |
+
"""Detect if the query is a pure greeting or system check (not a real mental health question)"""
|
| 882 |
question_lower = question.lower().strip()
|
|
|
|
| 883 |
# Common greetings and casual conversation
|
| 884 |
greetings = [
|
| 885 |
"hi", "hello", "hey", "good morning", "good afternoon", "good evening",
|
| 886 |
"how are you", "how's it going", "what's up", "sup", "yo"
|
| 887 |
]
|
|
|
|
| 888 |
# Very short or casual queries
|
| 889 |
if len(question_lower) <= 3 or question_lower in greetings:
|
| 890 |
return True
|
| 891 |
+
# System check/capability questions
|
|
|
|
| 892 |
casual_questions = [
|
| 893 |
"how can you help", "what can you do", "what are you", "who are you",
|
| 894 |
"are you working", "are you there", "can you hear me", "test"
|
| 895 |
]
|
|
|
|
| 896 |
for casual in casual_questions:
|
| 897 |
+
if casual == question_lower:
|
| 898 |
return True
|
| 899 |
+
# Otherwise, treat as a real question (let LLM handle it)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 900 |
return False
|
| 901 |
|
| 902 |
def _generate_conversational_response(self, question: str) -> str:
|
| 903 |
+
"""Generate a short, friendly response for greetings or system checks only"""
|
| 904 |
question_lower = question.lower().strip()
|
| 905 |
+
greetings = ["hi", "hello", "hey"]
|
| 906 |
+
if question_lower in greetings:
|
| 907 |
+
return "Hello! How can I support your mental health or well-being today?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 908 |
elif "how can you help" in question_lower or "what can you do" in question_lower:
|
| 909 |
+
return "I can offer brief, evidence-based tips and emotional support for mental health questions. What would you like to talk about?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 910 |
elif "who are you" in question_lower or "what are you" in question_lower:
|
| 911 |
+
return "I'm an AI companion here to help with mental health and wellness questions. How can I assist you?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 912 |
else:
|
| 913 |
+
return "How can I help you today? Feel free to ask about mental health, coping, or emotional support."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 914 |
|
| 915 |
def _filter_relevant_results(self, search_results: List[Dict[str, Any]], question: str) -> List[Dict[str, Any]]:
|
| 916 |
"""Filter search results for relevance to the question"""
|
|
|
|
| 967 |
relevant_results.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
| 968 |
|
| 969 |
logger.info(f"Filtered {len(search_results)} results to {len(relevant_results)} relevant results")
|
| 970 |
+
return relevant_results
|
requirements.txt
CHANGED
|
@@ -15,4 +15,5 @@ python-dotenv>=1.0.0
|
|
| 15 |
numpy>=1.24.0
|
| 16 |
pandas>=2.0.0
|
| 17 |
requests>=2.31.0
|
| 18 |
-
tiktoken>=0.5.0
|
|
|
|
|
|
| 15 |
numpy>=1.24.0
|
| 16 |
pandas>=2.0.0
|
| 17 |
requests>=2.31.0
|
| 18 |
+
tiktoken>=0.5.0
|
| 19 |
+
langdetect
|