Spaces:

ak0601
/

Law-chatbot

Runtime error

App Files Files Community

Law-chatbot / config.py

ak0601

Upload 15 files

73ab00e verified 3 months ago

raw

history blame contribute delete

2.79 kB

	"""
	Configuration file for the Law RAG Chatbot application
	"""

	import os
	from typing import Optional
	from pathlib import Path
	from dotenv import load_dotenv
	load_dotenv()
	# Load environment variables from .env file if it exists
	def load_dotenv():
	"""Load environment variables from .env file"""
	env_file = Path('.env')
	if env_file.exists():
	with open(env_file, 'r') as f:
	for line in f:
	line = line.strip()
	if line and not line.startswith('#') and '=' in line:
	key, value = line.split('=', 1)
	os.environ[key] = value

	# Load .env file
	load_dotenv()

	# Hugging Face Configuration
	HF_TOKEN = os.getenv('HF_TOKEN')
	HF_DATASET_NAME = "Amod/mental_health_counseling_conversations"

	# Groq Configuration
	GROQ_API_KEY = os.getenv('GROQ_API_KEY')
	GROQ_MODEL = "llama3-8b-8192" # or "mixtral-8x7b-32768"

	# Embedding Configuration
	EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
	EMBEDDING_DIMENSION = 384

	# ChromaDB Configuration
	CHROMA_PERSIST_DIR = "./chroma_db"
	CHROMA_COLLECTION_NAME = "mental_health_counseling"

	# FastAPI Configuration
	API_TITLE = "Mental Health Counseling Chatbot API"
	API_VERSION = "1.0.0"
	API_DESCRIPTION = "RAG-based mental health counseling chatbot using Amod/mental_health_counseling_conversations data"
	HOST = "0.0.0.0"
	PORT = 8000

	# RAG Configuration
	CHUNK_SIZE = 1000
	CHUNK_OVERLAP = 200
	TOP_K_RETRIEVAL = 8 # Increased from 5
	MAX_TOKENS = 4096
	TEMPERATURE = 0.1
	DEFAULT_CONTEXT_LENGTH = 5 # New default context length

	# Token Management Configuration
	MAX_CONTEXT_TOKENS = 4000 # Maximum tokens for context (reserve space for prompt)
	MAX_PROMPT_TOKENS = 6000 # Maximum total prompt tokens (Groq limit)
	MAX_SOURCES = 5 # Maximum number of sources to include
	MAX_SEARCH_VARIATIONS = 2 # Maximum search variations to try
	MAX_LEGAL_CONCEPTS = 2 # Maximum legal concepts to extract

	# Dataset Configuration
	DATASET_SPLIT = "train"
	CACHE_DIR = ".cache"

	# Error Messages
	ERROR_MESSAGES = {
	"no_hf_token": "Hugging Face token not found. Set HF_TOKEN environment variable.",
	"no_groq_key": "Groq API key not found. Set GROQ_API_KEY environment variable.",
	"auth_failed": "Authentication failed: {}",
	"dataset_load_failed": "Failed to load dataset: {}",
	"embedding_failed": "Failed to create embeddings: {}",
	"vector_db_failed": "Failed to setup vector database: {}",
	"llm_failed": "Failed to initialize LLM: {}"
	}

	# API Response Models
	class ChatRequest:
	question: str
	context_length: Optional[int] = 3

	class ChatResponse:
	answer: str
	sources: list
	confidence: float
	processing_time: float