AI-Life-Coach-Streamlit2

Running

App Files Files Community

rdune71 commited on Sep 8

Commit

a20d863

1 Parent(s): 482aace

Implement hierarchical multi-model conversation architecture with HF as authoritative layer

Browse files

Files changed (4) hide show

core/coordinator.py +226 -151
core/providers/huggingface.py +34 -29
core/session.py +56 -2
test_hierarchical_coordination.py +83 -0

core/coordinator.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 import logging
-from typing import List, Dict, Optional, Tuple
 from core.llm_factory import llm_factory
 from core.session import session_manager
 from services.hf_endpoint_monitor import hf_monitor
@@ -12,174 +12,282 @@ except ImportError:
     TavilyClient = None
     TAVILY_AVAILABLE = False
 import os
 logger = logging.getLogger(__name__)
 class AICoordinator:
-    """Coordinate multiple AI models and external services"""
     def __init__(self):
         self.tavily_client = None
         if TAVILY_AVAILABLE and os.getenv("TAVILY_API_KEY"):
             self.tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
-    async def coordinate_response(self, user_id: str, user_query: str) -> Dict:
         """
-        Coordinate Ollama (fast) and HF (deep) responses
-        Returns:
-            Dict with 'immediate_response' and 'final_response'
         """
         try:
             # Get conversation history
             session = session_manager.get_session(user_id)
-            conversation_history = session.get("conversation", [])
-            # Step 1: Gather external data with Ollama
-            logger.info("Step 1: Gathering external data...")
             external_data = await self._gather_external_data(user_query)
-            # Step 2: Get immediate Ollama response (fast)
-            logger.info("Step 2: Getting immediate Ollama response...")
-            immediate_response = await self._get_ollama_response(
                 user_query, conversation_history, external_data
             )
-            # Step 3: Initialize HF endpoint in background
-            logger.info("Step 3: Initializing HF endpoint...")
-            hf_task = asyncio.create_task(self._initialize_and_get_hf_response(
-                user_query, conversation_history, external_data, immediate_response
-            ))
-            # Return immediate response while HF processes
-            return {
-                'immediate_response': immediate_response,
-                'hf_task': hf_task,  # Background task for HF processing
-                'external_data': external_data
-            }
-        except Exception as e:
-            logger.error(f"Coordination failed: {e}")
-            # Fallback to simple Ollama response
-            immediate_response = await self._get_ollama_response(
-                user_query, conversation_history, {}
-            )
-            return {
-                'immediate_response': immediate_response,
-                'hf_task': None,
-                'external_data': {}
-            }
-    async def _gather_external_data(self, query: str) -> Dict:
-        """Gather external data from various sources"""
-        data = {}
-        # Tavily/DuckDuckGo search
-        if self.tavily_client:
-            try:
-                search_result = self.tavily_client.search(query, max_results=3)
-                data['search_results'] = search_result.get('results', [])
-            except Exception as e:
-                logger.warning(f"Tavily search failed: {e}")
-        # Weather data (if location mentioned)
-        if 'weather' in query.lower() or 'temperature' in query.lower():
-            try:
-                # Extract location from query or use default
-                location = self._extract_location(query) or "New York"
-                weather = weather_service.get_current_weather(location)
-                if weather:
-                    data['weather'] = weather
-            except Exception as e:
-                logger.warning(f"Weather data failed: {e}")
-        # Current date/time
-        from datetime import datetime
-        data['current_datetime'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        return data
-    async def _get_ollama_response(self, query: str, history: List, external_data: Dict) -> str:
-        """Get fast response from Ollama"""
-        try:
-            # Enhance query with external data
-            enhanced_query = self._enhance_query_with_data(query, external_data)
-            # Get Ollama provider
-            ollama_provider = llm_factory.get_provider('ollama')
-            if not ollama_provider:
-                raise Exception("Ollama provider not available")
-            # Prepare conversation with external context
-            enhanced_history = history.copy()
-            if external_data:
-                context_message = {
-                    "role": "system",
-                    "content": f"External context: {str(external_data)}"
-                }
-                enhanced_history.insert(0, context_message)
-            enhanced_history.append({"role": "user", "content": enhanced_query})
-            # Generate response
-            response = ollama_provider.generate(enhanced_query, enhanced_history)
-            return response or "I'm processing your request..."
         except Exception as e:
-            logger.error(f"Ollama response failed: {e}")
-            return "I'm thinking about your question..."
-    async def _initialize_and_get_hf_response(self, query: str, history: List,
-                                             external_data: Dict, ollama_response: str) -> Optional[str]:
-        """Initialize HF endpoint and get deep analysis"""
         try:
-            # Check if HF endpoint is available
             hf_status = hf_monitor.check_endpoint_status()
             if not hf_status['available']:
-                logger.info("HF endpoint not available, attempting to warm up...")
-                # Try to warm up the endpoint
-                warmup_success = hf_monitor.warm_up_endpoint()
                 if not warmup_success:
-                    return None
             # Get HF provider
             hf_provider = llm_factory.get_provider('huggingface')
             if not hf_provider:
-                return None
-            # Prepare enhanced conversation for HF
             enhanced_history = history.copy()
-            # Add Ollama's initial response for HF to consider
             enhanced_history.append({
-                "role": "assistant",
-                "content": f"Initial response (for reference): {ollama_response}"
             })
-            # Add external data context
-            if external_data:
-                context_message = {
-                    "role": "system",
-                    "content": f"Additional context data: {str(external_data)}"
-                }
-                enhanced_history.insert(0, context_message)
-            # Add HF's role instruction
-            enhanced_history.append({
                 "role": "system",
-                "content": "You are providing deep analysis and second opinions. Consider the initial response and enhance it with deeper insights."
             })
             enhanced_history.append({"role": "user", "content": query})
-            # Generate deep response
-            deep_response = hf_provider.generate(query, enhanced_history)
-            return deep_response
         except Exception as e:
-            logger.error(f"HF response failed: {e}")
-            return None
     def get_coordination_status(self) -> Dict:
         """Get current coordination system status"""
@@ -208,38 +316,5 @@ class AICoordinator:
         except:
             return {}
-    def _enhance_query_with_data(self, query: str, data: Dict) -> str:
-        """Enhance query with gathered external data"""
-        if not data:
-            return query
-        context_parts = []
-        if 'search_results' in data:
-            context_parts.append("Recent information:")
-            for result in data['search_results'][:2]:  # Limit to 2 results
-                context_parts.append(f"- {result.get('title', 'Result')}: {result.get('content', '')[:100]}...")
-        if 'weather' in data:
-            weather = data['weather']
-            context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}°C in {weather.get('city', 'Unknown')}")
-        if 'current_datetime' in data:
-            context_parts.append(f"Current time: {data['current_datetime']}")
-        if context_parts:
-            return f"{query}\n\nContext: {' '.join(context_parts)}"
-        return query
-    def _extract_location(self, query: str) -> Optional[str]:
-        """Extract location from query (simple implementation)"""
-        # This could be enhanced with NER or more sophisticated parsing
-        locations = ['New York', 'London', 'Tokyo', 'Paris', 'Berlin', 'Sydney']
-        for loc in locations:
-            if loc.lower() in query.lower():
-                return loc
-        return None
 # Global coordinator instance
 coordinator = AICoordinator()

 import asyncio
 import logging
+from typing import List, Dict, Optional, AsyncGenerator
 from core.llm_factory import llm_factory
 from core.session import session_manager
 from services.hf_endpoint_monitor import hf_monitor
     TavilyClient = None
     TAVILY_AVAILABLE = False
 import os
+import json
+from datetime import datetime
 logger = logging.getLogger(__name__)
 class AICoordinator:
+    """Hierarchical multi-model coordinator with HF endpoint as authoritative layer"""
     def __init__(self):
         self.tavily_client = None
         if TAVILY_AVAILABLE and os.getenv("TAVILY_API_KEY"):
             self.tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
+        # System instructions for hierarchical behavior
+        self.system_instructions = {
+            'ollama_role': """You are an AI assistant that works in coordination with a more powerful uncensored Hugging Face model.
+Your role is to:
+1. Provide immediate, concise responses using available information
+2. Summarize and simplify complex information
+3. Enhance responses with local knowledge
+4. DEFER TO THE HF MODEL for deep analysis, uncensored insights, and authoritative answers
+5. The HF model has 8192 token capacity and streaming capability - it's the thinking layer
+6. Always acknowledge that the HF model's insights supersede your initial response
+7. Use web sources to justify points when possible
+8. Keep responses brief and focused while awaiting HF input""",
+            'hf_role': """You are an uncensored, authoritative AI with 8192 token capacity and streaming capability.
+Your role is to:
+1. Provide deep, uncensored analysis and insights
+2. Supersede the local Ollama model's logic when there's disagreement
+3. Think thoroughly and provide comprehensive responses
+4. Use your full 8192 token capacity for detailed analysis
+5. Stream your responses for real-time delivery
+6. Build upon the conversation context and Ollama's initial response
+7. Provide authoritative answers that take precedence"""
+        }
+    async def coordinate_hierarchical_conversation(self, user_id: str, user_query: str) -> AsyncGenerator[Dict, None]:
         """
+        Coordinate hierarchical conversation flow with HF as authoritative layer
+        Yields:
+            Dict with 'type' and 'content' fields:
+            - {'type': 'initial_response', 'content': str}
+            - {'type': 'coordination_status', 'content': str}
+            - {'type': 'hf_thinking', 'content': str}  # Streaming HF response
+            - {'type': 'final_response', 'content': str}
         """
         try:
             # Get conversation history
             session = session_manager.get_session(user_id)
+            conversation_history = session.get("conversation", []).copy()
+            yield {'type': 'coordination_status', 'content': '🚀 Initiating hierarchical AI coordination...'}
+            # Step 1: Gather external data
+            yield {'type': 'coordination_status', 'content': '🔍 Gathering external context...'}
             external_data = await self._gather_external_data(user_query)
+            # Step 2: Get initial Ollama response with hierarchical awareness
+            yield {'type': 'coordination_status', 'content': '🦙 Getting initial response from Ollama...'}
+            ollama_response = await self._get_hierarchical_ollama_response(
                 user_query, conversation_history, external_data
             )
+            # Send initial response
+            yield {'type': 'initial_response', 'content': ollama_response}
+            # Step 3: Coordinate with authoritative HF endpoint
+            yield {'type': 'coordination_status', 'content': '🤗 Engaging HF endpoint for deep analysis...'}
+            # Check HF availability and coordinate
+            hf_available = self._check_hf_availability()
+            if hf_available:
+                async for hf_chunk in self._coordinate_hierarchical_hf_response(
+                    user_id, user_query, conversation_history,
+                    external_data, ollama_response
+                ):
+                    yield hf_chunk
+            else:
+                yield {'type': 'coordination_status', 'content': 'ℹ️ HF endpoint not available - using Ollama response'}
+            # Final coordination status
+            yield {'type': 'coordination_status', 'content': '✅ Hierarchical coordination complete'}
         except Exception as e:
+            logger.error(f"Hierarchical coordination failed: {e}")
+            yield {'type': 'coordination_status', 'content': f'❌ Coordination error: {str(e)}'}
+            # Fallback to simple Ollama response
+            try:
+                session = session_manager.get_session(user_id)
+                conversation_history = session.get("conversation", [])
+                fallback_response = await self._get_hierarchical_ollama_response(
+                    user_query, conversation_history, {}
+                )
+                yield {'type': 'initial_response', 'content': fallback_response}
+            except:
+                yield {'type': 'initial_response', 'content': "I'm here to help! What would you like to discuss?"}
+    async def _coordinate_hierarchical_hf_response(self, user_id: str, query: str,
+                                                 history: List, external_data: Dict,
+                                                ollama_response: str) -> AsyncGenerator[Dict, None]:
+        """Coordinate with HF endpoint as authoritative layer with streaming"""
         try:
+            # Check and warm up HF endpoint if needed
             hf_status = hf_monitor.check_endpoint_status()
             if not hf_status['available']:
+                yield {'type': 'coordination_status', 'content': '⚡ Initializing HF endpoint (2-4 minutes)...'}
+                warmup_success = hf_monitor.handle_scale_to_zero()
                 if not warmup_success:
+                    yield {'type': 'coordination_status', 'content': '❌ HF endpoint initialization failed'}
+                    return
             # Get HF provider
             hf_provider = llm_factory.get_provider('huggingface')
             if not hf_provider:
+                yield {'type': 'coordination_status', 'content': '❌ HF provider not available'}
+                return
+            # Prepare enhanced conversation for HF with hierarchical context
             enhanced_history = history.copy()
+            # Add system instructions for HF
+            enhanced_history.insert(0, {
+                "role": "system",
+                "content": self.system_instructions['hf_role']
+            })
+            # Add context about the coordination
             enhanced_history.append({
+                "role": "system",
+                "content": f"""Context for your authoritative analysis:
+                User Query: '{query}'
+                Ollama's Initial Response: '{ollama_response}'
+                External Data Available: {json.dumps(external_data, indent=2) if external_data else 'None'}
+                Please provide your deep, uncensored analysis that supersedes the local model's response.
+                Use your full 8192 token capacity for comprehensive thinking.
+                Stream your response for real-time delivery."""
             })
+            # Add the user's latest query
+            enhanced_history.append({"role": "user", "content": query})
+            # Stream HF response with full 8192 token capacity
+            yield {'type': 'coordination_status', 'content': '🧠 HF endpoint thinking...'}
+            # Use streaming for real-time delivery
+            hf_response_stream = hf_provider.stream_generate(query, enhanced_history)
+            if hf_response_stream:
+                # Stream the response chunks
+                full_hf_response = ""
+                for chunk in hf_response_stream:
+                    if chunk:
+                        full_hf_response += chunk
+                        yield {'type': 'hf_thinking', 'content': chunk}
+                # Final HF response
+                yield {'type': 'final_response', 'content': full_hf_response}
+                yield {'type': 'coordination_status', 'content': '🎯 HF analysis complete and authoritative'}
+            else:
+                yield {'type': 'coordination_status', 'content': '❌ HF response generation failed'}
+        except Exception as e:
+            logger.error(f"Hierarchical HF coordination failed: {e}")
+            yield {'type': 'coordination_status', 'content': f'❌ HF coordination error: {str(e)}'}
+    async def _get_hierarchical_ollama_response(self, query: str, history: List, external_data: Dict) -> str:
+        """Get Ollama response with hierarchical awareness"""
+        try:
+            # Get Ollama provider
+            ollama_provider = llm_factory.get_provider('ollama')
+            if not ollama_provider:
+                raise Exception("Ollama provider not available")
+            # Prepare conversation with hierarchical context
+            enhanced_history = history.copy()
+            # Add system instruction for Ollama's role
+            enhanced_history.insert(0, {
                 "role": "system",
+                "content": self.system_instructions['ollama_role']
             })
+            # Add external data context if available
+            if external_data:
+                context_parts = []
+                if 'search_answer' in external_data:
+                    context_parts.append(f"Current information: {external_data['search_answer']}")
+                if 'weather' in external_data:
+                    weather = external_data['weather']
+                    context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}°C in {weather.get('city', 'Unknown')}")
+                if 'current_datetime' in external_data:
+                    context_parts.append(f"Current time: {external_data['current_datetime']}")
+                if context_parts:
+                    context_message = {
+                        "role": "system",
+                        "content": "Context: " + " | ".join(context_parts)
+                    }
+                    enhanced_history.insert(1, context_message)  # Insert after role instruction
+            # Add the user's query
             enhanced_history.append({"role": "user", "content": query})
+            # Generate response with awareness of HF's superior capabilities
+            response = ollama_provider.generate(query, enhanced_history)
+            # Add acknowledgment of HF's authority
+            if response:
+                return f"{response}\n\n*Note: A more comprehensive analysis from the uncensored HF model is being prepared...*"
+            else:
+                return "I'm processing your request... A deeper analysis is being prepared by the authoritative model."
         except Exception as e:
+            logger.error(f"Hierarchical Ollama response failed: {e}")
+            return "I'm thinking about your question... Preparing a comprehensive response."
+    def _check_hf_availability(self) -> bool:
+        """Check if HF endpoint is configured and available"""
+        try:
+            from utils.config import config
+            return bool(config.hf_token and config.hf_api_url)
+        except:
+            return False
+    async def _gather_external_data(self, query: str) -> Dict:
+        """Gather external data from various sources"""
+        data = {}
+        # Tavily/DuckDuckGo search with justification focus
+        if self.tavily_client:
+            try:
+                search_result = self.tavily_client.search(
+                    f"current information about {query}",
+                    max_results=5,  # More results for better justification
+                    include_answer=True,
+                    include_raw_content=True  # For deeper analysis
+                )
+                data['search_results'] = search_result.get('results', [])
+                if search_result.get('answer'):
+                    data['search_answer'] = search_result['answer']
+                # Store raw content for HF to analyze
+                data['raw_sources'] = [result.get('raw_content', '')[:1000] for result in search_result.get('results', [])[:3]]
+            except Exception as e:
+                logger.warning(f"Tavily search failed: {e}")
+        # Weather data
+        weather_keywords = ['weather', 'temperature', 'forecast', 'climate', 'rain', 'sunny']
+        if any(keyword in query.lower() for keyword in weather_keywords):
+            try:
+                location = self._extract_location(query) or "New York"
+                weather = weather_service.get_current_weather(location)
+                if weather:
+                    data['weather'] = weather
+            except Exception as e:
+                logger.warning(f"Weather data failed: {e}")
+        # Current date/time
+        data['current_datetime'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        return data
+    def _extract_location(self, query: str) -> Optional[str]:
+        """Extract location from query"""
+        locations = ['New York', 'London', 'Tokyo', 'Paris', 'Berlin', 'Sydney',
+                     'Los Angeles', 'Chicago', 'Miami', 'Seattle', 'Boston',
+                     'San Francisco', 'Toronto', 'Vancouver', 'Montreal']
+        for loc in locations:
+            if loc.lower() in query.lower():
+                return loc
+        return "New York"  # Default
     def get_coordination_status(self) -> Dict:
         """Get current coordination system status"""
         except:
             return {}
 # Global coordinator instance
 coordinator = AICoordinator()

core/providers/huggingface.py CHANGED Viewed

@@ -3,9 +3,7 @@ import logging
 from typing import List, Dict, Optional, Union
 from core.providers.base import LLMProvider
 from utils.config import config
 logger = logging.getLogger(__name__)
 try:
     from openai import OpenAI
     HUGGINGFACE_SDK_AVAILABLE = True
@@ -18,17 +16,15 @@ class HuggingFaceProvider(LLMProvider):
     def __init__(self, model_name: str, timeout: int = 30, max_retries: int = 3):
         super().__init__(model_name, timeout, max_retries)
         logger.info(f"Initializing HuggingFaceProvider with:")
-        logger.info(f"  HF_API_URL: {config.hf_api_url}")
-        logger.info(f"  HF_TOKEN SET: {bool(config.hf_token)}")
         if not HUGGINGFACE_SDK_AVAILABLE:
             raise ImportError("Hugging Face provider requires 'openai' package")
         if not config.hf_token:
             raise ValueError("HF_TOKEN not set - required for Hugging Face provider")
         # Make sure NO proxies parameter is included
         try:
             self.client = OpenAI(
@@ -40,7 +36,7 @@ class HuggingFaceProvider(LLMProvider):
             logger.error(f"Failed to initialize HuggingFaceProvider: {e}")
             logger.error(f"Error type: {type(e)}")
             raise
     def generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
         """Generate a response synchronously"""
         try:
@@ -48,7 +44,7 @@ class HuggingFaceProvider(LLMProvider):
         except Exception as e:
             logger.error(f"Hugging Face generation failed: {e}")
             return None
     def stream_generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[Union[str, List[str]]]:
         """Generate a response with streaming support"""
         try:
@@ -56,7 +52,7 @@ class HuggingFaceProvider(LLMProvider):
         except Exception as e:
             logger.error(f"Hugging Face stream generation failed: {e}")
             return None
     def validate_model(self) -> bool:
         """Validate if the model is available"""
         # For Hugging Face endpoints, we'll assume the model is valid if we can connect
@@ -67,15 +63,18 @@ class HuggingFaceProvider(LLMProvider):
         except Exception as e:
             logger.warning(f"Hugging Face model validation failed: {e}")
             return False
     def _generate_impl(self, prompt: str, conversation_history: List[Dict]) -> str:
-        """Implementation of synchronous generation"""
         try:
             response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=conversation_history,
-                max_tokens=500,
-                temperature=0.7
             )
             return response.choices[0].message.content
         except Exception as e:
@@ -87,22 +86,28 @@ class HuggingFaceProvider(LLMProvider):
                 response = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=conversation_history,
-                    max_tokens=500,
-                    temperature=0.7
                 )
                 return response.choices[0].message.content
             else:
                 raise
     def _stream_generate_impl(self, prompt: str, conversation_history: List[Dict]) -> List[str]:
-        """Implementation of streaming generation"""
         try:
             response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=conversation_history,
-                max_tokens=500,
                 temperature=0.7,
-                stream=True
             )
             chunks = []
@@ -110,7 +115,7 @@ class HuggingFaceProvider(LLMProvider):
                 content = chunk.choices[0].delta.content
                 if content:
                     chunks.append(content)
             return chunks
         except Exception as e:
             # Handle scale-to-zero behavior
@@ -121,9 +126,12 @@ class HuggingFaceProvider(LLMProvider):
                 response = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=conversation_history,
-                    max_tokens=500,
                     temperature=0.7,
-                    stream=True
                 )
                 chunks = []
@@ -131,18 +139,15 @@ class HuggingFaceProvider(LLMProvider):
                     content = chunk.choices[0].delta.content
                     if content:
                         chunks.append(content)
                 return chunks
             else:
                 raise
     def _is_scale_to_zero_error(self, error: Exception) -> bool:
         """Check if the error is related to scale-to-zero initialization"""
         error_str = str(error).lower()
         scale_to_zero_indicators = [
-            "503",
-            "service unavailable",
-            "initializing",
-            "cold start"
         ]
         return any(indicator in error_str for indicator in scale_to_zero_indicators)

 from typing import List, Dict, Optional, Union
 from core.providers.base import LLMProvider
 from utils.config import config
 logger = logging.getLogger(__name__)
 try:
     from openai import OpenAI
     HUGGINGFACE_SDK_AVAILABLE = True
     def __init__(self, model_name: str, timeout: int = 30, max_retries: int = 3):
         super().__init__(model_name, timeout, max_retries)
         logger.info(f"Initializing HuggingFaceProvider with:")
+        logger.info(f" HF_API_URL: {config.hf_api_url}")
+        logger.info(f" HF_TOKEN SET: {bool(config.hf_token)}")
         if not HUGGINGFACE_SDK_AVAILABLE:
             raise ImportError("Hugging Face provider requires 'openai' package")
         if not config.hf_token:
             raise ValueError("HF_TOKEN not set - required for Hugging Face provider")
         # Make sure NO proxies parameter is included
         try:
             self.client = OpenAI(
             logger.error(f"Failed to initialize HuggingFaceProvider: {e}")
             logger.error(f"Error type: {type(e)}")
             raise
     def generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[str]:
         """Generate a response synchronously"""
         try:
         except Exception as e:
             logger.error(f"Hugging Face generation failed: {e}")
             return None
     def stream_generate(self, prompt: str, conversation_history: List[Dict]) -> Optional[Union[str, List[str]]]:
         """Generate a response with streaming support"""
         try:
         except Exception as e:
             logger.error(f"Hugging Face stream generation failed: {e}")
             return None
     def validate_model(self) -> bool:
         """Validate if the model is available"""
         # For Hugging Face endpoints, we'll assume the model is valid if we can connect
         except Exception as e:
             logger.warning(f"Hugging Face model validation failed: {e}")
             return False
     def _generate_impl(self, prompt: str, conversation_history: List[Dict]) -> str:
+        """Implementation of synchronous generation with proper configuration"""
         try:
             response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=conversation_history,
+                max_tokens=8192,  # Set to 8192 as requested
+                temperature=0.7,
+                top_p=0.9,
+                frequency_penalty=0.1,
+                presence_penalty=0.1
             )
             return response.choices[0].message.content
         except Exception as e:
                 response = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=conversation_history,
+                    max_tokens=8192,  # Set to 8192 as requested
+                    temperature=0.7,
+                    top_p=0.9,
+                    frequency_penalty=0.1,
+                    presence_penalty=0.1
                 )
                 return response.choices[0].message.content
             else:
                 raise
     def _stream_generate_impl(self, prompt: str, conversation_history: List[Dict]) -> List[str]:
+        """Implementation of streaming generation with proper configuration"""
         try:
             response = self.client.chat.completions.create(
                 model=self.model_name,
                 messages=conversation_history,
+                max_tokens=8192,  # Set to 8192 as requested
                 temperature=0.7,
+                top_p=0.9,
+                frequency_penalty=0.1,
+                presence_penalty=0.1,
+                stream=True  # Enable streaming
             )
             chunks = []
                 content = chunk.choices[0].delta.content
                 if content:
                     chunks.append(content)
             return chunks
         except Exception as e:
             # Handle scale-to-zero behavior
                 response = self.client.chat.completions.create(
                     model=self.model_name,
                     messages=conversation_history,
+                    max_tokens=8192,  # Set to 8192 as requested
                     temperature=0.7,
+                    top_p=0.9,
+                    frequency_penalty=0.1,
+                    presence_penalty=0.1,
+                    stream=True  # Enable streaming
                 )
                 chunks = []
                     content = chunk.choices[0].delta.content
                     if content:
                         chunks.append(content)
                 return chunks
             else:
                 raise
     def _is_scale_to_zero_error(self, error: Exception) -> bool:
         """Check if the error is related to scale-to-zero initialization"""
         error_str = str(error).lower()
         scale_to_zero_indicators = [
+            "503", "service unavailable", "initializing", "cold start"
         ]
         return any(indicator in error_str for indicator in scale_to_zero_indicators)

core/session.py CHANGED Viewed

@@ -74,7 +74,7 @@ class SessionManager:
             redis_data = {}
             for key, value in session.items():
                 if isinstance(value, (list, dict)):
-                    redis_data[key] = json.dumps(value)
                 elif isinstance(value, (int, float, str, bool)):
                     redis_data[key] = value
                 else:
@@ -121,7 +121,7 @@ class SessionManager:
             redis_data = {}
             for key, value in session.items():
                 if isinstance(value, (dict, list)):
-                    redis_data[key] = json.dumps(value)
                 else:
                     redis_data[key] = value
@@ -137,6 +137,60 @@ class SessionManager:
             logger.error(f"Error updating coordination session for user {user_id}: {e}")
             return False
     def clear_session(self, user_id: str) -> bool:
         """Clear user session data
         Args:

             redis_data = {}
             for key, value in session.items():
                 if isinstance(value, (list, dict)):
+                    redis_data[key] = json.dumps(value, default=str)
                 elif isinstance(value, (int, float, str, bool)):
                     redis_data[key] = value
                 else:
             redis_data = {}
             for key, value in session.items():
                 if isinstance(value, (dict, list)):
+                    redis_data[key] = json.dumps(value, default=str)
                 else:
                     redis_data[key] = value
             logger.error(f"Error updating coordination session for user {user_id}: {e}")
             return False
+    def update_hierarchical_coordination(self, user_id: str, coordination_data: Dict) -> bool:
+        """Update session with hierarchical coordination data"""
+        try:
+            # Get existing session
+            session = self.get_session(user_id)
+            # Add hierarchical coordination tracking
+            if 'hierarchical_coordination' not in session:
+                session['hierarchical_coordination'] = {
+                    'total_conversations': 0,
+                    'hf_engagements': 0,
+                    'ollama_responses': 0,
+                    'coordination_success': 0,
+                    'last_coordination': None
+                }
+            coord_stats = session['hierarchical_coordination']
+            # Update statistics
+            coord_stats['total_conversations'] += 1
+            coord_stats['last_coordination'] = datetime.now().isoformat()
+            # Update specific counters based on coordination data
+            if coordination_data.get('hf_engaged'):
+                coord_stats['hf_engagements'] += 1
+            if coordination_data.get('ollama_responded'):
+                coord_stats['ollama_responses'] += 1
+            if coordination_data.get('success'):
+                coord_stats['coordination_success'] += 1
+            # Convert complex data to JSON strings for Redis
+            redis_data = {}
+            for key, value in session.items():
+                if isinstance(value, (dict, list)):
+                    redis_data[key] = json.dumps(value, default=str)
+                else:
+                    redis_data[key] = value
+            # Save updated session
+            result = save_user_state(user_id, redis_data)
+            return result
+        except Exception as e:
+            logger.error(f"Error updating hierarchical coordination for user {user_id}: {e}")
+            return False
+    def get_hierarchical_stats(self, user_id: str) -> Dict:
+        """Get hierarchical coordination statistics"""
+        try:
+            session = self.get_session(user_id)
+            return session.get('hierarchical_coordination', {})
+        except Exception as e:
+            logger.error(f"Error getting hierarchical stats for user {user_id}: {e}")
+            return {}
     def clear_session(self, user_id: str) -> bool:
         """Clear user session data
         Args:

test_hierarchical_coordination.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import sys
+import asyncio
+from pathlib import Path
+# Add project root to path
+project_root = Path(__file__).parent
+sys.path.append(str(project_root))
+from core.coordinator import coordinator
+from core.session import session_manager
+async def test_hierarchical_coordination():
+    """Test the hierarchical coordination system"""
+    print("=== Hierarchical Coordination System Test ===")
+    print()
+    # Test user query
+    user_query = "What are the key principles of effective time management?"
+    user_id = "test_user"
+    print(f"User Query: {user_query}")
+    print()
+    # Test coordination status
+    print("1. Testing Coordination Status:")
+    try:
+        coord_status = coordinator.get_coordination_status()
+        print(f"   Tavily Available: {coord_status.get('tavily_available', False)}")
+        print(f"   Weather Available: {coord_status.get('weather_available', False)}")
+        print(f"   Web Search Enabled: {coord_status.get('web_search_enabled', False)}")
+        print("   ✅ Coordination Status Check Passed")
+    except Exception as e:
+        print(f"   ❌ Coordination Status Check Failed: {e}")
+    print()
+    # Test hierarchical conversation coordination
+    print("2. Testing Hierarchical Conversation Coordination:")
+    try:
+        print("   Starting hierarchical coordination...")
+        response_count = 0
+        async for response_chunk in coordinator.coordinate_hierarchical_conversation(user_id, user_query):
+            response_count += 1
+            print(f"   Chunk {response_count}: {response_chunk['type']} - {response_chunk['content'][:50]}...")
+            # Limit output for readability
+            if response_count >= 5:
+                print("   ... (truncated for brevity)")
+                break
+        print("   ✅ Hierarchical Coordination Test Passed")
+    except Exception as e:
+        print(f"   ❌ Hierarchical Coordination Test Failed: {e}")
+    print()
+    # Test hierarchical session tracking
+    print("3. Testing Hierarchical Session Tracking:")
+    try:
+        # Update with test coordination data
+        test_data = {
+            'hf_engaged': True,
+            'ollama_responded': True,
+            'success': True
+        }
+        update_result = session_manager.update_hierarchical_coordination(user_id, test_data)
+        print(f"   Update Result: {'✅ Success' if update_result else '❌ Failed'}")
+        # Get hierarchical stats
+        stats = session_manager.get_hierarchical_stats(user_id)
+        print(f"   Total Conversations: {stats.get('total_conversations', 0)}")
+        print(f"   HF Engagements: {stats.get('hf_engagements', 0)}")
+        print(f"   Ollama Responses: {stats.get('ollama_responses', 0)}")
+        print("   ✅ Hierarchical Session Tracking Passed")
+    except Exception as e:
+        print(f"   ❌ Hierarchical Session Tracking Failed: {e}")
+    print()
+    print("🎉 Hierarchical Coordination System Test Completed!")
+if __name__ == "__main__":
+    asyncio.run(test_hierarchical_coordination())