rdune71 commited on
Commit
e7063a6
Β·
1 Parent(s): e441606

Prioritize HF Endpoint as primary provider and improve timeout handling

Browse files
Files changed (3) hide show
  1. app.py +7 -10
  2. src/llm/factory.py +23 -31
  3. src/ui/chat_handler.py +33 -51
app.py CHANGED
@@ -60,14 +60,7 @@ with st.sidebar:
60
  st.title("🐱 CosmicCat AI Assistant")
61
  st.markdown("Your personal AI-powered assistant with a cosmic twist.")
62
 
63
- # Show available providers
64
- available_providers = []
65
- if config.hf_token and hf_monitor.get_endpoint_status()["available"]:
66
- available_providers.append("πŸ€— HF Endpoint (Primary)")
67
- if config.ollama_host:
68
- available_providers.append("πŸ¦™ Ollama (Local)")
69
-
70
- # Model selection (now shows actual providers)
71
  model_options = {
72
  "Auto Select": "auto", # Will choose best available
73
  "πŸ¦™ Ollama (Local)": "ollama",
@@ -84,8 +77,12 @@ with st.sidebar:
84
  # Show which provider will actually be used
85
  actual_provider = "Unknown"
86
  if st.session_state.selected_model_value == "auto":
87
- if config.hf_token and hf_monitor.get_endpoint_status()["available"]:
88
- actual_provider = "πŸ€— HF Endpoint"
 
 
 
 
89
  elif config.ollama_host:
90
  actual_provider = "πŸ¦™ Ollama"
91
  else:
 
60
  st.title("🐱 CosmicCat AI Assistant")
61
  st.markdown("Your personal AI-powered assistant with a cosmic twist.")
62
 
63
+ # Model selection
 
 
 
 
 
 
 
64
  model_options = {
65
  "Auto Select": "auto", # Will choose best available
66
  "πŸ¦™ Ollama (Local)": "ollama",
 
77
  # Show which provider will actually be used
78
  actual_provider = "Unknown"
79
  if st.session_state.selected_model_value == "auto":
80
+ if config.hf_token:
81
+ status = hf_monitor.get_endpoint_status()
82
+ if status["available"]:
83
+ actual_provider = "πŸ€— HF Endpoint"
84
+ elif config.ollama_host:
85
+ actual_provider = "πŸ¦™ Ollama"
86
  elif config.ollama_host:
87
  actual_provider = "πŸ¦™ Ollama"
88
  else:
src/llm/factory.py CHANGED
@@ -1,7 +1,6 @@
1
  import logging
2
  from typing import Optional
3
  from src.llm.base_provider import LLMProvider
4
- from src.llm.mentor_provider import MentorProvider
5
  from src.llm.hf_provider import HuggingFaceProvider
6
  from src.llm.ollama_provider import OllamaProvider
7
  from utils.config import config
@@ -14,9 +13,10 @@ class ProviderNotAvailableError(Exception):
14
  pass
15
 
16
  class LLMFactory:
17
- """Factory for creating LLM providers with mentor approach"""
18
 
19
  _instance = None
 
20
 
21
  def __new__(cls):
22
  if cls._instance is None:
@@ -26,39 +26,31 @@ class LLMFactory:
26
  def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
27
  """
28
  Get an LLM provider based on preference and availability.
29
- Default: Mentor approach (HF expert + Ollama mentor)
30
  """
31
- try:
32
- # Try mentor provider first (HF expert + Ollama mentor)
33
- logger.info("Initializing Mentor Provider (HF Expert + Ollama Mentor)")
34
- return MentorProvider(
35
- model_name="mentor_model"
36
- )
37
- except Exception as e:
38
- logger.warning(f"Failed to initialize Mentor provider: {e}")
39
-
40
- # Fallback to individual providers
41
- if config.hf_token:
42
- status = hf_monitor.get_endpoint_status()
43
- if status["available"]:
44
- try:
45
- logger.info("Falling back to HF Endpoint")
46
- return HuggingFaceProvider(
47
- model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
48
- )
49
- except Exception as hf_error:
50
- logger.warning(f"Failed to initialize HF provider: {hf_error}")
51
-
52
- if config.ollama_host:
53
  try:
54
- logger.info("Falling back to Ollama")
55
- return OllamaProvider(
56
- model_name=config.local_model_name
57
  )
58
- except Exception as ollama_error:
59
- logger.warning(f"Failed to initialize Ollama provider: {ollama_error}")
60
 
61
- raise ProviderNotAvailableError("No LLM providers are available or configured")
 
 
 
 
 
 
 
 
 
 
62
 
63
  # Global factory instance
64
  llm_factory = LLMFactory()
 
1
  import logging
2
  from typing import Optional
3
  from src.llm.base_provider import LLMProvider
 
4
  from src.llm.hf_provider import HuggingFaceProvider
5
  from src.llm.ollama_provider import OllamaProvider
6
  from utils.config import config
 
13
  pass
14
 
15
  class LLMFactory:
16
+ """Factory for creating LLM providers with intelligent priority"""
17
 
18
  _instance = None
19
+ _providers = {}
20
 
21
  def __new__(cls):
22
  if cls._instance is None:
 
26
  def get_provider(self, preferred_provider: Optional[str] = None) -> LLMProvider:
27
  """
28
  Get an LLM provider based on preference and availability.
29
+ NEW PRIORITY: HF Endpoint > Ollama > Error
30
  """
31
+ # Check if HF should be primary (when token available and endpoint ready)
32
+ if config.hf_token:
33
+ hf_status = hf_monitor.get_endpoint_status()
34
+ if hf_status["available"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  try:
36
+ logger.info("🎯 Using HF Endpoint as PRIMARY provider")
37
+ return HuggingFaceProvider(
38
+ model_name="DavidAU/OpenAi-GPT-oss-20b-abliterated-uncensored-NEO-Imatrix-gguf"
39
  )
40
+ except Exception as e:
41
+ logger.warning(f"Failed to initialize HF provider: {e}")
42
 
43
+ # Try Ollama as secondary
44
+ if config.ollama_host:
45
+ try:
46
+ logger.info("πŸ¦™ Using Ollama as SECONDARY provider")
47
+ return OllamaProvider(
48
+ model_name=config.local_model_name
49
+ )
50
+ except Exception as e:
51
+ logger.warning(f"Failed to initialize Ollama provider: {e}")
52
+
53
+ raise ProviderNotAvailableError("No LLM providers are available")
54
 
55
  # Global factory instance
56
  llm_factory = LLMFactory()
src/ui/chat_handler.py CHANGED
@@ -8,13 +8,13 @@ from core.session import session_manager
8
  logger = logging.getLogger(__name__)
9
 
10
  class ChatHandler:
11
- """Handles chat interactions with mentor AI approach"""
12
 
13
  def __init__(self):
14
  self.is_processing = False
15
 
16
  def process_user_message(self, user_input: str, selected_model: str):
17
- """Process user message with enhanced UI feedback"""
18
  if not user_input or not user_input.strip():
19
  st.warning("Please enter a message")
20
  return
@@ -53,7 +53,7 @@ class ChatHandler:
53
  st.session_state.last_processed_message = ""
54
 
55
  def process_ai_response(self, user_input: str, selected_model: str):
56
- """Process AI response with mentor approach"""
57
  if not user_input or not user_input.strip():
58
  return
59
 
@@ -64,17 +64,13 @@ class ChatHandler:
64
  response_placeholder = st.empty()
65
 
66
  try:
67
- # Get mentor provider
68
- status_placeholder.info("πŸš€ Activating AI Mentor System...")
69
  provider = llm_factory.get_provider()
 
70
 
71
- # Show system status
72
- if hasattr(provider, 'hf_provider') and provider.hf_provider:
73
- status_placeholder.info("πŸ€– Consulting HF Expert + 🐱 Getting Ollama Mentor Insights...")
74
- else:
75
- status_placeholder.info("πŸ¦™ Consulting Local Ollama...")
76
 
77
- # Get response
78
  response = None
79
  try:
80
  # Get session and conversation history
@@ -83,27 +79,37 @@ class ChatHandler:
83
  conversation_history.append({"role": "user", "content": user_input})
84
 
85
  response = provider.generate(user_input, conversation_history)
 
86
  except Exception as e:
87
  logger.error(f"AI response error: {e}")
88
- raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if response and response.strip():
91
- status_placeholder.success("βœ… Expert Response + Mentor Insights Received!")
92
  response_placeholder.markdown(response)
93
 
94
- # Add to session history with provider info
95
  timestamp = time.strftime("%H:%M:%S")
96
- provider_info = "mentor_system"
97
- if hasattr(provider, 'hf_provider') and provider.hf_provider:
98
- provider_info = "mentor_hf"
99
- elif hasattr(provider, 'ollama_provider') and provider.ollama_provider:
100
- provider_info = "ollama_only"
101
-
102
  st.session_state.messages.append({
103
  "role": "assistant",
104
  "content": response,
105
  "timestamp": timestamp,
106
- "provider": provider_info
107
  })
108
  else:
109
  status_placeholder.warning("⚠️ Empty response received")
@@ -112,8 +118,7 @@ class ChatHandler:
112
  st.session_state.messages.append({
113
  "role": "assistant",
114
  "content": "*No response generated. Please try again.*",
115
- "timestamp": timestamp,
116
- "provider": "unknown"
117
  })
118
 
119
  except ProviderNotAvailableError as e:
@@ -127,33 +132,10 @@ class ChatHandler:
127
  })
128
  logger.error(f"Provider not available: {e}")
129
 
130
- except Exception as e:
131
- status_placeholder.error("❌ Request failed")
132
-
133
- # User-friendly error messages
134
- if "timeout" in str(e).lower() or "500" in str(e):
135
- error_message = ("⏰ Request timed out. The AI is taking too long to respond.\n\n"
136
- "**Current setup:**\n"
137
- "β€’ πŸ€– HF Expert: Providing deep analysis\n"
138
- "β€’ 🐱 Ollama Mentor: Analyzing thinking patterns\n\n"
139
- "Please try again or simplify your question.")
140
- else:
141
- error_message = f"Sorry, I encountered an error: {str(e)}"
142
-
143
- response_placeholder.markdown(error_message)
144
- timestamp = time.strftime("%H:%M:%S")
145
- st.session_state.messages.append({
146
- "role": "assistant",
147
- "content": error_message,
148
- "timestamp": timestamp
149
- })
150
- logger.error(f"Chat processing error: {e}", exc_info=True)
151
-
152
  except Exception as e:
153
- logger.error(f"Unexpected error in process_ai_response: {e}", exc_info=True)
154
  st.error("An unexpected error occurred. Please try again.")
155
  finally:
156
- # Clear processing flags
157
  st.session_state.is_processing = False
158
  st.session_state.last_processed_message = ""
159
  time.sleep(0.1)
@@ -161,11 +143,11 @@ class ChatHandler:
161
  def _get_provider_display_name(self, provider_name: str) -> str:
162
  """Get display name for provider"""
163
  display_names = {
164
- "ollama_only": "πŸ¦™ Ollama (Local)",
165
- "mentor_hf": "πŸŽ“ Mentor System (HF Expert + Ollama Mentor)",
166
- "mentor_system": "πŸ”„ Mentor System Active"
167
  }
168
- return display_names.get(provider_name, provider_name)
169
 
170
  # Global instance
171
  chat_handler = ChatHandler()
 
8
  logger = logging.getLogger(__name__)
9
 
10
  class ChatHandler:
11
+ """Handles chat interactions with better timeout handling"""
12
 
13
  def __init__(self):
14
  self.is_processing = False
15
 
16
  def process_user_message(self, user_input: str, selected_model: str):
17
+ """Process user message with immediate display"""
18
  if not user_input or not user_input.strip():
19
  st.warning("Please enter a message")
20
  return
 
53
  st.session_state.last_processed_message = ""
54
 
55
  def process_ai_response(self, user_input: str, selected_model: str):
56
+ """Process AI response with enhanced timeout handling"""
57
  if not user_input or not user_input.strip():
58
  return
59
 
 
64
  response_placeholder = st.empty()
65
 
66
  try:
67
+ # Determine which provider will be used
 
68
  provider = llm_factory.get_provider()
69
+ provider_name = "HF Endpoint" if "huggingface" in str(type(provider)).lower() else "Ollama"
70
 
71
+ status_placeholder.info(f"πŸš€ Contacting {provider_name}...")
 
 
 
 
72
 
73
+ # Generate response with timeout handling
74
  response = None
75
  try:
76
  # Get session and conversation history
 
79
  conversation_history.append({"role": "user", "content": user_input})
80
 
81
  response = provider.generate(user_input, conversation_history)
82
+
83
  except Exception as e:
84
  logger.error(f"AI response error: {e}")
85
+
86
+ # Better error messages for timeout issues
87
+ if "timeout" in str(e).lower() or "500" in str(e) or "60" in str(e):
88
+ if provider_name == "Ollama":
89
+ error_message = ("⏰ Ollama request timed out (60+ seconds).\n\n"
90
+ "**Recommendation:** Switch to HF Endpoint which is more reliable.\n"
91
+ "Click the 'πŸ€– HF Expert Analysis' button below for deep analysis.")
92
+ else:
93
+ error_message = ("⏰ HF Endpoint is initializing (may take 2-4 minutes).\n\n"
94
+ "Please try again in a moment or use Ollama for faster responses.")
95
+ else:
96
+ error_message = f"Sorry, I encountered an error: {str(e)[:100]}..."
97
+
98
+ status_placeholder.error(f"❌ {provider_name} Error")
99
+ response_placeholder.markdown(error_message)
100
+ return
101
 
102
  if response and response.strip():
103
+ status_placeholder.success("βœ… Response received!")
104
  response_placeholder.markdown(response)
105
 
106
+ # Add to session history
107
  timestamp = time.strftime("%H:%M:%S")
 
 
 
 
 
 
108
  st.session_state.messages.append({
109
  "role": "assistant",
110
  "content": response,
111
  "timestamp": timestamp,
112
+ "provider": provider_name.lower().replace(" ", "_")
113
  })
114
  else:
115
  status_placeholder.warning("⚠️ Empty response received")
 
118
  st.session_state.messages.append({
119
  "role": "assistant",
120
  "content": "*No response generated. Please try again.*",
121
+ "timestamp": timestamp
 
122
  })
123
 
124
  except ProviderNotAvailableError as e:
 
132
  })
133
  logger.error(f"Provider not available: {e}")
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
+ logger.error(f"Chat processing failed: {e}", exc_info=True)
137
  st.error("An unexpected error occurred. Please try again.")
138
  finally:
 
139
  st.session_state.is_processing = False
140
  st.session_state.last_processed_message = ""
141
  time.sleep(0.1)
 
143
  def _get_provider_display_name(self, provider_name: str) -> str:
144
  """Get display name for provider"""
145
  display_names = {
146
+ "ollama": "πŸ¦™ Ollama (Local)",
147
+ "huggingface": "πŸ€— HF Endpoint (Primary)",
148
+ "hf_endpoint": "πŸ€— HF Endpoint (Primary)"
149
  }
150
+ return display_names.get(provider_name.lower(), provider_name)
151
 
152
  # Global instance
153
  chat_handler = ChatHandler()