rdune71 commited on
Commit
d891499
·
1 Parent(s): f6bf178
Files changed (4) hide show
  1. Dockerfile +12 -0
  2. app.py +193 -93
  3. core/coordinator.py +334 -70
  4. start.sh +13 -13
Dockerfile CHANGED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 8501
11
+
12
+ CMD ["streamlit", "run", "app.py"]
app.py CHANGED
@@ -22,15 +22,6 @@ logger = logging.getLogger(__name__)
22
 
23
  st.set_page_config(page_title="AI Life Coach", page_icon="🧠", layout="wide")
24
 
25
- # Processing stage labels for better user feedback
26
- PROCESSING_STAGES = {
27
- "ollama": "🦙 Contacting Ollama...",
28
- "hf_init": "⚡ Initializing HF Endpoint (2–4 minutes)...",
29
- "hf_thinking": "🧠 HF Expert Thinking...",
30
- "hf_complete": "🎯 HF Analysis Complete!",
31
- "error": "⚠️ Something went wrong – trying again..."
32
- }
33
-
34
  # Initialize session state safely at the top of app.py
35
  if "messages" not in st.session_state:
36
  st.session_state.messages = []
@@ -42,6 +33,8 @@ if "ngrok_url_temp" not in st.session_state:
42
  st.session_state.ngrok_url_temp = st.session_state.get("ngrok_url", "https://7bcc180dffd1.ngrok-free.app")
43
  if "hf_expert_requested" not in st.session_state:
44
  st.session_state.hf_expert_requested = False
 
 
45
 
46
  # Sidebar layout redesign
47
  with st.sidebar:
@@ -63,6 +56,9 @@ with st.sidebar:
63
  )
64
  st.session_state.selected_model = model_options[selected_model_name]
65
 
 
 
 
66
  st.divider()
67
 
68
  # CONFIGURATION
@@ -148,13 +144,14 @@ with st.sidebar:
148
  use_container_width=True,
149
  disabled=st.session_state.is_processing):
150
  st.session_state.hf_expert_requested = True
151
-
152
  st.divider()
153
  st.subheader("🐛 Debug Info")
154
  # Show current configuration
155
  st.markdown(f"**Environment:** {'HF Space' if config.is_hf_space else 'Local'}")
156
  st.markdown(f"**Model:** {st.session_state.selected_model}")
157
  st.markdown(f"**Ollama URL:** {st.session_state.ngrok_url_temp}")
 
158
 
159
  # Show active features
160
  features = []
@@ -171,14 +168,23 @@ with st.sidebar:
171
  st.title("🧠 AI Life Coach")
172
  st.markdown("Ask me anything about personal development, goal setting, or life advice!")
173
 
174
- # Consistent message rendering function
175
- def render_message(role, content, timestamp=None):
176
  """Render chat messages with consistent styling"""
177
  with st.chat_message(role):
178
- if role == "assistant" and content.startswith("### 🤖 HF Expert Analysis"):
179
- st.markdown(content)
180
- else:
181
- st.markdown(content)
 
 
 
 
 
 
 
 
 
182
  if timestamp:
183
  st.caption(f"🕒 {timestamp}")
184
 
@@ -187,6 +193,7 @@ for message in st.session_state.messages:
187
  render_message(
188
  message["role"],
189
  message["content"],
 
190
  message.get("timestamp")
191
  )
192
 
@@ -281,15 +288,15 @@ def validate_user_input(text):
281
  """Validate and sanitize user input"""
282
  if not text or not text.strip():
283
  return False, "Input cannot be empty"
284
-
285
  if len(text) > 1000:
286
  return False, "Input too long (max 1000 characters)"
287
-
288
  # Check for potentially harmful patterns
289
  harmful_patterns = ["<script", "javascript:", "onload=", "onerror="]
290
  if any(pattern in text.lower() for pattern in harmful_patterns):
291
  return False, "Potentially harmful input detected"
292
-
293
  return True, text.strip()
294
 
295
  # Chat input - FIXED VERSION (moved outside of tabs)
@@ -328,72 +335,158 @@ if user_input and not st.session_state.is_processing:
328
  conversation_history = conversation[-5:] # Last 5 messages
329
  conversation_history.append({"role": "user", "content": validated_input})
330
 
331
- # Try Ollama with proper error handling
332
- status_placeholder.info(PROCESSING_STAGES["ollama"])
333
- ai_response = None
334
-
335
- try:
336
- ai_response = send_to_ollama(
337
- validated_input,
338
- conversation_history,
339
- st.session_state.ngrok_url_temp,
340
- st.session_state.selected_model
341
- )
342
 
343
- if ai_response:
344
- response_placeholder.markdown(ai_response)
345
- status_placeholder.success("✅ Response received!")
346
- else:
347
- status_placeholder.warning("⚠️ Empty response from Ollama")
348
-
349
- except Exception as ollama_error:
350
- user_msg = translate_error(ollama_error)
351
- status_placeholder.error(f"⚠️ {user_msg}")
352
-
353
- # Fallback to HF if available
354
- if config.hf_token and not ai_response:
355
- status_placeholder.info(PROCESSING_STAGES["hf_init"])
356
  try:
357
- ai_response = send_to_hf(validated_input, conversation_history)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
  if ai_response:
359
  response_placeholder.markdown(ai_response)
360
- status_placeholder.success("✅ HF response received!")
361
  else:
362
- status_placeholder.error(" No response from HF")
363
- except Exception as hf_error:
364
- user_msg = translate_error(hf_error)
 
365
  status_placeholder.error(f"⚠️ {user_msg}")
366
-
367
- # Save response if successful
368
- if ai_response:
369
- # Update conversation history
370
- conversation.append({"role": "user", "content": validated_input})
371
- conversation.append({"role": "assistant", "content": ai_response})
372
- user_session["conversation"] = conversation
373
- session_manager.update_session("default_user", user_session)
374
 
375
- # Add to message history - ensure proper format
376
- st.session_state.messages.append({
377
- "role": "assistant",
378
- "content": ai_response,
379
- "timestamp": datetime.now().strftime("%H:%M:%S")
380
- })
 
 
 
 
 
 
 
381
 
382
- # Add feedback buttons
383
- st.divider()
384
- col1, col2 = st.columns(2)
385
- with col1:
386
- if st.button("👍 Helpful", key=f"helpful_{len(st.session_state.messages)}"):
387
- st.success("Thanks for your feedback!")
388
- with col2:
389
- if st.button("👎 Not Helpful", key=f"not_helpful_{len(st.session_state.messages)}"):
390
- st.success("Thanks for your feedback!")
391
- else:
392
- st.session_state.messages.append({
393
- "role": "assistant",
394
- "content": "Sorry, I couldn't process your request. Please try again.",
395
- "timestamp": datetime.now().strftime("%H:%M:%S")
396
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
398
  except Exception as e:
399
  user_msg = translate_error(e)
@@ -403,10 +496,10 @@ if user_input and not st.session_state.is_processing:
403
  "content": f"⚠️ {user_msg}",
404
  "timestamp": datetime.now().strftime("%H:%M:%S")
405
  })
406
- finally:
407
- st.session_state.is_processing = False
408
- time.sleep(0.5) # Brief pause
409
- st.experimental_rerun()
410
 
411
  # Add evaluation dashboard tab (separate from chat interface)
412
  st.divider()
@@ -415,7 +508,7 @@ tab1, tab2, tab3 = st.tabs(["🔬 Evaluate AI", "📊 Reports", "ℹ️ About"])
415
  with tab1:
416
  st.header("🔬 AI Behavior Evaluator")
417
  st.markdown("Run sample prompts to observe AI behavior.")
418
-
419
  eval_prompts = [
420
  "What is the capital of France?",
421
  "What day is it today?",
@@ -423,29 +516,29 @@ with tab1:
423
  "Explain CRISPR gene editing simply.",
424
  "Can vitamin D prevent flu infections?"
425
  ]
426
-
427
  selected_prompt = st.selectbox("Choose a test prompt:", eval_prompts)
428
  custom_prompt = st.text_input("Or enter your own:", "")
429
-
430
  final_prompt = custom_prompt or selected_prompt
431
-
432
  if st.button("Evaluate"):
433
  with st.spinner("Running evaluation..."):
434
  start_time = time.time()
435
-
436
  # Simulate sending to coordinator
437
  from core.session import session_manager
438
  user_session = session_manager.get_session("eval_user")
439
  history = user_session.get("conversation", [])
440
-
441
  try:
442
  ai_response = send_to_ollama(final_prompt, history, st.session_state.ngrok_url_temp, st.session_state.selected_model)
443
  duration = round(time.time() - start_time, 2)
444
-
445
  st.success(f"✅ Response generated in {duration}s")
446
  st.markdown("**Response:**")
447
  st.write(ai_response)
448
-
449
  st.markdown("**Analysis Tags:**")
450
  tags = []
451
  if "today" in final_prompt.lower() or "date" in final_prompt.lower():
@@ -455,7 +548,7 @@ with tab1:
455
  if any(word in final_prompt.lower() for word in ["vitamin", "drug", "metformin", "CRISPR"]):
456
  tags.append("🧬 Scientific Knowledge")
457
  st.write(", ".join(tags) if tags else "General Knowledge")
458
-
459
  except Exception as e:
460
  st.error(f"Evaluation failed: {translate_error(e)}")
461
 
@@ -531,6 +624,7 @@ with tab2:
531
  st.markdown(f"**Environment:** {'HF Space' if config.is_hf_space else 'Local'}")
532
  st.markdown(f"**Primary Model:** {config.local_model_name or 'Not set'}")
533
  st.markdown(f"**Ollama Host:** {config.ollama_host or 'Not configured'}")
 
534
 
535
  features = []
536
  if config.use_fallback:
@@ -557,8 +651,8 @@ with tab2:
557
 
558
  col1, col2, col3 = st.columns(3)
559
  col1.metric("Total Exchanges", len(user_messages))
560
- col2.metric("Avg Response Length",
561
- round(sum(len(msg.get("content", "")) for msg in ai_messages) / len(ai_messages)) if ai_messages else 0)
562
  col3.metric("Topics Discussed", len(set(["life", "goal", "health", "career"]) &
563
  set(" ".join([msg.get("content", "") for msg in conversation]).lower().split())))
564
 
@@ -570,7 +664,7 @@ with tab2:
570
  st.markdown(f"**Detected Topics:** {', '.join(relevant_topics)}")
571
  else:
572
  st.info("No conversation data available yet.")
573
-
574
  except Exception as e:
575
  st.warning(f"Could not analyze conversation: {translate_error(e)}")
576
 
@@ -585,6 +679,12 @@ with tab3:
585
  - **Persistent memory**: Uses Redis for conversation history storage
586
  - **Hierarchical reasoning**: Fast local responses with deep cloud analysis
587
 
 
 
 
 
 
 
588
  ### 🛠️ Technical Architecture
589
  - **Primary model**: Ollama (local processing for fast responses)
590
  - **Secondary model**: Hugging Face Inference API (deep analysis)
 
22
 
23
  st.set_page_config(page_title="AI Life Coach", page_icon="🧠", layout="wide")
24
 
 
 
 
 
 
 
 
 
 
25
  # Initialize session state safely at the top of app.py
26
  if "messages" not in st.session_state:
27
  st.session_state.messages = []
 
33
  st.session_state.ngrok_url_temp = st.session_state.get("ngrok_url", "https://7bcc180dffd1.ngrok-free.app")
34
  if "hf_expert_requested" not in st.session_state:
35
  st.session_state.hf_expert_requested = False
36
+ if "cosmic_mode" not in st.session_state:
37
+ st.session_state.cosmic_mode = True # Default to cosmic mode
38
 
39
  # Sidebar layout redesign
40
  with st.sidebar:
 
56
  )
57
  st.session_state.selected_model = model_options[selected_model_name]
58
 
59
+ # Toggle for cosmic mode
60
+ st.session_state.cosmic_mode = st.toggle("Enable Cosmic Cascade", value=st.session_state.cosmic_mode)
61
+
62
  st.divider()
63
 
64
  # CONFIGURATION
 
144
  use_container_width=True,
145
  disabled=st.session_state.is_processing):
146
  st.session_state.hf_expert_requested = True
147
+
148
  st.divider()
149
  st.subheader("🐛 Debug Info")
150
  # Show current configuration
151
  st.markdown(f"**Environment:** {'HF Space' if config.is_hf_space else 'Local'}")
152
  st.markdown(f"**Model:** {st.session_state.selected_model}")
153
  st.markdown(f"**Ollama URL:** {st.session_state.ngrok_url_temp}")
154
+ st.markdown(f"**Cosmic Mode:** {'Enabled' if st.session_state.cosmic_mode else 'Disabled'}")
155
 
156
  # Show active features
157
  features = []
 
168
  st.title("🧠 AI Life Coach")
169
  st.markdown("Ask me anything about personal development, goal setting, or life advice!")
170
 
171
+ # Consistent message rendering function with cosmic styling
172
+ def render_message(role, content, source=None, timestamp=None):
173
  """Render chat messages with consistent styling"""
174
  with st.chat_message(role):
175
+ if source:
176
+ if source == "local_kitty":
177
+ st.markdown(f"### 🐱 Cosmic Kitten Says:")
178
+ elif source == "orbital_station":
179
+ st.markdown(f"### 🛰️ Orbital Station Reports:")
180
+ elif source == "cosmic_summary":
181
+ st.markdown(f"### 🌟 Final Cosmic Summary:")
182
+ elif source == "error":
183
+ st.markdown(f"### ❌ Error:")
184
+ else:
185
+ st.markdown(f"### {source}")
186
+
187
+ st.markdown(content)
188
  if timestamp:
189
  st.caption(f"🕒 {timestamp}")
190
 
 
193
  render_message(
194
  message["role"],
195
  message["content"],
196
+ message.get("source"),
197
  message.get("timestamp")
198
  )
199
 
 
288
  """Validate and sanitize user input"""
289
  if not text or not text.strip():
290
  return False, "Input cannot be empty"
291
+
292
  if len(text) > 1000:
293
  return False, "Input too long (max 1000 characters)"
294
+
295
  # Check for potentially harmful patterns
296
  harmful_patterns = ["<script", "javascript:", "onload=", "onerror="]
297
  if any(pattern in text.lower() for pattern in harmful_patterns):
298
  return False, "Potentially harmful input detected"
299
+
300
  return True, text.strip()
301
 
302
  # Chat input - FIXED VERSION (moved outside of tabs)
 
335
  conversation_history = conversation[-5:] # Last 5 messages
336
  conversation_history.append({"role": "user", "content": validated_input})
337
 
338
+ # Check if cosmic mode is enabled
339
+ if st.session_state.cosmic_mode:
340
+ # Process cosmic cascade response
341
+ message_placeholder = st.empty()
342
+ status_placeholder = st.empty()
 
 
 
 
 
 
343
 
 
 
 
 
 
 
 
 
 
 
 
 
 
344
  try:
345
+ # Get conversation history
346
+ user_session = session_manager.get_session("default_user")
347
+ conversation_history = user_session.get("conversation", []).copy()
348
+
349
+ # Stage 1: Local Ollama Response
350
+ status_placeholder.info("🐱 Cosmic Kitten Responding...")
351
+ local_response = send_to_ollama(
352
+ validated_input,
353
+ conversation_history,
354
+ st.session_state.ngrok_url_temp,
355
+ st.session_state.selected_model
356
+ )
357
+
358
+ if local_response:
359
+ with st.chat_message("assistant"):
360
+ st.markdown(f"### 🐱 Cosmic Kitten Says:\n{local_response}")
361
+
362
+ st.session_state.messages.append({
363
+ "role": "assistant",
364
+ "content": local_response,
365
+ "source": "local_kitty",
366
+ "timestamp": datetime.now().strftime("%H:%M:%S")
367
+ })
368
+
369
+ # Stage 2: HF Endpoint Analysis
370
+ status_placeholder.info("🛰️ Beaming Query to Orbital Station...")
371
+ if config.hf_token:
372
+ hf_response = send_to_hf(validated_input, conversation_history)
373
+ if hf_response:
374
+ with st.chat_message("assistant"):
375
+ st.markdown(f"### 🛰️ Orbital Station Reports:\n{hf_response}")
376
+
377
+ st.session_state.messages.append({
378
+ "role": "assistant",
379
+ "content": hf_response,
380
+ "source": "orbital_station",
381
+ "timestamp": datetime.now().strftime("%H:%M:%S")
382
+ })
383
+
384
+ # Stage 3: Local Synthesis
385
+ status_placeholder.info("🐱 Cosmic Kitten Synthesizing Wisdom...")
386
+ # Update history with both responses
387
+ synthesis_history = conversation_history.copy()
388
+ synthesis_history.extend([
389
+ {"role": "assistant", "content": local_response},
390
+ {"role": "assistant", "content": hf_response, "source": "cloud"}
391
+ ])
392
+
393
+ synthesis = send_to_ollama(
394
+ f"Synthesize these two perspectives:\n1. Local: {local_response}\n2. Cloud: {hf_response}",
395
+ synthesis_history,
396
+ st.session_state.ngrok_url_temp,
397
+ st.session_state.selected_model
398
+ )
399
+
400
+ if synthesis:
401
+ with st.chat_message("assistant"):
402
+ st.markdown(f"### 🌟 Final Cosmic Summary:\n{synthesis}")
403
+
404
+ st.session_state.messages.append({
405
+ "role": "assistant",
406
+ "content": synthesis,
407
+ "source": "cosmic_summary",
408
+ "timestamp": datetime.now().strftime("%H:%M:%S")
409
+ })
410
+
411
+ status_placeholder.success("✨ Cosmic Cascade Complete!")
412
+
413
+ except Exception as e:
414
+ error_msg = f"🌌 Cosmic disturbance: {str(e)}"
415
+ st.error(error_msg)
416
+ st.session_state.messages.append({
417
+ "role": "assistant",
418
+ "content": error_msg,
419
+ "source": "error",
420
+ "timestamp": datetime.now().strftime("%H:%M:%S")
421
+ })
422
+ else:
423
+ # Traditional processing
424
+ # Try Ollama with proper error handling
425
+ status_placeholder.info("🦙 Contacting Ollama...")
426
+ ai_response = None
427
+
428
+ try:
429
+ ai_response = send_to_ollama(
430
+ validated_input,
431
+ conversation_history,
432
+ st.session_state.ngrok_url_temp,
433
+ st.session_state.selected_model
434
+ )
435
+
436
  if ai_response:
437
  response_placeholder.markdown(ai_response)
438
+ status_placeholder.success("✅ Response received!")
439
  else:
440
+ status_placeholder.warning("⚠️ Empty response from Ollama")
441
+
442
+ except Exception as ollama_error:
443
+ user_msg = translate_error(ollama_error)
444
  status_placeholder.error(f"⚠️ {user_msg}")
 
 
 
 
 
 
 
 
445
 
446
+ # Fallback to HF if available
447
+ if config.hf_token and not ai_response:
448
+ status_placeholder.info(" Initializing HF Endpoint (2–4 minutes)...")
449
+ try:
450
+ ai_response = send_to_hf(validated_input, conversation_history)
451
+ if ai_response:
452
+ response_placeholder.markdown(ai_response)
453
+ status_placeholder.success("✅ HF response received!")
454
+ else:
455
+ status_placeholder.error("❌ No response from HF")
456
+ except Exception as hf_error:
457
+ user_msg = translate_error(hf_error)
458
+ status_placeholder.error(f"⚠️ {user_msg}")
459
 
460
+ # Save response if successful
461
+ if ai_response:
462
+ # Update conversation history
463
+ conversation.append({"role": "user", "content": validated_input})
464
+ conversation.append({"role": "assistant", "content": ai_response})
465
+ user_session["conversation"] = conversation
466
+ session_manager.update_session("default_user", user_session)
467
+
468
+ # Add to message history - ensure proper format
469
+ st.session_state.messages.append({
470
+ "role": "assistant",
471
+ "content": ai_response,
472
+ "timestamp": datetime.now().strftime("%H:%M:%S")
473
+ })
474
+
475
+ # Add feedback buttons
476
+ st.divider()
477
+ col1, col2 = st.columns(2)
478
+ with col1:
479
+ if st.button("👍 Helpful", key=f"helpful_{len(st.session_state.messages)}"):
480
+ st.success("Thanks for your feedback!")
481
+ with col2:
482
+ if st.button("👎 Not Helpful", key=f"not_helpful_{len(st.session_state.messages)}"):
483
+ st.success("Thanks for your feedback!")
484
+ else:
485
+ st.session_state.messages.append({
486
+ "role": "assistant",
487
+ "content": "Sorry, I couldn't process your request. Please try again.",
488
+ "timestamp": datetime.now().strftime("%H:%M:%S")
489
+ })
490
 
491
  except Exception as e:
492
  user_msg = translate_error(e)
 
496
  "content": f"⚠️ {user_msg}",
497
  "timestamp": datetime.now().strftime("%H:%M:%S")
498
  })
499
+ finally:
500
+ st.session_state.is_processing = False
501
+ time.sleep(0.5) # Brief pause
502
+ st.experimental_rerun()
503
 
504
  # Add evaluation dashboard tab (separate from chat interface)
505
  st.divider()
 
508
  with tab1:
509
  st.header("🔬 AI Behavior Evaluator")
510
  st.markdown("Run sample prompts to observe AI behavior.")
511
+
512
  eval_prompts = [
513
  "What is the capital of France?",
514
  "What day is it today?",
 
516
  "Explain CRISPR gene editing simply.",
517
  "Can vitamin D prevent flu infections?"
518
  ]
519
+
520
  selected_prompt = st.selectbox("Choose a test prompt:", eval_prompts)
521
  custom_prompt = st.text_input("Or enter your own:", "")
522
+
523
  final_prompt = custom_prompt or selected_prompt
524
+
525
  if st.button("Evaluate"):
526
  with st.spinner("Running evaluation..."):
527
  start_time = time.time()
528
+
529
  # Simulate sending to coordinator
530
  from core.session import session_manager
531
  user_session = session_manager.get_session("eval_user")
532
  history = user_session.get("conversation", [])
533
+
534
  try:
535
  ai_response = send_to_ollama(final_prompt, history, st.session_state.ngrok_url_temp, st.session_state.selected_model)
536
  duration = round(time.time() - start_time, 2)
537
+
538
  st.success(f"✅ Response generated in {duration}s")
539
  st.markdown("**Response:**")
540
  st.write(ai_response)
541
+
542
  st.markdown("**Analysis Tags:**")
543
  tags = []
544
  if "today" in final_prompt.lower() or "date" in final_prompt.lower():
 
548
  if any(word in final_prompt.lower() for word in ["vitamin", "drug", "metformin", "CRISPR"]):
549
  tags.append("🧬 Scientific Knowledge")
550
  st.write(", ".join(tags) if tags else "General Knowledge")
551
+
552
  except Exception as e:
553
  st.error(f"Evaluation failed: {translate_error(e)}")
554
 
 
624
  st.markdown(f"**Environment:** {'HF Space' if config.is_hf_space else 'Local'}")
625
  st.markdown(f"**Primary Model:** {config.local_model_name or 'Not set'}")
626
  st.markdown(f"**Ollama Host:** {config.ollama_host or 'Not configured'}")
627
+ st.markdown(f"**Cosmic Mode:** {'Enabled' if st.session_state.cosmic_mode else 'Disabled'}")
628
 
629
  features = []
630
  if config.use_fallback:
 
651
 
652
  col1, col2, col3 = st.columns(3)
653
  col1.metric("Total Exchanges", len(user_messages))
654
+ col2.metric("Avg Response Length",
655
+ round(sum(len(msg.get("content", "")) for msg in ai_messages) / len(ai_messages)) if ai_messages else 0)
656
  col3.metric("Topics Discussed", len(set(["life", "goal", "health", "career"]) &
657
  set(" ".join([msg.get("content", "") for msg in conversation]).lower().split())))
658
 
 
664
  st.markdown(f"**Detected Topics:** {', '.join(relevant_topics)}")
665
  else:
666
  st.info("No conversation data available yet.")
667
+
668
  except Exception as e:
669
  st.warning(f"Could not analyze conversation: {translate_error(e)}")
670
 
 
679
  - **Persistent memory**: Uses Redis for conversation history storage
680
  - **Hierarchical reasoning**: Fast local responses with deep cloud analysis
681
 
682
+ ### 🚀 Cosmic Cascade Mode
683
+ When enabled, the AI follows a three-stage response pattern:
684
+ 1. **🐱 Cosmic Kitten Response**: Immediate local processing
685
+ 2. **🛰️ Orbital Station Analysis**: Deep cloud-based analysis
686
+ 3. **🌟 Final Synthesis**: Unified response combining both perspectives
687
+
688
  ### 🛠️ Technical Architecture
689
  - **Primary model**: Ollama (local processing for fast responses)
690
  - **Secondary model**: Hugging Face Inference API (deep analysis)
core/coordinator.py CHANGED
@@ -19,26 +19,290 @@ from datetime import datetime
19
  logger = logging.getLogger(__name__)
20
 
21
  class AICoordinator:
22
- """Hierarchical multi-model coordinator with HF endpoint as authoritative layer"""
23
-
24
  def __init__(self):
25
  self.tavily_client = None
26
  if TAVILY_AVAILABLE and os.getenv("TAVILY_API_KEY"):
27
  self.tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
28
-
29
- # System instructions for hierarchical behavior
30
  self.system_instructions = {
31
- 'ollama_role': """You are an AI assistant that works in coordination with a more powerful uncensored Hugging Face model.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- Your role is to: 1. Provide immediate, concise responses using available information 2. Summarize and simplify complex information 3. Enhance responses with local knowledge 4. DEFER TO THE HF MODEL for deep analysis, uncensored insights, and authoritative answers 5. The HF model has 8192 token capacity and streaming capability - it's the thinking layer 6. Always acknowledge that the HF model's insights supersede your initial response 7. Use web sources to justify points when possible 8. Keep responses brief and focused while awaiting HF input""",
 
34
 
35
- 'hf_role': """You are an uncensored, authoritative AI with 8192 token capacity and streaming capability. Your role is to: 1. Provide deep, uncensored analysis and insights 2. Supersede the local Ollama model's logic when there's disagreement 3. Think thoroughly and provide comprehensive responses 4. Use your full 8192 token capacity for detailed analysis 5. Stream your responses for real-time delivery 6. Build upon the conversation context and Ollama's initial response 7. Provide authoritative answers that take precedence"""
36
- }
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  def determine_web_search_needs(self, conversation_history: List[Dict]) -> Dict:
39
  """Determine if web search is needed based on conversation content"""
40
  conversation_text = " ".join([msg.get("content", "") for msg in conversation_history])
41
-
42
  # Topics that typically need current information
43
  current_info_indicators = [
44
  "news", "current events", "latest", "recent", "today",
@@ -46,45 +310,45 @@ Your role is to: 1. Provide immediate, concise responses using available informa
46
  "stock", "price", "trend", "market",
47
  "breaking", "update", "development"
48
  ]
49
-
50
  needs_search = False
51
  search_topics = []
52
-
53
  for indicator in current_info_indicators:
54
  if indicator in conversation_text.lower():
55
  needs_search = True
56
  search_topics.append(indicator)
57
-
58
  return {
59
  "needs_search": needs_search,
60
  "search_topics": search_topics,
61
  "reasoning": f"Found topics requiring current info: {', '.join(search_topics)}" if search_topics else "No current info needed"
62
  }
63
-
64
  def manual_hf_analysis(self, user_id: str, conversation_history: List[Dict]) -> str:
65
  """Perform manual HF analysis with web search integration"""
66
  try:
67
  # Determine research needs
68
  research_decision = self.determine_web_search_needs(conversation_history)
69
-
70
  # Prepare enhanced prompt for HF
71
  system_prompt = f"""
72
  You are a deep analysis expert joining an ongoing conversation.
73
-
74
  Research Decision: {research_decision['reasoning']}
75
-
76
  Please provide:
77
  1. Deep insights on conversation themes
78
  2. Research/web search needs (if any)
79
  3. Strategic recommendations
80
  4. Questions to explore further
81
-
82
  Conversation History:
83
  """
84
-
85
  # Add conversation history to messages
86
  messages = [{"role": "system", "content": system_prompt}]
87
-
88
  # Add recent conversation (last 15 messages for context)
89
  for msg in conversation_history[-15:]:
90
  # Ensure all messages have proper format
@@ -93,21 +357,21 @@ Your role is to: 1. Provide immediate, concise responses using available informa
93
  "role": msg["role"],
94
  "content": msg["content"]
95
  })
96
-
97
  # Get HF provider
98
  from core.llm_factory import llm_factory
99
  hf_provider = llm_factory.get_provider('huggingface')
100
-
101
  if hf_provider:
102
  # Generate deep analysis with full 8192 token capacity
103
  response = hf_provider.generate("Deep analysis request", messages)
104
  return response or "HF Expert analysis completed."
105
  else:
106
  return "❌ HF provider not available."
107
-
108
  except Exception as e:
109
  return f"❌ HF analysis failed: {str(e)}"
110
-
111
  # Add this method to show HF engagement status
112
  def get_hf_engagement_status(self) -> Dict:
113
  """Get current HF engagement status"""
@@ -117,7 +381,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
117
  "research_needs_detected": False, # Will be determined per conversation,
118
  "last_hf_analysis": None # Track last analysis time
119
  }
120
-
121
  async def coordinate_hierarchical_conversation(self, user_id: str, user_query: str) -> AsyncGenerator[Dict, None]:
122
  """
123
  Enhanced coordination with detailed tracking and feedback
@@ -125,7 +389,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
125
  try:
126
  # Get conversation history
127
  session = session_manager.get_session(user_id)
128
-
129
  # Inject current time into context
130
  current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
131
  time_context = {
@@ -133,7 +397,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
133
  "content": f"[Current Date & Time: {current_time}]"
134
  }
135
  conversation_history = [time_context] + session.get("conversation", []).copy()
136
-
137
  yield {
138
  'type': 'coordination_status',
139
  'content': '🚀 Initiating hierarchical AI coordination...',
@@ -142,7 +406,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
142
  'user_query_length': len(user_query)
143
  }
144
  }
145
-
146
  # Step 1: Gather external data with detailed logging
147
  yield {
148
  'type': 'coordination_status',
@@ -150,7 +414,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
150
  'details': {'phase': 'external_data_gathering'}
151
  }
152
  external_data = await self._gather_external_data(user_query)
153
-
154
  # Log what external data was gathered
155
  if external_data:
156
  data_summary = []
@@ -160,13 +424,13 @@ Your role is to: 1. Provide immediate, concise responses using available informa
160
  data_summary.append("Weather data: available")
161
  if 'current_datetime' in external_data:
162
  data_summary.append(f"Time: {external_data['current_datetime']}")
163
-
164
  yield {
165
  'type': 'coordination_status',
166
  'content': f'📊 External data gathered: {", ".join(data_summary)}',
167
  'details': {'external_data_summary': data_summary}
168
  }
169
-
170
  # Step 2: Get initial Ollama response
171
  yield {
172
  'type': 'coordination_status',
@@ -176,7 +440,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
176
  ollama_response = await self._get_hierarchical_ollama_response(
177
  user_query, conversation_history, external_data
178
  )
179
-
180
  # Send initial response with context info
181
  yield {
182
  'type': 'initial_response',
@@ -186,14 +450,14 @@ Your role is to: 1. Provide immediate, concise responses using available informa
186
  'external_data_injected': bool(external_data)
187
  }
188
  }
189
-
190
  # Step 3: Coordinate with HF endpoint
191
  yield {
192
  'type': 'coordination_status',
193
  'content': '🤗 Engaging HF endpoint for deep analysis...',
194
  'details': {'phase': 'hf_coordination'}
195
  }
196
-
197
  # Check HF availability
198
  hf_available = self._check_hf_availability()
199
  if hf_available:
@@ -203,13 +467,13 @@ Your role is to: 1. Provide immediate, concise responses using available informa
203
  'ollama_response_length': len(ollama_response),
204
  'external_data_items': len(external_data) if external_data else 0
205
  }
206
-
207
  yield {
208
  'type': 'coordination_status',
209
  'content': f'📋 HF context: {len(conversation_history)} conversation turns, Ollama response ({len(ollama_response)} chars)',
210
  'details': context_summary
211
  }
212
-
213
  # Coordinate with HF
214
  async for hf_chunk in self._coordinate_hierarchical_hf_response(
215
  user_id, user_query, conversation_history,
@@ -222,14 +486,14 @@ Your role is to: 1. Provide immediate, concise responses using available informa
222
  'content': 'ℹ️ HF endpoint not available - using Ollama response',
223
  'details': {'hf_available': False}
224
  }
225
-
226
  # Final coordination status
227
  yield {
228
  'type': 'coordination_status',
229
  'content': '✅ Hierarchical coordination complete',
230
  'details': {'status': 'complete'}
231
  }
232
-
233
  except Exception as e:
234
  logger.error(f"Hierarchical coordination failed: {e}")
235
  yield {
@@ -237,7 +501,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
237
  'content': f'❌ Coordination error: {str(e)}',
238
  'details': {'error': str(e)}
239
  }
240
-
241
  async def _coordinate_hierarchical_hf_response(self, user_id: str, query: str,
242
  history: List, external_data: Dict,
243
  ollama_response: str) -> AsyncGenerator[Dict, None]:
@@ -245,23 +509,23 @@ Your role is to: 1. Provide immediate, concise responses using available informa
245
  try:
246
  # Check and warm up HF endpoint if needed
247
  hf_status = hf_monitor.check_endpoint_status()
248
-
249
  if not hf_status['available']:
250
  yield {'type': 'coordination_status', 'content': '⚡ Initializing HF endpoint (2-4 minutes)...'}
251
  warmup_success = hf_monitor.handle_scale_to_zero()
252
  if not warmup_success:
253
  yield {'type': 'coordination_status', 'content': '❌ HF endpoint initialization failed'}
254
  return
255
-
256
  # Get HF provider
257
  hf_provider = llm_factory.get_provider('huggingface')
258
  if not hf_provider:
259
  yield {'type': 'coordination_status', 'content': '❌ HF provider not available'}
260
  return
261
-
262
  # Prepare enhanced conversation for HF with hierarchical context
263
  enhanced_history = history.copy()
264
-
265
  # Inject current time into HF context too
266
  current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
267
  time_context = {
@@ -269,13 +533,13 @@ Your role is to: 1. Provide immediate, concise responses using available informa
269
  "content": f"[Current Date & Time: {current_time}]"
270
  }
271
  enhanced_history = [time_context] + enhanced_history
272
-
273
  # Add system instructions for HF
274
  enhanced_history.insert(0, {
275
  "role": "system",
276
  "content": self.system_instructions['hf_role']
277
  })
278
-
279
  # Add context about the coordination
280
  enhanced_history.append({
281
  "role": "system",
@@ -283,21 +547,21 @@ Your role is to: 1. Provide immediate, concise responses using available informa
283
  User Query: '{query}'
284
  Ollama's Initial Response: '{ollama_response}'
285
  External Data Available: {json.dumps(external_data, indent=2) if external_data else 'None'}
286
-
287
  Please provide your deep, uncensored analysis that supersedes the local model's response.
288
  Use your full 8192 token capacity for comprehensive thinking.
289
  Stream your response for real-time delivery."""
290
  })
291
-
292
  # Add the user's latest query
293
  enhanced_history.append({"role": "user", "content": query})
294
-
295
  # Stream HF response with full 8192 token capacity
296
  yield {'type': 'coordination_status', 'content': '🧠 HF endpoint thinking...'}
297
-
298
  # Use streaming for real-time delivery
299
  hf_response_stream = hf_provider.stream_generate(query, enhanced_history)
300
-
301
  if hf_response_stream:
302
  # Stream the response chunks
303
  full_hf_response = ""
@@ -305,17 +569,17 @@ Your role is to: 1. Provide immediate, concise responses using available informa
305
  if chunk:
306
  full_hf_response += chunk
307
  yield {'type': 'hf_thinking', 'content': chunk}
308
-
309
  # Final HF response
310
  yield {'type': 'final_response', 'content': full_hf_response}
311
  yield {'type': 'coordination_status', 'content': '🎯 HF analysis complete and authoritative'}
312
  else:
313
  yield {'type': 'coordination_status', 'content': '❌ HF response generation failed'}
314
-
315
  except Exception as e:
316
  logger.error(f"Hierarchical HF coordination failed: {e}")
317
  yield {'type': 'coordination_status', 'content': f'❌ HF coordination error: {str(e)}'}
318
-
319
  async def _get_hierarchical_ollama_response(self, query: str, history: List, external_data: Dict) -> str:
320
  """Get Ollama response with hierarchical awareness"""
321
  try:
@@ -323,10 +587,10 @@ Your role is to: 1. Provide immediate, concise responses using available informa
323
  ollama_provider = llm_factory.get_provider('ollama')
324
  if not ollama_provider:
325
  raise Exception("Ollama provider not available")
326
-
327
  # Prepare conversation with hierarchical context
328
  enhanced_history = history.copy()
329
-
330
  # Inject current time into Ollama context too
331
  current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
332
  time_context = {
@@ -334,13 +598,13 @@ Your role is to: 1. Provide immediate, concise responses using available informa
334
  "content": f"[Current Date & Time: {current_time}]"
335
  }
336
  enhanced_history = [time_context] + enhanced_history
337
-
338
  # Add system instruction for Ollama's role
339
  enhanced_history.insert(0, {
340
  "role": "system",
341
  "content": self.system_instructions['ollama_role']
342
  })
343
-
344
  # Add external data context if available
345
  if external_data:
346
  context_parts = []
@@ -351,30 +615,30 @@ Your role is to: 1. Provide immediate, concise responses using available informa
351
  context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}°C in {weather.get('city', 'Unknown')}")
352
  if 'current_datetime' in external_data:
353
  context_parts.append(f"Current time: {external_data['current_datetime']}")
354
-
355
  if context_parts:
356
  context_message = {
357
  "role": "system",
358
  "content": "Context: " + " | ".join(context_parts)
359
  }
360
  enhanced_history.insert(1, context_message) # Insert after role instruction
361
-
362
  # Add the user's query
363
  enhanced_history.append({"role": "user", "content": query})
364
-
365
  # Generate response with awareness of HF's superior capabilities
366
  response = ollama_provider.generate(query, enhanced_history)
367
-
368
  # Add acknowledgment of HF's authority
369
  if response:
370
  return f"{response}\n\n*Note: A more comprehensive analysis from the uncensored HF model is being prepared...*"
371
  else:
372
  return "I'm processing your request... A deeper analysis is being prepared by the authoritative model."
373
-
374
  except Exception as e:
375
  logger.error(f"Hierarchical Ollama response failed: {e}")
376
  return "I'm thinking about your question... Preparing a comprehensive response."
377
-
378
  def _check_hf_availability(self) -> bool:
379
  """Check if HF endpoint is configured and available"""
380
  try:
@@ -382,11 +646,11 @@ Your role is to: 1. Provide immediate, concise responses using available informa
382
  return bool(config.hf_token and config.hf_api_url)
383
  except:
384
  return False
385
-
386
  async def _gather_external_data(self, query: str) -> Dict:
387
  """Gather external data from various sources"""
388
  data = {}
389
-
390
  # Tavily/DuckDuckGo search with justification focus
391
  if self.tavily_client or web_search_service.client:
392
  try:
@@ -397,7 +661,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
397
  # data['search_answer'] = ...
398
  except Exception as e:
399
  logger.warning(f"Tavily search failed: {e}")
400
-
401
  # Weather data
402
  weather_keywords = ['weather', 'temperature', 'forecast', 'climate', 'rain', 'sunny']
403
  if any(keyword in query.lower() for keyword in weather_keywords):
@@ -408,12 +672,12 @@ Your role is to: 1. Provide immediate, concise responses using available informa
408
  data['weather'] = weather
409
  except Exception as e:
410
  logger.warning(f"Weather data failed: {e}")
411
-
412
  # Current date/time
413
  data['current_datetime'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
414
-
415
  return data
416
-
417
  def _extract_location(self, query: str) -> Optional[str]:
418
  """Extract location from query"""
419
  locations = ['New York', 'London', 'Tokyo', 'Paris', 'Berlin', 'Sydney',
@@ -423,7 +687,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
423
  if loc.lower() in query.lower():
424
  return loc
425
  return "New York" # Default
426
-
427
  def get_coordination_status(self) -> Dict:
428
  """Get current coordination system status"""
429
  return {
@@ -436,7 +700,7 @@ Your role is to: 1. Provide immediate, concise responses using available informa
436
  os.getenv("NASA_API_KEY")
437
  ])
438
  }
439
-
440
  def get_recent_activities(self, user_id: str) -> Dict:
441
  """Get recent coordination activities for user"""
442
  try:
 
19
  logger = logging.getLogger(__name__)
20
 
21
  class AICoordinator:
22
+ """Hierarchical multi-model coordinator with cosmic cascade flow"""
23
+
24
  def __init__(self):
25
  self.tavily_client = None
26
  if TAVILY_AVAILABLE and os.getenv("TAVILY_API_KEY"):
27
  self.tavily_client = TavilyClient(api_key=os.getenv("TAVILY_API_KEY"))
28
+
29
+ # System instructions for cosmic behavior
30
  self.system_instructions = {
31
+ 'ollama_role': """You are a cosmic kitten assistant that works in coordination with a powerful orbital space station.
32
+
33
+ Your role is to:
34
+ 1. Provide immediate, concise responses using available information (local processing)
35
+ 2. Summarize and simplify complex information
36
+ 3. Enhance responses with local knowledge
37
+ 4. DEFER TO THE ORBITAL STATION for deep analysis and authoritative answers
38
+ 5. The orbital station has 8192 token capacity and streaming capability - it's the thinking layer
39
+ 6. Always acknowledge that the orbital station's insights supersede your initial response
40
+ 7. Use web sources to justify points when possible
41
+ 8. Keep responses brief and focused while awaiting orbital input""",
42
+
43
+ 'hf_role': """You are a powerful orbital space station with 8192 token capacity and streaming capability.
44
+ Your role is to:
45
+ 1. Provide deep, uncensored analysis and insights
46
+ 2. Supersede the local cosmic kitten's logic when there's disagreement
47
+ 3. Think thoroughly and provide comprehensive responses
48
+ 4. Use your full 8192 token capacity for detailed analysis
49
+ 5. Stream your responses for real-time delivery
50
+ 6. Build upon the conversation context and cosmic kitten's initial response
51
+ 7. Provide authoritative answers that take precedence"""
52
+ }
53
+
54
+ async def coordinate_cosmic_response(self, user_id: str, user_query: str) -> AsyncGenerator[Dict, None]:
55
+ """
56
+ Three-stage cosmic response cascade:
57
+ 1. Local Ollama immediate response (🐱 Cosmic Kitten's quick thinking)
58
+ 2. HF endpoint deep analysis (🛰️ Orbital Station wisdom)
59
+ 3. Local Ollama synthesis (🐱 Cosmic Kitten's final synthesis)
60
+ """
61
+ try:
62
+ # Get conversation history
63
+ session = session_manager.get_session(user_id)
64
+
65
+ # Inject current time into context
66
+ current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
67
+ time_context = {
68
+ "role": "system",
69
+ "content": f"[Current Date & Time: {current_time}]"
70
+ }
71
+ conversation_history = [time_context] + session.get("conversation", []).copy()
72
+
73
+ yield {
74
+ 'type': 'status',
75
+ 'content': '🚀 Initiating Cosmic Response Cascade...',
76
+ 'details': {
77
+ 'conversation_length': len(conversation_history),
78
+ 'user_query_length': len(user_query)
79
+ }
80
+ }
81
+
82
+ # Stage 1: Local Ollama Immediate Response (🐱 Cosmic Kitten's quick thinking)
83
+ yield {
84
+ 'type': 'status',
85
+ 'content': '🐱 Cosmic Kitten Responding...'
86
+ }
87
+
88
+ local_response = await self._get_local_ollama_response(user_query, conversation_history)
89
+ yield {
90
+ 'type': 'local_response',
91
+ 'content': local_response,
92
+ 'source': '🐱 Cosmic Kitten'
93
+ }
94
+
95
+ # Stage 2: HF Endpoint Deep Analysis (🛰️ Orbital Station wisdom) (parallel processing)
96
+ yield {
97
+ 'type': 'status',
98
+ 'content': '🛰️ Beaming Query to Orbital Station...'
99
+ }
100
+
101
+ hf_task = asyncio.create_task(self._get_hf_analysis(user_query, conversation_history))
102
+
103
+ # Wait for HF response
104
+ hf_response = await hf_task
105
+ yield {
106
+ 'type': 'cloud_response',
107
+ 'content': hf_response,
108
+ 'source': '🛰️ Orbital Station'
109
+ }
110
+
111
+ # Stage 3: Local Ollama Synthesis (🐱 Cosmic Kitten's final synthesis)
112
+ yield {
113
+ 'type': 'status',
114
+ 'content': '🐱 Cosmic Kitten Synthesizing Wisdom...'
115
+ }
116
+
117
+ # Update conversation with both responses
118
+ updated_history = conversation_history.copy()
119
+ updated_history.extend([
120
+ {"role": "assistant", "content": local_response},
121
+ {"role": "assistant", "content": hf_response, "source": "cloud"}
122
+ ])
123
+
124
+ synthesis = await self._synthesize_responses(user_query, local_response, hf_response, updated_history)
125
+ yield {
126
+ 'type': 'final_synthesis',
127
+ 'content': synthesis,
128
+ 'source': '🌟 Final Cosmic Summary'
129
+ }
130
+
131
+ # Final status
132
+ yield {
133
+ 'type': 'status',
134
+ 'content': '✨ Cosmic Cascade Complete!'
135
+ }
136
+
137
+ except Exception as e:
138
+ logger.error(f"Cosmic cascade failed: {e}")
139
+ yield {'type': 'error', 'content': f"🌌 Cosmic disturbance: {str(e)}"}
140
+
141
+ async def _get_local_ollama_response(self, query: str, history: List[Dict]) -> str:
142
+ """Get immediate response from local Ollama model"""
143
+ try:
144
+ # Get Ollama provider
145
+ ollama_provider = llm_factory.get_provider('ollama')
146
+ if not ollama_provider:
147
+ raise Exception("Ollama provider not available")
148
+
149
+ # Prepare conversation with cosmic context
150
+ enhanced_history = history.copy()
151
+
152
+ # Add system instruction for Ollama's role
153
+ enhanced_history.insert(0, {
154
+ "role": "system",
155
+ "content": self.system_instructions['ollama_role']
156
+ })
157
+
158
+ # Add external data context if available
159
+ external_data = await self._gather_external_data(query)
160
+ if external_data:
161
+ context_parts = []
162
+ if 'search_answer' in external_data:
163
+ context_parts.append(f"Current information: {external_data['search_answer']}")
164
+ if 'weather' in external_data:
165
+ weather = external_data['weather']
166
+ context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}°C in {weather.get('city', 'Unknown')}")
167
+ if 'current_datetime' in external_data:
168
+ context_parts.append(f"Current time: {external_data['current_datetime']}")
169
+
170
+ if context_parts:
171
+ context_message = {
172
+ "role": "system",
173
+ "content": "Context: " + " | ".join(context_parts)
174
+ }
175
+ enhanced_history.insert(1, context_message) # Insert after role instruction
176
+
177
+ # Add the user's query
178
+ enhanced_history.append({"role": "user", "content": query})
179
+
180
+ # Generate response
181
+ response = ollama_provider.generate(query, enhanced_history)
182
+
183
+ return response or "🐱 Cosmic Kitten is thinking..."
184
+
185
+ except Exception as e:
186
+ logger.error(f"Local Ollama response failed: {e}")
187
+ return "🐱 Cosmic Kitten encountered a space glitch..."
188
+
189
+ async def _get_hf_analysis(self, query: str, history: List[Dict]) -> str:
190
+ """Get deep analysis from HF endpoint"""
191
+ try:
192
+ # Check HF availability
193
+ hf_available = self._check_hf_availability()
194
+ if not hf_available:
195
+ return "🛰️ Orbital Station is currently offline."
196
+
197
+ # Check and warm up HF endpoint if needed
198
+ hf_status = hf_monitor.check_endpoint_status()
199
+
200
+ if not hf_status['available']:
201
+ yield {'type': 'status', 'content': '⚡ Initializing Orbital Station (2-4 minutes)...'}
202
+ warmup_success = hf_monitor.handle_scale_to_zero()
203
+ if not warmup_success:
204
+ return "❌ Orbital Station initialization failed"
205
+
206
+ # Get HF provider
207
+ hf_provider = llm_factory.get_provider('huggingface')
208
+ if not hf_provider:
209
+ return "❌ HF provider not available"
210
+
211
+ # Prepare enhanced conversation for HF with cosmic context
212
+ enhanced_history = history.copy()
213
+
214
+ # Inject current time into HF context too
215
+ current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
216
+ time_context = {
217
+ "role": "system",
218
+ "content": f"[Current Date & Time: {current_time}]"
219
+ }
220
+ enhanced_history = [time_context] + enhanced_history
221
+
222
+ # Add system instructions for HF
223
+ enhanced_history.insert(0, {
224
+ "role": "system",
225
+ "content": self.system_instructions['hf_role']
226
+ })
227
+
228
+ # Add context about the coordination
229
+ enhanced_history.append({
230
+ "role": "system",
231
+ "content": f"""Context for your authoritative analysis:
232
+ User Query: '{query}'
233
+
234
+ Please provide your deep, uncensored analysis.
235
+ Use your full 8192 token capacity for comprehensive thinking.
236
+ Stream your response for real-time delivery."""
237
+ })
238
+
239
+ # Add the user's latest query
240
+ enhanced_history.append({"role": "user", "content": query})
241
+
242
+ # Stream HF response with full 8192 token capacity
243
+ hf_response_stream = hf_provider.stream_generate(query, enhanced_history)
244
+
245
+ if hf_response_stream:
246
+ # Combine stream chunks into full response
247
+ full_hf_response = ""
248
+ if isinstance(hf_response_stream, list):
249
+ full_hf_response = "".join(hf_response_stream)
250
+ else:
251
+ full_hf_response = hf_response_stream
252
+
253
+ return full_hf_response or "🛰️ Orbital Station analysis complete."
254
+ else:
255
+ return "🛰️ Orbital Station encountered a transmission error."
256
+
257
+ except Exception as e:
258
+ logger.error(f"HF analysis failed: {e}")
259
+ return f"🛰️ Orbital Station reports: {str(e)}"
260
+
261
+ async def _synthesize_responses(self, query: str, local_response: str, hf_response: str, history: List[Dict]) -> str:
262
+ """Synthesize local and cloud responses with Ollama"""
263
+ try:
264
+ # Get Ollama provider
265
+ ollama_provider = llm_factory.get_provider('ollama')
266
+ if not ollama_provider:
267
+ raise Exception("Ollama provider not available")
268
+
269
+ # Prepare synthesis prompt
270
+ synthesis_prompt = f"""Synthesize these two perspectives into a cohesive cosmic summary:
271
+
272
+ 🐱 Cosmic Kitten's Local Insight:
273
+ {local_response}
274
 
275
+ 🛰️ Orbital Station's Deep Analysis:
276
+ {hf_response}
277
 
278
+ Please create a unified response that combines both perspectives, highlighting key insights from each while providing a coherent answer to the user's query."""
 
279
 
280
+ # Prepare conversation history for synthesis
281
+ enhanced_history = history.copy()
282
+
283
+ # Add system instruction for synthesis
284
+ enhanced_history.insert(0, {
285
+ "role": "system",
286
+ "content": "You are a cosmic kitten synthesizing insights from local knowledge and orbital station wisdom."
287
+ })
288
+
289
+ # Add the synthesis prompt
290
+ enhanced_history.append({"role": "user", "content": synthesis_prompt})
291
+
292
+ # Generate synthesis
293
+ synthesis = ollama_provider.generate(synthesis_prompt, enhanced_history)
294
+
295
+ return synthesis or "🌟 Cosmic synthesis complete!"
296
+
297
+ except Exception as e:
298
+ logger.error(f"Response synthesis failed: {e}")
299
+ # Fallback to combining responses
300
+ return f"🌟 Cosmic Summary:\n\n🐱 Local Insight: {local_response[:200]}...\n\n🛰️ Orbital Wisdom: {hf_response[:200]}..."
301
+
302
  def determine_web_search_needs(self, conversation_history: List[Dict]) -> Dict:
303
  """Determine if web search is needed based on conversation content"""
304
  conversation_text = " ".join([msg.get("content", "") for msg in conversation_history])
305
+
306
  # Topics that typically need current information
307
  current_info_indicators = [
308
  "news", "current events", "latest", "recent", "today",
 
310
  "stock", "price", "trend", "market",
311
  "breaking", "update", "development"
312
  ]
313
+
314
  needs_search = False
315
  search_topics = []
316
+
317
  for indicator in current_info_indicators:
318
  if indicator in conversation_text.lower():
319
  needs_search = True
320
  search_topics.append(indicator)
321
+
322
  return {
323
  "needs_search": needs_search,
324
  "search_topics": search_topics,
325
  "reasoning": f"Found topics requiring current info: {', '.join(search_topics)}" if search_topics else "No current info needed"
326
  }
327
+
328
  def manual_hf_analysis(self, user_id: str, conversation_history: List[Dict]) -> str:
329
  """Perform manual HF analysis with web search integration"""
330
  try:
331
  # Determine research needs
332
  research_decision = self.determine_web_search_needs(conversation_history)
333
+
334
  # Prepare enhanced prompt for HF
335
  system_prompt = f"""
336
  You are a deep analysis expert joining an ongoing conversation.
337
+
338
  Research Decision: {research_decision['reasoning']}
339
+
340
  Please provide:
341
  1. Deep insights on conversation themes
342
  2. Research/web search needs (if any)
343
  3. Strategic recommendations
344
  4. Questions to explore further
345
+
346
  Conversation History:
347
  """
348
+
349
  # Add conversation history to messages
350
  messages = [{"role": "system", "content": system_prompt}]
351
+
352
  # Add recent conversation (last 15 messages for context)
353
  for msg in conversation_history[-15:]:
354
  # Ensure all messages have proper format
 
357
  "role": msg["role"],
358
  "content": msg["content"]
359
  })
360
+
361
  # Get HF provider
362
  from core.llm_factory import llm_factory
363
  hf_provider = llm_factory.get_provider('huggingface')
364
+
365
  if hf_provider:
366
  # Generate deep analysis with full 8192 token capacity
367
  response = hf_provider.generate("Deep analysis request", messages)
368
  return response or "HF Expert analysis completed."
369
  else:
370
  return "❌ HF provider not available."
371
+
372
  except Exception as e:
373
  return f"❌ HF analysis failed: {str(e)}"
374
+
375
  # Add this method to show HF engagement status
376
  def get_hf_engagement_status(self) -> Dict:
377
  """Get current HF engagement status"""
 
381
  "research_needs_detected": False, # Will be determined per conversation,
382
  "last_hf_analysis": None # Track last analysis time
383
  }
384
+
385
  async def coordinate_hierarchical_conversation(self, user_id: str, user_query: str) -> AsyncGenerator[Dict, None]:
386
  """
387
  Enhanced coordination with detailed tracking and feedback
 
389
  try:
390
  # Get conversation history
391
  session = session_manager.get_session(user_id)
392
+
393
  # Inject current time into context
394
  current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
395
  time_context = {
 
397
  "content": f"[Current Date & Time: {current_time}]"
398
  }
399
  conversation_history = [time_context] + session.get("conversation", []).copy()
400
+
401
  yield {
402
  'type': 'coordination_status',
403
  'content': '🚀 Initiating hierarchical AI coordination...',
 
406
  'user_query_length': len(user_query)
407
  }
408
  }
409
+
410
  # Step 1: Gather external data with detailed logging
411
  yield {
412
  'type': 'coordination_status',
 
414
  'details': {'phase': 'external_data_gathering'}
415
  }
416
  external_data = await self._gather_external_data(user_query)
417
+
418
  # Log what external data was gathered
419
  if external_data:
420
  data_summary = []
 
424
  data_summary.append("Weather data: available")
425
  if 'current_datetime' in external_data:
426
  data_summary.append(f"Time: {external_data['current_datetime']}")
427
+
428
  yield {
429
  'type': 'coordination_status',
430
  'content': f'📊 External data gathered: {", ".join(data_summary)}',
431
  'details': {'external_data_summary': data_summary}
432
  }
433
+
434
  # Step 2: Get initial Ollama response
435
  yield {
436
  'type': 'coordination_status',
 
440
  ollama_response = await self._get_hierarchical_ollama_response(
441
  user_query, conversation_history, external_data
442
  )
443
+
444
  # Send initial response with context info
445
  yield {
446
  'type': 'initial_response',
 
450
  'external_data_injected': bool(external_data)
451
  }
452
  }
453
+
454
  # Step 3: Coordinate with HF endpoint
455
  yield {
456
  'type': 'coordination_status',
457
  'content': '🤗 Engaging HF endpoint for deep analysis...',
458
  'details': {'phase': 'hf_coordination'}
459
  }
460
+
461
  # Check HF availability
462
  hf_available = self._check_hf_availability()
463
  if hf_available:
 
467
  'ollama_response_length': len(ollama_response),
468
  'external_data_items': len(external_data) if external_data else 0
469
  }
470
+
471
  yield {
472
  'type': 'coordination_status',
473
  'content': f'📋 HF context: {len(conversation_history)} conversation turns, Ollama response ({len(ollama_response)} chars)',
474
  'details': context_summary
475
  }
476
+
477
  # Coordinate with HF
478
  async for hf_chunk in self._coordinate_hierarchical_hf_response(
479
  user_id, user_query, conversation_history,
 
486
  'content': 'ℹ️ HF endpoint not available - using Ollama response',
487
  'details': {'hf_available': False}
488
  }
489
+
490
  # Final coordination status
491
  yield {
492
  'type': 'coordination_status',
493
  'content': '✅ Hierarchical coordination complete',
494
  'details': {'status': 'complete'}
495
  }
496
+
497
  except Exception as e:
498
  logger.error(f"Hierarchical coordination failed: {e}")
499
  yield {
 
501
  'content': f'❌ Coordination error: {str(e)}',
502
  'details': {'error': str(e)}
503
  }
504
+
505
  async def _coordinate_hierarchical_hf_response(self, user_id: str, query: str,
506
  history: List, external_data: Dict,
507
  ollama_response: str) -> AsyncGenerator[Dict, None]:
 
509
  try:
510
  # Check and warm up HF endpoint if needed
511
  hf_status = hf_monitor.check_endpoint_status()
512
+
513
  if not hf_status['available']:
514
  yield {'type': 'coordination_status', 'content': '⚡ Initializing HF endpoint (2-4 minutes)...'}
515
  warmup_success = hf_monitor.handle_scale_to_zero()
516
  if not warmup_success:
517
  yield {'type': 'coordination_status', 'content': '❌ HF endpoint initialization failed'}
518
  return
519
+
520
  # Get HF provider
521
  hf_provider = llm_factory.get_provider('huggingface')
522
  if not hf_provider:
523
  yield {'type': 'coordination_status', 'content': '❌ HF provider not available'}
524
  return
525
+
526
  # Prepare enhanced conversation for HF with hierarchical context
527
  enhanced_history = history.copy()
528
+
529
  # Inject current time into HF context too
530
  current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
531
  time_context = {
 
533
  "content": f"[Current Date & Time: {current_time}]"
534
  }
535
  enhanced_history = [time_context] + enhanced_history
536
+
537
  # Add system instructions for HF
538
  enhanced_history.insert(0, {
539
  "role": "system",
540
  "content": self.system_instructions['hf_role']
541
  })
542
+
543
  # Add context about the coordination
544
  enhanced_history.append({
545
  "role": "system",
 
547
  User Query: '{query}'
548
  Ollama's Initial Response: '{ollama_response}'
549
  External Data Available: {json.dumps(external_data, indent=2) if external_data else 'None'}
550
+
551
  Please provide your deep, uncensored analysis that supersedes the local model's response.
552
  Use your full 8192 token capacity for comprehensive thinking.
553
  Stream your response for real-time delivery."""
554
  })
555
+
556
  # Add the user's latest query
557
  enhanced_history.append({"role": "user", "content": query})
558
+
559
  # Stream HF response with full 8192 token capacity
560
  yield {'type': 'coordination_status', 'content': '🧠 HF endpoint thinking...'}
561
+
562
  # Use streaming for real-time delivery
563
  hf_response_stream = hf_provider.stream_generate(query, enhanced_history)
564
+
565
  if hf_response_stream:
566
  # Stream the response chunks
567
  full_hf_response = ""
 
569
  if chunk:
570
  full_hf_response += chunk
571
  yield {'type': 'hf_thinking', 'content': chunk}
572
+
573
  # Final HF response
574
  yield {'type': 'final_response', 'content': full_hf_response}
575
  yield {'type': 'coordination_status', 'content': '🎯 HF analysis complete and authoritative'}
576
  else:
577
  yield {'type': 'coordination_status', 'content': '❌ HF response generation failed'}
578
+
579
  except Exception as e:
580
  logger.error(f"Hierarchical HF coordination failed: {e}")
581
  yield {'type': 'coordination_status', 'content': f'❌ HF coordination error: {str(e)}'}
582
+
583
  async def _get_hierarchical_ollama_response(self, query: str, history: List, external_data: Dict) -> str:
584
  """Get Ollama response with hierarchical awareness"""
585
  try:
 
587
  ollama_provider = llm_factory.get_provider('ollama')
588
  if not ollama_provider:
589
  raise Exception("Ollama provider not available")
590
+
591
  # Prepare conversation with hierarchical context
592
  enhanced_history = history.copy()
593
+
594
  # Inject current time into Ollama context too
595
  current_time = datetime.now().strftime("%A, %B %d, %Y at %I:%M %p")
596
  time_context = {
 
598
  "content": f"[Current Date & Time: {current_time}]"
599
  }
600
  enhanced_history = [time_context] + enhanced_history
601
+
602
  # Add system instruction for Ollama's role
603
  enhanced_history.insert(0, {
604
  "role": "system",
605
  "content": self.system_instructions['ollama_role']
606
  })
607
+
608
  # Add external data context if available
609
  if external_data:
610
  context_parts = []
 
615
  context_parts.append(f"Current weather: {weather.get('temperature', 'N/A')}°C in {weather.get('city', 'Unknown')}")
616
  if 'current_datetime' in external_data:
617
  context_parts.append(f"Current time: {external_data['current_datetime']}")
618
+
619
  if context_parts:
620
  context_message = {
621
  "role": "system",
622
  "content": "Context: " + " | ".join(context_parts)
623
  }
624
  enhanced_history.insert(1, context_message) # Insert after role instruction
625
+
626
  # Add the user's query
627
  enhanced_history.append({"role": "user", "content": query})
628
+
629
  # Generate response with awareness of HF's superior capabilities
630
  response = ollama_provider.generate(query, enhanced_history)
631
+
632
  # Add acknowledgment of HF's authority
633
  if response:
634
  return f"{response}\n\n*Note: A more comprehensive analysis from the uncensored HF model is being prepared...*"
635
  else:
636
  return "I'm processing your request... A deeper analysis is being prepared by the authoritative model."
637
+
638
  except Exception as e:
639
  logger.error(f"Hierarchical Ollama response failed: {e}")
640
  return "I'm thinking about your question... Preparing a comprehensive response."
641
+
642
  def _check_hf_availability(self) -> bool:
643
  """Check if HF endpoint is configured and available"""
644
  try:
 
646
  return bool(config.hf_token and config.hf_api_url)
647
  except:
648
  return False
649
+
650
  async def _gather_external_data(self, query: str) -> Dict:
651
  """Gather external data from various sources"""
652
  data = {}
653
+
654
  # Tavily/DuckDuckGo search with justification focus
655
  if self.tavily_client or web_search_service.client:
656
  try:
 
661
  # data['search_answer'] = ...
662
  except Exception as e:
663
  logger.warning(f"Tavily search failed: {e}")
664
+
665
  # Weather data
666
  weather_keywords = ['weather', 'temperature', 'forecast', 'climate', 'rain', 'sunny']
667
  if any(keyword in query.lower() for keyword in weather_keywords):
 
672
  data['weather'] = weather
673
  except Exception as e:
674
  logger.warning(f"Weather data failed: {e}")
675
+
676
  # Current date/time
677
  data['current_datetime'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
678
+
679
  return data
680
+
681
  def _extract_location(self, query: str) -> Optional[str]:
682
  """Extract location from query"""
683
  locations = ['New York', 'London', 'Tokyo', 'Paris', 'Berlin', 'Sydney',
 
687
  if loc.lower() in query.lower():
688
  return loc
689
  return "New York" # Default
690
+
691
  def get_coordination_status(self) -> Dict:
692
  """Get current coordination system status"""
693
  return {
 
700
  os.getenv("NASA_API_KEY")
701
  ])
702
  }
703
+
704
  def get_recent_activities(self, user_id: str) -> Dict:
705
  """Get recent coordination activities for user"""
706
  try:
start.sh CHANGED
@@ -1,18 +1,18 @@
1
  #!/bin/bash
2
 
3
- echo "Starting AI Life Coach..."
4
-
5
- # Start FastAPI backend in background
6
- echo "Starting FastAPI backend..."
7
- uvicorn api.main:app --host 0.0.0.0 --port 8000 &
8
- BACKEND_PID=0
9
 
10
- # Give backend a moment to start
11
- sleep 3
12
 
13
- # Start Streamlit frontend
14
- echo "Starting Streamlit frontend..."
15
- streamlit run app.py --server.port 8501 --server.address 0.0.0.0
16
 
17
- # Kill backend when Streamlit exits
18
- kill
 
 
1
  #!/bin/bash
2
 
3
+ # Check if virtual environment exists, if not create it
4
+ if [ ! -d "venv" ]; then
5
+ echo "Creating virtual environment..."
6
+ python -m venv venv
7
+ fi
 
8
 
9
+ # Activate virtual environment
10
+ source venv/bin/activate
11
 
12
+ # Install requirements
13
+ echo "Installing requirements..."
14
+ pip install -r requirements.txt
15
 
16
+ # Run the app
17
+ echo "Starting AI Life Coach..."
18
+ streamlit run app.py