import gradio as gr from backend.council import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream from backend.config import COUNCIL_MODELS, CHAIRMAN_MODEL async def ask_council(question: str, progress=gr.Progress()): """ Ask the LLM Council a question. The council consists of multiple advanced LLMs (currently: {models}) that: 1. Individually answer the question 2. Rank each other's answers 3. Synthesize a final best answer (Chairman: {chairman}) Args: question: The user's question to be discussed by the council. progress: Gradio progress tracker. Yields: Status updates and finally the synthesized answer. """.format( models=", ".join([m.split("/")[-1] for m in COUNCIL_MODELS]), chairman=CHAIRMAN_MODEL.split("/")[-1] ) try: buffer = "" # Stage 1: Collect individual responses progress(0.1, desc="Stage 1: Collecting individual responses...") buffer += "## 🟔 Stage 1: Collecting individual responses from council members...\n\n" yield buffer stage1_results = await stage1_collect_responses(question) if not stage1_results: buffer += "\nāŒ The council failed to generate a response." yield buffer return # Format Stage 1 results buffer += f"### āœ… Received {len(stage1_results)} responses:\n" for res in stage1_results: model_name = res["model"].split("/")[-1] preview = res["response"][:100].replace("\n", " ") + "..." buffer += f"- **{model_name}**: {preview}\n" buffer += "\n---\n\n" yield buffer # Stage 2: Collect rankings progress(0.4, desc="Stage 2: Council members are ranking responses...") buffer += "## 🟔 Stage 2: Council members are ranking each other's responses...\n\n" yield buffer stage2_results, _ = await stage2_collect_rankings(question, stage1_results) # Format Stage 2 results buffer += "### āœ… Rankings Collected:\n" for res in stage2_results: model_name = res["model"].split("/")[-1] # Extract just the ranking part if possible, or just say "Ranked" buffer += f"- **{model_name}** has submitted their rankings.\n" buffer += "\n---\n\n" yield buffer # Stage 3: Synthesize final answer progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...") buffer += "## 🟔 Stage 3: Chairman is synthesizing the final answer...\n\n" yield buffer full_response = "" async for chunk in stage3_synthesize_final_stream(question, stage1_results, stage2_results): full_response += chunk yield buffer + full_response progress(1.0, desc="Complete!") if not full_response: buffer += "\nāŒ The council failed to generate a final synthesis." yield buffer return # Let's keep the history but mark Stage 3 as done final_buffer = buffer.replace( "## 🟔 Stage 3: Chairman is synthesizing the final answer...", "## 🟢 Stage 3: Final Answer" ) yield final_buffer + full_response except Exception as e: yield f"āŒ Error consulting the council: {str(e)}" description = """ An MCP server that consults a council of LLMs to answer questions. [LLM Council](https://github.com/machine-theory/lm-council?tab=readme-ov-file) is a project by Machine Theory and Andrej Karpathy. This space exposes it as an MCP server so you can use it in your own projchatects. MCP Server āš ļø We're using 5 models in the council, so it takes a minute to answer. """ demo = gr.Interface( fn=ask_council, inputs=gr.Textbox(lines=2, placeholder="Ask the council..."), outputs=gr.Markdown(height=200), title="LLM Council MCP Server", description=description, ) if __name__ == "__main__": # Launch with mcp_server=True to expose as MCP demo.launch(mcp_server=True, show_error=True)