|
|
import gradio as gr |
|
|
from backend.council import stage1_collect_responses, stage2_collect_rankings, stage3_synthesize_final_stream |
|
|
from backend.config import COUNCIL_MODELS, CHAIRMAN_MODEL |
|
|
|
|
|
|
|
|
async def ask_council(question: str, progress=gr.Progress()): |
|
|
""" |
|
|
Ask the LLM Council a question. |
|
|
|
|
|
The council consists of multiple advanced LLMs (currently: {models}) that: |
|
|
1. Individually answer the question |
|
|
2. Rank each other's answers |
|
|
3. Synthesize a final best answer (Chairman: {chairman}) |
|
|
|
|
|
Args: |
|
|
question: The user's question to be discussed by the council. |
|
|
progress: Gradio progress tracker. |
|
|
|
|
|
Yields: |
|
|
Status updates and finally the synthesized answer. |
|
|
""".format( |
|
|
models=", ".join([m.split("/")[-1] for m in COUNCIL_MODELS]), chairman=CHAIRMAN_MODEL.split("/")[-1] |
|
|
) |
|
|
|
|
|
try: |
|
|
buffer = "" |
|
|
|
|
|
|
|
|
progress(0.1, desc="Stage 1: Collecting individual responses...") |
|
|
buffer += "## π‘ Stage 1: Collecting individual responses from council members...\n\n" |
|
|
yield buffer |
|
|
|
|
|
stage1_results = await stage1_collect_responses(question) |
|
|
|
|
|
if not stage1_results: |
|
|
buffer += "\nβ The council failed to generate a response." |
|
|
yield buffer |
|
|
return |
|
|
|
|
|
|
|
|
buffer += f"### β
Received {len(stage1_results)} responses:\n" |
|
|
for res in stage1_results: |
|
|
model_name = res["model"].split("/")[-1] |
|
|
preview = res["response"][:100].replace("\n", " ") + "..." |
|
|
buffer += f"- **{model_name}**: {preview}\n" |
|
|
buffer += "\n---\n\n" |
|
|
yield buffer |
|
|
|
|
|
|
|
|
progress(0.4, desc="Stage 2: Council members are ranking responses...") |
|
|
buffer += "## π‘ Stage 2: Council members are ranking each other's responses...\n\n" |
|
|
yield buffer |
|
|
|
|
|
stage2_results, _ = await stage2_collect_rankings(question, stage1_results) |
|
|
|
|
|
|
|
|
buffer += "### β
Rankings Collected:\n" |
|
|
for res in stage2_results: |
|
|
model_name = res["model"].split("/")[-1] |
|
|
|
|
|
buffer += f"- **{model_name}** has submitted their rankings.\n" |
|
|
buffer += "\n---\n\n" |
|
|
yield buffer |
|
|
|
|
|
|
|
|
progress(0.7, desc="Stage 3: Chairman is synthesizing the final answer...") |
|
|
buffer += "## π‘ Stage 3: Chairman is synthesizing the final answer...\n\n" |
|
|
yield buffer |
|
|
|
|
|
full_response = "" |
|
|
async for chunk in stage3_synthesize_final_stream(question, stage1_results, stage2_results): |
|
|
full_response += chunk |
|
|
yield buffer + full_response |
|
|
|
|
|
progress(1.0, desc="Complete!") |
|
|
|
|
|
if not full_response: |
|
|
buffer += "\nβ The council failed to generate a final synthesis." |
|
|
yield buffer |
|
|
return |
|
|
|
|
|
|
|
|
final_buffer = buffer.replace( |
|
|
"## π‘ Stage 3: Chairman is synthesizing the final answer...", "## π’ Stage 3: Final Answer" |
|
|
) |
|
|
yield final_buffer + full_response |
|
|
|
|
|
except Exception as e: |
|
|
yield f"β Error consulting the council: {str(e)}" |
|
|
|
|
|
|
|
|
description = """ |
|
|
An MCP server that consults a council of LLMs to answer questions. [LLM Council](https://github.com/machine-theory/lm-council?tab=readme-ov-file) is a project by Machine Theory |
|
|
and Andrej Karpathy. This space exposes it as an MCP server so you can use it in your own projchatects. |
|
|
<img src="https://pbs.twimg.com/media/G6ZZO7ragAAtnCZ?format=jpg" alt="MCP Server" style="width: 300px; height: auto; text-align: center;"> |
|
|
β οΈ We're using 5 models in the council, so it takes a minute to answer. |
|
|
""" |
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=ask_council, |
|
|
inputs=gr.Textbox(lines=2, placeholder="Ask the council..."), |
|
|
outputs=gr.Markdown(height=200), |
|
|
title="LLM Council MCP Server", |
|
|
description=description, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
demo.launch(mcp_server=True, show_error=True) |
|
|
|