import gradio as gr, os, huggingface_hub as hf def go(m, q): c = hf.InferenceClient(api_key=os.environ["HF_TOKEN"]) s = c.chat.completions.create(model=m, messages=[{"role":"user","content":q}], stream=True) for i in s: if i.choices and i.choices[0].delta.content: yield i.choices[0].delta.content models = [ "deepseek-ai/DeepSeek-V3.1", "meta-llama/Meta-Llama-3-8B-Instruct", "Qwen/Qwen2.5-7B-Instruct", "2F-AI/Titan-Atom", "NobodyExistsOnTheInternet/K3-Q4-GGUF" ] gr.Interface( fn=go, inputs=[gr.Dropdown(models), gr.Textbox(label="Ask")], outputs=gr.Textbox(label="Reply"), title="Multi-Model Chat" ).launch(share=True)