import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer import spaces # Load model and tokenizer model_id = "LiquidAI/LFM2-2.6B" model = AutoModelForCausalLM.from_pretrained( model_id, device_map="auto", # Ensure proper device mapping for zero-gpu ) tokenizer = AutoTokenizer.from_pretrained(model_id) @spaces.GPU(duration=120) def chat_with_model(message, history): # Format conversation history conversation = [] for user_msg, assistant_msg in history: conversation.append({"role": "user", "content": user_msg}) conversation.append({"role": "assistant", "content": assistant_msg}) conversation.append({"role": "user", "content": message}) # Apply chat template input_ids = tokenizer.apply_chat_template( conversation, add_generation_prompt=True, return_tensors="pt", tokenize=True, ).to(model.device) # Generate response output = model.generate( input_ids, do_sample=True, temperature=0.3, min_p=0.15, repetition_penalty=1.05, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id, ) # Decode only the newly generated tokens, skipping the prompt response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True) return response # Create Gradio interface iface = gr.ChatInterface( fn=chat_with_model, title="LFM2-2.6B Chatbot", description="A chatbot powered by LiquidAI/LFM2-2.6B. Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder).", theme="soft", examples=[ ["What is C. elegans?"], ["Write a short story about a robot who discovers music."], ["Explain the importance of the transformer architecture in NLP."], ], ) iface.launch()