import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
import spaces

# Load model and tokenizer
model_id = "LiquidAI/LFM2-2.6B"
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",  # Ensure proper device mapping for zero-gpu
)
tokenizer = AutoTokenizer.from_pretrained(model_id)

@spaces.GPU(duration=120)
def chat_with_model(message, history):
    # Format conversation history
    conversation = []
    for user_msg, assistant_msg in history:
        conversation.append({"role": "user", "content": user_msg})
        conversation.append({"role": "assistant", "content": assistant_msg})
    conversation.append({"role": "user", "content": message})
    
    # Apply chat template
    input_ids = tokenizer.apply_chat_template(
        conversation,
        add_generation_prompt=True,
        return_tensors="pt",
        tokenize=True,
    ).to(model.device)
    
    # Generate response
    output = model.generate(
        input_ids,
        do_sample=True,
        temperature=0.3,
        min_p=0.15,
        repetition_penalty=1.05,
        max_new_tokens=512,
        pad_token_id=tokenizer.eos_token_id,
    )
    
    # Decode only the newly generated tokens, skipping the prompt
    response = tokenizer.decode(output[0][input_ids.shape[-1]:], skip_special_tokens=True)
    
    return response

# Create Gradio interface
iface = gr.ChatInterface(
    fn=chat_with_model,
    title="LFM2-2.6B Chatbot",
    description="A chatbot powered by LiquidAI/LFM2-2.6B. Built with [anycoder](https://huggingface.co/spaces/akhaliq/anycoder).",
    theme="soft",
    examples=[
        ["What is C. elegans?"],
        ["Write a short story about a robot who discovers music."],
        ["Explain the importance of the transformer architecture in NLP."],
    ],
)

iface.launch()