Spaces:

EmmyHenz001
/

claude-chat

Running

App Files Files Community

EmmyHenz001 commited on Oct 10

Commit

cfa8216

verified ·

1 Parent(s): 3e31527

Update app.py

Browse files

Files changed (1) hide show

app.py +65 -60

app.py CHANGED Viewed

@@ -1,73 +1,69 @@
 import gradio as gr
-from transformers import AutoTokenizer, AutoModelForCausalLM
-import torch
 # Title and description
 title = "🧠 Claude-3.7-Sonnet-Reasoning-Gemma3-12B Chat"
 description = """
-Chat with the Claude-3.7-Sonnet-Reasoning-Gemma3-12B model. This is a powerful reasoning model fine-tuned from Gemma2.
 """
-# Initialize model and tokenizer
-def load_model():
     try:
-        tokenizer = AutoTokenizer.from_pretrained("reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B")
-        model = AutoModelForCausalLM.from_pretrained(
-            "reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B",
-            torch_dtype=torch.float16,
-            device_map="auto",
-            low_cpu_mem_usage=True
-        )
-        return model, tokenizer
     except Exception as e:
-        print(f"Error loading model: {e}")
-        return None, None
-model, tokenizer = load_model()
 def chat_with_claude(message, chat_history):
-    """Chat function with the Claude model"""
-    if model is None or tokenizer is None:
-        return "❌ Model not loaded. Please check the logs for errors.", chat_history
-    try:
-        # Format the conversation history for the model
-        conversation = ""
-        for user_msg, bot_msg in chat_history:
-            conversation += f"Human: {user_msg}\nAssistant: {bot_msg}\n"
-        conversation += f"Human: {message}\nAssistant:"
-        # Tokenize input
-        inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=2048)
-        inputs = {k: v.to(model.device) for k, v in inputs.items()}
-        # Generate response
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=512,
-                temperature=0.7,
-                do_sample=True,
-                top_p=0.9,
-                pad_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.1
-            )
-        # Decode response
-        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract only the new assistant response
-        assistant_response = response.split("Assistant:")[-1].strip()
-        # Add to chat history
-        chat_history.append((message, assistant_response))
-        return "", chat_history
-    except Exception as e:
-        error_msg = f"❌ Error generating response: {str(e)}"
-        chat_history.append((message, error_msg))
-        return "", chat_history
 def clear_chat():
     """Clear the chat history"""
@@ -78,9 +74,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown(f"# {title}")
     gr.Markdown(description)
     chatbot = gr.Chatbot(
         label="Chat with Claude",
-        height=500
     )
     with gr.Row():
@@ -88,7 +92,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             label="Type your message here...",
             placeholder="Ask me anything...",
             lines=2,
-            scale=4
         )
         submit_btn = gr.Button("Send", variant="primary", scale=1)

 import gradio as gr
+import requests
+import os
 # Title and description
 title = "🧠 Claude-3.7-Sonnet-Reasoning-Gemma3-12B Chat"
 description = """
+Chat with the Claude-3.7-Sonnet-Reasoning-Gemma3-12B model using Hugging Face Inference API.
 """
+# Hugging Face API setup
+API_URL = "https://api-inference.huggingface.co/models/reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B"
+headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
+def query_hf_api(payload):
+    """Query Hugging Face Inference API"""
     try:
+        response = requests.post(API_URL, headers=headers, json=payload)
+        return response.json()
     except Exception as e:
+        return {"error": str(e)}
 def chat_with_claude(message, chat_history):
+    """Chat function using HF Inference API"""
+    # Build conversation context
+    conversation = ""
+    for msg in chat_history:
+        if msg["role"] == "user":
+            conversation += f"Human: {msg['content']}\n"
+        else:
+            conversation += f"Assistant: {msg['content']}\n"
+    conversation += f"Human: {message}\nAssistant:"
+    # Call the API
+    payload = {
+        "inputs": conversation,
+        "parameters": {
+            "max_new_tokens": 500,
+            "temperature": 0.7,
+            "top_p": 0.9,
+            "do_sample": True
+        }
+    }
+    response = query_hf_api(payload)
+    if "error" in response:
+        assistant_response = f"❌ API Error: {response['error']}"
+    elif isinstance(response, list) and len(response) > 0:
+        # Extract the generated text
+        if "generated_text" in response[0]:
+            full_text = response[0]["generated_text"]
+            # Extract only the assistant's response
+            assistant_response = full_text.split("Assistant:")[-1].strip()
+        else:
+            assistant_response = "❌ Unexpected response format from API"
+    else:
+        assistant_response = "❌ No response from API"
+    # Update chat history
+    chat_history.append({"role": "user", "content": message})
+    chat_history.append({"role": "assistant", "content": assistant_response})
+    return "", chat_history
 def clear_chat():
     """Clear the chat history"""
     gr.Markdown(f"# {title}")
     gr.Markdown(description)
+    # Add HF Token info
+    gr.Markdown("""
+    **Note:** To use this chat, you need to add your Hugging Face token in the Space settings:
+    1. Go to Settings → Repository secrets
+    2. Add `HF_TOKEN` with your Hugging Face token
+    """)
     chatbot = gr.Chatbot(
         label="Chat with Claude",
+        height=500,
+        type="messages"  # Fixed the deprecation warning
     )
     with gr.Row():
             label="Type your message here...",
             placeholder="Ask me anything...",
             lines=2,
+            scale=4,
+            container=False
         )
         submit_btn = gr.Button("Send", variant="primary", scale=1)