EmmyHenz001 commited on
Commit
cfa8216
Β·
verified Β·
1 Parent(s): 3e31527

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -60
app.py CHANGED
@@ -1,73 +1,69 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForCausalLM
3
- import torch
4
 
5
  # Title and description
6
  title = "🧠 Claude-3.7-Sonnet-Reasoning-Gemma3-12B Chat"
7
  description = """
8
- Chat with the Claude-3.7-Sonnet-Reasoning-Gemma3-12B model. This is a powerful reasoning model fine-tuned from Gemma2.
9
  """
10
 
11
- # Initialize model and tokenizer
12
- def load_model():
 
 
 
 
13
  try:
14
- tokenizer = AutoTokenizer.from_pretrained("reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B")
15
- model = AutoModelForCausalLM.from_pretrained(
16
- "reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B",
17
- torch_dtype=torch.float16,
18
- device_map="auto",
19
- low_cpu_mem_usage=True
20
- )
21
- return model, tokenizer
22
  except Exception as e:
23
- print(f"Error loading model: {e}")
24
- return None, None
25
-
26
- model, tokenizer = load_model()
27
 
28
  def chat_with_claude(message, chat_history):
29
- """Chat function with the Claude model"""
30
- if model is None or tokenizer is None:
31
- return "❌ Model not loaded. Please check the logs for errors.", chat_history
32
 
33
- try:
34
- # Format the conversation history for the model
35
- conversation = ""
36
- for user_msg, bot_msg in chat_history:
37
- conversation += f"Human: {user_msg}\nAssistant: {bot_msg}\n"
38
- conversation += f"Human: {message}\nAssistant:"
39
-
40
- # Tokenize input
41
- inputs = tokenizer(conversation, return_tensors="pt", truncation=True, max_length=2048)
42
- inputs = {k: v.to(model.device) for k, v in inputs.items()}
43
-
44
- # Generate response
45
- with torch.no_grad():
46
- outputs = model.generate(
47
- **inputs,
48
- max_new_tokens=512,
49
- temperature=0.7,
50
- do_sample=True,
51
- top_p=0.9,
52
- pad_token_id=tokenizer.eos_token_id,
53
- repetition_penalty=1.1
54
- )
55
-
56
- # Decode response
57
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
58
-
59
- # Extract only the new assistant response
60
- assistant_response = response.split("Assistant:")[-1].strip()
61
-
62
- # Add to chat history
63
- chat_history.append((message, assistant_response))
64
-
65
- return "", chat_history
66
-
67
- except Exception as e:
68
- error_msg = f"❌ Error generating response: {str(e)}"
69
- chat_history.append((message, error_msg))
70
- return "", chat_history
 
 
 
71
 
72
  def clear_chat():
73
  """Clear the chat history"""
@@ -78,9 +74,17 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
78
  gr.Markdown(f"# {title}")
79
  gr.Markdown(description)
80
 
 
 
 
 
 
 
 
81
  chatbot = gr.Chatbot(
82
  label="Chat with Claude",
83
- height=500
 
84
  )
85
 
86
  with gr.Row():
@@ -88,7 +92,8 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
88
  label="Type your message here...",
89
  placeholder="Ask me anything...",
90
  lines=2,
91
- scale=4
 
92
  )
93
  submit_btn = gr.Button("Send", variant="primary", scale=1)
94
 
 
1
  import gradio as gr
2
+ import requests
3
+ import os
4
 
5
  # Title and description
6
  title = "🧠 Claude-3.7-Sonnet-Reasoning-Gemma3-12B Chat"
7
  description = """
8
+ Chat with the Claude-3.7-Sonnet-Reasoning-Gemma3-12B model using Hugging Face Inference API.
9
  """
10
 
11
+ # Hugging Face API setup
12
+ API_URL = "https://api-inference.huggingface.co/models/reedmayhew/claude-3.7-sonnet-reasoning-gemma3-12B"
13
+ headers = {"Authorization": f"Bearer {os.environ.get('HF_TOKEN', '')}"}
14
+
15
+ def query_hf_api(payload):
16
+ """Query Hugging Face Inference API"""
17
  try:
18
+ response = requests.post(API_URL, headers=headers, json=payload)
19
+ return response.json()
 
 
 
 
 
 
20
  except Exception as e:
21
+ return {"error": str(e)}
 
 
 
22
 
23
  def chat_with_claude(message, chat_history):
24
+ """Chat function using HF Inference API"""
 
 
25
 
26
+ # Build conversation context
27
+ conversation = ""
28
+ for msg in chat_history:
29
+ if msg["role"] == "user":
30
+ conversation += f"Human: {msg['content']}\n"
31
+ else:
32
+ conversation += f"Assistant: {msg['content']}\n"
33
+
34
+ conversation += f"Human: {message}\nAssistant:"
35
+
36
+ # Call the API
37
+ payload = {
38
+ "inputs": conversation,
39
+ "parameters": {
40
+ "max_new_tokens": 500,
41
+ "temperature": 0.7,
42
+ "top_p": 0.9,
43
+ "do_sample": True
44
+ }
45
+ }
46
+
47
+ response = query_hf_api(payload)
48
+
49
+ if "error" in response:
50
+ assistant_response = f"❌ API Error: {response['error']}"
51
+ elif isinstance(response, list) and len(response) > 0:
52
+ # Extract the generated text
53
+ if "generated_text" in response[0]:
54
+ full_text = response[0]["generated_text"]
55
+ # Extract only the assistant's response
56
+ assistant_response = full_text.split("Assistant:")[-1].strip()
57
+ else:
58
+ assistant_response = "❌ Unexpected response format from API"
59
+ else:
60
+ assistant_response = "❌ No response from API"
61
+
62
+ # Update chat history
63
+ chat_history.append({"role": "user", "content": message})
64
+ chat_history.append({"role": "assistant", "content": assistant_response})
65
+
66
+ return "", chat_history
67
 
68
  def clear_chat():
69
  """Clear the chat history"""
 
74
  gr.Markdown(f"# {title}")
75
  gr.Markdown(description)
76
 
77
+ # Add HF Token info
78
+ gr.Markdown("""
79
+ **Note:** To use this chat, you need to add your Hugging Face token in the Space settings:
80
+ 1. Go to Settings β†’ Repository secrets
81
+ 2. Add `HF_TOKEN` with your Hugging Face token
82
+ """)
83
+
84
  chatbot = gr.Chatbot(
85
  label="Chat with Claude",
86
+ height=500,
87
+ type="messages" # Fixed the deprecation warning
88
  )
89
 
90
  with gr.Row():
 
92
  label="Type your message here...",
93
  placeholder="Ask me anything...",
94
  lines=2,
95
+ scale=4,
96
+ container=False
97
  )
98
  submit_btn = gr.Button("Send", variant="primary", scale=1)
99