MrAlexGov commited on
Commit
92773d2
·
verified ·
1 Parent(s): 3964d91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -46
app.py CHANGED
@@ -1,42 +1,69 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
  from typing import List, Dict, Any, Tuple
 
4
 
5
- # ТОП-5 MODELS (chat-ready, быстрые, HF-tested)
6
- MODELS = [
7
- "Qwen/Qwen2.5-0.5B-Instruct", # ⚡ Супер-быстрая (0.5B)
8
- "Qwen/Qwen2.5-1.5B-Instruct", # Быстрая (1.5B)
9
- "microsoft/Phi-3-mini-4k-instruct", # Надёжная Phi-3
10
- "mistralai/Mistral-7B-Instruct-v0.3", # Классика
11
- "HuggingFaceH4/zephyr-7b-beta" # Zephyr (chat-tuned)
12
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  def respond(message: str,
15
  history: List[Dict[str, str]],
16
- model_id: str,
17
- system_prompt: str,
18
- hf_token: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
19
- """HF API с debug."""
20
  try:
21
- print(f"🚀 Запрос: Model={model_id}, Token={'Yes' if hf_token else 'No'}, Msg='{message[:50]}...'") # Лог HF
 
 
22
 
23
- client = InferenceClient(model=model_id, token=hf_token.strip() or None)
24
 
 
25
  messages = []
26
  if system_prompt.strip():
27
  messages.append({"role": "system", "content": system_prompt})
28
  messages.extend(history)
29
  messages.append({"role": "user", "content": message})
30
 
31
- response = client.chat_completion(
32
- messages=messages,
33
- max_tokens=512,
34
- temperature=0.7,
35
- stream=False
36
- )
37
 
38
- bot_reply = response.choices[0].message.content
39
- print(f"✅ Ответ: {bot_reply[:50]}...") # Лог успеха
 
 
 
40
 
41
  new_history = history + [
42
  {"role": "user", "content": message},
@@ -45,32 +72,21 @@ def respond(message: str,
45
  return new_history, "", gr.update(value="")
46
 
47
  except Exception as e:
48
- full_error = f"❌ Model: {model_id}\nError: {str(e)}\n"
49
- print(f"💥 ERROR: {full_error}") # Лог HF
50
-
51
- if "429" in str(e) or "rate" in str(e).lower():
52
- full_error += "🔥 RATE LIMIT! Вставь HF Token (huggingface.co/settings/tokens)."
53
- elif "token" in str(e).lower() or "Unauthorized" in str(e):
54
- full_error += "🔑 Неверный/нет Token. Создай новый."
55
- elif "No chat template" in str(e) or "tokenizer" in str(e):
56
- full_error += "🧠 Модель не chat-ready. Выбери другую (Qwen/Phi)."
57
- else:
58
- full_error += "🌐 HF API глючит. Попробуй Token или другую модель."
59
-
60
  new_history = history + [
61
  {"role": "user", "content": message},
62
- {"role": "assistant", "content": full_error}
63
  ]
64
- return new_history, full_error, gr.update(value="")
65
 
66
  # UI
67
- with gr.Blocks(title="🚀 HF Чат (debug!)", theme=gr.themes.Soft()) as demo:
68
- gr.Markdown("# Тест HF API\n**1. Выбери Qwen2.5-0.5B** ⚡\n**2. Вставь Token** → Нет лимитов\n[Token](https://huggingface.co/settings/tokens)")
69
 
70
  with gr.Row(variant="compact"):
71
- model_dropdown = gr.Dropdown(choices=MODELS, value=MODELS[0], label="🧠 Модель")
72
- system_prompt = gr.Textbox(label="📝 System", placeholder="Ты ИИ-ученик.", lines=2)
73
- hf_token = gr.Textbox(label="🔑 Token", placeholder="hf_...", type="password")
74
 
75
  chatbot = gr.Chatbot(type="messages", height=500)
76
 
@@ -82,11 +98,11 @@ with gr.Blocks(title="🚀 HF Чат (debug!)", theme=gr.themes.Soft()) as demo:
82
  clear_btn = gr.Button("🗑️ Clear")
83
  retry_btn = gr.Button("🔄 Retry")
84
 
85
- status = gr.Textbox(label="Логи/Статус", interactive=False, lines=3)
86
 
87
  # Events
88
- send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt, hf_token], outputs=[chatbot, status, msg_input])
89
- msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt, hf_token], outputs=[chatbot, status, msg_input])
90
 
91
  def clear():
92
  return [], "", gr.update(value="")
@@ -99,4 +115,4 @@ with gr.Blocks(title="🚀 HF Чат (debug!)", theme=gr.themes.Soft()) as demo:
99
  retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
100
 
101
  if __name__ == "__main__":
102
- demo.queue(max_size=20).launch(debug=True)
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer
3
  from typing import List, Dict, Any, Tuple
4
+ import torch
5
 
6
+ # CPU-модели (маленькие, chat-ready)
7
+ MODELS = {
8
+ "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
10
+ "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
11
+ "Gemma-2-2B": "google/gemma-2-2b-it"
12
+ }
13
+
14
+ def load_model(model_key: str):
15
+ """Lazy load pipeline."""
16
+ model_id = MODELS[model_key]
17
+ print(f"🚀 Загрузка {model_id}...")
18
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
19
+ if tokenizer.pad_token is None:
20
+ tokenizer.pad_token = tokenizer.eos_token
21
+
22
+ pipe = pipeline(
23
+ "text-generation",
24
+ model=model_id,
25
+ tokenizer=tokenizer,
26
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
27
+ device_map="auto" if torch.cuda.is_available() else None,
28
+ max_new_tokens=512,
29
+ do_sample=True,
30
+ temperature=0.7,
31
+ pad_token_id=tokenizer.eos_token_id
32
+ )
33
+ print(f"✅ {model_id} загружена!")
34
+ return pipe
35
+
36
+ # Global cache
37
+ model_cache = {}
38
 
39
  def respond(message: str,
40
  history: List[Dict[str, str]],
41
+ model_key: str,
42
+ system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
43
+ """Локальный чат с pipeline."""
 
44
  try:
45
+ if model_key not in model_cache:
46
+ model_cache[model_key] = load_model(model_key)
47
+ pipe = model_cache[model_key]
48
 
49
+ print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")
50
 
51
+ # Chat format (system + history + user)
52
  messages = []
53
  if system_prompt.strip():
54
  messages.append({"role": "system", "content": system_prompt})
55
  messages.extend(history)
56
  messages.append({"role": "user", "content": message})
57
 
58
+ # Apply chat template (для instruct)
59
+ tokenizer = pipe.tokenizer
60
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
61
 
62
+ # Generate
63
+ outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
64
+ bot_reply = outputs[0]["generated_text"][len(prompt):].strip()
65
+
66
+ print(f"✅ Ответ: {bot_reply[:50]}...")
67
 
68
  new_history = history + [
69
  {"role": "user", "content": message},
 
72
  return new_history, "", gr.update(value="")
73
 
74
  except Exception as e:
75
+ error_msg = f"❌ {model_key}: {str(e)}"
76
+ print(f"💥 {error_msg}")
 
 
 
 
 
 
 
 
 
 
77
  new_history = history + [
78
  {"role": "user", "content": message},
79
+ {"role": "assistant", "content": error_msg}
80
  ]
81
+ return new_history, error_msg, gr.update(value="")
82
 
83
  # UI
84
+ with gr.Blocks(title="🚀 Локальный HF Чат (CPU!)", theme=gr.themes.Soft()) as demo:
85
+ gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Нет limits/token.")
86
 
87
  with gr.Row(variant="compact"):
88
+ model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
89
+ system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
 
90
 
91
  chatbot = gr.Chatbot(type="messages", height=500)
92
 
 
98
  clear_btn = gr.Button("🗑️ Clear")
99
  retry_btn = gr.Button("🔄 Retry")
100
 
101
+ status = gr.Textbox(label="Логи", interactive=False, lines=4)
102
 
103
  # Events
104
+ send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
105
+ msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
106
 
107
  def clear():
108
  return [], "", gr.update(value="")
 
115
  retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
116
 
117
  if __name__ == "__main__":
118
+ demo.queue(max_size=10).launch(debug=True)