Spaces:

MrAlexGov
/

chat-bots-test

Running

App Files Files Community

MrAlexGov commited on 18 days ago

Commit

92773d2

verified ·

1 Parent(s): 3964d91

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -46

app.py CHANGED Viewed

@@ -1,42 +1,69 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 from typing import List, Dict, Any, Tuple
-# ТОП-5 MODELS (chat-ready, быстрые, HF-tested)
-MODELS = [
-    "Qwen/Qwen2.5-0.5B-Instruct",     # ⚡ Супер-быстрая (0.5B)
-    "Qwen/Qwen2.5-1.5B-Instruct",     # Быстрая (1.5B)
-    "microsoft/Phi-3-mini-4k-instruct", # Надёжная Phi-3
-    "mistralai/Mistral-7B-Instruct-v0.3", # Классика
-    "HuggingFaceH4/zephyr-7b-beta"    # Zephyr (chat-tuned)
-]
 def respond(message: str,
             history: List[Dict[str, str]],
-            model_id: str,
-            system_prompt: str,
-            hf_token: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
-    """HF API с debug."""
     try:
-        print(f"🚀 Запрос: Model={model_id}, Token={'Yes' if hf_token else 'No'}, Msg='{message[:50]}...'")  # Лог HF
-        client = InferenceClient(model=model_id, token=hf_token.strip() or None)
         messages = []
         if system_prompt.strip():
             messages.append({"role": "system", "content": system_prompt})
         messages.extend(history)
         messages.append({"role": "user", "content": message})
-        response = client.chat_completion(
-            messages=messages,
-            max_tokens=512,
-            temperature=0.7,
-            stream=False
-        )
-        bot_reply = response.choices[0].message.content
-        print(f"✅ Ответ: {bot_reply[:50]}...")  # Лог успеха
         new_history = history + [
             {"role": "user", "content": message},
@@ -45,32 +72,21 @@ def respond(message: str,
         return new_history, "", gr.update(value="")
     except Exception as e:
-        full_error = f"❌ Model: {model_id}\nError: {str(e)}\n"
-        print(f"💥 ERROR: {full_error}")  # Лог HF
-        if "429" in str(e) or "rate" in str(e).lower():
-            full_error += "🔥 RATE LIMIT! Вставь HF Token (huggingface.co/settings/tokens)."
-        elif "token" in str(e).lower() or "Unauthorized" in str(e):
-            full_error += "🔑 Неверный/нет Token. Создай новый."
-        elif "No chat template" in str(e) or "tokenizer" in str(e):
-            full_error += "🧠 Модель не chat-ready. Выбери другую (Qwen/Phi)."
-        else:
-            full_error += "🌐 HF API глючит. Попробуй Token или другую модель."
         new_history = history + [
             {"role": "user", "content": message},
-            {"role": "assistant", "content": full_error}
         ]
-        return new_history, full_error, gr.update(value="")
 # UI
-with gr.Blocks(title="🚀 HF Чат (debug!)", theme=gr.themes.Soft()) as demo:
-    gr.Markdown("# Тест HF API\n**1. Выбери Qwen2.5-0.5B** ⚡\n**2. Вставь Token** → Нет лимитов\n[Token](https://huggingface.co/settings/tokens)")
     with gr.Row(variant="compact"):
-        model_dropdown = gr.Dropdown(choices=MODELS, value=MODELS[0], label="🧠 Модель")
-        system_prompt = gr.Textbox(label="📝 System", placeholder="Ты ИИ-ученик.", lines=2)
-        hf_token = gr.Textbox(label="🔑 Token", placeholder="hf_...", type="password")
     chatbot = gr.Chatbot(type="messages", height=500)
@@ -82,11 +98,11 @@ with gr.Blocks(title="🚀 HF Чат (debug!)", theme=gr.themes.Soft()) as demo:
         clear_btn = gr.Button("🗑️ Clear")
         retry_btn = gr.Button("🔄 Retry")
-    status = gr.Textbox(label="Логи/Статус", interactive=False, lines=3)
     # Events
-    send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt, hf_token], outputs=[chatbot, status, msg_input])
-    msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt, hf_token], outputs=[chatbot, status, msg_input])
     def clear():
         return [], "", gr.update(value="")
@@ -99,4 +115,4 @@ with gr.Blocks(title="🚀 HF Чат (debug!)", theme=gr.themes.Soft()) as demo:
     retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch(debug=True)

 import gradio as gr
+from transformers import pipeline, AutoTokenizer
 from typing import List, Dict, Any, Tuple
+import torch
+# CPU-модели (маленькие, chat-ready)
+MODELS = {
+    "Qwen2.5-0.5B": "Qwen/Qwen2.5-0.5B-Instruct",
+    "Qwen2.5-1.5B": "Qwen/Qwen2.5-1.5B-Instruct",
+    "Phi-3-mini": "microsoft/Phi-3-mini-4k-instruct",
+    "Gemma-2-2B": "google/gemma-2-2b-it"
+}
+def load_model(model_key: str):
+    """Lazy load pipeline."""
+    model_id = MODELS[model_key]
+    print(f"🚀 Загрузка {model_id}...")
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    if tokenizer.pad_token is None:
+        tokenizer.pad_token = tokenizer.eos_token
+    pipe = pipeline(
+        "text-generation",
+        model=model_id,
+        tokenizer=tokenizer,
+        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+        device_map="auto" if torch.cuda.is_available() else None,
+        max_new_tokens=512,
+        do_sample=True,
+        temperature=0.7,
+        pad_token_id=tokenizer.eos_token_id
+    )
+    print(f"✅ {model_id} загружена!")
+    return pipe
+# Global cache
+model_cache = {}
 def respond(message: str,
             history: List[Dict[str, str]],
+            model_key: str,
+            system_prompt: str) -> Tuple[List[Dict[str, str]], str, Dict[str, Any]]:
+    """Локальный чат с pipeline."""
     try:
+        if model_key not in model_cache:
+            model_cache[model_key] = load_model(model_key)
+        pipe = model_cache[model_key]
+        print(f"🚀 Генерация: {model_key}, Msg='{message[:30]}...'")
+        # Chat format (system + history + user)
         messages = []
         if system_prompt.strip():
             messages.append({"role": "system", "content": system_prompt})
         messages.extend(history)
         messages.append({"role": "user", "content": message})
+        # Apply chat template (для instruct)
+        tokenizer = pipe.tokenizer
+        prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        # Generate
+        outputs = pipe(prompt, max_new_tokens=512, do_sample=True, temperature=0.7)
+        bot_reply = outputs[0]["generated_text"][len(prompt):].strip()
+        print(f"✅ Ответ: {bot_reply[:50]}...")
         new_history = history + [
             {"role": "user", "content": message},
         return new_history, "", gr.update(value="")
     except Exception as e:
+        error_msg = f"❌ {model_key}: {str(e)}"
+        print(f"💥 {error_msg}")
         new_history = history + [
             {"role": "user", "content": message},
+            {"role": "assistant", "content": error_msg}
         ]
+        return new_history, error_msg, gr.update(value="")
 # UI
+with gr.Blocks(title="🚀 Локальный HF Чат (CPU!)", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# Локальный Inference (без API!)\n**Маленькие модели** — 1-3 сек CPU. Нет limits/token.")
     with gr.Row(variant="compact"):
+        model_dropdown = gr.Dropdown(choices=list(MODELS.keys()), value="Qwen2.5-0.5B", label="🧠 Модель")
+        system_prompt = gr.Textbox(label="📝 System", placeholder="Ты весёлый ИИ.", lines=2)
     chatbot = gr.Chatbot(type="messages", height=500)
         clear_btn = gr.Button("🗑️ Clear")
         retry_btn = gr.Button("🔄 Retry")
+    status = gr.Textbox(label="Логи", interactive=False, lines=4)
     # Events
+    send_btn.click(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
+    msg_input.submit(fn=respond, inputs=[msg_input, chatbot, model_dropdown, system_prompt], outputs=[chatbot, status, msg_input])
     def clear():
         return [], "", gr.update(value="")
     retry_btn.click(retry, inputs=[chatbot], outputs=[msg_input])
 if __name__ == "__main__":
+    demo.queue(max_size=10).launch(debug=True)