Spaces:

inclusionAI
/

ling-mini-2.0-local

Sleeping

雷娃 commited on Sep 10

Commit

7de1f3b

1 Parent(s): a68acd5

add local load models

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,7 +1,21 @@
 import gradio as gr
 from huggingface_hub import InferenceClient
 def respond(
     message,
     history: list[dict[str, str]],
@@ -22,22 +36,23 @@ def respond(
     messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
         messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        choices = message.choices
-        token = ""
-        if len(choices) and choices[0].delta.content:
-            token = choices[0].delta.content
-        response += token
-        yield response
 """

+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
+from threading import Thread
 import gradio as gr
+import re
+import torch
 from huggingface_hub import InferenceClient
+# load model and tokenizer
+model_name = "inclusionAI/Ling-mini-2.0"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForCausalLM.from_pretrained(
+    model_name,
+    torch_dtype="auto",
+    device_map="auto",
+    trust_remote_code=True
+).eval()
 def respond(
     message,
     history: list[dict[str, str]],
     messages.append({"role": "user", "content": message})
+    text = tokenizer.apply_chat_template(
         messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
+    generated_ids = model.generate(
+        **model_inputs,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    yield response
 """

requirements.txt ADDED Viewed

+gradio
+transformers
+torch
+accelerate
+openai