Update README.md
Browse files
README.md
CHANGED
|
@@ -170,8 +170,8 @@ Install the latest transformers (>4.40)
|
|
| 170 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 171 |
device = "cuda" # the device to load the model onto
|
| 172 |
# use bfloat16 to ensure the best performance.
|
| 173 |
-
model = AutoModelForCausalLM.from_pretrained("
|
| 174 |
-
tokenizer = AutoTokenizer.from_pretrained("
|
| 175 |
messages = [
|
| 176 |
{"role": "system", "content": "You are a helpful assistant."},
|
| 177 |
{"role": "user", "content": "Hello world"},
|
|
@@ -207,7 +207,10 @@ def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_p
|
|
| 207 |
sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
|
| 208 |
llm = LLM("SorawitChok/SeaLLM-7B-v2.5-AWQ", quantization="AWQ")
|
| 209 |
|
| 210 |
-
message =
|
|
|
|
|
|
|
|
|
|
| 211 |
prompt = seallm_chat_convo_format(message, True)
|
| 212 |
gen = llm.generate(prompt, sampling_params)
|
| 213 |
|
|
|
|
| 170 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 171 |
device = "cuda" # the device to load the model onto
|
| 172 |
# use bfloat16 to ensure the best performance.
|
| 173 |
+
model = AutoModelForCausalLM.from_pretrained("SorawitChok/SeaLLM-7B-v2.5-AWQ", torch_dtype=torch.bfloat16, device_map=device)
|
| 174 |
+
tokenizer = AutoTokenizer.from_pretrained("SorawitChok/SeaLLM-7B-v2.5-AWQ")
|
| 175 |
messages = [
|
| 176 |
{"role": "system", "content": "You are a helpful assistant."},
|
| 177 |
{"role": "user", "content": "Hello world"},
|
|
|
|
| 207 |
sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
|
| 208 |
llm = LLM("SorawitChok/SeaLLM-7B-v2.5-AWQ", quantization="AWQ")
|
| 209 |
|
| 210 |
+
message = [
|
| 211 |
+
{"role": "user", "content": "Explain general relativity in details."}
|
| 212 |
+
]
|
| 213 |
+
|
| 214 |
prompt = seallm_chat_convo_format(message, True)
|
| 215 |
gen = llm.generate(prompt, sampling_params)
|
| 216 |
|