SorawitChok
/

SeaLLM-7B-v2.5-AWQ

Text Generation

text-generation-inference

4-bit precision

Model card Files Files and versions

SorawitChok commited on May 24, 2024

Commit

59e6bb3

·

verified ·

1 Parent(s): 08ac907

Update README.md

Files changed (1) hide show

README.md +6 -3

README.md CHANGED Viewed

@@ -170,8 +170,8 @@ Install the latest transformers (>4.40)
 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
 # use bfloat16 to ensure the best performance.
-model = AutoModelForCausalLM.from_pretrained("SeaLLMs/SeaLLM-7B-v2.5", torch_dtype=torch.bfloat16, device_map=device)
-tokenizer = AutoTokenizer.from_pretrained("SeaLLMs/SeaLLM-7B-v2.5")
 messages = [
     {"role": "system", "content": "You are a helpful assistant."},
     {"role": "user", "content": "Hello world"},
@@ -207,7 +207,10 @@ def seallm_chat_convo_format(conversations, add_assistant_prefix: bool, system_p
 sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
 llm = LLM("SorawitChok/SeaLLM-7B-v2.5-AWQ", quantization="AWQ")
-message = "Explain general relativity in details."
 prompt = seallm_chat_convo_format(message, True)
 gen = llm.generate(prompt, sampling_params)

 from transformers import AutoModelForCausalLM, AutoTokenizer
 device = "cuda" # the device to load the model onto
 # use bfloat16 to ensure the best performance.
+model = AutoModelForCausalLM.from_pretrained("SorawitChok/SeaLLM-7B-v2.5-AWQ", torch_dtype=torch.bfloat16, device_map=device)
+tokenizer = AutoTokenizer.from_pretrained("SorawitChok/SeaLLM-7B-v2.5-AWQ")
 messages = [
     {"role": "system", "content": "You are a helpful assistant."},
     {"role": "user", "content": "Hello world"},
 sparams = SamplingParams(temperature=0.1, max_tokens=1024, stop=['<eos>', '<|im_start|>'])
 llm = LLM("SorawitChok/SeaLLM-7B-v2.5-AWQ", quantization="AWQ")
+message = [
+            {"role": "user", "content": "Explain general relativity in details."}
+          ]
 prompt = seallm_chat_convo_format(message, True)
 gen = llm.generate(prompt, sampling_params)