|
|
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline |
|
|
import torch |
|
|
from peft import PeftModel |
|
|
|
|
|
REPOSITORY_ID = "exillarml/Mistral7b_11Labs_DoctorDS_Testing" |
|
|
|
|
|
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
|
"unsloth/mistral-7b-bnb-4bit", |
|
|
load_in_4bit=True, |
|
|
trust_remote_code=True, |
|
|
device_map="auto" |
|
|
) |
|
|
|
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(REPOSITORY_ID) |
|
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
|
|
|
|
|
model = PeftModel.from_pretrained(model, REPOSITORY_ID) |
|
|
|
|
|
|
|
|
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
|
|
|
def handler(inputs): |
|
|
prompt = inputs.get("prompt", "") |
|
|
generated_text = pipe(prompt, max_length=100)[0]["generated_text"] |
|
|
return {"generated_text": generated_text} |