exillar25's picture
Update handler.py
e09cabc verified
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
import torch
from peft import PeftModel
REPOSITORY_ID = "exillarml/Mistral7b_11Labs_DoctorDS_Testing"
# Load the base model with quantization
model = AutoModelForCausalLM.from_pretrained(
"unsloth/mistral-7b-bnb-4bit",
load_in_4bit=True,
trust_remote_code=True,
device_map="auto"
)
# Load the tokenizer and set the pad token
tokenizer = AutoTokenizer.from_pretrained(REPOSITORY_ID) # Your repo ID.
tokenizer.pad_token = tokenizer.eos_token
# Load the LoRA adapter
model = PeftModel.from_pretrained(model, REPOSITORY_ID) # Your repo ID
# Create the text generation pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
def handler(inputs):
prompt = inputs.get("prompt", "")
generated_text = pipe(prompt, max_length=100)[0]["generated_text"]
return {"generated_text": generated_text}