exillarml
/

Mistral7b_11Labs_DoctorDS_Testing

text-generation-inference

Model card Files Files and versions

Mistral7b_11Labs_DoctorDS_Testing / handler.py

exillar25's picture

Update handler.py

e09cabc verified 8 months ago

history blame contribute delete

930 Bytes

	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
	import torch
	from peft import PeftModel

	REPOSITORY_ID = "exillarml/Mistral7b_11Labs_DoctorDS_Testing"

	# Load the base model with quantization
	model = AutoModelForCausalLM.from_pretrained(
	"unsloth/mistral-7b-bnb-4bit",
	load_in_4bit=True,
	trust_remote_code=True,
	device_map="auto"
	)

	# Load the tokenizer and set the pad token
	tokenizer = AutoTokenizer.from_pretrained(REPOSITORY_ID) # Your repo ID.
	tokenizer.pad_token = tokenizer.eos_token

	# Load the LoRA adapter
	model = PeftModel.from_pretrained(model, REPOSITORY_ID) # Your repo ID

	# Create the text generation pipeline
	pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

	def handler(inputs):
	prompt = inputs.get("prompt", "")
	generated_text = pipe(prompt, max_length=100)[0]["generated_text"]
	return {"generated_text": generated_text}