Upload 81 files
#4
by
twhitworth
- opened
- fp16.py → p16.py +3 -5
- v0.1.3 +0 -0
fp16.py → p16.py
RENAMED
|
@@ -9,7 +9,6 @@ from tqdm import tqdm
|
|
| 9 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 10 |
from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
|
| 11 |
|
| 12 |
-
# Make sure to set your model output directory and make sure it has 755 permissions.
|
| 13 |
MODEL_ID = "openai/gpt-oss-120b"
|
| 14 |
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./fp16/gpt-oss-120b-fp16")
|
| 15 |
|
|
@@ -20,12 +19,11 @@ os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
| 20 |
tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
|
| 21 |
|
| 22 |
# 3. load model in fp16
|
| 23 |
-
|
| 24 |
-
max_memory = {0: "17GiB", 1: "17GiB", 2: "17GiB", 3: "17GiB", 4: "17GiB", 5: "17GiB", 6: "17GiB", 7: "17GiB", "cpu": "196GiB"}
|
| 25 |
model = AutoModelForCausalLM.from_pretrained(
|
| 26 |
MODEL_ID,
|
| 27 |
torch_dtype=torch.float16,
|
| 28 |
-
device_map="
|
| 29 |
low_cpu_mem_usage=True,
|
| 30 |
max_memory=max_memory,
|
| 31 |
offload_folder="./offload_cache",
|
|
@@ -100,7 +98,7 @@ for _m in model.modules():
|
|
| 100 |
from transformers.models.gpt_bigcode import modeling_gpt_bigcode
|
| 101 |
modeling_gpt_bigcode.GPTBigCodeModel._check_hidden_states_dtype = lambda *_, **__: None
|
| 102 |
|
| 103 |
-
# 5.
|
| 104 |
if __name__ == "__main__":
|
| 105 |
prompt = "Explain quantum supremacy in one paragraph."
|
| 106 |
inputs = tok(prompt, return_tensors="pt").to(model.device)
|
|
|
|
| 9 |
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 10 |
from transformers.generation.stopping_criteria import StoppingCriteria, StoppingCriteriaList
|
| 11 |
|
|
|
|
| 12 |
MODEL_ID = "openai/gpt-oss-120b"
|
| 13 |
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./fp16/gpt-oss-120b-fp16")
|
| 14 |
|
|
|
|
| 19 |
tok = AutoTokenizer.from_pretrained(MODEL_ID, use_fast=True)
|
| 20 |
|
| 21 |
# 3. load model in fp16
|
| 22 |
+
max_memory = {0: "17GiB", 1: "17GiB", 2: "17GiB", 3: "17GiB", 4: "17GiB", 5: "17GiB", "cpu": "196GiB"}
|
|
|
|
| 23 |
model = AutoModelForCausalLM.from_pretrained(
|
| 24 |
MODEL_ID,
|
| 25 |
torch_dtype=torch.float16,
|
| 26 |
+
device_map="auto",
|
| 27 |
low_cpu_mem_usage=True,
|
| 28 |
max_memory=max_memory,
|
| 29 |
offload_folder="./offload_cache",
|
|
|
|
| 98 |
from transformers.models.gpt_bigcode import modeling_gpt_bigcode
|
| 99 |
modeling_gpt_bigcode.GPTBigCodeModel._check_hidden_states_dtype = lambda *_, **__: None
|
| 100 |
|
| 101 |
+
# 5. quick demo
|
| 102 |
if __name__ == "__main__":
|
| 103 |
prompt = "Explain quantum supremacy in one paragraph."
|
| 104 |
inputs = tok(prompt, return_tensors="pt").to(model.device)
|
v0.1.3
DELETED
|
File without changes
|