Update README.md
Browse files
README.md
CHANGED
|
@@ -63,7 +63,7 @@ with three layers used for context extension. **Crucially, LongLLaMA is able to
|
|
| 63 |
|
| 64 |
<div align="center">
|
| 65 |
|
| 66 |
-
| | [LongLLaMA-3B](https://huggingface.co/syzymon/
|
| 67 |
|----------------|----------|----------|-----------|-----------|
|
| 68 |
| Source model | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm) | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | - | - |
|
| 69 |
| Source model tokens | 1T | 1 T | - | - |
|
|
@@ -93,8 +93,8 @@ pip install transformers==4.30 sentencepiece accelerate
|
|
| 93 |
import torch
|
| 94 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
| 95 |
|
| 96 |
-
tokenizer = LlamaTokenizer.from_pretrained("syzymon/
|
| 97 |
-
model = AutoModelForCausalLM.from_pretrained("syzymon/
|
| 98 |
torch_dtype=torch.float32,
|
| 99 |
trust_remote_code=True)
|
| 100 |
```
|
|
@@ -132,9 +132,9 @@ LongLLaMA has several other parameters:
|
|
| 132 |
import torch
|
| 133 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
| 134 |
|
| 135 |
-
tokenizer = LlamaTokenizer.from_pretrained("syzymon/
|
| 136 |
model = AutoModelForCausalLM.from_pretrained(
|
| 137 |
-
"syzymon/
|
| 138 |
mem_layers=[],
|
| 139 |
mem_dtype='bfloat16',
|
| 140 |
trust_remote_code=True,
|
|
@@ -150,8 +150,8 @@ model = AutoModelForCausalLM.from_pretrained(
|
|
| 150 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 151 |
import torch
|
| 152 |
|
| 153 |
-
tokenizer = LlamaTokenizer.from_pretrained("syzymon/
|
| 154 |
-
model = LlamaForCausalLM.from_pretrained("syzymon/
|
| 155 |
```
|
| 156 |
|
| 157 |
|
|
|
|
| 63 |
|
| 64 |
<div align="center">
|
| 65 |
|
| 66 |
+
| | [LongLLaMA-3B](https://huggingface.co/syzymon/long_llama_3b_instruct) | [LongLLaMA-3Bv1.1](https://huggingface.co/syzymon/long_llama_3b_v1_1) | LongLLaMA-7B<br />*(coming soon)*| LongLLaMA-13B<br />*(coming soon)*|
|
| 67 |
|----------------|----------|----------|-----------|-----------|
|
| 68 |
| Source model | [OpenLLaMA-3B](https://huggingface.co/openlm-research/open_llama_3b_easylm) | [OpenLLaMA-3Bv2](https://huggingface.co/openlm-research/open_llama_3b_v2_easylm) | - | - |
|
| 69 |
| Source model tokens | 1T | 1 T | - | - |
|
|
|
|
| 93 |
import torch
|
| 94 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
| 95 |
|
| 96 |
+
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
|
| 97 |
+
model = AutoModelForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct",
|
| 98 |
torch_dtype=torch.float32,
|
| 99 |
trust_remote_code=True)
|
| 100 |
```
|
|
|
|
| 132 |
import torch
|
| 133 |
from transformers import LlamaTokenizer, AutoModelForCausalLM
|
| 134 |
|
| 135 |
+
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
|
| 136 |
model = AutoModelForCausalLM.from_pretrained(
|
| 137 |
+
"syzymon/long_llama_3b_instruct", torch_dtype=torch.float32,
|
| 138 |
mem_layers=[],
|
| 139 |
mem_dtype='bfloat16',
|
| 140 |
trust_remote_code=True,
|
|
|
|
| 150 |
from transformers import LlamaTokenizer, LlamaForCausalLM
|
| 151 |
import torch
|
| 152 |
|
| 153 |
+
tokenizer = LlamaTokenizer.from_pretrained("syzymon/long_llama_3b_instruct")
|
| 154 |
+
model = LlamaForCausalLM.from_pretrained("syzymon/long_llama_3b_instruct", torch_dtype=torch.float32)
|
| 155 |
```
|
| 156 |
|
| 157 |
|