Upload RecursiveGPT2Model.py with huggingface_hub
Browse files- RecursiveGPT2Model.py +27 -0
RecursiveGPT2Model.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import GPT2LMHeadModel, GPT2Config
|
| 2 |
+
from shared_attention import convert_to_recursive
|
| 3 |
+
import torch
|
| 4 |
+
|
| 5 |
+
class RecursiveGPT2Config(GPT2Config):
|
| 6 |
+
model_type = "recursive_gpt2"
|
| 7 |
+
|
| 8 |
+
def __init__(self, K=2, rank=8, **kwargs):
|
| 9 |
+
super().__init__(**kwargs)
|
| 10 |
+
self.K = K
|
| 11 |
+
self.rank = rank
|
| 12 |
+
|
| 13 |
+
class RecursiveGPT2LMHeadModel(GPT2LMHeadModel):
|
| 14 |
+
config_class = RecursiveGPT2Config
|
| 15 |
+
|
| 16 |
+
def __init__(self, config):
|
| 17 |
+
# Initialize as regular GPT2 first
|
| 18 |
+
super().__init__(config)
|
| 19 |
+
|
| 20 |
+
# Apply recursive modifications
|
| 21 |
+
convert_to_recursive(self, K=config.K, rank=config.rank)
|
| 22 |
+
|
| 23 |
+
@classmethod
|
| 24 |
+
def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
|
| 25 |
+
# This ensures the recursive modifications are applied when loading
|
| 26 |
+
model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
|
| 27 |
+
return model
|