brianling16 commited on
Commit
9720c2e
·
verified ·
1 Parent(s): f3d30f6

Upload RecursiveGPT2Model.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. RecursiveGPT2Model.py +27 -0
RecursiveGPT2Model.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import GPT2LMHeadModel, GPT2Config
2
+ from shared_attention import convert_to_recursive
3
+ import torch
4
+
5
+ class RecursiveGPT2Config(GPT2Config):
6
+ model_type = "recursive_gpt2"
7
+
8
+ def __init__(self, K=2, rank=8, **kwargs):
9
+ super().__init__(**kwargs)
10
+ self.K = K
11
+ self.rank = rank
12
+
13
+ class RecursiveGPT2LMHeadModel(GPT2LMHeadModel):
14
+ config_class = RecursiveGPT2Config
15
+
16
+ def __init__(self, config):
17
+ # Initialize as regular GPT2 first
18
+ super().__init__(config)
19
+
20
+ # Apply recursive modifications
21
+ convert_to_recursive(self, K=config.K, rank=config.rank)
22
+
23
+ @classmethod
24
+ def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
25
+ # This ensures the recursive modifications are applied when loading
26
+ model = super().from_pretrained(pretrained_model_name_or_path, *model_args, **kwargs)
27
+ return model