Lowenzahn commited on
Commit
660a47b
·
verified ·
1 Parent(s): b7eba75

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +1 -11
README.md CHANGED
@@ -35,17 +35,7 @@ base_model = AutoModelForCausalLM.from_pretrained(
35
 
36
  # Load tokenizer
37
  tokenizer = AutoTokenizer.from_pretrained('microsoft/Orca-2-7b')
38
- tokenizer.add_special_tokens(dict(
39
- eos_token=AddedToken("<|im_end|>", single_word=False, lstrip=False, rstrip=False, normalized=True, special=True),
40
- unk_token=AddedToken("<unk>", single_word=False, lstrip=False, rstrip=False, normalized=True, special=True),
41
- bos_token=AddedToken("<s>", single_word=False, lstrip=False, rstrip=False, normalized=True, special=True),
42
- pad_token=AddedToken("</s>", single_word=False, lstrip=False, rstrip=False, normalized=False, special=True),
43
- ))
44
- tokenizer.add_tokens([AddedToken("<|im_start|>", single_word=False, lstrip=True, rstrip=True, normalized=False)], special_tokens=True)
45
- tokenizer.additional_special_tokens = ['<unk>', '<s>', '</s>', '<|im_end|>', '<|im_start|>']
46
-
47
- model.resize_token_embeddings(len(tokenizer))
48
- model.config.eos_token_id = tokenizer.eos_token_id
49
 
50
  # Load PEFT
51
  model = PeftModel.from_pretrained(base_model, 'Lowenzahn/PathoIE-Orca-2-7B')
 
35
 
36
  # Load tokenizer
37
  tokenizer = AutoTokenizer.from_pretrained('microsoft/Orca-2-7b')
38
+
 
 
 
 
 
 
 
 
 
 
39
 
40
  # Load PEFT
41
  model = PeftModel.from_pretrained(base_model, 'Lowenzahn/PathoIE-Orca-2-7B')