burtenshaw HF Staff commited on
Commit
c15c65b
·
verified ·
1 Parent(s): 5e050a9

Upload tokenizer_config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +17 -2
tokenizer_config.json CHANGED
@@ -1,4 +1,19 @@
1
  {
2
- "tokenizer_class": "NanoChatTokenizer",
3
- "chat_template": "{% if messages[0]['role'] == 'system' %}<|bos|><|user_start|>{{ messages[0]['content'] }}\n\n{{ messages[1]['content'] }}<|user_end|>{% set messages = messages[2:] %}{% else %}<|bos|>{% endif %}{% for message in messages %}{% if loop.index0 % 2 == 0 %}<|user_start|>{{ message['content'] }}<|user_end|>{% else %}<|assistant_start|>{{ message['content'] }}<|assistant_end|>{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant_start|>{% endif %}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  }
 
1
  {
2
+ "tokenizer_class": "PreTrainedTokenizerFast",
3
+ "bos_token": "<|bos|>",
4
+ "eos_token": "<|assistant_end|>",
5
+ "pad_token": "<|assistant_end|>",
6
+ "additional_special_tokens": [
7
+ "<|user_start|>",
8
+ "<|user_end|>",
9
+ "<|assistant_start|>",
10
+ "<|python_start|>",
11
+ "<|python_end|>",
12
+ "<|output_start|>",
13
+ "<|output_end|>"
14
+ ],
15
+ "model_input_names": [
16
+ "input_ids",
17
+ "attention_mask"
18
+ ]
19
  }