| { | |
| "added_tokens_decoder": { | |
| "151329": { | |
| "content": "<|endoftext|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151330": { | |
| "content": "[MASK]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151331": { | |
| "content": "[gMASK]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151332": { | |
| "content": "[sMASK]", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151333": { | |
| "content": "<sop>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151334": { | |
| "content": "<eop>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151335": { | |
| "content": "<|system|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151336": { | |
| "content": "<|user|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151337": { | |
| "content": "<|assistant|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151338": { | |
| "content": "<|observation|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151339": { | |
| "content": "<|begin_of_image|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151340": { | |
| "content": "<|end_of_image|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151341": { | |
| "content": "<|begin_of_video|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "151342": { | |
| "content": "<|end_of_video|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| } | |
| }, | |
| "additional_special_tokens": [ | |
| "<|endoftext|>", | |
| "[MASK]", | |
| "[gMASK]", | |
| "[sMASK]", | |
| "<sop>", | |
| "<eop>", | |
| "<|system|>", | |
| "<|user|>", | |
| "<|assistant|>", | |
| "<|observation|>", | |
| "<|begin_of_image|>", | |
| "<|end_of_image|>", | |
| "<|begin_of_video|>", | |
| "<|end_of_video|>" | |
| ], | |
| "chat_template": "[gMASK]<sop>{%- if tools -%}<|system|>\n# 可用工具\n{% for tool in tools %}{%- set function = tool.function if tool.get(\"function\") else tool %}\n\n## {{ function.name }}\n\n{{ function | tojson(indent=4, ensure_ascii=False) }}\n在调用上述函数时,请使用 Json 格式表示调用的参数。{%- endfor %}{%- endif -%}{%- for msg in messages %}{%- if msg.role == 'system' %}<|system|>\n{{ msg.content }}{%- endif %}{%- endfor %}{%- for message in messages if message.role != 'system' %}{%- set role = message['role'] %}{%- set content = message['content'] %}{%- set meta = message.get(\"metadata\", \"\") %}{%- if role == 'user' %}<|user|>\n{{ content }}{%- elif role == 'assistant' and not meta %}<|assistant|>\n{{ content }}{%- elif role == 'assistant' and meta %}<|assistant|>{{ meta }} \n{{ content }}{%- elif role == 'observation' %}<|observation|>\n{{ content }}{%- endif %}{%- endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}", | |
| "clean_up_tokenization_spaces": false, | |
| "do_lower_case": false, | |
| "eos_token": "<|user|>", | |
| "extra_special_tokens": {}, | |
| "model_input_names": [ | |
| "input_ids", | |
| "attention_mask" | |
| ], | |
| "model_max_length": 128000, | |
| "pad_token": "<|endoftext|>", | |
| "padding_side": "left", | |
| "remove_space": false, | |
| "tokenizer_class": "PreTrainedTokenizer" | |
| } | |