{ "_commit_hash": null, "add_bos_token": false, "add_prefix_space": false, "added_tokens_decoder": { "151643": { "content": "<|endoftext|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151644": { "content": "<|im_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151645": { "content": "<|im_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151646": { "content": "<|object_ref_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151647": { "content": "<|object_ref_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151648": { "content": "<|box_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151649": { "content": "<|box_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151650": { "content": "<|quad_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151651": { "content": "<|quad_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151652": { "content": "<|vision_start|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151653": { "content": "<|vision_end|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151654": { "content": "<|vision_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151655": { "content": "<|image_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151656": { "content": "<|video_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": true }, "151657": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151658": { "content": "", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151659": { "content": "<|fim_prefix|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151660": { "content": "<|fim_middle|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151661": { "content": "<|fim_suffix|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151662": { "content": "<|fim_pad|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151663": { "content": "<|repo_name|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false }, "151664": { "content": "<|file_sep|>", "lstrip": false, "normalized": false, "rstrip": false, "single_word": false, "special": false } }, "additional_special_tokens": [ "<|im_start|>", "<|im_end|>", "<|object_ref_start|>", "<|object_ref_end|>", "<|box_start|>", "<|box_end|>", "<|quad_start|>", "<|quad_end|>", "<|vision_start|>", "<|vision_end|>", "<|vision_pad|>", "<|image_pad|>", "<|video_pad|>" ], "bos_token": null, "chat_template": "{%- set image_placeholder = '<|vision_start|><|image_pad|><|vision_end|>' -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'user' -%}\n {%- if loop.first and message['role'] != 'system' -%}\n {{- '<|im_start|>system\n' -}}\n {%- if template -%}\n {#--- If template, extraction task ---#}\n {{- 'You are NuExtract, an information extraction tool created by NuMind.' -}}\n {%- else -%}\n {#--- Else, template generation task ---#}\n {{- 'You are a helpful assistant.' -}}\n {%- endif -%}\n {{ '<|im_end|>\n' }}\n {%- endif -%}\n {{- '<|im_start|>' + message['role'] + '\n' -}}\n {%- if template -%}\n {#--- Template Section ---#}\n {{- '# Template:\n' -}}\n {{- template -}}\n {{- '\n' -}}\n \n {%- if examples -%}\n {#--- Examples can only exist in the extraction task ---#}\n {{- '# Examples:\n' -}}\n {%- for example in examples -%}\n {{- '## Input:\n' -}}\n {%- if example['input'] is mapping and (example['input']['type'] == 'image' or example['input']['type'] == 'image_url') -%}\n {{- image_placeholder | trim -}}\n {%- elif example['input'] == '' -%}\n {#--- Keep compatibility with for now ---#}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- Text input example ---#}\n {{- example['input'] -}}\n {%- endif -%}\n {{- '\n' -}}\n {{- '## Output:\n' -}}\n {{- example['output'] -}}\n {{- '\n' -}}\n {%- endfor -%}\n {%- endif -%}\n {{- '# Context:\n' -}}\n {%- endif -%}\n \n {%- if message['content'] is string -%}\n {#--- Simple string content ---#}\n {{- message['content'] | trim -}}\n {%- elif message['content'] is mapping and (message['content']['type'] == 'image' or message['content']['type'] == 'image_url') -%}\n {{- image_placeholder | trim -}}\n {%- else -%}\n {#--- List of content items (mixed text/images) ---#}\n {#--- First, determine what the actual input content is (not ICL images) ---#}\n {%- set ns = namespace(has_text_input=false, text_content='') -%}\n \n {#--- Count content types and identify actual input document ---#}\n {%- for content in message['content'] -%}\n {%- if content is mapping and content.get('type') == 'text' -%}\n {%- if content.get('text') != '' -%}\n {#--- Keep compatibility with for now ---#}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content['text'] -%}\n {%- endif -%}\n {%- elif content is string -%}\n {%- if content != '' -%}\n {#--- Keep compatibility with for now ---#}\n {%- set ns.has_text_input = true -%}\n {%- set ns.text_content = content -%}\n {%- endif -%}\n {%- endif -%}\n {%- endfor -%}\n \n {#--- Determine what to output based on actual input type ---#}\n {%- if ns.has_text_input -%}\n {#--- Main input is text, so output the text content ---#}\n {{- ns.text_content | trim -}}\n {%- else -%}\n {#--- Main input is image or placeholder ---#}\n {%- set ns2 = namespace(found_image=false) -%}\n {%- for content in message['content'] -%}\n {%- if content is mapping and (content.get('type') == 'image' or content.get('type') == 'image_url') and not ns2.found_image -%}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is mapping and content.get('type') == 'text' and content.get('text') == '' and not ns2.found_image -%}\n {#--- Keep compatibility with for now ---#}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- elif content is string and content == '' and not ns2.found_image -%}\n {#--- Keep compatibility with for now ---#}\n {{- image_placeholder | trim -}}\n {%- set ns2.found_image = true -%}\n {%- endif -%}\n {%- endfor -%}\n {%- endif -%}\n {%- endif -%}\n {{- '<|im_end|>\n'}}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- '<|im_start|>assistant\n' -}}\n{%- endif -%}", "clean_up_tokenization_spaces": false, "eos_token": "<|im_end|>", "errors": "replace", "extra_special_tokens": {}, "max_length": null, "max_pixels": 23000000, "min_pixels": 200704, "model_max_length": 131072, "pad_to_multiple_of": null, "pad_token": "<|endoftext|>", "pad_token_type_id": 0, "padding_side": "right", "processor_class": "Qwen2_5_VLProcessor", "split_special_tokens": false, "tokenizer_class": "Qwen2Tokenizer", "unk_token": null }