Commit 
							
							·
						
						c98c40f
	
1
								Parent(s):
							
							b8a2143
								
update
Browse files- added_tokens.json +1 -0
- chat_template.jinja +1 -1
- processor_config.json +0 -2
- special_tokens_map.json +2 -4
- tokenizer.json +2 -2
- tokenizer_config.json +11 -4
    	
        added_tokens.json
    CHANGED
    
    | @@ -8,6 +8,7 @@ | |
| 8 | 
             
              "<img>": 92544,
         | 
| 9 | 
             
              "<quad>": 92547,
         | 
| 10 | 
             
              "<ref>": 92549,
         | 
|  | |
| 11 | 
             
              "<|action_end|>": 92540,
         | 
| 12 | 
             
              "<|action_start|>": 92541,
         | 
| 13 | 
             
              "<|im_end|>": 92542,
         | 
|  | |
| 8 | 
             
              "<img>": 92544,
         | 
| 9 | 
             
              "<quad>": 92547,
         | 
| 10 | 
             
              "<ref>": 92549,
         | 
| 11 | 
            +
              "<video>": 92553,
         | 
| 12 | 
             
              "<|action_end|>": 92540,
         | 
| 13 | 
             
              "<|action_start|>": 92541,
         | 
| 14 | 
             
              "<|im_end|>": 92542,
         | 
    	
        chat_template.jinja
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 | 
             
            {% for message in messages %}{{'<|im_start|>' + message['role'] + '
         | 
| 2 | 
            -
            '}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '< | 
| 3 | 
             
            ' }}{% elif content['type'] == 'video' %}{{ '<video>
         | 
| 4 | 
             
            ' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>
         | 
| 5 | 
             
            '}}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant
         | 
|  | |
| 1 | 
             
            {% for message in messages %}{{'<|im_start|>' + message['role'] + '
         | 
| 2 | 
            +
            '}}{% if message['content'] is string %}{{ message['content'] }}{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' %}{{ '<IMG_CONTEXT>
         | 
| 3 | 
             
            ' }}{% elif content['type'] == 'video' %}{{ '<video>
         | 
| 4 | 
             
            ' }}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}{% endif %}{{'<|im_end|>
         | 
| 5 | 
             
            '}}{% endfor %}{% if add_generation_prompt %}{{'<|im_start|>assistant
         | 
    	
        processor_config.json
    CHANGED
    
    | @@ -1,6 +1,4 @@ | |
| 1 | 
             
            {
         | 
| 2 | 
            -
              "fake_image_token": "<image>",
         | 
| 3 | 
            -
              "fake_video_token": "<video>",
         | 
| 4 | 
             
              "image_seq_length": 256,
         | 
| 5 | 
             
              "processor_class": "InternVLProcessor"
         | 
| 6 | 
             
            }
         | 
|  | |
| 1 | 
             
            {
         | 
|  | |
|  | |
| 2 | 
             
              "image_seq_length": 256,
         | 
| 3 | 
             
              "processor_class": "InternVLProcessor"
         | 
| 4 | 
             
            }
         | 
    	
        special_tokens_map.json
    CHANGED
    
    | @@ -23,8 +23,6 @@ | |
| 23 | 
             
                "rstrip": false,
         | 
| 24 | 
             
                "single_word": false
         | 
| 25 | 
             
              },
         | 
| 26 | 
            -
              "context_image_token": "<IMG_CONTEXT>",
         | 
| 27 | 
            -
              "end_image_token": "</img>",
         | 
| 28 | 
             
              "eos_token": {
         | 
| 29 | 
             
                "content": "</s>",
         | 
| 30 | 
             
                "lstrip": false,
         | 
| @@ -39,12 +37,12 @@ | |
| 39 | 
             
                "rstrip": false,
         | 
| 40 | 
             
                "single_word": false
         | 
| 41 | 
             
              },
         | 
| 42 | 
            -
              "start_image_token": "<img>",
         | 
| 43 | 
             
              "unk_token": {
         | 
| 44 | 
             
                "content": "<unk>",
         | 
| 45 | 
             
                "lstrip": false,
         | 
| 46 | 
             
                "normalized": false,
         | 
| 47 | 
             
                "rstrip": false,
         | 
| 48 | 
             
                "single_word": false
         | 
| 49 | 
            -
              }
         | 
|  | |
| 50 | 
             
            }
         | 
|  | |
| 23 | 
             
                "rstrip": false,
         | 
| 24 | 
             
                "single_word": false
         | 
| 25 | 
             
              },
         | 
|  | |
|  | |
| 26 | 
             
              "eos_token": {
         | 
| 27 | 
             
                "content": "</s>",
         | 
| 28 | 
             
                "lstrip": false,
         | 
|  | |
| 37 | 
             
                "rstrip": false,
         | 
| 38 | 
             
                "single_word": false
         | 
| 39 | 
             
              },
         | 
|  | |
| 40 | 
             
              "unk_token": {
         | 
| 41 | 
             
                "content": "<unk>",
         | 
| 42 | 
             
                "lstrip": false,
         | 
| 43 | 
             
                "normalized": false,
         | 
| 44 | 
             
                "rstrip": false,
         | 
| 45 | 
             
                "single_word": false
         | 
| 46 | 
            +
              },
         | 
| 47 | 
            +
              "video_token": "<video>"
         | 
| 48 | 
             
            }
         | 
    	
        tokenizer.json
    CHANGED
    
    | @@ -1,3 +1,3 @@ | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            -
            oid sha256: | 
| 3 | 
            -
            size  | 
|  | |
| 1 | 
             
            version https://git-lfs.github.com/spec/v1
         | 
| 2 | 
            +
            oid sha256:ab3c9a9a9b96c4b6c042fd747c62d971b6b3475e6d6804ac77ce49c3e26876b7
         | 
| 3 | 
            +
            size 10578541
         | 
    	
        tokenizer_config.json
    CHANGED
    
    | @@ -1634,6 +1634,14 @@ | |
| 1634 | 
             
                  "rstrip": false,
         | 
| 1635 | 
             
                  "single_word": false,
         | 
| 1636 | 
             
                  "special": true
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 1637 | 
             
                }
         | 
| 1638 | 
             
              },
         | 
| 1639 | 
             
              "additional_special_tokens": [
         | 
| @@ -1665,9 +1673,7 @@ | |
| 1665 | 
             
              "end_image_token": "</img>",
         | 
| 1666 | 
             
              "eos_token": "</s>",
         | 
| 1667 | 
             
              "extra_special_tokens": {
         | 
| 1668 | 
            -
                " | 
| 1669 | 
            -
                "end_image_token": "</img>",
         | 
| 1670 | 
            -
                "start_image_token": "<img>"
         | 
| 1671 | 
             
              },
         | 
| 1672 | 
             
              "legacy": false,
         | 
| 1673 | 
             
              "model_max_length": 8192,
         | 
| @@ -1679,5 +1685,6 @@ | |
| 1679 | 
             
              "start_image_token": "<img>",
         | 
| 1680 | 
             
              "tokenizer_class": "LlamaTokenizer",
         | 
| 1681 | 
             
              "unk_token": "<unk>",
         | 
| 1682 | 
            -
              "use_default_system_prompt": false
         | 
|  | |
| 1683 | 
             
            }
         | 
|  | |
| 1634 | 
             
                  "rstrip": false,
         | 
| 1635 | 
             
                  "single_word": false,
         | 
| 1636 | 
             
                  "special": true
         | 
| 1637 | 
            +
                },
         | 
| 1638 | 
            +
                "92553": {
         | 
| 1639 | 
            +
                  "content": "<video>",
         | 
| 1640 | 
            +
                  "lstrip": false,
         | 
| 1641 | 
            +
                  "normalized": false,
         | 
| 1642 | 
            +
                  "rstrip": false,
         | 
| 1643 | 
            +
                  "single_word": false,
         | 
| 1644 | 
            +
                  "special": true
         | 
| 1645 | 
             
                }
         | 
| 1646 | 
             
              },
         | 
| 1647 | 
             
              "additional_special_tokens": [
         | 
|  | |
| 1673 | 
             
              "end_image_token": "</img>",
         | 
| 1674 | 
             
              "eos_token": "</s>",
         | 
| 1675 | 
             
              "extra_special_tokens": {
         | 
| 1676 | 
            +
                "video_token": "<video>"
         | 
|  | |
|  | |
| 1677 | 
             
              },
         | 
| 1678 | 
             
              "legacy": false,
         | 
| 1679 | 
             
              "model_max_length": 8192,
         | 
|  | |
| 1685 | 
             
              "start_image_token": "<img>",
         | 
| 1686 | 
             
              "tokenizer_class": "LlamaTokenizer",
         | 
| 1687 | 
             
              "unk_token": "<unk>",
         | 
| 1688 | 
            +
              "use_default_system_prompt": false,
         | 
| 1689 | 
            +
              "video_token": "<video>"
         | 
| 1690 | 
             
            }
         | 
