| { | |
| "add_prefix_space": false, | |
| "added_tokens_decoder": { | |
| "100256": { | |
| "content": "<|_unuse_missing_100256|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100257": { | |
| "content": "<|endoftext|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100258": { | |
| "content": "<|fim_prefix|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100259": { | |
| "content": "<|fim_middle|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100260": { | |
| "content": "<|fim_suffix|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100261": { | |
| "content": "<|_unuse_missing_100261|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100262": { | |
| "content": "<|_unuse_missing_100262|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100263": { | |
| "content": "<|_unuse_missing_100263|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100264": { | |
| "content": "<|_unuse_missing_100264|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100265": { | |
| "content": "<|_unuse_missing_100265|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100266": { | |
| "content": "<|_unuse_missing_100266|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100267": { | |
| "content": "<|_unuse_missing_100267|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100268": { | |
| "content": "<|_unuse_missing_100268|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100269": { | |
| "content": "<|_unuse_missing_100269|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100270": { | |
| "content": "<|_unuse_missing_100270|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100271": { | |
| "content": "<|dummy3|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100272": { | |
| "content": "<|im_start|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100273": { | |
| "content": "<|im_end|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100274": { | |
| "content": "<|stop|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100275": { | |
| "content": "<|endofturn|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "100276": { | |
| "content": "<|endofprompt|>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110491": { | |
| "content": "<repo_name>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110492": { | |
| "content": "<file_sep>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110493": { | |
| "content": "<issue_start>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110494": { | |
| "content": "<issue_comment>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110495": { | |
| "content": "<issue_closed>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110496": { | |
| "content": "<jupyter_start>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110497": { | |
| "content": "<jupyter_text>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110498": { | |
| "content": "<jupyter_code>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110499": { | |
| "content": "<jupyter_output>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110500": { | |
| "content": "<jupyter_script>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110501": { | |
| "content": "<empty_output>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110502": { | |
| "content": "<code_to_intermediate>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110503": { | |
| "content": "<intermediate_to_code>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110504": { | |
| "content": "<pr>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110505": { | |
| "content": "<pr_status>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110506": { | |
| "content": "<pr_is_merged>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110507": { | |
| "content": "<pr_base>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110508": { | |
| "content": "<pr_file>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110509": { | |
| "content": "<pr_base_code>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110510": { | |
| "content": "<pr_diff>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110511": { | |
| "content": "<pr_diff_hunk>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110512": { | |
| "content": "<pr_comment>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110513": { | |
| "content": "<pr_event_id>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110514": { | |
| "content": "<pr_review>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110515": { | |
| "content": "<pr_review_state>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110516": { | |
| "content": "<pr_review_comment>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110517": { | |
| "content": "<pr_in_reply_to_review_id>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110518": { | |
| "content": "<pr_in_reply_to_comment_id>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110519": { | |
| "content": "<pr_diff_hunk_comment_line>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110520": { | |
| "content": "<NAME>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110521": { | |
| "content": "<EMAIL>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110522": { | |
| "content": "<KEY>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| }, | |
| "110523": { | |
| "content": "<PASSWORD>", | |
| "lstrip": false, | |
| "normalized": false, | |
| "rstrip": false, | |
| "single_word": false, | |
| "special": true | |
| } | |
| }, | |
| "additional_special_tokens": [ | |
| "<|endoftext|>", | |
| "<|fim_prefix|>", | |
| "<|fim_middle|>", | |
| "<|fim_suffix|>", | |
| "<|endofprompt|>", | |
| "<|_unuse_missing_100256|>", | |
| "<|_unuse_missing_100261|>", | |
| "<|_unuse_missing_100262|>", | |
| "<|_unuse_missing_100263|>", | |
| "<|_unuse_missing_100264|>", | |
| "<|_unuse_missing_100265|>", | |
| "<|_unuse_missing_100266|>", | |
| "<|_unuse_missing_100267|>", | |
| "<|_unuse_missing_100268|>", | |
| "<|_unuse_missing_100269|>", | |
| "<|_unuse_missing_100270|>", | |
| "<|dummy3|>", | |
| "<|im_start|>", | |
| "<|im_end|>", | |
| "<|stop|>", | |
| "<|endofturn|>", | |
| "<repo_name>", | |
| "<file_sep>", | |
| "<issue_start>", | |
| "<issue_comment>", | |
| "<issue_closed>", | |
| "<jupyter_start>", | |
| "<jupyter_text>", | |
| "<jupyter_code>", | |
| "<jupyter_output>", | |
| "<jupyter_script>", | |
| "<empty_output>", | |
| "<code_to_intermediate>", | |
| "<intermediate_to_code>", | |
| "<pr>", | |
| "<pr_status>", | |
| "<pr_is_merged>", | |
| "<pr_base>", | |
| "<pr_file>", | |
| "<pr_base_code>", | |
| "<pr_diff>", | |
| "<pr_diff_hunk>", | |
| "<pr_comment>", | |
| "<pr_event_id>", | |
| "<pr_review>", | |
| "<pr_review_state>", | |
| "<pr_review_comment>", | |
| "<pr_in_reply_to_review_id>", | |
| "<pr_in_reply_to_comment_id>", | |
| "<pr_diff_hunk_comment_line>", | |
| "<NAME>", | |
| "<EMAIL>", | |
| "<KEY>", | |
| "<PASSWORD>" | |
| ], | |
| "bos_token": "<|endoftext|>", | |
| "chat_template": [ | |
| { | |
| "name": "default", | |
| "template": "<|im_start|>tool_list\n<|im_end|>\n{% for message in messages %}\n{% set content = message['content'] %}\n{% set role = message['role'] %}\n{% if loop.first and role != 'system' %}\n<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}\n{% if message['content'] is string %}\n<|im_start|>{{ role }}\n{{ message['content'] }}<|im_end|>\n{% else %}\n{% if content['type'] == 'image' %}\n<|im_start|>{{ role }} (mime)\n{\"type\": \"image/jpeg\", \"filename\": \"{{ content['filename'] }}\"}<|im_end|>\n<|im_start|>{{ role }} (vector)\n<|dummy3|><|im_end|>\n<|im_start|>image/aux\n다음 중 ocr은 사진에서 검출된 글자이고, lens_keyword는 사진에서 추출된 keyword와 bbox 위치입니다. bbox는 0~1 사이로 정규화된 [x1, y1, x2, y2]의 형태입니다. 참고하여 답변하세요. {\"ocr\": \"{{ content['ocr'] or '' }}\", \"lens_keywords\": \"{{ content['lens_keywords'] or '' }}\", \"lens_local_keywords\": \"{{ content['lens_local_keywords'] or '' }}\"}<|im_end|>\n{% elif content['type'] == 'video' %}\n<|im_start|>{{ role }} (mime)\n{\"type\": \"video/mp4\", \"filename\": \"{{ content['filename'] }}\"}<|im_end|>\n<|im_start|>{{ role }} (vector)\n<|dummy3|><|im_end|>\n<|im_start|>image/aux\n{% if content.get('is_final_grid') %}\n다음 중 lens_keyword는 사진에서 추출된 keyword와 bbox 위치입니다. bbox는 0~1 사이로 정규화된 [x1, y1, x2, y2]의 형태입니다. video_time_stamp는 비디오에서 해당 구간의 시간 정보입니다. speech_to_text는 비디오 속에서의 대화, 음성, 소리, 대사, 그리고 말을 전부 글로 받아 적은 것 입니다. 참고하여 답변하세요. {\"video_time_stamp\": \"{{ content['video_time_stamp'] }}\", \"lens_keywords\": \"{{ content.get('lens_keywords', '') }}\", \"lens_local_keywords\": \"{{ content.get('lens_local_keywords', '') }}\", \"speech_to_text\": \"{{ content.get('speech_to_text', '') }}\"}\n{% else %}\n다음 중 video_time_stamp는 비디오에서 해당 구간의 시간 정보입니다. 참고하여 답변하세요. {\"video_time_stamp\": \"{{ content['video_time_stamp'] }}\"}\n{% endif %}<|im_end|>\n{% elif content['type'] == 'text' %}\n<|im_start|>{{ role }}\n{{ content['text'] }}<|im_end|>\n{% endif %}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}\n<|im_start|>assistant\n{% endif %}\n" | |
| } | |
| ], | |
| "clean_up_tokenization_spaces": true, | |
| "eos_token": "<|endofturn|>", | |
| "extra_special_tokens": {}, | |
| "model_max_length": 1000000000000000019884624838656, | |
| "pad_token": "<|endoftext|>", | |
| "tokenizer_class": "GPT2Tokenizer", | |
| "unk_token": "<|endoftext|>" | |
| } | |