Spaces:
Sleeping
Sleeping
| USE_REMOTE = False # use remote tokenizer or local tokenizer | |
| # load_vocab_with_SPECIAL_TOKEN = True # 如果不包含会导致计算词典大小错误、overlap_token计算不一致。 | |
| # encoding config | |
| ADD_SPECIAL_TOKEN = False | |
| # | |
| LAZY_IMPORT = True | |
| # DEBUG: 设置环境变量 RUST_BACKTRACE=full | |
| # | |
| default_user_input = """\ | |
| Replace this text in the input field to see how tokenization works. | |
| Buenos días! | |
| 华为发布Mate60手机。 | |
| ラグビーワールドカップ2023フランス""" | |
| default_tokenizer_type_1 = "llama3" | |
| default_tokenizer_type_2 = "gpt_4" | |