Upload tokenizer
Browse files- added_tokens.json +104 -0
- special_tokens_map.json +112 -0
- spm.model +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +120 -0
added_tokens.json
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"--NME--": 128001,
|
| 3 |
+
"[E-0]": 128002,
|
| 4 |
+
"[E-10]": 128012,
|
| 5 |
+
"[E-11]": 128013,
|
| 6 |
+
"[E-12]": 128014,
|
| 7 |
+
"[E-13]": 128015,
|
| 8 |
+
"[E-14]": 128016,
|
| 9 |
+
"[E-15]": 128017,
|
| 10 |
+
"[E-16]": 128018,
|
| 11 |
+
"[E-17]": 128019,
|
| 12 |
+
"[E-18]": 128020,
|
| 13 |
+
"[E-19]": 128021,
|
| 14 |
+
"[E-1]": 128003,
|
| 15 |
+
"[E-20]": 128022,
|
| 16 |
+
"[E-21]": 128023,
|
| 17 |
+
"[E-22]": 128024,
|
| 18 |
+
"[E-23]": 128025,
|
| 19 |
+
"[E-24]": 128026,
|
| 20 |
+
"[E-25]": 128027,
|
| 21 |
+
"[E-26]": 128028,
|
| 22 |
+
"[E-27]": 128029,
|
| 23 |
+
"[E-28]": 128030,
|
| 24 |
+
"[E-29]": 128031,
|
| 25 |
+
"[E-2]": 128004,
|
| 26 |
+
"[E-30]": 128032,
|
| 27 |
+
"[E-31]": 128033,
|
| 28 |
+
"[E-32]": 128034,
|
| 29 |
+
"[E-33]": 128035,
|
| 30 |
+
"[E-34]": 128036,
|
| 31 |
+
"[E-35]": 128037,
|
| 32 |
+
"[E-36]": 128038,
|
| 33 |
+
"[E-37]": 128039,
|
| 34 |
+
"[E-38]": 128040,
|
| 35 |
+
"[E-39]": 128041,
|
| 36 |
+
"[E-3]": 128005,
|
| 37 |
+
"[E-40]": 128042,
|
| 38 |
+
"[E-41]": 128043,
|
| 39 |
+
"[E-42]": 128044,
|
| 40 |
+
"[E-43]": 128045,
|
| 41 |
+
"[E-44]": 128046,
|
| 42 |
+
"[E-45]": 128047,
|
| 43 |
+
"[E-46]": 128048,
|
| 44 |
+
"[E-47]": 128049,
|
| 45 |
+
"[E-48]": 128050,
|
| 46 |
+
"[E-49]": 128051,
|
| 47 |
+
"[E-4]": 128006,
|
| 48 |
+
"[E-50]": 128052,
|
| 49 |
+
"[E-51]": 128053,
|
| 50 |
+
"[E-52]": 128054,
|
| 51 |
+
"[E-53]": 128055,
|
| 52 |
+
"[E-54]": 128056,
|
| 53 |
+
"[E-55]": 128057,
|
| 54 |
+
"[E-56]": 128058,
|
| 55 |
+
"[E-57]": 128059,
|
| 56 |
+
"[E-58]": 128060,
|
| 57 |
+
"[E-59]": 128061,
|
| 58 |
+
"[E-5]": 128007,
|
| 59 |
+
"[E-60]": 128062,
|
| 60 |
+
"[E-61]": 128063,
|
| 61 |
+
"[E-62]": 128064,
|
| 62 |
+
"[E-63]": 128065,
|
| 63 |
+
"[E-64]": 128066,
|
| 64 |
+
"[E-65]": 128067,
|
| 65 |
+
"[E-66]": 128068,
|
| 66 |
+
"[E-67]": 128069,
|
| 67 |
+
"[E-68]": 128070,
|
| 68 |
+
"[E-69]": 128071,
|
| 69 |
+
"[E-6]": 128008,
|
| 70 |
+
"[E-70]": 128072,
|
| 71 |
+
"[E-71]": 128073,
|
| 72 |
+
"[E-72]": 128074,
|
| 73 |
+
"[E-73]": 128075,
|
| 74 |
+
"[E-74]": 128076,
|
| 75 |
+
"[E-75]": 128077,
|
| 76 |
+
"[E-76]": 128078,
|
| 77 |
+
"[E-77]": 128079,
|
| 78 |
+
"[E-78]": 128080,
|
| 79 |
+
"[E-79]": 128081,
|
| 80 |
+
"[E-7]": 128009,
|
| 81 |
+
"[E-80]": 128082,
|
| 82 |
+
"[E-81]": 128083,
|
| 83 |
+
"[E-82]": 128084,
|
| 84 |
+
"[E-83]": 128085,
|
| 85 |
+
"[E-84]": 128086,
|
| 86 |
+
"[E-85]": 128087,
|
| 87 |
+
"[E-86]": 128088,
|
| 88 |
+
"[E-87]": 128089,
|
| 89 |
+
"[E-88]": 128090,
|
| 90 |
+
"[E-89]": 128091,
|
| 91 |
+
"[E-8]": 128010,
|
| 92 |
+
"[E-90]": 128092,
|
| 93 |
+
"[E-91]": 128093,
|
| 94 |
+
"[E-92]": 128094,
|
| 95 |
+
"[E-93]": 128095,
|
| 96 |
+
"[E-94]": 128096,
|
| 97 |
+
"[E-95]": 128097,
|
| 98 |
+
"[E-96]": 128098,
|
| 99 |
+
"[E-97]": 128099,
|
| 100 |
+
"[E-98]": 128100,
|
| 101 |
+
"[E-99]": 128101,
|
| 102 |
+
"[E-9]": 128011,
|
| 103 |
+
"[MASK]": 128000
|
| 104 |
+
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"--NME--",
|
| 4 |
+
"[E-0]",
|
| 5 |
+
"[E-1]",
|
| 6 |
+
"[E-2]",
|
| 7 |
+
"[E-3]",
|
| 8 |
+
"[E-4]",
|
| 9 |
+
"[E-5]",
|
| 10 |
+
"[E-6]",
|
| 11 |
+
"[E-7]",
|
| 12 |
+
"[E-8]",
|
| 13 |
+
"[E-9]",
|
| 14 |
+
"[E-10]",
|
| 15 |
+
"[E-11]",
|
| 16 |
+
"[E-12]",
|
| 17 |
+
"[E-13]",
|
| 18 |
+
"[E-14]",
|
| 19 |
+
"[E-15]",
|
| 20 |
+
"[E-16]",
|
| 21 |
+
"[E-17]",
|
| 22 |
+
"[E-18]",
|
| 23 |
+
"[E-19]",
|
| 24 |
+
"[E-20]",
|
| 25 |
+
"[E-21]",
|
| 26 |
+
"[E-22]",
|
| 27 |
+
"[E-23]",
|
| 28 |
+
"[E-24]",
|
| 29 |
+
"[E-25]",
|
| 30 |
+
"[E-26]",
|
| 31 |
+
"[E-27]",
|
| 32 |
+
"[E-28]",
|
| 33 |
+
"[E-29]",
|
| 34 |
+
"[E-30]",
|
| 35 |
+
"[E-31]",
|
| 36 |
+
"[E-32]",
|
| 37 |
+
"[E-33]",
|
| 38 |
+
"[E-34]",
|
| 39 |
+
"[E-35]",
|
| 40 |
+
"[E-36]",
|
| 41 |
+
"[E-37]",
|
| 42 |
+
"[E-38]",
|
| 43 |
+
"[E-39]",
|
| 44 |
+
"[E-40]",
|
| 45 |
+
"[E-41]",
|
| 46 |
+
"[E-42]",
|
| 47 |
+
"[E-43]",
|
| 48 |
+
"[E-44]",
|
| 49 |
+
"[E-45]",
|
| 50 |
+
"[E-46]",
|
| 51 |
+
"[E-47]",
|
| 52 |
+
"[E-48]",
|
| 53 |
+
"[E-49]",
|
| 54 |
+
"[E-50]",
|
| 55 |
+
"[E-51]",
|
| 56 |
+
"[E-52]",
|
| 57 |
+
"[E-53]",
|
| 58 |
+
"[E-54]",
|
| 59 |
+
"[E-55]",
|
| 60 |
+
"[E-56]",
|
| 61 |
+
"[E-57]",
|
| 62 |
+
"[E-58]",
|
| 63 |
+
"[E-59]",
|
| 64 |
+
"[E-60]",
|
| 65 |
+
"[E-61]",
|
| 66 |
+
"[E-62]",
|
| 67 |
+
"[E-63]",
|
| 68 |
+
"[E-64]",
|
| 69 |
+
"[E-65]",
|
| 70 |
+
"[E-66]",
|
| 71 |
+
"[E-67]",
|
| 72 |
+
"[E-68]",
|
| 73 |
+
"[E-69]",
|
| 74 |
+
"[E-70]",
|
| 75 |
+
"[E-71]",
|
| 76 |
+
"[E-72]",
|
| 77 |
+
"[E-73]",
|
| 78 |
+
"[E-74]",
|
| 79 |
+
"[E-75]",
|
| 80 |
+
"[E-76]",
|
| 81 |
+
"[E-77]",
|
| 82 |
+
"[E-78]",
|
| 83 |
+
"[E-79]",
|
| 84 |
+
"[E-80]",
|
| 85 |
+
"[E-81]",
|
| 86 |
+
"[E-82]",
|
| 87 |
+
"[E-83]",
|
| 88 |
+
"[E-84]",
|
| 89 |
+
"[E-85]",
|
| 90 |
+
"[E-86]",
|
| 91 |
+
"[E-87]",
|
| 92 |
+
"[E-88]",
|
| 93 |
+
"[E-89]",
|
| 94 |
+
"[E-90]",
|
| 95 |
+
"[E-91]",
|
| 96 |
+
"[E-92]",
|
| 97 |
+
"[E-93]",
|
| 98 |
+
"[E-94]",
|
| 99 |
+
"[E-95]",
|
| 100 |
+
"[E-96]",
|
| 101 |
+
"[E-97]",
|
| 102 |
+
"[E-98]",
|
| 103 |
+
"[E-99]"
|
| 104 |
+
],
|
| 105 |
+
"bos_token": "[CLS]",
|
| 106 |
+
"cls_token": "[CLS]",
|
| 107 |
+
"eos_token": "[SEP]",
|
| 108 |
+
"mask_token": "[MASK]",
|
| 109 |
+
"pad_token": "[PAD]",
|
| 110 |
+
"sep_token": "[SEP]",
|
| 111 |
+
"unk_token": "[UNK]"
|
| 112 |
+
}
|
spm.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
|
| 3 |
+
size 2464616
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": true,
|
| 3 |
+
"additional_special_tokens": [
|
| 4 |
+
"--NME--",
|
| 5 |
+
"[E-0]",
|
| 6 |
+
"[E-1]",
|
| 7 |
+
"[E-2]",
|
| 8 |
+
"[E-3]",
|
| 9 |
+
"[E-4]",
|
| 10 |
+
"[E-5]",
|
| 11 |
+
"[E-6]",
|
| 12 |
+
"[E-7]",
|
| 13 |
+
"[E-8]",
|
| 14 |
+
"[E-9]",
|
| 15 |
+
"[E-10]",
|
| 16 |
+
"[E-11]",
|
| 17 |
+
"[E-12]",
|
| 18 |
+
"[E-13]",
|
| 19 |
+
"[E-14]",
|
| 20 |
+
"[E-15]",
|
| 21 |
+
"[E-16]",
|
| 22 |
+
"[E-17]",
|
| 23 |
+
"[E-18]",
|
| 24 |
+
"[E-19]",
|
| 25 |
+
"[E-20]",
|
| 26 |
+
"[E-21]",
|
| 27 |
+
"[E-22]",
|
| 28 |
+
"[E-23]",
|
| 29 |
+
"[E-24]",
|
| 30 |
+
"[E-25]",
|
| 31 |
+
"[E-26]",
|
| 32 |
+
"[E-27]",
|
| 33 |
+
"[E-28]",
|
| 34 |
+
"[E-29]",
|
| 35 |
+
"[E-30]",
|
| 36 |
+
"[E-31]",
|
| 37 |
+
"[E-32]",
|
| 38 |
+
"[E-33]",
|
| 39 |
+
"[E-34]",
|
| 40 |
+
"[E-35]",
|
| 41 |
+
"[E-36]",
|
| 42 |
+
"[E-37]",
|
| 43 |
+
"[E-38]",
|
| 44 |
+
"[E-39]",
|
| 45 |
+
"[E-40]",
|
| 46 |
+
"[E-41]",
|
| 47 |
+
"[E-42]",
|
| 48 |
+
"[E-43]",
|
| 49 |
+
"[E-44]",
|
| 50 |
+
"[E-45]",
|
| 51 |
+
"[E-46]",
|
| 52 |
+
"[E-47]",
|
| 53 |
+
"[E-48]",
|
| 54 |
+
"[E-49]",
|
| 55 |
+
"[E-50]",
|
| 56 |
+
"[E-51]",
|
| 57 |
+
"[E-52]",
|
| 58 |
+
"[E-53]",
|
| 59 |
+
"[E-54]",
|
| 60 |
+
"[E-55]",
|
| 61 |
+
"[E-56]",
|
| 62 |
+
"[E-57]",
|
| 63 |
+
"[E-58]",
|
| 64 |
+
"[E-59]",
|
| 65 |
+
"[E-60]",
|
| 66 |
+
"[E-61]",
|
| 67 |
+
"[E-62]",
|
| 68 |
+
"[E-63]",
|
| 69 |
+
"[E-64]",
|
| 70 |
+
"[E-65]",
|
| 71 |
+
"[E-66]",
|
| 72 |
+
"[E-67]",
|
| 73 |
+
"[E-68]",
|
| 74 |
+
"[E-69]",
|
| 75 |
+
"[E-70]",
|
| 76 |
+
"[E-71]",
|
| 77 |
+
"[E-72]",
|
| 78 |
+
"[E-73]",
|
| 79 |
+
"[E-74]",
|
| 80 |
+
"[E-75]",
|
| 81 |
+
"[E-76]",
|
| 82 |
+
"[E-77]",
|
| 83 |
+
"[E-78]",
|
| 84 |
+
"[E-79]",
|
| 85 |
+
"[E-80]",
|
| 86 |
+
"[E-81]",
|
| 87 |
+
"[E-82]",
|
| 88 |
+
"[E-83]",
|
| 89 |
+
"[E-84]",
|
| 90 |
+
"[E-85]",
|
| 91 |
+
"[E-86]",
|
| 92 |
+
"[E-87]",
|
| 93 |
+
"[E-88]",
|
| 94 |
+
"[E-89]",
|
| 95 |
+
"[E-90]",
|
| 96 |
+
"[E-91]",
|
| 97 |
+
"[E-92]",
|
| 98 |
+
"[E-93]",
|
| 99 |
+
"[E-94]",
|
| 100 |
+
"[E-95]",
|
| 101 |
+
"[E-96]",
|
| 102 |
+
"[E-97]",
|
| 103 |
+
"[E-98]",
|
| 104 |
+
"[E-99]"
|
| 105 |
+
],
|
| 106 |
+
"bos_token": "[CLS]",
|
| 107 |
+
"clean_up_tokenization_spaces": true,
|
| 108 |
+
"cls_token": "[CLS]",
|
| 109 |
+
"do_lower_case": false,
|
| 110 |
+
"eos_token": "[SEP]",
|
| 111 |
+
"mask_token": "[MASK]",
|
| 112 |
+
"model_max_length": 1000000000000000019884624838656,
|
| 113 |
+
"pad_token": "[PAD]",
|
| 114 |
+
"sep_token": "[SEP]",
|
| 115 |
+
"sp_model_kwargs": {},
|
| 116 |
+
"split_by_punct": false,
|
| 117 |
+
"tokenizer_class": "DebertaV2Tokenizer",
|
| 118 |
+
"unk_token": "[UNK]",
|
| 119 |
+
"vocab_type": "spm"
|
| 120 |
+
}
|