Spaces:
Sleeping
Sleeping
update
Browse files- README.md +0 -3
- vocab/chatglm_6b/__init__.py +7 -11
README.md
CHANGED
|
@@ -18,9 +18,6 @@ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-
|
|
| 18 |
## TODO
|
| 19 |
|
| 20 |
|
| 21 |
-
- 'MossTokenizer' object has no attribute 'encoder'
|
| 22 |
-
- chatglmTokenizer
|
| 23 |
-
|
| 24 |
|
| 25 |
|
| 26 |
|
|
|
|
| 18 |
## TODO
|
| 19 |
|
| 20 |
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
|
| 23 |
|
vocab/chatglm_6b/__init__.py
CHANGED
|
@@ -6,17 +6,13 @@ import os
|
|
| 6 |
import config
|
| 7 |
from transformers import AutoTokenizer
|
| 8 |
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
# CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 17 |
-
# TOKENIZER_DIR = os.path.join(CURRENT_DIR, "chatglm_6b")
|
| 18 |
-
# tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
|
| 19 |
|
| 20 |
# https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py#L153
|
| 21 |
tokenizer.comments = f"num_image_tokens: {tokenizer.sp_tokenizer.num_image_tokens}; num_image_tokens: {tokenizer.sp_tokenizer.num_text_tokens} "
|
| 22 |
-
|
|
|
|
| 6 |
import config
|
| 7 |
from transformers import AutoTokenizer
|
| 8 |
|
| 9 |
+
if False: # 有bug
|
| 10 |
+
tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True)
|
| 11 |
+
else:
|
| 12 |
+
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
| 13 |
+
CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
|
| 14 |
+
TOKENIZER_DIR = os.path.join(CURRENT_DIR, "chatglm_6b")
|
| 15 |
+
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_DIR, trust_remote_code=True)
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# https://huggingface.co/THUDM/chatglm-6b/blob/main/tokenization_chatglm.py#L153
|
| 18 |
tokenizer.comments = f"num_image_tokens: {tokenizer.sp_tokenizer.num_image_tokens}; num_image_tokens: {tokenizer.sp_tokenizer.num_text_tokens} "
|
|
|