fix: tiktoken cannot be loaded without internet (#12478)
Signed-off-by: -LAN- <laipz8200@outlook.com>
This commit is contained in:
parent
53bb37b749
commit
0a49d3dd52
@ -1,8 +1,6 @@
|
|||||||
from threading import Lock
|
from threading import Lock
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
import tiktoken
|
|
||||||
|
|
||||||
_tokenizer: Any = None
|
_tokenizer: Any = None
|
||||||
_lock = Lock()
|
_lock = Lock()
|
||||||
|
|
||||||
@ -33,9 +31,17 @@ class GPT2Tokenizer:
|
|||||||
if _tokenizer is None:
|
if _tokenizer is None:
|
||||||
# Try to use tiktoken to get the tokenizer because it is faster
|
# Try to use tiktoken to get the tokenizer because it is faster
|
||||||
#
|
#
|
||||||
|
try:
|
||||||
|
import tiktoken
|
||||||
|
|
||||||
_tokenizer = tiktoken.get_encoding("gpt2")
|
_tokenizer = tiktoken.get_encoding("gpt2")
|
||||||
# base_path = abspath(__file__)
|
except Exception:
|
||||||
# gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
|
from os.path import abspath, dirname, join
|
||||||
# _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
|
|
||||||
|
from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore
|
||||||
|
|
||||||
|
base_path = abspath(__file__)
|
||||||
|
gpt2_tokenizer_path = join(dirname(base_path), "gpt2")
|
||||||
|
_tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path)
|
||||||
|
|
||||||
return _tokenizer
|
return _tokenizer
|
||||||
|
Loading…
Reference in New Issue
Block a user