From 0a49d3dd5274e1f37d9d05ccbc7284c64d9d20a4 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Wed, 8 Jan 2025 14:49:44 +0800 Subject: [PATCH] fix: tiktoken cannot be loaded without internet (#12478) Signed-off-by: -LAN- --- .../__base/tokenizers/gpt2_tokenzier.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py index 9a5c40addb..7f3c4a61e4 100644 --- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py +++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py @@ -1,8 +1,6 @@ from threading import Lock from typing import Any -import tiktoken - _tokenizer: Any = None _lock = Lock() @@ -33,9 +31,17 @@ class GPT2Tokenizer: if _tokenizer is None: # Try to use tiktoken to get the tokenizer because it is faster # - _tokenizer = tiktoken.get_encoding("gpt2") - # base_path = abspath(__file__) - # gpt2_tokenizer_path = join(dirname(base_path), "gpt2") - # _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path) + try: + import tiktoken + + _tokenizer = tiktoken.get_encoding("gpt2") + except Exception: + from os.path import abspath, dirname, join + + from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore + + base_path = abspath(__file__) + gpt2_tokenizer_path = join(dirname(base_path), "gpt2") + _tokenizer = TransformerGPT2Tokenizer.from_pretrained(gpt2_tokenizer_path) return _tokenizer