From 6f5a8a33d9de4fce63082870d70d9e6f62869e1c Mon Sep 17 00:00:00 2001 From: -LAN- Date: Fri, 3 Jan 2025 09:13:18 +0800 Subject: [PATCH] refactor: replace gevent threadpool with ProcessPoolExecutor in GPT2Tokenizer (#12316) Signed-off-by: -LAN- --- .../model_providers/__base/tokenizers/gpt2_tokenzier.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py index ab45a95803..72d9b7163c 100644 --- a/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py +++ b/api/core/model_runtime/model_providers/__base/tokenizers/gpt2_tokenzier.py @@ -1,13 +1,13 @@ +from concurrent.futures import ProcessPoolExecutor from os.path import abspath, dirname, join from threading import Lock from typing import Any, cast -import gevent.threadpool # type: ignore from transformers import GPT2Tokenizer as TransformerGPT2Tokenizer # type: ignore _tokenizer: Any = None _lock = Lock() -_pool = gevent.threadpool.ThreadPool(1) +_executor = ProcessPoolExecutor(max_workers=1) class GPT2Tokenizer: @@ -22,8 +22,8 @@ class GPT2Tokenizer: @staticmethod def get_num_tokens(text: str) -> int: - future = _pool.spawn(GPT2Tokenizer._get_num_tokens_by_gpt2, text) - result = future.get(block=True) + future = _executor.submit(GPT2Tokenizer._get_num_tokens_by_gpt2, text) + result = future.result() return cast(int, result) @staticmethod