diff --git a/api/core/model_providers/models/llm/tongyi_model.py b/api/core/model_providers/models/llm/tongyi_model.py index a66606e16b..319505817f 100644 --- a/api/core/model_providers/models/llm/tongyi_model.py +++ b/api/core/model_providers/models/llm/tongyi_model.py @@ -18,7 +18,6 @@ class TongyiModel(BaseLLM): def _init_client(self) -> Any: provider_model_kwargs = self._to_model_kwargs_input(self.model_rules, self.model_kwargs) - del provider_model_kwargs['max_tokens'] return EnhanceTongyi( model_name=self.name, max_retries=1, @@ -58,7 +57,6 @@ class TongyiModel(BaseLLM): def _set_model_kwargs(self, model_kwargs: ModelKwargs): provider_model_kwargs = self._to_model_kwargs_input(self.model_rules, model_kwargs) - del provider_model_kwargs['max_tokens'] for k, v in provider_model_kwargs.items(): if hasattr(self.client, k): setattr(self.client, k, v) diff --git a/api/core/model_providers/providers/tongyi_provider.py b/api/core/model_providers/providers/tongyi_provider.py index d3074b885c..d48b4447f8 100644 --- a/api/core/model_providers/providers/tongyi_provider.py +++ b/api/core/model_providers/providers/tongyi_provider.py @@ -24,12 +24,12 @@ class TongyiProvider(BaseModelProvider): if model_type == ModelType.TEXT_GENERATION: return [ { - 'id': 'qwen-v1', - 'name': 'qwen-v1', + 'id': 'qwen-turbo', + 'name': 'qwen-turbo', }, { - 'id': 'qwen-plus-v1', - 'name': 'qwen-plus-v1', + 'id': 'qwen-plus', + 'name': 'qwen-plus', } ] else: @@ -58,16 +58,16 @@ class TongyiProvider(BaseModelProvider): :return: """ model_max_tokens = { - 'qwen-v1': 1500, - 'qwen-plus-v1': 6500 + 'qwen-turbo': 6000, + 'qwen-plus': 6000 } return ModelKwargsRules( - temperature=KwargRule[float](enabled=False), - top_p=KwargRule[float](min=0, max=1, default=0.8, precision=2), + temperature=KwargRule[float](min=0.01, max=1, default=1, precision=2), + top_p=KwargRule[float](min=0.01, max=0.99, default=0.5, precision=2), presence_penalty=KwargRule[float](enabled=False), frequency_penalty=KwargRule[float](enabled=False), - max_tokens=KwargRule[int](min=10, max=model_max_tokens.get(model_name), default=1024, precision=0), + max_tokens=KwargRule[int](enabled=False, max=model_max_tokens.get(model_name)), ) @classmethod @@ -84,7 +84,7 @@ class TongyiProvider(BaseModelProvider): } llm = EnhanceTongyi( - model_name='qwen-v1', + model_name='qwen-turbo', max_retries=1, **credential_kwargs ) diff --git a/api/core/model_providers/rules/tongyi.json b/api/core/model_providers/rules/tongyi.json index 0af3e61ec7..c431f50b3f 100644 --- a/api/core/model_providers/rules/tongyi.json +++ b/api/core/model_providers/rules/tongyi.json @@ -3,5 +3,19 @@ "custom" ], "system_config": null, - "model_flexibility": "fixed" + "model_flexibility": "fixed", + "price_config": { + "qwen-turbo": { + "prompt": "0.012", + "completion": "0.012", + "unit": "0.001", + "currency": "RMB" + }, + "qwen-plus": { + "prompt": "0.14", + "completion": "0.14", + "unit": "0.001", + "currency": "RMB" + } + } } \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt index a1ca193ff8..5c2383c9e8 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -44,7 +44,7 @@ readabilipy==0.2.0 google-search-results==2.4.2 replicate~=0.9.0 websocket-client~=1.6.1 -dashscope~=1.5.0 +dashscope~=1.11.0 huggingface_hub~=0.16.4 transformers~=4.31.0 stripe~=5.5.0 diff --git a/api/tests/integration_tests/models/llm/test_tongyi_model.py b/api/tests/integration_tests/models/llm/test_tongyi_model.py index c2254dec0c..8c34497ac7 100644 --- a/api/tests/integration_tests/models/llm/test_tongyi_model.py +++ b/api/tests/integration_tests/models/llm/test_tongyi_model.py @@ -44,7 +44,7 @@ def decrypt_side_effect(tenant_id, encrypted_api_key): @patch('core.helper.encrypter.decrypt_token', side_effect=decrypt_side_effect) def test_get_num_tokens(mock_decrypt): - model = get_mock_model('qwen-v1') + model = get_mock_model('qwen-turbo') rst = model.get_num_tokens([ PromptMessage(type=MessageType.HUMAN, content='Who is your manufacturer?') ]) @@ -55,7 +55,7 @@ def test_get_num_tokens(mock_decrypt): def test_run(mock_decrypt, mocker): mocker.patch('core.model_providers.providers.base.BaseModelProvider.update_last_used', return_value=None) - model = get_mock_model('qwen-v1') + model = get_mock_model('qwen-turbo') rst = model.run( [PromptMessage(content='Human: Are you Human? you MUST only answer `y` or `n`? \nAssistant: ')], stop=['\nHuman:'],