diff --git a/api/core/model_runtime/model_providers/__base/tts_model.py b/api/core/model_runtime/model_providers/__base/tts_model.py
index 77be02978c..722d80c91e 100644
--- a/api/core/model_runtime/model_providers/__base/tts_model.py
+++ b/api/core/model_runtime/model_providers/__base/tts_model.py
@@ -53,7 +53,7 @@ class TTSModel(AIModel):
"""
raise NotImplementedError
- def get_tts_model_voices(self, model: str, credentials: dict, language: str) -> list:
+ def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list:
"""
Get voice for given tts model voices
@@ -66,7 +66,10 @@ class TTSModel(AIModel):
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
voices = model_schema.model_properties[ModelPropertyKey.VOICES]
- return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
+ if language:
+ return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
+ else:
+ return [{'name': d['name'], 'value': d['mode']} for d in voices]
def _get_model_default_voice(self, model: str, credentials: dict) -> any:
"""
diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py
index 269760ab64..b1718c063c 100644
--- a/api/core/model_runtime/model_providers/openai/tts/tts.py
+++ b/api/core/model_runtime/model_providers/openai/tts/tts.py
@@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
"""
# transform credentials to kwargs for model instance
credentials_kwargs = self._to_credential_kwargs(credentials)
- if not voice:
+ if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
voice = self._get_model_default_voice(model, credentials)
word_limit = self._get_model_word_limit(model, credentials)
audio_type = self._get_model_audio_type(model, credentials)
diff --git a/api/core/model_runtime/model_providers/tongyi/tts/tts.py b/api/core/model_runtime/model_providers/tongyi/tts/tts.py
index 1b670baff7..6bd17684fe 100644
--- a/api/core/model_runtime/model_providers/tongyi/tts/tts.py
+++ b/api/core/model_runtime/model_providers/tongyi/tts/tts.py
@@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
:return: text translated to audio file
"""
audio_type = self._get_model_audio_type(model, credentials)
- if not voice:
+ if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
voice = self._get_model_default_voice(model, credentials)
if streaming:
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
diff --git a/web/app/components/app/configuration/config-vision/param-config.tsx b/web/app/components/app/configuration/config-vision/param-config.tsx
index f12565abfe..5ea0a32907 100644
--- a/web/app/components/app/configuration/config-vision/param-config.tsx
+++ b/web/app/components/app/configuration/config-vision/param-config.tsx
@@ -3,7 +3,7 @@ import type { FC } from 'react'
import { memo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import cn from 'classnames'
-import ParamConfigContent from './param-config-content'
+import VoiceParamConfig from './param-config-content'
import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
import {
PortalToFollowElem,
@@ -27,12 +27,12 @@ const ParamsConfig: FC = () => {
setOpen(v => !v)}>
-
{t('appDebug.vision.settings')}
+
{t('appDebug.voice.settings')}
-
+
diff --git a/web/app/components/app/configuration/config-voice/param-config-content.tsx b/web/app/components/app/configuration/config-voice/param-config-content.tsx
index d93f4e978c..a749eca5e2 100644
--- a/web/app/components/app/configuration/config-voice/param-config-content.tsx
+++ b/web/app/components/app/configuration/config-voice/param-config-content.tsx
@@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => {
const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
- const voiceItems = useSWR({ url: `/apps/${appId}/text-to-audio/voices?language=${languageItem ? languageItem.value : 'en-US'}` }, fetchAppVoices).data
+ const language = languageItem?.value
+ const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
diff --git a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
index 6941c55b6e..24d3e0e64a 100644
--- a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
+++ b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx
@@ -1,17 +1,27 @@
'use client'
+import useSWR from 'swr'
import React, { type FC } from 'react'
import { useTranslation } from 'react-i18next'
import { useContext } from 'use-context-selector'
+import { usePathname } from 'next/navigation'
import Panel from '@/app/components/app/configuration/base/feature-panel'
import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
import ConfigContext from '@/context/debug-configuration'
import { languages } from '@/utils/language'
+import { fetchAppVoices } from '@/service/apps'
const TextToSpeech: FC = () => {
const { t } = useTranslation()
const {
textToSpeechConfig,
} = useContext(ConfigContext)
+
+ const pathname = usePathname()
+ const matched = pathname.match(/\/app\/([^/]+)/)
+ const appId = (matched?.length && matched[1]) ? matched[1] : ''
+ const language = textToSpeechConfig.language
+ const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
+ const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
return (
{
headerIcon={}
headerRight={