From c6e2900be73ed8ff53f64866b40106ebef047a60 Mon Sep 17 00:00:00 2001 From: "Charlie.Wei" Date: Sun, 18 Feb 2024 15:39:25 +0800 Subject: [PATCH] Display selected tts voice name (#2459) Co-authored-by: luowei Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> --- .../model_providers/__base/tts_model.py | 7 +++++-- .../model_runtime/model_providers/openai/tts/tts.py | 2 +- .../model_runtime/model_providers/tongyi/tts/tts.py | 2 +- .../app/configuration/config-vision/param-config.tsx | 6 +++--- .../config-voice/param-config-content.tsx | 3 ++- .../features/chat-group/text-to-speech/index.tsx | 12 +++++++++++- web/i18n/lang/app-debug.en.ts | 1 + web/i18n/lang/app-debug.pt.ts | 1 + web/i18n/lang/app-debug.zh.ts | 1 + web/service/apps.ts | 4 ++-- 10 files changed, 28 insertions(+), 11 deletions(-) diff --git a/api/core/model_runtime/model_providers/__base/tts_model.py b/api/core/model_runtime/model_providers/__base/tts_model.py index 77be02978c..722d80c91e 100644 --- a/api/core/model_runtime/model_providers/__base/tts_model.py +++ b/api/core/model_runtime/model_providers/__base/tts_model.py @@ -53,7 +53,7 @@ class TTSModel(AIModel): """ raise NotImplementedError - def get_tts_model_voices(self, model: str, credentials: dict, language: str) -> list: + def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list: """ Get voice for given tts model voices @@ -66,7 +66,10 @@ class TTSModel(AIModel): if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties: voices = model_schema.model_properties[ModelPropertyKey.VOICES] - return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')] + if language: + return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')] + else: + return [{'name': d['name'], 'value': d['mode']} for d in voices] def _get_model_default_voice(self, model: str, credentials: dict) -> any: """ diff --git a/api/core/model_runtime/model_providers/openai/tts/tts.py b/api/core/model_runtime/model_providers/openai/tts/tts.py index 269760ab64..b1718c063c 100644 --- a/api/core/model_runtime/model_providers/openai/tts/tts.py +++ b/api/core/model_runtime/model_providers/openai/tts/tts.py @@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel): """ # transform credentials to kwargs for model instance credentials_kwargs = self._to_credential_kwargs(credentials) - if not voice: + if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials): voice = self._get_model_default_voice(model, credentials) word_limit = self._get_model_word_limit(model, credentials) audio_type = self._get_model_audio_type(model, credentials) diff --git a/api/core/model_runtime/model_providers/tongyi/tts/tts.py b/api/core/model_runtime/model_providers/tongyi/tts/tts.py index 1b670baff7..6bd17684fe 100644 --- a/api/core/model_runtime/model_providers/tongyi/tts/tts.py +++ b/api/core/model_runtime/model_providers/tongyi/tts/tts.py @@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel): :return: text translated to audio file """ audio_type = self._get_model_audio_type(model, credentials) - if not voice: + if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials): voice = self._get_model_default_voice(model, credentials) if streaming: return Response(stream_with_context(self._tts_invoke_streaming(model=model, diff --git a/web/app/components/app/configuration/config-vision/param-config.tsx b/web/app/components/app/configuration/config-vision/param-config.tsx index f12565abfe..5ea0a32907 100644 --- a/web/app/components/app/configuration/config-vision/param-config.tsx +++ b/web/app/components/app/configuration/config-vision/param-config.tsx @@ -3,7 +3,7 @@ import type { FC } from 'react' import { memo, useState } from 'react' import { useTranslation } from 'react-i18next' import cn from 'classnames' -import ParamConfigContent from './param-config-content' +import VoiceParamConfig from './param-config-content' import { Settings01 } from '@/app/components/base/icons/src/vender/line/general' import { PortalToFollowElem, @@ -27,12 +27,12 @@ const ParamsConfig: FC = () => { setOpen(v => !v)}>
-
{t('appDebug.vision.settings')}
+
{t('appDebug.voice.settings')}
- +
diff --git a/web/app/components/app/configuration/config-voice/param-config-content.tsx b/web/app/components/app/configuration/config-voice/param-config-content.tsx index d93f4e978c..a749eca5e2 100644 --- a/web/app/components/app/configuration/config-voice/param-config-content.tsx +++ b/web/app/components/app/configuration/config-voice/param-config-content.tsx @@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => { const languageItem = languages.find(item => item.value === textToSpeechConfig.language) const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select') - const voiceItems = useSWR({ url: `/apps/${appId}/text-to-audio/voices?language=${languageItem ? languageItem.value : 'en-US'}` }, fetchAppVoices).data + const language = languageItem?.value + const voiceItems = useSWR({ appId, language }, fetchAppVoices).data const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice) const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select') diff --git a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx index 6941c55b6e..24d3e0e64a 100644 --- a/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx +++ b/web/app/components/app/configuration/features/chat-group/text-to-speech/index.tsx @@ -1,17 +1,27 @@ 'use client' +import useSWR from 'swr' import React, { type FC } from 'react' import { useTranslation } from 'react-i18next' import { useContext } from 'use-context-selector' +import { usePathname } from 'next/navigation' import Panel from '@/app/components/app/configuration/base/feature-panel' import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices' import ConfigContext from '@/context/debug-configuration' import { languages } from '@/utils/language' +import { fetchAppVoices } from '@/service/apps' const TextToSpeech: FC = () => { const { t } = useTranslation() const { textToSpeechConfig, } = useContext(ConfigContext) + + const pathname = usePathname() + const matched = pathname.match(/\/app\/([^/]+)/) + const appId = (matched?.length && matched[1]) ? matched[1] : '' + const language = textToSpeechConfig.language + const voiceItems = useSWR({ appId, language }, fetchAppVoices).data + const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice) return ( { headerIcon={} headerRight={
- {languages.find(i => i.value === textToSpeechConfig.language)?.name} {textToSpeechConfig.voice} + {languages.find(i => i.value === textToSpeechConfig.language)?.name} - {voiceItem?.name ?? t('appDebug.voice.defaultDisplay')}
} noBodySpacing diff --git a/web/i18n/lang/app-debug.en.ts b/web/i18n/lang/app-debug.en.ts index 088d76154e..82f9ae7926 100644 --- a/web/i18n/lang/app-debug.en.ts +++ b/web/i18n/lang/app-debug.en.ts @@ -300,6 +300,7 @@ const translation = { }, voice: { name: 'Voice', + defaultDisplay: 'Default Voice', description: 'Text to speech voice Settings', settings: 'Settings', voiceSettings: { diff --git a/web/i18n/lang/app-debug.pt.ts b/web/i18n/lang/app-debug.pt.ts index 0ab8c45d8d..4d706cc38c 100644 --- a/web/i18n/lang/app-debug.pt.ts +++ b/web/i18n/lang/app-debug.pt.ts @@ -300,6 +300,7 @@ const translation = { }, voice: { name: 'voz', + defaultDisplay: 'Voz padrão', description: 'Texto para configurações de timbre de voz', settings: 'As configurações', voiceSettings: { diff --git a/web/i18n/lang/app-debug.zh.ts b/web/i18n/lang/app-debug.zh.ts index 7cd61cf63e..6df5d833d1 100644 --- a/web/i18n/lang/app-debug.zh.ts +++ b/web/i18n/lang/app-debug.zh.ts @@ -296,6 +296,7 @@ const translation = { }, voice: { name: '音色', + defaultDisplay: '缺省音色', description: '文本转语音音色设置', settings: '设置', voiceSettings: { diff --git a/web/service/apps.ts b/web/service/apps.ts index fde8128754..ac1a3d1878 100644 --- a/web/service/apps.ts +++ b/web/service/apps.ts @@ -94,6 +94,6 @@ export const generationIntroduction: Fetcher(url, { body }) } -export const fetchAppVoices: Fetcher = ({ url }) => { - return get(url) +export const fetchAppVoices: Fetcher = ({ appId, language }) => { + return get(`apps/${appId}/text-to-audio/voices?language=${language}`) }