Display selected tts voice name (#2459)
Co-authored-by: luowei <glpat-EjySCyNjWiLqAED-YmwM> Co-authored-by: crazywoola <427733928@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
parent
963d9b6032
commit
c6e2900be7
@ -53,7 +53,7 @@ class TTSModel(AIModel):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def get_tts_model_voices(self, model: str, credentials: dict, language: str) -> list:
|
def get_tts_model_voices(self, model: str, credentials: dict, language: Optional[str] = None) -> list:
|
||||||
"""
|
"""
|
||||||
Get voice for given tts model voices
|
Get voice for given tts model voices
|
||||||
|
|
||||||
@ -66,7 +66,10 @@ class TTSModel(AIModel):
|
|||||||
|
|
||||||
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
|
if model_schema and ModelPropertyKey.VOICES in model_schema.model_properties:
|
||||||
voices = model_schema.model_properties[ModelPropertyKey.VOICES]
|
voices = model_schema.model_properties[ModelPropertyKey.VOICES]
|
||||||
return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
|
if language:
|
||||||
|
return [{'name': d['name'], 'value': d['mode']} for d in voices if language and language in d.get('language')]
|
||||||
|
else:
|
||||||
|
return [{'name': d['name'], 'value': d['mode']} for d in voices]
|
||||||
|
|
||||||
def _get_model_default_voice(self, model: str, credentials: dict) -> any:
|
def _get_model_default_voice(self, model: str, credentials: dict) -> any:
|
||||||
"""
|
"""
|
||||||
|
@ -119,7 +119,7 @@ class OpenAIText2SpeechModel(_CommonOpenAI, TTSModel):
|
|||||||
"""
|
"""
|
||||||
# transform credentials to kwargs for model instance
|
# transform credentials to kwargs for model instance
|
||||||
credentials_kwargs = self._to_credential_kwargs(credentials)
|
credentials_kwargs = self._to_credential_kwargs(credentials)
|
||||||
if not voice:
|
if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
word_limit = self._get_model_word_limit(model, credentials)
|
word_limit = self._get_model_word_limit(model, credentials)
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
audio_type = self._get_model_audio_type(model, credentials)
|
||||||
|
@ -34,7 +34,7 @@ class TongyiText2SpeechModel(_CommonTongyi, TTSModel):
|
|||||||
:return: text translated to audio file
|
:return: text translated to audio file
|
||||||
"""
|
"""
|
||||||
audio_type = self._get_model_audio_type(model, credentials)
|
audio_type = self._get_model_audio_type(model, credentials)
|
||||||
if not voice:
|
if not voice or voice not in self.get_tts_model_voices(model=model, credentials=credentials):
|
||||||
voice = self._get_model_default_voice(model, credentials)
|
voice = self._get_model_default_voice(model, credentials)
|
||||||
if streaming:
|
if streaming:
|
||||||
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
|
return Response(stream_with_context(self._tts_invoke_streaming(model=model,
|
||||||
|
@ -3,7 +3,7 @@ import type { FC } from 'react'
|
|||||||
import { memo, useState } from 'react'
|
import { memo, useState } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import cn from 'classnames'
|
import cn from 'classnames'
|
||||||
import ParamConfigContent from './param-config-content'
|
import VoiceParamConfig from './param-config-content'
|
||||||
import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
|
import { Settings01 } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
import {
|
import {
|
||||||
PortalToFollowElem,
|
PortalToFollowElem,
|
||||||
@ -27,12 +27,12 @@ const ParamsConfig: FC = () => {
|
|||||||
<PortalToFollowElemTrigger onClick={() => setOpen(v => !v)}>
|
<PortalToFollowElemTrigger onClick={() => setOpen(v => !v)}>
|
||||||
<div className={cn('flex items-center rounded-md h-7 px-3 space-x-1 text-gray-700 cursor-pointer hover:bg-gray-200', open && 'bg-gray-200')}>
|
<div className={cn('flex items-center rounded-md h-7 px-3 space-x-1 text-gray-700 cursor-pointer hover:bg-gray-200', open && 'bg-gray-200')}>
|
||||||
<Settings01 className='w-3.5 h-3.5 ' />
|
<Settings01 className='w-3.5 h-3.5 ' />
|
||||||
<div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.vision.settings')}</div>
|
<div className='ml-1 leading-[18px] text-xs font-medium '>{t('appDebug.voice.settings')}</div>
|
||||||
</div>
|
</div>
|
||||||
</PortalToFollowElemTrigger>
|
</PortalToFollowElemTrigger>
|
||||||
<PortalToFollowElemContent style={{ zIndex: 50 }}>
|
<PortalToFollowElemContent style={{ zIndex: 50 }}>
|
||||||
<div className='w-80 sm:w-[412px] p-4 bg-white rounded-lg border-[0.5px] border-gray-200 shadow-lg space-y-3'>
|
<div className='w-80 sm:w-[412px] p-4 bg-white rounded-lg border-[0.5px] border-gray-200 shadow-lg space-y-3'>
|
||||||
<ParamConfigContent />
|
<VoiceParamConfig />
|
||||||
</div>
|
</div>
|
||||||
</PortalToFollowElemContent>
|
</PortalToFollowElemContent>
|
||||||
</PortalToFollowElem>
|
</PortalToFollowElem>
|
||||||
|
@ -28,7 +28,8 @@ const VoiceParamConfig: FC = () => {
|
|||||||
const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
|
const languageItem = languages.find(item => item.value === textToSpeechConfig.language)
|
||||||
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
|
const localLanguagePlaceholder = languageItem?.name || t('common.placeholder.select')
|
||||||
|
|
||||||
const voiceItems = useSWR({ url: `/apps/${appId}/text-to-audio/voices?language=${languageItem ? languageItem.value : 'en-US'}` }, fetchAppVoices).data
|
const language = languageItem?.value
|
||||||
|
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
|
||||||
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
|
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
|
||||||
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
|
const localVoicePlaceholder = voiceItem?.name || t('common.placeholder.select')
|
||||||
|
|
||||||
|
@ -1,17 +1,27 @@
|
|||||||
'use client'
|
'use client'
|
||||||
|
import useSWR from 'swr'
|
||||||
import React, { type FC } from 'react'
|
import React, { type FC } from 'react'
|
||||||
import { useTranslation } from 'react-i18next'
|
import { useTranslation } from 'react-i18next'
|
||||||
import { useContext } from 'use-context-selector'
|
import { useContext } from 'use-context-selector'
|
||||||
|
import { usePathname } from 'next/navigation'
|
||||||
import Panel from '@/app/components/app/configuration/base/feature-panel'
|
import Panel from '@/app/components/app/configuration/base/feature-panel'
|
||||||
import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
|
import { Speaker } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
|
||||||
import ConfigContext from '@/context/debug-configuration'
|
import ConfigContext from '@/context/debug-configuration'
|
||||||
import { languages } from '@/utils/language'
|
import { languages } from '@/utils/language'
|
||||||
|
import { fetchAppVoices } from '@/service/apps'
|
||||||
|
|
||||||
const TextToSpeech: FC = () => {
|
const TextToSpeech: FC = () => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const {
|
const {
|
||||||
textToSpeechConfig,
|
textToSpeechConfig,
|
||||||
} = useContext(ConfigContext)
|
} = useContext(ConfigContext)
|
||||||
|
|
||||||
|
const pathname = usePathname()
|
||||||
|
const matched = pathname.match(/\/app\/([^/]+)/)
|
||||||
|
const appId = (matched?.length && matched[1]) ? matched[1] : ''
|
||||||
|
const language = textToSpeechConfig.language
|
||||||
|
const voiceItems = useSWR({ appId, language }, fetchAppVoices).data
|
||||||
|
const voiceItem = voiceItems?.find(item => item.value === textToSpeechConfig.voice)
|
||||||
return (
|
return (
|
||||||
<Panel
|
<Panel
|
||||||
title={
|
title={
|
||||||
@ -22,7 +32,7 @@ const TextToSpeech: FC = () => {
|
|||||||
headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
|
headerIcon={<Speaker className='w-4 h-4 text-[#7839EE]' />}
|
||||||
headerRight={
|
headerRight={
|
||||||
<div className='text-xs text-gray-500'>
|
<div className='text-xs text-gray-500'>
|
||||||
{languages.find(i => i.value === textToSpeechConfig.language)?.name} {textToSpeechConfig.voice}
|
{languages.find(i => i.value === textToSpeechConfig.language)?.name} - {voiceItem?.name ?? t('appDebug.voice.defaultDisplay')}
|
||||||
</div>
|
</div>
|
||||||
}
|
}
|
||||||
noBodySpacing
|
noBodySpacing
|
||||||
|
@ -300,6 +300,7 @@ const translation = {
|
|||||||
},
|
},
|
||||||
voice: {
|
voice: {
|
||||||
name: 'Voice',
|
name: 'Voice',
|
||||||
|
defaultDisplay: 'Default Voice',
|
||||||
description: 'Text to speech voice Settings',
|
description: 'Text to speech voice Settings',
|
||||||
settings: 'Settings',
|
settings: 'Settings',
|
||||||
voiceSettings: {
|
voiceSettings: {
|
||||||
|
@ -300,6 +300,7 @@ const translation = {
|
|||||||
},
|
},
|
||||||
voice: {
|
voice: {
|
||||||
name: 'voz',
|
name: 'voz',
|
||||||
|
defaultDisplay: 'Voz padrão',
|
||||||
description: 'Texto para configurações de timbre de voz',
|
description: 'Texto para configurações de timbre de voz',
|
||||||
settings: 'As configurações',
|
settings: 'As configurações',
|
||||||
voiceSettings: {
|
voiceSettings: {
|
||||||
|
@ -296,6 +296,7 @@ const translation = {
|
|||||||
},
|
},
|
||||||
voice: {
|
voice: {
|
||||||
name: '音色',
|
name: '音色',
|
||||||
|
defaultDisplay: '缺省音色',
|
||||||
description: '文本转语音音色设置',
|
description: '文本转语音音色设置',
|
||||||
settings: '设置',
|
settings: '设置',
|
||||||
voiceSettings: {
|
voiceSettings: {
|
||||||
|
@ -94,6 +94,6 @@ export const generationIntroduction: Fetcher<GenerationIntroductionResponse, { u
|
|||||||
return post<GenerationIntroductionResponse>(url, { body })
|
return post<GenerationIntroductionResponse>(url, { body })
|
||||||
}
|
}
|
||||||
|
|
||||||
export const fetchAppVoices: Fetcher<AppVoicesListResponse, { url: string }> = ({ url }) => {
|
export const fetchAppVoices: Fetcher<AppVoicesListResponse, { appId: string; language?: string }> = ({ appId, language }) => {
|
||||||
return get<AppVoicesListResponse>(url)
|
return get<AppVoicesListResponse>(`apps/${appId}/text-to-audio/voices?language=${language}`)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user