convert audio wav to mp3 (#552)
This commit is contained in:
parent
b91e226063
commit
397a92f2ee
@ -6,7 +6,8 @@ from services.errors.audio import NoAudioUploadedServiceError, AudioTooLargeServ
|
|||||||
from core.llm.whisper import Whisper
|
from core.llm.whisper import Whisper
|
||||||
from models.provider import ProviderName
|
from models.provider import ProviderName
|
||||||
|
|
||||||
FILE_SIZE_LIMIT = 1 * 1024 * 1024
|
FILE_SIZE = 15
|
||||||
|
FILE_SIZE_LIMIT = FILE_SIZE * 1024 * 1024
|
||||||
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
|
ALLOWED_EXTENSIONS = ['mp3', 'mp4', 'mpeg', 'mpga', 'm4a', 'wav', 'webm']
|
||||||
|
|
||||||
class AudioService:
|
class AudioService:
|
||||||
@ -23,17 +24,17 @@ class AudioService:
|
|||||||
file_size = len(file_content)
|
file_size = len(file_content)
|
||||||
|
|
||||||
if file_size > FILE_SIZE_LIMIT:
|
if file_size > FILE_SIZE_LIMIT:
|
||||||
message = f"({file_size} > {FILE_SIZE_LIMIT})"
|
message = f"Audio size larger than {FILE_SIZE} mb"
|
||||||
raise AudioTooLargeServiceError(message)
|
raise AudioTooLargeServiceError(message)
|
||||||
|
|
||||||
provider_name = LLMBuilder.get_default_provider(tenant_id)
|
provider_name = LLMBuilder.get_default_provider(tenant_id)
|
||||||
if provider_name != ProviderName.OPENAI.value:
|
if provider_name != ProviderName.OPENAI.value:
|
||||||
raise ProviderNotSupportSpeechToTextServiceError('haha')
|
raise ProviderNotSupportSpeechToTextServiceError()
|
||||||
|
|
||||||
provider_service = LLMProviderService(tenant_id, provider_name)
|
provider_service = LLMProviderService(tenant_id, provider_name)
|
||||||
|
|
||||||
buffer = io.BytesIO(file_content)
|
buffer = io.BytesIO(file_content)
|
||||||
buffer.name = 'temp.wav'
|
buffer.name = 'temp.mp3'
|
||||||
|
|
||||||
return Whisper(provider_service.provider).transcribe(buffer)
|
return Whisper(provider_service.provider).transcribe(buffer)
|
||||||
|
|
||||||
|
@ -1,23 +1,13 @@
|
|||||||
from services.errors.base import BaseServiceError
|
class NoAudioUploadedServiceError(Exception):
|
||||||
|
pass
|
||||||
class NoAudioUploadedServiceError(BaseServiceError):
|
|
||||||
error_code = 'no_audio_uploaded'
|
|
||||||
description = "Please upload your audio."
|
|
||||||
code = 400
|
|
||||||
|
|
||||||
|
|
||||||
class AudioTooLargeServiceError(BaseServiceError):
|
class AudioTooLargeServiceError(Exception):
|
||||||
error_code = 'audio_too_large'
|
pass
|
||||||
description = "Audio size exceeded. {message}"
|
|
||||||
code = 413
|
|
||||||
|
|
||||||
|
|
||||||
class UnsupportedAudioTypeServiceError(BaseServiceError):
|
class UnsupportedAudioTypeServiceError(Exception):
|
||||||
error_code = 'unsupported_audio_type'
|
pass
|
||||||
description = "Audio type not allowed."
|
|
||||||
code = 415
|
|
||||||
|
|
||||||
class ProviderNotSupportSpeechToTextServiceError(BaseServiceError):
|
class ProviderNotSupportSpeechToTextServiceError(Exception):
|
||||||
error_code = 'provider_not_support_speech_to_text'
|
pass
|
||||||
description = "Provider not support speech to text. {message}"
|
|
||||||
code = 400
|
|
@ -4,6 +4,7 @@ import { useParams, usePathname } from 'next/navigation'
|
|||||||
import cn from 'classnames'
|
import cn from 'classnames'
|
||||||
import Recorder from 'js-audio-recorder'
|
import Recorder from 'js-audio-recorder'
|
||||||
import { useRafInterval } from 'ahooks'
|
import { useRafInterval } from 'ahooks'
|
||||||
|
import { convertToMp3 } from './utils'
|
||||||
import s from './index.module.css'
|
import s from './index.module.css'
|
||||||
import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
|
import { StopCircle } from '@/app/components/base/icons/src/vender/solid/mediaAndDevices'
|
||||||
import { Loading02, XClose } from '@/app/components/base/icons/src/vender/line/general'
|
import { Loading02, XClose } from '@/app/components/base/icons/src/vender/line/general'
|
||||||
@ -19,7 +20,12 @@ const VoiceInput = ({
|
|||||||
onConverted,
|
onConverted,
|
||||||
}: VoiceInputTypes) => {
|
}: VoiceInputTypes) => {
|
||||||
const { t } = useTranslation()
|
const { t } = useTranslation()
|
||||||
const recorder = useRef(new Recorder())
|
const recorder = useRef(new Recorder({
|
||||||
|
sampleBits: 16,
|
||||||
|
sampleRate: 16000,
|
||||||
|
numChannels: 1,
|
||||||
|
compiling: false,
|
||||||
|
}))
|
||||||
const canvasRef = useRef<HTMLCanvasElement | null>(null)
|
const canvasRef = useRef<HTMLCanvasElement | null>(null)
|
||||||
const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
|
const ctxRef = useRef<CanvasRenderingContext2D | null>(null)
|
||||||
const drawRecordId = useRef<number | null>(null)
|
const drawRecordId = useRef<number | null>(null)
|
||||||
@ -75,10 +81,10 @@ const VoiceInput = ({
|
|||||||
const canvas = canvasRef.current!
|
const canvas = canvasRef.current!
|
||||||
const ctx = ctxRef.current!
|
const ctx = ctxRef.current!
|
||||||
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
ctx.clearRect(0, 0, canvas.width, canvas.height)
|
||||||
const wavBlob = recorder.current.getWAVBlob()
|
const mp3Blob = convertToMp3(recorder.current)
|
||||||
const wavFile = new File([wavBlob], 'a.wav', { type: 'audio/wav' })
|
const mp3File = new File([mp3Blob], 'temp.mp3', { type: 'audio/mp3' })
|
||||||
const formData = new FormData()
|
const formData = new FormData()
|
||||||
formData.append('file', wavFile)
|
formData.append('file', mp3File)
|
||||||
|
|
||||||
let url = ''
|
let url = ''
|
||||||
let isPublic = false
|
let isPublic = false
|
||||||
|
38
web/app/components/base/voice-input/utils.ts
Normal file
38
web/app/components/base/voice-input/utils.ts
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
import lamejs from 'lamejs'
|
||||||
|
|
||||||
|
export const convertToMp3 = (recorder: any) => {
|
||||||
|
const wav = lamejs.WavHeader.readHeader(recorder.getWAV())
|
||||||
|
const { channels, sampleRate } = wav
|
||||||
|
const mp3enc = new lamejs.Mp3Encoder(channels, sampleRate, 128)
|
||||||
|
const result = recorder.getChannelData()
|
||||||
|
const buffer = []
|
||||||
|
|
||||||
|
const leftData = result.left && new Int16Array(result.left.buffer, 0, result.left.byteLength / 2)
|
||||||
|
const rightData = result.right && new Int16Array(result.right.buffer, 0, result.right.byteLength / 2)
|
||||||
|
const remaining = leftData.length + (rightData ? rightData.length : 0)
|
||||||
|
|
||||||
|
const maxSamples = 1152
|
||||||
|
for (let i = 0; i < remaining; i += maxSamples) {
|
||||||
|
const left = leftData.subarray(i, i + maxSamples)
|
||||||
|
let right = null
|
||||||
|
let mp3buf = null
|
||||||
|
|
||||||
|
if (channels === 2) {
|
||||||
|
right = rightData.subarray(i, i + maxSamples)
|
||||||
|
mp3buf = mp3enc.encodeBuffer(left, right)
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mp3buf = mp3enc.encodeBuffer(left)
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mp3buf.length > 0)
|
||||||
|
buffer.push(mp3buf)
|
||||||
|
}
|
||||||
|
|
||||||
|
const enc = mp3enc.flush()
|
||||||
|
|
||||||
|
if (enc.length > 0)
|
||||||
|
buffer.push(enc)
|
||||||
|
|
||||||
|
return new Blob(buffer, { type: 'audio/mp3' })
|
||||||
|
}
|
1
web/global.d.ts
vendored
Normal file
1
web/global.d.ts
vendored
Normal file
@ -0,0 +1 @@
|
|||||||
|
declare module 'lamejs';
|
@ -81,7 +81,8 @@
|
|||||||
"swr": "^2.1.0",
|
"swr": "^2.1.0",
|
||||||
"tailwindcss": "^3.2.7",
|
"tailwindcss": "^3.2.7",
|
||||||
"typescript": "4.9.5",
|
"typescript": "4.9.5",
|
||||||
"use-context-selector": "^1.4.1"
|
"use-context-selector": "^1.4.1",
|
||||||
|
"lamejs": "1.2.0"
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"@antfu/eslint-config": "^0.36.0",
|
"@antfu/eslint-config": "^0.36.0",
|
||||||
|
Loading…
Reference in New Issue
Block a user