From af537d76fe3ca560a1db17e1a884128b182e67db Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Fri, 1 May 2026 06:34:43 -0400 Subject: [PATCH] Fix speech provider endpoint routing --- .../components/Chat/Messages/MessageAudio.tsx | 7 ++-- .../Speech/STT/EngineSTTDropdown.tsx | 3 ++ .../Nav/SettingsTabs/Speech/Speech.tsx | 29 +++++++++++--- .../Speech/TTS/EngineTTSDropdown.tsx | 5 +++ .../SettingsTabs/Speech/TTS/VoiceDropdown.tsx | 7 ++-- .../src/hooks/Config/useSpeechSettingsInit.ts | 8 ++-- client/src/hooks/Input/audioEndpoints.spec.ts | 39 +++++++++++++++++++ client/src/hooks/Input/audioEndpoints.ts | 28 +++++++++++++ client/src/hooks/Input/useGetAudioSettings.ts | 7 +++- .../src/hooks/Input/useSpeechToTextBrowser.ts | 4 +- client/src/store/settings.ts | 2 + 11 files changed, 118 insertions(+), 21 deletions(-) create mode 100644 client/src/hooks/Input/audioEndpoints.spec.ts create mode 100644 client/src/hooks/Input/audioEndpoints.ts diff --git a/client/src/components/Chat/Messages/MessageAudio.tsx b/client/src/components/Chat/Messages/MessageAudio.tsx index eb4c52a407..d5309e526b 100644 --- a/client/src/components/Chat/Messages/MessageAudio.tsx +++ b/client/src/components/Chat/Messages/MessageAudio.tsx @@ -1,19 +1,18 @@ import { memo } from 'react'; -import { useRecoilValue } from 'recoil'; import type { TMessageAudio } from '~/common'; import { BrowserTTS, ExternalTTS } from '~/components/Audio/TTS'; import { TTSEndpoints } from '~/common'; -import store from '~/store'; +import { useGetAudioSettings } from '~/hooks'; function MessageAudio(props: TMessageAudio) { - const engineTTS = useRecoilValue(store.engineTTS); + const { textToSpeechEndpoint } = useGetAudioSettings(); const TTSComponents = { [TTSEndpoints.browser]: BrowserTTS, [TTSEndpoints.external]: ExternalTTS, }; - const SelectedTTS = TTSComponents[engineTTS]; + const SelectedTTS = TTSComponents[textToSpeechEndpoint]; if (!SelectedTTS) { return null; } diff --git a/client/src/components/Nav/SettingsTabs/Speech/STT/EngineSTTDropdown.tsx b/client/src/components/Nav/SettingsTabs/Speech/STT/EngineSTTDropdown.tsx index 8fc3dd8352..0b67e7c103 100644 --- a/client/src/components/Nav/SettingsTabs/Speech/STT/EngineSTTDropdown.tsx +++ b/client/src/components/Nav/SettingsTabs/Speech/STT/EngineSTTDropdown.tsx @@ -1,6 +1,7 @@ import React from 'react'; import { useRecoilState } from 'recoil'; import { Dropdown } from '@librechat/client'; +import { STTProviders } from 'librechat-data-provider'; import { useLocalize } from '~/hooks'; import store from '~/store'; @@ -16,6 +17,8 @@ const EngineSTTDropdown: React.FC = ({ external }) => { ? [ { value: 'browser', label: localize('com_nav_browser') }, { value: 'external', label: localize('com_nav_external') }, + { value: STTProviders.OPENAI, label: localize('com_ui_openai') }, + { value: STTProviders.AZURE_OPENAI, label: localize('com_ui_azure') }, ] : [{ value: 'browser', label: localize('com_nav_browser') }]; diff --git a/client/src/components/Nav/SettingsTabs/Speech/Speech.tsx b/client/src/components/Nav/SettingsTabs/Speech/Speech.tsx index c3ea966e2d..4e24cb1f26 100644 --- a/client/src/components/Nav/SettingsTabs/Speech/Speech.tsx +++ b/client/src/components/Nav/SettingsTabs/Speech/Speech.tsx @@ -3,6 +3,7 @@ import { useRecoilState } from 'recoil'; import * as Tabs from '@radix-ui/react-tabs'; import { Lightbulb, Cog } from 'lucide-react'; import { useOnClickOutside, useMediaQuery } from '@librechat/client'; +import { STTProviders, TTSProviders } from 'librechat-data-provider'; import { useGetCustomConfigSpeechQuery } from 'librechat-data-provider/react-query'; import { CloudBrowserVoicesSwitch, @@ -23,6 +24,7 @@ import { } from './STT'; import ConversationModeSwitch from './ConversationModeSwitch'; import { useLocalize } from '~/hooks'; +import { isExternalAvailable } from '~/hooks/Input/audioEndpoints'; import { cn } from '~/utils'; import store from '~/store'; @@ -33,8 +35,8 @@ function Speech() { const { data } = useGetCustomConfigSpeechQuery(); const isSmallScreen = useMediaQuery('(max-width: 767px)'); - const [sttExternal, setSttExternal] = useState(false); - const [ttsExternal, setTtsExternal] = useState(false); + const [sttExternal, setSttExternal] = useRecoilState(store.sttExternal); + const [ttsExternal, setTtsExternal] = useRecoilState(store.ttsExternal); const [advancedMode, setAdvancedMode] = useRecoilState(store.advancedMode); const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio); const [conversationMode, setConversationMode] = useRecoilState(store.conversationMode); @@ -55,7 +57,7 @@ function Speech() { const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate); const updateSetting = useCallback( - (key: string, newValue: string | number) => { + (key: string, newValue: string | number | boolean) => { const settings = { sttExternal: { value: sttExternal, setFunc: setSttExternal }, ttsExternal: { value: ttsExternal, setFunc: setTtsExternal }, @@ -140,11 +142,28 @@ function Speech() { // Reset engineTTS if it is set to a removed/invalid value (e.g., 'edge') // TODO: remove this once the 'edge' engine is fully deprecated useEffect(() => { - const validEngines = ['browser', 'external']; + if (!data) { + return; + } + const validEngines = isExternalAvailable(data.ttsExternal) + ? ['browser', 'external', ...Object.values(TTSProviders)] + : ['browser']; if (!validEngines.includes(engineTTS)) { setEngineTTS('browser'); } - }, [engineTTS, setEngineTTS]); + }, [data, engineTTS, setEngineTTS]); + + useEffect(() => { + if (!data) { + return; + } + const validEngines = isExternalAvailable(data.sttExternal) + ? ['browser', 'external', ...Object.values(STTProviders)] + : ['browser']; + if (!validEngines.includes(engineSTT)) { + setEngineSTT('browser'); + } + }, [data, engineSTT, setEngineSTT]); const contentRef = useRef(null); useOnClickOutside(contentRef, () => confirmClear && setConfirmClear(false), []); diff --git a/client/src/components/Nav/SettingsTabs/Speech/TTS/EngineTTSDropdown.tsx b/client/src/components/Nav/SettingsTabs/Speech/TTS/EngineTTSDropdown.tsx index a5a576ba92..f134283232 100644 --- a/client/src/components/Nav/SettingsTabs/Speech/TTS/EngineTTSDropdown.tsx +++ b/client/src/components/Nav/SettingsTabs/Speech/TTS/EngineTTSDropdown.tsx @@ -1,6 +1,7 @@ import React from 'react'; import { useRecoilState } from 'recoil'; import { Dropdown } from '@librechat/client'; +import { TTSProviders } from 'librechat-data-provider'; import { useLocalize } from '~/hooks'; import store from '~/store'; @@ -16,6 +17,10 @@ const EngineTTSDropdown: React.FC = ({ external }) => { ? [ { value: 'browser', label: localize('com_nav_browser') }, { value: 'external', label: localize('com_nav_external') }, + { value: TTSProviders.OPENAI, label: localize('com_ui_openai') }, + { value: TTSProviders.AZURE_OPENAI, label: localize('com_ui_azure') }, + { value: TTSProviders.ELEVENLABS, label: 'ElevenLabs' }, + { value: TTSProviders.LOCALAI, label: 'LocalAI' }, ] : [{ value: 'browser', label: localize('com_nav_browser') }]; diff --git a/client/src/components/Nav/SettingsTabs/Speech/TTS/VoiceDropdown.tsx b/client/src/components/Nav/SettingsTabs/Speech/TTS/VoiceDropdown.tsx index 259400fb77..241a92b448 100644 --- a/client/src/components/Nav/SettingsTabs/Speech/TTS/VoiceDropdown.tsx +++ b/client/src/components/Nav/SettingsTabs/Speech/TTS/VoiceDropdown.tsx @@ -1,7 +1,6 @@ -import { useRecoilValue } from 'recoil'; import { BrowserVoiceDropdown, ExternalVoiceDropdown } from '~/components/Audio/Voices'; import { TTSEndpoints } from '~/common'; -import store from '~/store'; +import { useGetAudioSettings } from '~/hooks'; const voiceDropdownComponentsMap = { [TTSEndpoints.browser]: BrowserVoiceDropdown, @@ -9,8 +8,8 @@ const voiceDropdownComponentsMap = { }; export default function VoiceDropdown() { - const engineTTS = useRecoilValue(store.engineTTS); - const VoiceDropdownComponent = voiceDropdownComponentsMap[engineTTS]; + const { textToSpeechEndpoint } = useGetAudioSettings(); + const VoiceDropdownComponent = voiceDropdownComponentsMap[textToSpeechEndpoint]; if (!VoiceDropdownComponent) { return null; diff --git a/client/src/hooks/Config/useSpeechSettingsInit.ts b/client/src/hooks/Config/useSpeechSettingsInit.ts index a6ce69e52b..d3e312fe93 100644 --- a/client/src/hooks/Config/useSpeechSettingsInit.ts +++ b/client/src/hooks/Config/useSpeechSettingsInit.ts @@ -14,6 +14,8 @@ export default function useSpeechSettingsInit(isAuthenticated: boolean) { const setters = useRef({ conversationMode: useSetRecoilState(store.conversationMode), advancedMode: useSetRecoilState(store.advancedMode), + sttExternal: useSetRecoilState(store.sttExternal), + ttsExternal: useSetRecoilState(store.ttsExternal), speechToText: useSetRecoilState(store.speechToText), textToSpeech: useSetRecoilState(store.textToSpeech), cacheTTS: useSetRecoilState(store.cacheTTS), @@ -36,9 +38,9 @@ export default function useSpeechSettingsInit(isAuthenticated: boolean) { logger.log('Initializing speech settings from config:', data); Object.entries(data).forEach(([key, value]) => { - if (key === 'sttExternal' || key === 'ttsExternal') return; - - if (localStorage.getItem(key) !== null) return; + if (key !== 'sttExternal' && key !== 'ttsExternal' && localStorage.getItem(key) !== null) { + return; + } const setter = setters[key as keyof typeof setters]; if (setter) { diff --git a/client/src/hooks/Input/audioEndpoints.spec.ts b/client/src/hooks/Input/audioEndpoints.spec.ts new file mode 100644 index 0000000000..3d90f694ca --- /dev/null +++ b/client/src/hooks/Input/audioEndpoints.spec.ts @@ -0,0 +1,39 @@ +import { STTProviders, TTSProviders } from 'librechat-data-provider'; +import { STTEndpoints, TTSEndpoints } from '~/common'; +import { isExternalAvailable, normalizeSTTEndpoint, normalizeTTSEndpoint } from './audioEndpoints'; + +describe('audio endpoint normalization', () => { + it('detects external availability from config values', () => { + expect(isExternalAvailable(true)).toBe(true); + expect(isExternalAvailable('true')).toBe(true); + expect(isExternalAvailable(false)).toBe(false); + expect(isExternalAvailable('false')).toBe(false); + expect(isExternalAvailable(undefined)).toBe(false); + }); + + it('routes configured STT providers through the external recorder', () => { + expect(normalizeSTTEndpoint(STTProviders.OPENAI, true)).toBe(STTEndpoints.external); + expect(normalizeSTTEndpoint(STTProviders.AZURE_OPENAI, true)).toBe(STTEndpoints.external); + expect(normalizeSTTEndpoint(STTEndpoints.external, true)).toBe(STTEndpoints.external); + expect(normalizeSTTEndpoint(STTEndpoints.browser, true)).toBe(STTEndpoints.browser); + }); + + it('falls back to browser STT when external STT is unavailable', () => { + expect(normalizeSTTEndpoint(STTProviders.OPENAI, false)).toBe(STTEndpoints.browser); + expect(normalizeSTTEndpoint(STTEndpoints.external, false)).toBe(STTEndpoints.browser); + }); + + it('routes configured TTS providers through the external player', () => { + expect(normalizeTTSEndpoint(TTSProviders.OPENAI, true)).toBe(TTSEndpoints.external); + expect(normalizeTTSEndpoint(TTSProviders.AZURE_OPENAI, true)).toBe(TTSEndpoints.external); + expect(normalizeTTSEndpoint(TTSProviders.ELEVENLABS, true)).toBe(TTSEndpoints.external); + expect(normalizeTTSEndpoint(TTSProviders.LOCALAI, true)).toBe(TTSEndpoints.external); + expect(normalizeTTSEndpoint(TTSEndpoints.external, true)).toBe(TTSEndpoints.external); + expect(normalizeTTSEndpoint(TTSEndpoints.browser, true)).toBe(TTSEndpoints.browser); + }); + + it('falls back to browser TTS when external TTS is unavailable', () => { + expect(normalizeTTSEndpoint(TTSProviders.OPENAI, false)).toBe(TTSEndpoints.browser); + expect(normalizeTTSEndpoint(TTSEndpoints.external, false)).toBe(TTSEndpoints.browser); + }); +}); diff --git a/client/src/hooks/Input/audioEndpoints.ts b/client/src/hooks/Input/audioEndpoints.ts new file mode 100644 index 0000000000..21783cbef7 --- /dev/null +++ b/client/src/hooks/Input/audioEndpoints.ts @@ -0,0 +1,28 @@ +import { STTProviders, TTSProviders } from 'librechat-data-provider'; +import { STTEndpoints, TTSEndpoints } from '~/common'; + +const externalSTTEndpoints = new Set([ + STTEndpoints.external, + STTProviders.OPENAI, + STTProviders.AZURE_OPENAI, +]); + +const externalTTSEndpoints = new Set([ + TTSEndpoints.external, + TTSProviders.OPENAI, + TTSProviders.AZURE_OPENAI, + TTSProviders.ELEVENLABS, + TTSProviders.LOCALAI, +]); + +export const isExternalAvailable = (value: unknown) => value === true || value === 'true'; + +export const normalizeSTTEndpoint = (endpoint: string, externalAvailable: boolean) => + externalAvailable && externalSTTEndpoints.has(endpoint) + ? STTEndpoints.external + : STTEndpoints.browser; + +export const normalizeTTSEndpoint = (endpoint: string, externalAvailable: boolean) => + externalAvailable && externalTTSEndpoints.has(endpoint) + ? TTSEndpoints.external + : TTSEndpoints.browser; diff --git a/client/src/hooks/Input/useGetAudioSettings.ts b/client/src/hooks/Input/useGetAudioSettings.ts index 899cf50217..1f243caf72 100644 --- a/client/src/hooks/Input/useGetAudioSettings.ts +++ b/client/src/hooks/Input/useGetAudioSettings.ts @@ -1,13 +1,16 @@ import { useMemo } from 'react'; import { useRecoilValue } from 'recoil'; +import { normalizeSTTEndpoint, normalizeTTSEndpoint } from './audioEndpoints'; import store from '~/store'; const useGetAudioSettings = () => { const engineSTT = useRecoilValue(store.engineSTT); const engineTTS = useRecoilValue(store.engineTTS); + const sttExternal = useRecoilValue(store.sttExternal); + const ttsExternal = useRecoilValue(store.ttsExternal); - const speechToTextEndpoint = engineSTT; - const textToSpeechEndpoint = engineTTS; + const speechToTextEndpoint = normalizeSTTEndpoint(engineSTT, sttExternal); + const textToSpeechEndpoint = normalizeTTSEndpoint(engineTTS, ttsExternal); return useMemo( () => ({ speechToTextEndpoint, textToSpeechEndpoint }), diff --git a/client/src/hooks/Input/useSpeechToTextBrowser.ts b/client/src/hooks/Input/useSpeechToTextBrowser.ts index 373c84d005..8a34b7e527 100644 --- a/client/src/hooks/Input/useSpeechToTextBrowser.ts +++ b/client/src/hooks/Input/useSpeechToTextBrowser.ts @@ -2,7 +2,6 @@ import { useEffect, useRef, useMemo } from 'react'; import { useRecoilState } from 'recoil'; import { useToastContext } from '@librechat/client'; import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition'; -import { useGetCustomConfigSpeechQuery } from 'librechat-data-provider/react-query'; import useGetAudioSettings from './useGetAudioSettings'; import { useLocalize } from '~/hooks'; import store from '~/store'; @@ -15,12 +14,11 @@ const useSpeechToTextBrowser = ( const { showToast } = useToastContext(); const { speechToTextEndpoint } = useGetAudioSettings(); const isBrowserSTTEnabled = speechToTextEndpoint === 'browser'; - const { data: speechConfig } = useGetCustomConfigSpeechQuery({ enabled: true }); - const sttExternal = Boolean(speechConfig?.sttExternal); const lastTranscript = useRef(null); const lastInterim = useRef(null); const timeoutRef = useRef(); + const [sttExternal] = useRecoilState(store.sttExternal); const [autoSendText] = useRecoilState(store.autoSendText); const [languageSTT] = useRecoilState(store.languageSTT); const [autoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio); diff --git a/client/src/store/settings.ts b/client/src/store/settings.ts index b0e2eef254..0d78017cf6 100644 --- a/client/src/store/settings.ts +++ b/client/src/store/settings.ts @@ -12,6 +12,8 @@ const staticAtoms = { default: SettingsViews.default, }), showPopover: atom({ key: 'showPopover', default: false }), + sttExternal: atom({ key: 'sttExternal', default: false }), + ttsExternal: atom({ key: 'ttsExternal', default: false }), }; const localStorageAtoms = {