Fix speech provider endpoint routing

This commit is contained in:
Danny Avila 2026-05-01 06:34:43 -04:00
parent 781bfb857d
commit af537d76fe
11 changed files with 118 additions and 21 deletions

View file

@ -1,19 +1,18 @@
import { memo } from 'react';
import { useRecoilValue } from 'recoil';
import type { TMessageAudio } from '~/common';
import { BrowserTTS, ExternalTTS } from '~/components/Audio/TTS';
import { TTSEndpoints } from '~/common';
import store from '~/store';
import { useGetAudioSettings } from '~/hooks';
function MessageAudio(props: TMessageAudio) {
const engineTTS = useRecoilValue<string>(store.engineTTS);
const { textToSpeechEndpoint } = useGetAudioSettings();
const TTSComponents = {
[TTSEndpoints.browser]: BrowserTTS,
[TTSEndpoints.external]: ExternalTTS,
};
const SelectedTTS = TTSComponents[engineTTS];
const SelectedTTS = TTSComponents[textToSpeechEndpoint];
if (!SelectedTTS) {
return null;
}

View file

@ -1,6 +1,7 @@
import React from 'react';
import { useRecoilState } from 'recoil';
import { Dropdown } from '@librechat/client';
import { STTProviders } from 'librechat-data-provider';
import { useLocalize } from '~/hooks';
import store from '~/store';
@ -16,6 +17,8 @@ const EngineSTTDropdown: React.FC<EngineSTTDropdownProps> = ({ external }) => {
? [
{ value: 'browser', label: localize('com_nav_browser') },
{ value: 'external', label: localize('com_nav_external') },
{ value: STTProviders.OPENAI, label: localize('com_ui_openai') },
{ value: STTProviders.AZURE_OPENAI, label: localize('com_ui_azure') },
]
: [{ value: 'browser', label: localize('com_nav_browser') }];

View file

@ -3,6 +3,7 @@ import { useRecoilState } from 'recoil';
import * as Tabs from '@radix-ui/react-tabs';
import { Lightbulb, Cog } from 'lucide-react';
import { useOnClickOutside, useMediaQuery } from '@librechat/client';
import { STTProviders, TTSProviders } from 'librechat-data-provider';
import { useGetCustomConfigSpeechQuery } from 'librechat-data-provider/react-query';
import {
CloudBrowserVoicesSwitch,
@ -23,6 +24,7 @@ import {
} from './STT';
import ConversationModeSwitch from './ConversationModeSwitch';
import { useLocalize } from '~/hooks';
import { isExternalAvailable } from '~/hooks/Input/audioEndpoints';
import { cn } from '~/utils';
import store from '~/store';
@ -33,8 +35,8 @@ function Speech() {
const { data } = useGetCustomConfigSpeechQuery();
const isSmallScreen = useMediaQuery('(max-width: 767px)');
const [sttExternal, setSttExternal] = useState(false);
const [ttsExternal, setTtsExternal] = useState(false);
const [sttExternal, setSttExternal] = useRecoilState(store.sttExternal);
const [ttsExternal, setTtsExternal] = useRecoilState(store.ttsExternal);
const [advancedMode, setAdvancedMode] = useRecoilState(store.advancedMode);
const [autoTranscribeAudio, setAutoTranscribeAudio] = useRecoilState(store.autoTranscribeAudio);
const [conversationMode, setConversationMode] = useRecoilState(store.conversationMode);
@ -55,7 +57,7 @@ function Speech() {
const [playbackRate, setPlaybackRate] = useRecoilState(store.playbackRate);
const updateSetting = useCallback(
(key: string, newValue: string | number) => {
(key: string, newValue: string | number | boolean) => {
const settings = {
sttExternal: { value: sttExternal, setFunc: setSttExternal },
ttsExternal: { value: ttsExternal, setFunc: setTtsExternal },
@ -140,11 +142,28 @@ function Speech() {
// Reset engineTTS if it is set to a removed/invalid value (e.g., 'edge')
// TODO: remove this once the 'edge' engine is fully deprecated
useEffect(() => {
const validEngines = ['browser', 'external'];
if (!data) {
return;
}
const validEngines = isExternalAvailable(data.ttsExternal)
? ['browser', 'external', ...Object.values(TTSProviders)]
: ['browser'];
if (!validEngines.includes(engineTTS)) {
setEngineTTS('browser');
}
}, [engineTTS, setEngineTTS]);
}, [data, engineTTS, setEngineTTS]);
useEffect(() => {
if (!data) {
return;
}
const validEngines = isExternalAvailable(data.sttExternal)
? ['browser', 'external', ...Object.values(STTProviders)]
: ['browser'];
if (!validEngines.includes(engineSTT)) {
setEngineSTT('browser');
}
}, [data, engineSTT, setEngineSTT]);
const contentRef = useRef(null);
useOnClickOutside(contentRef, () => confirmClear && setConfirmClear(false), []);

View file

@ -1,6 +1,7 @@
import React from 'react';
import { useRecoilState } from 'recoil';
import { Dropdown } from '@librechat/client';
import { TTSProviders } from 'librechat-data-provider';
import { useLocalize } from '~/hooks';
import store from '~/store';
@ -16,6 +17,10 @@ const EngineTTSDropdown: React.FC<EngineTTSDropdownProps> = ({ external }) => {
? [
{ value: 'browser', label: localize('com_nav_browser') },
{ value: 'external', label: localize('com_nav_external') },
{ value: TTSProviders.OPENAI, label: localize('com_ui_openai') },
{ value: TTSProviders.AZURE_OPENAI, label: localize('com_ui_azure') },
{ value: TTSProviders.ELEVENLABS, label: 'ElevenLabs' },
{ value: TTSProviders.LOCALAI, label: 'LocalAI' },
]
: [{ value: 'browser', label: localize('com_nav_browser') }];

View file

@ -1,7 +1,6 @@
import { useRecoilValue } from 'recoil';
import { BrowserVoiceDropdown, ExternalVoiceDropdown } from '~/components/Audio/Voices';
import { TTSEndpoints } from '~/common';
import store from '~/store';
import { useGetAudioSettings } from '~/hooks';
const voiceDropdownComponentsMap = {
[TTSEndpoints.browser]: BrowserVoiceDropdown,
@ -9,8 +8,8 @@ const voiceDropdownComponentsMap = {
};
export default function VoiceDropdown() {
const engineTTS = useRecoilValue<string>(store.engineTTS);
const VoiceDropdownComponent = voiceDropdownComponentsMap[engineTTS];
const { textToSpeechEndpoint } = useGetAudioSettings();
const VoiceDropdownComponent = voiceDropdownComponentsMap[textToSpeechEndpoint];
if (!VoiceDropdownComponent) {
return null;

View file

@ -14,6 +14,8 @@ export default function useSpeechSettingsInit(isAuthenticated: boolean) {
const setters = useRef({
conversationMode: useSetRecoilState(store.conversationMode),
advancedMode: useSetRecoilState(store.advancedMode),
sttExternal: useSetRecoilState(store.sttExternal),
ttsExternal: useSetRecoilState(store.ttsExternal),
speechToText: useSetRecoilState(store.speechToText),
textToSpeech: useSetRecoilState(store.textToSpeech),
cacheTTS: useSetRecoilState(store.cacheTTS),
@ -36,9 +38,9 @@ export default function useSpeechSettingsInit(isAuthenticated: boolean) {
logger.log('Initializing speech settings from config:', data);
Object.entries(data).forEach(([key, value]) => {
if (key === 'sttExternal' || key === 'ttsExternal') return;
if (localStorage.getItem(key) !== null) return;
if (key !== 'sttExternal' && key !== 'ttsExternal' && localStorage.getItem(key) !== null) {
return;
}
const setter = setters[key as keyof typeof setters];
if (setter) {

View file

@ -0,0 +1,39 @@
import { STTProviders, TTSProviders } from 'librechat-data-provider';
import { STTEndpoints, TTSEndpoints } from '~/common';
import { isExternalAvailable, normalizeSTTEndpoint, normalizeTTSEndpoint } from './audioEndpoints';
describe('audio endpoint normalization', () => {
it('detects external availability from config values', () => {
expect(isExternalAvailable(true)).toBe(true);
expect(isExternalAvailable('true')).toBe(true);
expect(isExternalAvailable(false)).toBe(false);
expect(isExternalAvailable('false')).toBe(false);
expect(isExternalAvailable(undefined)).toBe(false);
});
it('routes configured STT providers through the external recorder', () => {
expect(normalizeSTTEndpoint(STTProviders.OPENAI, true)).toBe(STTEndpoints.external);
expect(normalizeSTTEndpoint(STTProviders.AZURE_OPENAI, true)).toBe(STTEndpoints.external);
expect(normalizeSTTEndpoint(STTEndpoints.external, true)).toBe(STTEndpoints.external);
expect(normalizeSTTEndpoint(STTEndpoints.browser, true)).toBe(STTEndpoints.browser);
});
it('falls back to browser STT when external STT is unavailable', () => {
expect(normalizeSTTEndpoint(STTProviders.OPENAI, false)).toBe(STTEndpoints.browser);
expect(normalizeSTTEndpoint(STTEndpoints.external, false)).toBe(STTEndpoints.browser);
});
it('routes configured TTS providers through the external player', () => {
expect(normalizeTTSEndpoint(TTSProviders.OPENAI, true)).toBe(TTSEndpoints.external);
expect(normalizeTTSEndpoint(TTSProviders.AZURE_OPENAI, true)).toBe(TTSEndpoints.external);
expect(normalizeTTSEndpoint(TTSProviders.ELEVENLABS, true)).toBe(TTSEndpoints.external);
expect(normalizeTTSEndpoint(TTSProviders.LOCALAI, true)).toBe(TTSEndpoints.external);
expect(normalizeTTSEndpoint(TTSEndpoints.external, true)).toBe(TTSEndpoints.external);
expect(normalizeTTSEndpoint(TTSEndpoints.browser, true)).toBe(TTSEndpoints.browser);
});
it('falls back to browser TTS when external TTS is unavailable', () => {
expect(normalizeTTSEndpoint(TTSProviders.OPENAI, false)).toBe(TTSEndpoints.browser);
expect(normalizeTTSEndpoint(TTSEndpoints.external, false)).toBe(TTSEndpoints.browser);
});
});

View file

@ -0,0 +1,28 @@
import { STTProviders, TTSProviders } from 'librechat-data-provider';
import { STTEndpoints, TTSEndpoints } from '~/common';
const externalSTTEndpoints = new Set<string>([
STTEndpoints.external,
STTProviders.OPENAI,
STTProviders.AZURE_OPENAI,
]);
const externalTTSEndpoints = new Set<string>([
TTSEndpoints.external,
TTSProviders.OPENAI,
TTSProviders.AZURE_OPENAI,
TTSProviders.ELEVENLABS,
TTSProviders.LOCALAI,
]);
export const isExternalAvailable = (value: unknown) => value === true || value === 'true';
export const normalizeSTTEndpoint = (endpoint: string, externalAvailable: boolean) =>
externalAvailable && externalSTTEndpoints.has(endpoint)
? STTEndpoints.external
: STTEndpoints.browser;
export const normalizeTTSEndpoint = (endpoint: string, externalAvailable: boolean) =>
externalAvailable && externalTTSEndpoints.has(endpoint)
? TTSEndpoints.external
: TTSEndpoints.browser;

View file

@ -1,13 +1,16 @@
import { useMemo } from 'react';
import { useRecoilValue } from 'recoil';
import { normalizeSTTEndpoint, normalizeTTSEndpoint } from './audioEndpoints';
import store from '~/store';
const useGetAudioSettings = () => {
const engineSTT = useRecoilValue<string>(store.engineSTT);
const engineTTS = useRecoilValue<string>(store.engineTTS);
const sttExternal = useRecoilValue<boolean>(store.sttExternal);
const ttsExternal = useRecoilValue<boolean>(store.ttsExternal);
const speechToTextEndpoint = engineSTT;
const textToSpeechEndpoint = engineTTS;
const speechToTextEndpoint = normalizeSTTEndpoint(engineSTT, sttExternal);
const textToSpeechEndpoint = normalizeTTSEndpoint(engineTTS, ttsExternal);
return useMemo(
() => ({ speechToTextEndpoint, textToSpeechEndpoint }),

View file

@ -2,7 +2,6 @@ import { useEffect, useRef, useMemo } from 'react';
import { useRecoilState } from 'recoil';
import { useToastContext } from '@librechat/client';
import SpeechRecognition, { useSpeechRecognition } from 'react-speech-recognition';
import { useGetCustomConfigSpeechQuery } from 'librechat-data-provider/react-query';
import useGetAudioSettings from './useGetAudioSettings';
import { useLocalize } from '~/hooks';
import store from '~/store';
@ -15,12 +14,11 @@ const useSpeechToTextBrowser = (
const { showToast } = useToastContext();
const { speechToTextEndpoint } = useGetAudioSettings();
const isBrowserSTTEnabled = speechToTextEndpoint === 'browser';
const { data: speechConfig } = useGetCustomConfigSpeechQuery({ enabled: true });
const sttExternal = Boolean(speechConfig?.sttExternal);
const lastTranscript = useRef<string | null>(null);
const lastInterim = useRef<string | null>(null);
const timeoutRef = useRef<NodeJS.Timeout | null>();
const [sttExternal] = useRecoilState(store.sttExternal);
const [autoSendText] = useRecoilState(store.autoSendText);
const [languageSTT] = useRecoilState<string>(store.languageSTT);
const [autoTranscribeAudio] = useRecoilState<boolean>(store.autoTranscribeAudio);

View file

@ -12,6 +12,8 @@ const staticAtoms = {
default: SettingsViews.default,
}),
showPopover: atom<boolean>({ key: 'showPopover', default: false }),
sttExternal: atom<boolean>({ key: 'sttExternal', default: false }),
ttsExternal: atom<boolean>({ key: 'ttsExternal', default: false }),
};
const localStorageAtoms = {