From c3ec23f9b8576d02b6d8856c045f7edbfbd4ec29 Mon Sep 17 00:00:00 2001 From: Danny Avila Date: Sun, 10 May 2026 13:41:58 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=8C=90=20feat:=20Support=20Vertex=20AI=20?= =?UTF-8?q?Multi-Region=20Endpoints=20(#13044)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: support Vertex AI multi-region endpoints * fix: sync Vertex endpoint with final location --- .env.example | 4 +- api/package.json | 4 +- librechat.example.yaml | 1 + package-lock.json | 53 ++++++++++--- packages/api/package.json | 4 +- .../src/endpoints/anthropic/vertex.spec.ts | 30 +++++++ packages/api/src/endpoints/google/llm.spec.ts | 79 +++++++++++++++++++ packages/api/src/endpoints/google/llm.ts | 47 ++++++++++- 8 files changed, 205 insertions(+), 17 deletions(-) create mode 100644 packages/api/src/endpoints/anthropic/vertex.spec.ts diff --git a/.env.example b/.env.example index 5349027e95..00030c7187 100644 --- a/.env.example +++ b/.env.example @@ -149,6 +149,7 @@ ANTHROPIC_API_KEY=user_provided # Set to true to use Anthropic models through Google Vertex AI instead of direct API # ANTHROPIC_USE_VERTEX= +# Supports regional locations like us-east5 and multi-region locations: us, eu, global # ANTHROPIC_VERTEX_REGION=us-east5 #============# @@ -209,7 +210,8 @@ GOOGLE_KEY=user_provided # GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001 -# Google Cloud region for Vertex AI (used by both chat and image generation) +# Google Cloud location for Vertex AI (used by both chat and image generation). +# Supports regional locations like us-central1 and multi-region locations: us, eu, global. # GOOGLE_LOC=us-central1 # Alternative region env var for Gemini Image Generation diff --git a/api/package.json b/api/package.json index 6cbca655b9..585a9bb215 100644 --- a/api/package.json +++ b/api/package.json @@ -34,7 +34,7 @@ }, "homepage": "https://librechat.ai", "dependencies": { - "@anthropic-ai/vertex-sdk": "^0.14.3", + "@anthropic-ai/vertex-sdk": "^0.16.0", "@aws-sdk/client-bedrock-runtime": "^3.1013.0", "@aws-sdk/client-cloudfront": "^3.1042.0", "@aws-sdk/client-s3": "^3.980.0", @@ -43,7 +43,7 @@ "@azure/identity": "^4.13.1", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.30.0", - "@google/genai": "^1.19.0", + "@google/genai": "^2.0.1", "@keyv/redis": "^4.3.3", "@librechat/agents": "^3.1.84", "@librechat/api": "*", diff --git a/librechat.example.yaml b/librechat.example.yaml index ef6f846c82..7d61b486f4 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -373,6 +373,7 @@ endpoints: # vertex: # # Vertex AI region (optional, defaults to 'us-east5') # # Available regions: us-east5, us-central1, europe-west1, europe-west4, asia-southeast1 + # # Multi-region endpoints: us, eu, global # region: "us-east5" # # Path to Google service account key file (optional) # # If not specified, uses GOOGLE_SERVICE_KEY_FILE env var or default path (api/data/auth.json) diff --git a/package-lock.json b/package-lock.json index e49a553123..bb6c854335 100644 --- a/package-lock.json +++ b/package-lock.json @@ -49,7 +49,7 @@ "version": "v0.8.5", "license": "ISC", "dependencies": { - "@anthropic-ai/vertex-sdk": "^0.14.3", + "@anthropic-ai/vertex-sdk": "^0.16.0", "@aws-sdk/client-bedrock-runtime": "^3.1013.0", "@aws-sdk/client-cloudfront": "^3.1042.0", "@aws-sdk/client-s3": "^3.980.0", @@ -58,7 +58,7 @@ "@azure/identity": "^4.13.1", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.30.0", - "@google/genai": "^1.19.0", + "@google/genai": "^2.0.1", "@keyv/redis": "^4.3.3", "@librechat/agents": "^3.1.84", "@librechat/api": "*", @@ -1637,9 +1637,9 @@ } }, "node_modules/@anthropic-ai/vertex-sdk": { - "version": "0.14.3", - "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.3.tgz", - "integrity": "sha512-GJZTkkvN66gM3Epqm9laKEjC3orQqzmQt8JAgTN9+zlb+I+1/oEd3Z7rj2tkEKCTeOUVScdhcXPudN8GdpuGqA==", + "version": "0.16.0", + "resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.16.0.tgz", + "integrity": "sha512-ntxemtRkwPsjVzGQJsmBPRW38tfas6VuVlD1v6pHffDJKLPtCdaiN9KUQeraJ/F34tjxEWlsaCnl3t/orJm1Xw==", "license": "MIT", "dependencies": { "@anthropic-ai/sdk": ">=0.50.3 <1", @@ -10010,19 +10010,22 @@ "license": "MIT" }, "node_modules/@google/genai": { - "version": "1.33.0", - "resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.33.0.tgz", - "integrity": "sha512-ThUjFZ1N0DU88peFjnQkb8K198EWaW2RmmnDShFQ+O+xkIH9itjpRe358x3L/b4X/A7dimkvq63oz49Vbh7Cog==", + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@google/genai/-/genai-2.0.1.tgz", + "integrity": "sha512-trxxbVePM9J8Cuni5x7+xvApoqb2y6Zk27/wugjT2cuwHOT78nFGdf/Ni29MkDxzWwrj90OQpno1Ana6dm3D2A==", + "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { "google-auth-library": "^10.3.0", + "p-retry": "^4.6.2", + "protobufjs": "^7.5.4", "ws": "^8.18.0" }, "engines": { "node": ">=20.0.0" }, "peerDependencies": { - "@modelcontextprotocol/sdk": "^1.24.0" + "@modelcontextprotocol/sdk": "^1.25.2" }, "peerDependenciesMeta": { "@modelcontextprotocol/sdk": { @@ -10117,6 +10120,19 @@ "url": "https://opencollective.com/node-fetch" } }, + "node_modules/@google/genai/node_modules/p-retry": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz", + "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==", + "license": "MIT", + "dependencies": { + "@types/retry": "0.12.0", + "retry": "^0.13.1" + }, + "engines": { + "node": ">=8" + } + }, "node_modules/@google/generative-ai": { "version": "0.24.1", "resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz", @@ -21413,6 +21429,12 @@ "integrity": "sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==", "dev": true }, + "node_modules/@types/retry": { + "version": "0.12.0", + "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", + "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==", + "license": "MIT" + }, "node_modules/@types/sanitize-html": { "version": "2.16.1", "resolved": "https://registry.npmjs.org/@types/sanitize-html/-/sanitize-html-2.16.1.tgz", @@ -39824,6 +39846,15 @@ "url": "https://github.com/sponsors/isaacs" } }, + "node_modules/retry": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", + "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "license": "MIT", + "engines": { + "node": ">= 4" + } + }, "node_modules/reusify": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", @@ -44648,7 +44679,7 @@ "yauzl": "^3.2.1" }, "peerDependencies": { - "@anthropic-ai/vertex-sdk": "^0.14.3", + "@anthropic-ai/vertex-sdk": "^0.16.0", "@aws-sdk/client-bedrock-runtime": "^3.1013.0", "@aws-sdk/client-cloudfront": "^3.1042.0", "@aws-sdk/client-s3": "^3.980.0", @@ -44656,7 +44687,7 @@ "@azure/identity": "^4.13.1", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.30.0", - "@google/genai": "^1.19.0", + "@google/genai": "^2.0.1", "@keyv/redis": "^4.3.3", "@librechat/agents": "^3.1.84", "@librechat/data-schemas": "*", diff --git a/packages/api/package.json b/packages/api/package.json index 43eedb53f7..42034e5665 100644 --- a/packages/api/package.json +++ b/packages/api/package.json @@ -88,7 +88,7 @@ "registry": "https://registry.npmjs.org/" }, "peerDependencies": { - "@anthropic-ai/vertex-sdk": "^0.14.3", + "@anthropic-ai/vertex-sdk": "^0.16.0", "@aws-sdk/client-bedrock-runtime": "^3.1013.0", "@aws-sdk/client-cloudfront": "^3.1042.0", "@aws-sdk/client-s3": "^3.980.0", @@ -96,7 +96,7 @@ "@azure/identity": "^4.13.1", "@azure/search-documents": "^12.0.0", "@azure/storage-blob": "^12.30.0", - "@google/genai": "^1.19.0", + "@google/genai": "^2.0.1", "@keyv/redis": "^4.3.3", "@librechat/agents": "^3.1.84", "@librechat/data-schemas": "*", diff --git a/packages/api/src/endpoints/anthropic/vertex.spec.ts b/packages/api/src/endpoints/anthropic/vertex.spec.ts new file mode 100644 index 0000000000..cfe1d9b1d6 --- /dev/null +++ b/packages/api/src/endpoints/anthropic/vertex.spec.ts @@ -0,0 +1,30 @@ +import { AuthKeys } from 'librechat-data-provider'; +import { createAnthropicVertexClient } from './vertex'; + +describe('createAnthropicVertexClient', () => { + const credentials = { + [AuthKeys.GOOGLE_SERVICE_KEY]: { + project_id: 'test-project', + client_email: 'test@test-project.iam.gserviceaccount.com', + private_key: 'test-private-key', + }, + }; + + it('should use Vertex AI multi-region base URLs for Anthropic', () => { + const testCases = [ + { region: 'eu', baseURL: 'https://aiplatform.eu.rep.googleapis.com/v1' }, + { region: 'us', baseURL: 'https://aiplatform.us.rep.googleapis.com/v1' }, + { region: 'global', baseURL: 'https://aiplatform.googleapis.com/v1' }, + ]; + + testCases.forEach(({ region, baseURL }) => { + const client = createAnthropicVertexClient(credentials, undefined, { + region, + projectId: 'test-project', + }); + + expect(client.region).toBe(region); + expect(client.baseURL).toBe(baseURL); + }); + }); +}); diff --git a/packages/api/src/endpoints/google/llm.spec.ts b/packages/api/src/endpoints/google/llm.spec.ts index 095f05a9dd..3c977c20ec 100644 --- a/packages/api/src/endpoints/google/llm.spec.ts +++ b/packages/api/src/endpoints/google/llm.spec.ts @@ -255,6 +255,84 @@ describe('getGoogleConfig', () => { expect(result.llmConfig).toHaveProperty('location', 'europe-west1'); }); + it('should use Vertex AI multi-region endpoints for eu and us locations', () => { + const credentials = { + [AuthKeys.GOOGLE_SERVICE_KEY]: { + project_id: 'test-project', + }, + }; + + const locations = [ + { location: 'eu', endpoint: 'aiplatform.eu.rep.googleapis.com' }, + { location: 'us', endpoint: 'aiplatform.us.rep.googleapis.com' }, + { location: 'global', endpoint: 'aiplatform.googleapis.com' }, + ]; + + locations.forEach(({ location, endpoint }) => { + process.env.GOOGLE_LOC = location; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3.1-flash-lite-preview', + }, + }); + + expect(result.llmConfig).toMatchObject({ + location, + endpoint, + }); + }); + }); + + it('should derive Vertex AI endpoint from the final location value', () => { + process.env.GOOGLE_LOC = 'us'; + + const credentials = { + [AuthKeys.GOOGLE_SERVICE_KEY]: { + project_id: 'test-project', + }, + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3.1-flash-lite-preview', + }, + addParams: { + location: 'eu', + }, + }); + + expect(result.llmConfig).toMatchObject({ + location: 'eu', + endpoint: 'aiplatform.eu.rep.googleapis.com', + }); + }); + + it('should preserve explicit Vertex AI endpoint overrides', () => { + process.env.GOOGLE_LOC = 'us'; + + const credentials = { + [AuthKeys.GOOGLE_SERVICE_KEY]: { + project_id: 'test-project', + }, + }; + + const result = getGoogleConfig(credentials, { + modelOptions: { + model: 'gemini-3.1-flash-lite-preview', + }, + addParams: { + location: 'eu', + endpoint: 'custom-aiplatform.example.com', + }, + }); + + expect(result.llmConfig).toMatchObject({ + location: 'eu', + endpoint: 'custom-aiplatform.example.com', + }); + }); + it('should handle service key as JSON string', () => { const credentials = { [AuthKeys.GOOGLE_SERVICE_KEY]: JSON.stringify({ @@ -972,6 +1050,7 @@ describe('knownGoogleParams', () => { expect(knownGoogleParams.has('topP')).toBe(true); expect(knownGoogleParams.has('topK')).toBe(true); expect(knownGoogleParams.has('apiKey')).toBe(true); + expect(knownGoogleParams.has('endpoint')).toBe(true); expect(knownGoogleParams.has('safetySettings')).toBe(true); }); diff --git a/packages/api/src/endpoints/google/llm.ts b/packages/api/src/endpoints/google/llm.ts index 0f0853b2ac..e3826b9cdb 100644 --- a/packages/api/src/endpoints/google/llm.ts +++ b/packages/api/src/endpoints/google/llm.ts @@ -19,6 +19,12 @@ const googleThinkingLevels = new Set([ 'HIGH', ]); +const vertexMultiRegionEndpoints = new Map([ + ['eu', 'aiplatform.eu.rep.googleapis.com'], + ['us', 'aiplatform.us.rep.googleapis.com'], + ['global', 'aiplatform.googleapis.com'], +]); + /** Known Google/Vertex AI parameters that map directly to the client config */ export const knownGoogleParams = new Set([ 'model', @@ -42,6 +48,7 @@ export const knownGoogleParams = new Set([ 'streamUsage', 'apiKey', 'baseUrl', + 'endpoint', 'location', 'authOptions', ]); @@ -95,6 +102,25 @@ function normalizeGoogleThinkingLevel(value: unknown): GoogleThinkingLevel | und return normalized; } +function getVertexMultiRegionEndpoint(location: string): string | undefined { + return vertexMultiRegionEndpoints.get(location); +} + +function hasStringEndpoint(config: Record): boolean { + return typeof config.endpoint === 'string' && config.endpoint.length > 0; +} + +function applyVertexMultiRegionEndpoint(config: VertexAIClientOptions & { endpoint?: string }) { + const location = config.location; + if (typeof location !== 'string') { + return; + } + const multiRegionEndpoint = getVertexMultiRegionEndpoint(location); + if (multiRegionEndpoint) { + config.endpoint = multiRegionEndpoint; + } +} + export function getSafetySettings( model?: string, ): Array<{ category: string; threshold: string }> | undefined { @@ -195,6 +221,9 @@ export function getGoogleConfig( }, true, ); + const initialConfig = llmConfig as Record; + let hasCustomVertexEndpoint = hasStringEndpoint(initialConfig); + let shouldSyncVertexEndpoint = true; /** Used only for Safety Settings */ llmConfig.safetySettings = getSafetySettings(llmConfig.model); @@ -213,7 +242,8 @@ export function getGoogleConfig( credentials: { ...serviceKey }, projectId: project_id, }; - (llmConfig as VertexAIClientOptions).location = process.env.GOOGLE_LOC || 'us-central1'; + const location = process.env.GOOGLE_LOC || 'us-central1'; + (llmConfig as VertexAIClientOptions).location = location; } else if (apiKey && provider === Providers.GOOGLE) { llmConfig.apiKey = apiKey; } else { @@ -311,6 +341,9 @@ export function getGoogleConfig( if (knownGoogleParams.has(key)) { /** Route known Google params to llmConfig only if undefined */ applyDefaultParams(llmConfig as Record, { [key]: value }); + if (key === 'endpoint' && hasStringEndpoint(llmConfig as Record)) { + hasCustomVertexEndpoint = true; + } } /** Leave other params for transform to handle - they might be OpenAI params */ } @@ -330,6 +363,9 @@ export function getGoogleConfig( if (knownGoogleParams.has(key)) { /** Route known Google params to llmConfig */ (llmConfig as Record)[key] = value; + if (key === 'endpoint') { + hasCustomVertexEndpoint = hasStringEndpoint(llmConfig as Record); + } } /** Leave other params for transform to handle - they might be OpenAI params */ } @@ -343,12 +379,21 @@ export function getGoogleConfig( return; } + if (param === 'endpoint') { + shouldSyncVertexEndpoint = false; + hasCustomVertexEndpoint = false; + } + if (param in llmConfig) { delete (llmConfig as Record)[param]; } }); } + if (provider === Providers.VERTEXAI && shouldSyncVertexEndpoint && !hasCustomVertexEndpoint) { + applyVertexMultiRegionEndpoint(llmConfig as VertexAIClientOptions & { endpoint?: string }); + } + const tools: GoogleAIToolType[] = []; if (enableWebSearch) {