mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-05-13 07:46:47 +00:00
🌐 feat: Support Vertex AI Multi-Region Endpoints (#13044)
* feat: support Vertex AI multi-region endpoints * fix: sync Vertex endpoint with final location
This commit is contained in:
parent
8fc68ebac0
commit
c3ec23f9b8
8 changed files with 205 additions and 17 deletions
|
|
@ -149,6 +149,7 @@ ANTHROPIC_API_KEY=user_provided
|
|||
|
||||
# Set to true to use Anthropic models through Google Vertex AI instead of direct API
|
||||
# ANTHROPIC_USE_VERTEX=
|
||||
# Supports regional locations like us-east5 and multi-region locations: us, eu, global
|
||||
# ANTHROPIC_VERTEX_REGION=us-east5
|
||||
|
||||
#============#
|
||||
|
|
@ -209,7 +210,8 @@ GOOGLE_KEY=user_provided
|
|||
|
||||
# GOOGLE_TITLE_MODEL=gemini-2.0-flash-lite-001
|
||||
|
||||
# Google Cloud region for Vertex AI (used by both chat and image generation)
|
||||
# Google Cloud location for Vertex AI (used by both chat and image generation).
|
||||
# Supports regional locations like us-central1 and multi-region locations: us, eu, global.
|
||||
# GOOGLE_LOC=us-central1
|
||||
|
||||
# Alternative region env var for Gemini Image Generation
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@
|
|||
},
|
||||
"homepage": "https://librechat.ai",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.3",
|
||||
"@anthropic-ai/vertex-sdk": "^0.16.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.1013.0",
|
||||
"@aws-sdk/client-cloudfront": "^3.1042.0",
|
||||
"@aws-sdk/client-s3": "^3.980.0",
|
||||
|
|
@ -43,7 +43,7 @@
|
|||
"@azure/identity": "^4.13.1",
|
||||
"@azure/search-documents": "^12.0.0",
|
||||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^1.19.0",
|
||||
"@google/genai": "^2.0.1",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.1.84",
|
||||
"@librechat/api": "*",
|
||||
|
|
|
|||
|
|
@ -373,6 +373,7 @@ endpoints:
|
|||
# vertex:
|
||||
# # Vertex AI region (optional, defaults to 'us-east5')
|
||||
# # Available regions: us-east5, us-central1, europe-west1, europe-west4, asia-southeast1
|
||||
# # Multi-region endpoints: us, eu, global
|
||||
# region: "us-east5"
|
||||
# # Path to Google service account key file (optional)
|
||||
# # If not specified, uses GOOGLE_SERVICE_KEY_FILE env var or default path (api/data/auth.json)
|
||||
|
|
|
|||
53
package-lock.json
generated
53
package-lock.json
generated
|
|
@ -49,7 +49,7 @@
|
|||
"version": "v0.8.5",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.3",
|
||||
"@anthropic-ai/vertex-sdk": "^0.16.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.1013.0",
|
||||
"@aws-sdk/client-cloudfront": "^3.1042.0",
|
||||
"@aws-sdk/client-s3": "^3.980.0",
|
||||
|
|
@ -58,7 +58,7 @@
|
|||
"@azure/identity": "^4.13.1",
|
||||
"@azure/search-documents": "^12.0.0",
|
||||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^1.19.0",
|
||||
"@google/genai": "^2.0.1",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.1.84",
|
||||
"@librechat/api": "*",
|
||||
|
|
@ -1637,9 +1637,9 @@
|
|||
}
|
||||
},
|
||||
"node_modules/@anthropic-ai/vertex-sdk": {
|
||||
"version": "0.14.3",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.14.3.tgz",
|
||||
"integrity": "sha512-GJZTkkvN66gM3Epqm9laKEjC3orQqzmQt8JAgTN9+zlb+I+1/oEd3Z7rj2tkEKCTeOUVScdhcXPudN8GdpuGqA==",
|
||||
"version": "0.16.0",
|
||||
"resolved": "https://registry.npmjs.org/@anthropic-ai/vertex-sdk/-/vertex-sdk-0.16.0.tgz",
|
||||
"integrity": "sha512-ntxemtRkwPsjVzGQJsmBPRW38tfas6VuVlD1v6pHffDJKLPtCdaiN9KUQeraJ/F34tjxEWlsaCnl3t/orJm1Xw==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@anthropic-ai/sdk": ">=0.50.3 <1",
|
||||
|
|
@ -10010,19 +10010,22 @@
|
|||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@google/genai": {
|
||||
"version": "1.33.0",
|
||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-1.33.0.tgz",
|
||||
"integrity": "sha512-ThUjFZ1N0DU88peFjnQkb8K198EWaW2RmmnDShFQ+O+xkIH9itjpRe358x3L/b4X/A7dimkvq63oz49Vbh7Cog==",
|
||||
"version": "2.0.1",
|
||||
"resolved": "https://registry.npmjs.org/@google/genai/-/genai-2.0.1.tgz",
|
||||
"integrity": "sha512-trxxbVePM9J8Cuni5x7+xvApoqb2y6Zk27/wugjT2cuwHOT78nFGdf/Ni29MkDxzWwrj90OQpno1Ana6dm3D2A==",
|
||||
"hasInstallScript": true,
|
||||
"license": "Apache-2.0",
|
||||
"dependencies": {
|
||||
"google-auth-library": "^10.3.0",
|
||||
"p-retry": "^4.6.2",
|
||||
"protobufjs": "^7.5.4",
|
||||
"ws": "^8.18.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=20.0.0"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@modelcontextprotocol/sdk": "^1.24.0"
|
||||
"@modelcontextprotocol/sdk": "^1.25.2"
|
||||
},
|
||||
"peerDependenciesMeta": {
|
||||
"@modelcontextprotocol/sdk": {
|
||||
|
|
@ -10117,6 +10120,19 @@
|
|||
"url": "https://opencollective.com/node-fetch"
|
||||
}
|
||||
},
|
||||
"node_modules/@google/genai/node_modules/p-retry": {
|
||||
"version": "4.6.2",
|
||||
"resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
|
||||
"integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"@types/retry": "0.12.0",
|
||||
"retry": "^0.13.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/@google/generative-ai": {
|
||||
"version": "0.24.1",
|
||||
"resolved": "https://registry.npmjs.org/@google/generative-ai/-/generative-ai-0.24.1.tgz",
|
||||
|
|
@ -21413,6 +21429,12 @@
|
|||
"integrity": "sha512-60BCwRFOZCQhDncwQdxxeOEEkbc5dIMccYLwbxsS4TUNeVECQ/pBJ0j09mrHOl/JJvpRPGwO9SvE4nR2Nb/a4Q==",
|
||||
"dev": true
|
||||
},
|
||||
"node_modules/@types/retry": {
|
||||
"version": "0.12.0",
|
||||
"resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
|
||||
"integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/@types/sanitize-html": {
|
||||
"version": "2.16.1",
|
||||
"resolved": "https://registry.npmjs.org/@types/sanitize-html/-/sanitize-html-2.16.1.tgz",
|
||||
|
|
@ -39824,6 +39846,15 @@
|
|||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/retry": {
|
||||
"version": "0.13.1",
|
||||
"resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
|
||||
"integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">= 4"
|
||||
}
|
||||
},
|
||||
"node_modules/reusify": {
|
||||
"version": "1.0.4",
|
||||
"resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz",
|
||||
|
|
@ -44648,7 +44679,7 @@
|
|||
"yauzl": "^3.2.1"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.3",
|
||||
"@anthropic-ai/vertex-sdk": "^0.16.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.1013.0",
|
||||
"@aws-sdk/client-cloudfront": "^3.1042.0",
|
||||
"@aws-sdk/client-s3": "^3.980.0",
|
||||
|
|
@ -44656,7 +44687,7 @@
|
|||
"@azure/identity": "^4.13.1",
|
||||
"@azure/search-documents": "^12.0.0",
|
||||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^1.19.0",
|
||||
"@google/genai": "^2.0.1",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.1.84",
|
||||
"@librechat/data-schemas": "*",
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@
|
|||
"registry": "https://registry.npmjs.org/"
|
||||
},
|
||||
"peerDependencies": {
|
||||
"@anthropic-ai/vertex-sdk": "^0.14.3",
|
||||
"@anthropic-ai/vertex-sdk": "^0.16.0",
|
||||
"@aws-sdk/client-bedrock-runtime": "^3.1013.0",
|
||||
"@aws-sdk/client-cloudfront": "^3.1042.0",
|
||||
"@aws-sdk/client-s3": "^3.980.0",
|
||||
|
|
@ -96,7 +96,7 @@
|
|||
"@azure/identity": "^4.13.1",
|
||||
"@azure/search-documents": "^12.0.0",
|
||||
"@azure/storage-blob": "^12.30.0",
|
||||
"@google/genai": "^1.19.0",
|
||||
"@google/genai": "^2.0.1",
|
||||
"@keyv/redis": "^4.3.3",
|
||||
"@librechat/agents": "^3.1.84",
|
||||
"@librechat/data-schemas": "*",
|
||||
|
|
|
|||
30
packages/api/src/endpoints/anthropic/vertex.spec.ts
Normal file
30
packages/api/src/endpoints/anthropic/vertex.spec.ts
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
import { AuthKeys } from 'librechat-data-provider';
|
||||
import { createAnthropicVertexClient } from './vertex';
|
||||
|
||||
describe('createAnthropicVertexClient', () => {
|
||||
const credentials = {
|
||||
[AuthKeys.GOOGLE_SERVICE_KEY]: {
|
||||
project_id: 'test-project',
|
||||
client_email: 'test@test-project.iam.gserviceaccount.com',
|
||||
private_key: 'test-private-key',
|
||||
},
|
||||
};
|
||||
|
||||
it('should use Vertex AI multi-region base URLs for Anthropic', () => {
|
||||
const testCases = [
|
||||
{ region: 'eu', baseURL: 'https://aiplatform.eu.rep.googleapis.com/v1' },
|
||||
{ region: 'us', baseURL: 'https://aiplatform.us.rep.googleapis.com/v1' },
|
||||
{ region: 'global', baseURL: 'https://aiplatform.googleapis.com/v1' },
|
||||
];
|
||||
|
||||
testCases.forEach(({ region, baseURL }) => {
|
||||
const client = createAnthropicVertexClient(credentials, undefined, {
|
||||
region,
|
||||
projectId: 'test-project',
|
||||
});
|
||||
|
||||
expect(client.region).toBe(region);
|
||||
expect(client.baseURL).toBe(baseURL);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -255,6 +255,84 @@ describe('getGoogleConfig', () => {
|
|||
expect(result.llmConfig).toHaveProperty('location', 'europe-west1');
|
||||
});
|
||||
|
||||
it('should use Vertex AI multi-region endpoints for eu and us locations', () => {
|
||||
const credentials = {
|
||||
[AuthKeys.GOOGLE_SERVICE_KEY]: {
|
||||
project_id: 'test-project',
|
||||
},
|
||||
};
|
||||
|
||||
const locations = [
|
||||
{ location: 'eu', endpoint: 'aiplatform.eu.rep.googleapis.com' },
|
||||
{ location: 'us', endpoint: 'aiplatform.us.rep.googleapis.com' },
|
||||
{ location: 'global', endpoint: 'aiplatform.googleapis.com' },
|
||||
];
|
||||
|
||||
locations.forEach(({ location, endpoint }) => {
|
||||
process.env.GOOGLE_LOC = location;
|
||||
|
||||
const result = getGoogleConfig(credentials, {
|
||||
modelOptions: {
|
||||
model: 'gemini-3.1-flash-lite-preview',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.llmConfig).toMatchObject({
|
||||
location,
|
||||
endpoint,
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
it('should derive Vertex AI endpoint from the final location value', () => {
|
||||
process.env.GOOGLE_LOC = 'us';
|
||||
|
||||
const credentials = {
|
||||
[AuthKeys.GOOGLE_SERVICE_KEY]: {
|
||||
project_id: 'test-project',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getGoogleConfig(credentials, {
|
||||
modelOptions: {
|
||||
model: 'gemini-3.1-flash-lite-preview',
|
||||
},
|
||||
addParams: {
|
||||
location: 'eu',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.llmConfig).toMatchObject({
|
||||
location: 'eu',
|
||||
endpoint: 'aiplatform.eu.rep.googleapis.com',
|
||||
});
|
||||
});
|
||||
|
||||
it('should preserve explicit Vertex AI endpoint overrides', () => {
|
||||
process.env.GOOGLE_LOC = 'us';
|
||||
|
||||
const credentials = {
|
||||
[AuthKeys.GOOGLE_SERVICE_KEY]: {
|
||||
project_id: 'test-project',
|
||||
},
|
||||
};
|
||||
|
||||
const result = getGoogleConfig(credentials, {
|
||||
modelOptions: {
|
||||
model: 'gemini-3.1-flash-lite-preview',
|
||||
},
|
||||
addParams: {
|
||||
location: 'eu',
|
||||
endpoint: 'custom-aiplatform.example.com',
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.llmConfig).toMatchObject({
|
||||
location: 'eu',
|
||||
endpoint: 'custom-aiplatform.example.com',
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle service key as JSON string', () => {
|
||||
const credentials = {
|
||||
[AuthKeys.GOOGLE_SERVICE_KEY]: JSON.stringify({
|
||||
|
|
@ -972,6 +1050,7 @@ describe('knownGoogleParams', () => {
|
|||
expect(knownGoogleParams.has('topP')).toBe(true);
|
||||
expect(knownGoogleParams.has('topK')).toBe(true);
|
||||
expect(knownGoogleParams.has('apiKey')).toBe(true);
|
||||
expect(knownGoogleParams.has('endpoint')).toBe(true);
|
||||
expect(knownGoogleParams.has('safetySettings')).toBe(true);
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -19,6 +19,12 @@ const googleThinkingLevels = new Set<GoogleThinkingLevel>([
|
|||
'HIGH',
|
||||
]);
|
||||
|
||||
const vertexMultiRegionEndpoints = new Map([
|
||||
['eu', 'aiplatform.eu.rep.googleapis.com'],
|
||||
['us', 'aiplatform.us.rep.googleapis.com'],
|
||||
['global', 'aiplatform.googleapis.com'],
|
||||
]);
|
||||
|
||||
/** Known Google/Vertex AI parameters that map directly to the client config */
|
||||
export const knownGoogleParams = new Set([
|
||||
'model',
|
||||
|
|
@ -42,6 +48,7 @@ export const knownGoogleParams = new Set([
|
|||
'streamUsage',
|
||||
'apiKey',
|
||||
'baseUrl',
|
||||
'endpoint',
|
||||
'location',
|
||||
'authOptions',
|
||||
]);
|
||||
|
|
@ -95,6 +102,25 @@ function normalizeGoogleThinkingLevel(value: unknown): GoogleThinkingLevel | und
|
|||
return normalized;
|
||||
}
|
||||
|
||||
function getVertexMultiRegionEndpoint(location: string): string | undefined {
|
||||
return vertexMultiRegionEndpoints.get(location);
|
||||
}
|
||||
|
||||
function hasStringEndpoint(config: Record<string, unknown>): boolean {
|
||||
return typeof config.endpoint === 'string' && config.endpoint.length > 0;
|
||||
}
|
||||
|
||||
function applyVertexMultiRegionEndpoint(config: VertexAIClientOptions & { endpoint?: string }) {
|
||||
const location = config.location;
|
||||
if (typeof location !== 'string') {
|
||||
return;
|
||||
}
|
||||
const multiRegionEndpoint = getVertexMultiRegionEndpoint(location);
|
||||
if (multiRegionEndpoint) {
|
||||
config.endpoint = multiRegionEndpoint;
|
||||
}
|
||||
}
|
||||
|
||||
export function getSafetySettings(
|
||||
model?: string,
|
||||
): Array<{ category: string; threshold: string }> | undefined {
|
||||
|
|
@ -195,6 +221,9 @@ export function getGoogleConfig(
|
|||
},
|
||||
true,
|
||||
);
|
||||
const initialConfig = llmConfig as Record<string, unknown>;
|
||||
let hasCustomVertexEndpoint = hasStringEndpoint(initialConfig);
|
||||
let shouldSyncVertexEndpoint = true;
|
||||
|
||||
/** Used only for Safety Settings */
|
||||
llmConfig.safetySettings = getSafetySettings(llmConfig.model);
|
||||
|
|
@ -213,7 +242,8 @@ export function getGoogleConfig(
|
|||
credentials: { ...serviceKey },
|
||||
projectId: project_id,
|
||||
};
|
||||
(llmConfig as VertexAIClientOptions).location = process.env.GOOGLE_LOC || 'us-central1';
|
||||
const location = process.env.GOOGLE_LOC || 'us-central1';
|
||||
(llmConfig as VertexAIClientOptions).location = location;
|
||||
} else if (apiKey && provider === Providers.GOOGLE) {
|
||||
llmConfig.apiKey = apiKey;
|
||||
} else {
|
||||
|
|
@ -311,6 +341,9 @@ export function getGoogleConfig(
|
|||
if (knownGoogleParams.has(key)) {
|
||||
/** Route known Google params to llmConfig only if undefined */
|
||||
applyDefaultParams(llmConfig as Record<string, unknown>, { [key]: value });
|
||||
if (key === 'endpoint' && hasStringEndpoint(llmConfig as Record<string, unknown>)) {
|
||||
hasCustomVertexEndpoint = true;
|
||||
}
|
||||
}
|
||||
/** Leave other params for transform to handle - they might be OpenAI params */
|
||||
}
|
||||
|
|
@ -330,6 +363,9 @@ export function getGoogleConfig(
|
|||
if (knownGoogleParams.has(key)) {
|
||||
/** Route known Google params to llmConfig */
|
||||
(llmConfig as Record<string, unknown>)[key] = value;
|
||||
if (key === 'endpoint') {
|
||||
hasCustomVertexEndpoint = hasStringEndpoint(llmConfig as Record<string, unknown>);
|
||||
}
|
||||
}
|
||||
/** Leave other params for transform to handle - they might be OpenAI params */
|
||||
}
|
||||
|
|
@ -343,12 +379,21 @@ export function getGoogleConfig(
|
|||
return;
|
||||
}
|
||||
|
||||
if (param === 'endpoint') {
|
||||
shouldSyncVertexEndpoint = false;
|
||||
hasCustomVertexEndpoint = false;
|
||||
}
|
||||
|
||||
if (param in llmConfig) {
|
||||
delete (llmConfig as Record<string, unknown>)[param];
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (provider === Providers.VERTEXAI && shouldSyncVertexEndpoint && !hasCustomVertexEndpoint) {
|
||||
applyVertexMultiRegionEndpoint(llmConfig as VertexAIClientOptions & { endpoint?: string });
|
||||
}
|
||||
|
||||
const tools: GoogleAIToolType[] = [];
|
||||
|
||||
if (enableWebSearch) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue