From e5ff82f81af394e30893d761b0f3c554e4f1ae34 Mon Sep 17 00:00:00 2001 From: Feng Lu Date: Wed, 29 Apr 2026 12:16:48 +0200 Subject: [PATCH] feat: add Bedrock prompt cache TTL config --- helm/librechat/values.yaml | 6 ++ librechat.example.yaml | 6 ++ .../src/endpoints/bedrock/initialize.spec.ts | 42 +++++++++++++ .../api/src/endpoints/bedrock/initialize.ts | 4 ++ packages/api/src/types/bedrock.ts | 4 ++ packages/data-provider/specs/bedrock.spec.ts | 62 +++++++++++++++++++ packages/data-provider/src/bedrock.ts | 51 ++++++++++++--- packages/data-provider/src/config.spec.ts | 12 ++++ packages/data-provider/src/config.ts | 1 + packages/data-provider/src/schemas.ts | 3 + packages/data-provider/src/types.ts | 1 + packages/data-schemas/src/schema/defaults.ts | 4 ++ packages/data-schemas/src/schema/preset.ts | 1 + packages/data-schemas/src/types/convo.ts | 1 + 14 files changed, 188 insertions(+), 10 deletions(-) diff --git a/helm/librechat/values.yaml b/helm/librechat/values.yaml index 1dbacbe89d..ef897a2ecb 100755 --- a/helm/librechat/values.yaml +++ b/helm/librechat/values.yaml @@ -69,6 +69,12 @@ librechat: # registration: # socialLogins: ["discord", "facebook", "github", "google", "openid"] # endpoints: + # bedrock: + # models: + # - "anthropic.claude-sonnet-4-5-20250929-v1:0" + # # Optional. Use "1h" only with Bedrock models that support 1-hour prompt cache TTL. + # # Omit this field to keep Bedrock's default 5-minute prompt cache TTL. + # promptCacheTtl: "1h" # azureOpenAI: # # Endpoint-level configuration # titleModel: "gpt-4o" diff --git a/librechat.example.yaml b/librechat.example.yaml index 16c0dbc0a4..206355c25e 100644 --- a/librechat.example.yaml +++ b/librechat.example.yaml @@ -534,6 +534,12 @@ endpoints: # - "anthropic.claude-3-7-sonnet-20250219-v1:0" # - "anthropic.claude-3-5-sonnet-20241022-v2:0" # + # # Prompt Cache TTL + # # Optional. Bedrock supports 5-minute cache checkpoints, and 1-hour checkpoints + # # for Claude 4.5 models. When omitted, Bedrock uses its default 5-minute TTL. + # # Reference: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html + # promptCacheTtl: "1h" + # # # Inference Profiles Configuration # # Maps model IDs to their inference profile ARNs # # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above diff --git a/packages/api/src/endpoints/bedrock/initialize.spec.ts b/packages/api/src/endpoints/bedrock/initialize.spec.ts index decf9a7df9..ef1c848b6a 100644 --- a/packages/api/src/endpoints/bedrock/initialize.spec.ts +++ b/packages/api/src/endpoints/bedrock/initialize.spec.ts @@ -95,6 +95,48 @@ describe('initializeBedrock', () => { expect(result.llmConfig).toHaveProperty('region', 'us-east-1'); }); + it('should include promptCacheTtl from Bedrock endpoint config', async () => { + const params = createMockParams({ + model_parameters: { + model: 'anthropic.claude-sonnet-4-5-20250929-v1:0', + }, + config: { + endpoints: { + [EModelEndpoint.bedrock]: { + promptCacheTtl: '1h', + }, + }, + }, + }); + const result = await initializeBedrock(params); + + expect(result.llmConfig).toHaveProperty('promptCacheTtl', '1h'); + expect(result.llmConfig).toHaveProperty('promptCache', true); + }); + + it('should omit one-hour promptCacheTtl for models that only support 5 minutes', async () => { + const params = createMockParams({ + config: { + endpoints: { + [EModelEndpoint.bedrock]: { + promptCacheTtl: '1h', + }, + }, + }, + }); + const result = await initializeBedrock(params); + + expect(result.llmConfig).not.toHaveProperty('promptCacheTtl'); + expect(result.llmConfig).toHaveProperty('promptCache', true); + }); + + it('should not include promptCacheTtl when not configured', async () => { + const params = createMockParams(); + const result = await initializeBedrock(params); + + expect(result.llmConfig).not.toHaveProperty('promptCacheTtl'); + }); + it('should handle model_parameters', async () => { const params = createMockParams({ model_parameters: { diff --git a/packages/api/src/endpoints/bedrock/initialize.ts b/packages/api/src/endpoints/bedrock/initialize.ts index f3ba459ba5..bf04383d42 100644 --- a/packages/api/src/endpoints/bedrock/initialize.ts +++ b/packages/api/src/endpoints/bedrock/initialize.ts @@ -13,6 +13,7 @@ import type { BaseInitializeParams, InitializeResultBase, BedrockCredentials, + BedrockPromptCacheTtl, GuardrailConfiguration, InferenceProfileConfig, } from '~/types'; @@ -54,6 +55,7 @@ export async function initializeBedrock({ | ({ guardrailConfig?: GuardrailConfiguration; inferenceProfiles?: InferenceProfileConfig; + promptCacheTtl?: BedrockPromptCacheTtl; } & Record) | undefined; @@ -98,6 +100,7 @@ export async function initializeBedrock({ const requestOptions: Record = { model: model_parameters?.model as string | undefined, region: BEDROCK_AWS_DEFAULT_REGION, + promptCacheTtl: bedrockConfig?.promptCacheTtl, }; const configOptions: Record = {}; @@ -117,6 +120,7 @@ export async function initializeBedrock({ endpointHost?: string; guardrailConfig?: GuardrailConfiguration; applicationInferenceProfile?: string; + promptCacheTtl?: BedrockPromptCacheTtl; }; if (bedrockConfig?.guardrailConfig) { diff --git a/packages/api/src/types/bedrock.ts b/packages/api/src/types/bedrock.ts index 8f34b2864d..914bc7eb33 100644 --- a/packages/api/src/types/bedrock.ts +++ b/packages/api/src/types/bedrock.ts @@ -27,6 +27,7 @@ export interface GuardrailConfiguration { * @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html */ export type InferenceProfileConfig = Record; +export type BedrockPromptCacheTtl = '5m' | '1h'; /** * Configuration options for Bedrock LLM @@ -45,6 +46,8 @@ export interface BedrockConfigOptions { guardrailConfig?: GuardrailConfiguration; /** Inference profile ARNs keyed by model ID / friendly name */ inferenceProfiles?: InferenceProfileConfig; + /** Bedrock prompt cache checkpoint TTL. Defaults to Bedrock's 5-minute TTL when unset. */ + promptCacheTtl?: BedrockPromptCacheTtl; } /** @@ -58,6 +61,7 @@ export interface BedrockLLMConfigResult { endpointHost?: string; guardrailConfig?: GuardrailConfiguration; applicationInferenceProfile?: string; + promptCacheTtl?: BedrockPromptCacheTtl; }; configOptions: Record; } diff --git a/packages/data-provider/specs/bedrock.spec.ts b/packages/data-provider/specs/bedrock.spec.ts index bbb57310c6..8b6ac458b6 100644 --- a/packages/data-provider/specs/bedrock.spec.ts +++ b/packages/data-provider/specs/bedrock.spec.ts @@ -921,6 +921,68 @@ describe('bedrockInputParser', () => { expect(result.promptCache).toBe(true); }); + test('should preserve one-hour promptCacheTtl for Claude 4.5 models', () => { + const input = { + model: 'anthropic.claude-sonnet-4-5-20250929-v1:0', + promptCache: true, + promptCacheTtl: '1h', + }; + const result = bedrockInputParser.parse(input) as Record; + expect(result.promptCache).toBe(true); + expect(result.promptCacheTtl).toBe('1h'); + }); + + test('should strip one-hour promptCacheTtl for models that only support 5 minutes', () => { + const result = bedrockInputParser.parse({ + model: 'amazon.nova-pro-v1:0', + promptCache: true, + promptCacheTtl: '1h', + }) as Record; + expect(result.promptCache).toBe(true); + expect(result.promptCacheTtl).toBeUndefined(); + }); + + test('should preserve explicit 5-minute promptCacheTtl for Nova models', () => { + const input = { + model: 'amazon.nova-pro-v1:0', + promptCache: true, + promptCacheTtl: '5m', + }; + const result = bedrockInputParser.parse(input) as Record; + expect(result.promptCache).toBe(true); + expect(result.promptCacheTtl).toBe('5m'); + }); + + test('should strip promptCacheTtl when promptCache is disabled', () => { + const input = { + model: 'anthropic.claude-sonnet-4-20250514-v1:0', + promptCache: false, + promptCacheTtl: '1h', + }; + const result = bedrockInputParser.parse(input) as Record; + expect(result.promptCache).toBe(false); + expect(result.promptCacheTtl).toBeUndefined(); + }); + + test('should strip stale promptCacheTtl when switching to non-Claude/Nova model', () => { + const staleConversationData = { + model: 'deepseek.deepseek-r1', + promptCacheTtl: '1h', + }; + const result = bedrockInputParser.parse(staleConversationData) as Record; + expect(result.promptCacheTtl).toBeUndefined(); + }); + + test('bedrockInputSchema should strip stale promptCacheTtl when promptCache is disabled', () => { + const result = bedrockInputSchema.parse({ + model: 'anthropic.claude-sonnet-4-20250514-v1:0', + promptCache: false, + promptCacheTtl: '1h', + }) as Record; + expect(result.promptCache).toBe(false); + expect(result.promptCacheTtl).toBeUndefined(); + }); + test('should strip stale thinking config from additionalModelRequestFields for non-Anthropic models', () => { const staleConversationData = { model: 'moonshot.kimi-k2-0711-thinking', diff --git a/packages/data-provider/src/bedrock.ts b/packages/data-provider/src/bedrock.ts index 96a4998079..72920202e2 100644 --- a/packages/data-provider/src/bedrock.ts +++ b/packages/data-provider/src/bedrock.ts @@ -40,6 +40,42 @@ function extractPersistedDisplay(amrf: unknown): string | undefined { return typeof display === 'string' ? display : undefined; } +function supportsBedrockPromptCache(model: unknown): boolean { + return typeof model === 'string' && (model.includes('claude') || model.includes('nova')); +} + +function supportsOneHourBedrockPromptCache(model: unknown): boolean { + if (typeof model !== 'string') { + return false; + } + + return ( + model.includes('anthropic.claude-opus-4-5') || + model.includes('anthropic.claude-sonnet-4-5') || + model.includes('anthropic.claude-haiku-4-5') + ); +} + +function normalizeBedrockPromptCache(data: Record) { + if (supportsBedrockPromptCache(data.model)) { + if (data.promptCache === undefined) { + data.promptCache = true; + } + if (data.promptCacheTtl === '1h' && !supportsOneHourBedrockPromptCache(data.model)) { + data.promptCacheTtl = undefined; + } + } else { + if (data.promptCache === true) { + data.promptCache = undefined; + } + data.promptCacheTtl = undefined; + } + + if (data.promptCache === false) { + data.promptCacheTtl = undefined; + } +} + export function resolveThinkingDisplay( model: string, explicit?: s.ThinkingDisplay | string | null, @@ -226,6 +262,7 @@ export const bedrockInputSchema = s.tConversationSchema thinkingDisplay: true, reasoning_effort: true, promptCache: true, + promptCacheTtl: true, /* Catch-all fields */ topK: true, additionalModelRequestFields: true, @@ -250,6 +287,7 @@ export const bedrockInputSchema = s.tConversationSchema } delete obj.additionalModelRequestFields; } + normalizeBedrockPromptCache(obj as Record); return s.removeNullishValues(obj); }) .catch(() => ({})); @@ -281,6 +319,7 @@ export const bedrockInputParser = s.tConversationSchema thinkingDisplay: true, reasoning_effort: true, promptCache: true, + promptCacheTtl: true, /* Catch-all fields */ topK: true, additionalModelRequestFields: true, @@ -304,6 +343,7 @@ export const bedrockInputParser = s.tConversationSchema 'topP', 'stop', 'promptCache', + 'promptCacheTtl', ]; const additionalFields: Record = {}; @@ -439,16 +479,7 @@ export const bedrockInputParser = s.tConversationSchema } /** Default promptCache for claude and nova models, if not defined */ - if ( - typeof typedData.model === 'string' && - (typedData.model.includes('claude') || typedData.model.includes('nova')) - ) { - if (typedData.promptCache === undefined) { - typedData.promptCache = true; - } - } else if (typedData.promptCache === true) { - typedData.promptCache = undefined; - } + normalizeBedrockPromptCache(typedData); if (Object.keys(additionalFields).length > 0) { typedData.additionalModelRequestFields = { diff --git a/packages/data-provider/src/config.spec.ts b/packages/data-provider/src/config.spec.ts index a6326b2635..73519f825b 100644 --- a/packages/data-provider/src/config.spec.ts +++ b/packages/data-provider/src/config.spec.ts @@ -3,6 +3,7 @@ import { EModelEndpoint, isDocumentSupportedProvider } from './schemas'; import { getEndpointFileConfig, mergeFileConfig } from './file-config'; import { allowedAddressesSchema, + bedrockEndpointSchema, configSchema, excludedKeys, resolveEndpointType, @@ -29,6 +30,17 @@ describe('excludedKeys', () => { }); }); +describe('bedrockEndpointSchema', () => { + it('accepts supported Bedrock prompt cache TTL values', () => { + expect(bedrockEndpointSchema.parse({ promptCacheTtl: '5m' }).promptCacheTtl).toBe('5m'); + expect(bedrockEndpointSchema.parse({ promptCacheTtl: '1h' }).promptCacheTtl).toBe('1h'); + }); + + it('rejects unsupported Bedrock prompt cache TTL values', () => { + expect(() => bedrockEndpointSchema.parse({ promptCacheTtl: '30m' })).toThrow(); + }); +}); + describe('resolveEndpointType', () => { describe('non-agents endpoints', () => { it('returns the config type for a custom endpoint', () => { diff --git a/packages/data-provider/src/config.ts b/packages/data-provider/src/config.ts index c5c62be3fc..b224dc5919 100644 --- a/packages/data-provider/src/config.ts +++ b/packages/data-provider/src/config.ts @@ -391,6 +391,7 @@ export const bedrockEndpointSchema = baseEndpointSchema.merge( availableRegions: z.array(z.string()).optional(), models: z.array(z.string()).optional(), inferenceProfiles: z.record(z.string(), z.string()).optional(), + promptCacheTtl: z.enum(['5m', '1h']).optional(), }), ); diff --git a/packages/data-provider/src/schemas.ts b/packages/data-provider/src/schemas.ts index c9f35d5d4b..985394f7ac 100644 --- a/packages/data-provider/src/schemas.ts +++ b/packages/data-provider/src/schemas.ts @@ -800,6 +800,7 @@ export const tConversationSchema = z.object({ max_tokens: coerceNumber.optional(), /* Anthropic */ promptCache: z.boolean().optional(), + promptCacheTtl: z.enum(['5m', '1h']).optional(), system: z.string().optional(), thinking: z.boolean().optional(), thinkingBudget: coerceNumber.optional(), @@ -950,6 +951,8 @@ export const tQueryParamsSchema = tConversationSchema maxOutputTokens: true, /** @endpoints anthropic */ promptCache: true, + /** @endpoints bedrock */ + promptCacheTtl: true, thinking: true, thinkingBudget: true, thinkingLevel: true, diff --git a/packages/data-provider/src/types.ts b/packages/data-provider/src/types.ts index b1c037c94f..4024e53498 100644 --- a/packages/data-provider/src/types.ts +++ b/packages/data-provider/src/types.ts @@ -51,6 +51,7 @@ export type TEndpointOption = Pick< | 'additionalModelRequestFields' // Anthropic-specific | 'promptCache' + | 'promptCacheTtl' | 'thinking' | 'thinkingBudget' | 'thinkingLevel' diff --git a/packages/data-schemas/src/schema/defaults.ts b/packages/data-schemas/src/schema/defaults.ts index 9b50bceb1d..84636310a5 100644 --- a/packages/data-schemas/src/schema/defaults.ts +++ b/packages/data-schemas/src/schema/defaults.ts @@ -77,6 +77,10 @@ export const conversationPreset = { promptCache: { type: Boolean, }, + promptCacheTtl: { + type: String, + enum: ['5m', '1h'], + }, thinking: { type: Boolean, }, diff --git a/packages/data-schemas/src/schema/preset.ts b/packages/data-schemas/src/schema/preset.ts index 5af5163fd3..5bc438b99b 100644 --- a/packages/data-schemas/src/schema/preset.ts +++ b/packages/data-schemas/src/schema/preset.ts @@ -28,6 +28,7 @@ export interface IPreset extends Document { file_ids?: string[]; resendImages?: boolean; promptCache?: boolean; + promptCacheTtl?: '5m' | '1h'; thinking?: boolean; thinkingBudget?: number; effort?: string; diff --git a/packages/data-schemas/src/types/convo.ts b/packages/data-schemas/src/types/convo.ts index c7888efba2..bfeb9f6bbb 100644 --- a/packages/data-schemas/src/types/convo.ts +++ b/packages/data-schemas/src/types/convo.ts @@ -26,6 +26,7 @@ export interface IConversation extends Document { file_ids?: string[]; resendImages?: boolean; promptCache?: boolean; + promptCacheTtl?: '5m' | '1h'; thinking?: boolean; thinkingBudget?: number; effort?: string;