feat: add Bedrock prompt cache TTL config

This commit is contained in:
Feng Lu 2026-04-29 12:16:48 +02:00
parent b39bf837a7
commit e5ff82f81a
14 changed files with 188 additions and 10 deletions

View file

@ -69,6 +69,12 @@ librechat:
# registration:
# socialLogins: ["discord", "facebook", "github", "google", "openid"]
# endpoints:
# bedrock:
# models:
# - "anthropic.claude-sonnet-4-5-20250929-v1:0"
# # Optional. Use "1h" only with Bedrock models that support 1-hour prompt cache TTL.
# # Omit this field to keep Bedrock's default 5-minute prompt cache TTL.
# promptCacheTtl: "1h"
# azureOpenAI:
# # Endpoint-level configuration
# titleModel: "gpt-4o"

View file

@ -534,6 +534,12 @@ endpoints:
# - "anthropic.claude-3-7-sonnet-20250219-v1:0"
# - "anthropic.claude-3-5-sonnet-20241022-v2:0"
#
# # Prompt Cache TTL
# # Optional. Bedrock supports 5-minute cache checkpoints, and 1-hour checkpoints
# # for Claude 4.5 models. When omitted, Bedrock uses its default 5-minute TTL.
# # Reference: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
# promptCacheTtl: "1h"
#
# # Inference Profiles Configuration
# # Maps model IDs to their inference profile ARNs
# # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above

View file

@ -95,6 +95,48 @@ describe('initializeBedrock', () => {
expect(result.llmConfig).toHaveProperty('region', 'us-east-1');
});
it('should include promptCacheTtl from Bedrock endpoint config', async () => {
const params = createMockParams({
model_parameters: {
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
},
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
promptCacheTtl: '1h',
},
},
},
});
const result = await initializeBedrock(params);
expect(result.llmConfig).toHaveProperty('promptCacheTtl', '1h');
expect(result.llmConfig).toHaveProperty('promptCache', true);
});
it('should omit one-hour promptCacheTtl for models that only support 5 minutes', async () => {
const params = createMockParams({
config: {
endpoints: {
[EModelEndpoint.bedrock]: {
promptCacheTtl: '1h',
},
},
},
});
const result = await initializeBedrock(params);
expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
expect(result.llmConfig).toHaveProperty('promptCache', true);
});
it('should not include promptCacheTtl when not configured', async () => {
const params = createMockParams();
const result = await initializeBedrock(params);
expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
});
it('should handle model_parameters', async () => {
const params = createMockParams({
model_parameters: {

View file

@ -13,6 +13,7 @@ import type {
BaseInitializeParams,
InitializeResultBase,
BedrockCredentials,
BedrockPromptCacheTtl,
GuardrailConfiguration,
InferenceProfileConfig,
} from '~/types';
@ -54,6 +55,7 @@ export async function initializeBedrock({
| ({
guardrailConfig?: GuardrailConfiguration;
inferenceProfiles?: InferenceProfileConfig;
promptCacheTtl?: BedrockPromptCacheTtl;
} & Record<string, unknown>)
| undefined;
@ -98,6 +100,7 @@ export async function initializeBedrock({
const requestOptions: Record<string, unknown> = {
model: model_parameters?.model as string | undefined,
region: BEDROCK_AWS_DEFAULT_REGION,
promptCacheTtl: bedrockConfig?.promptCacheTtl,
};
const configOptions: Record<string, unknown> = {};
@ -117,6 +120,7 @@ export async function initializeBedrock({
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
promptCacheTtl?: BedrockPromptCacheTtl;
};
if (bedrockConfig?.guardrailConfig) {

View file

@ -27,6 +27,7 @@ export interface GuardrailConfiguration {
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
*/
export type InferenceProfileConfig = Record<string, string>;
export type BedrockPromptCacheTtl = '5m' | '1h';
/**
* Configuration options for Bedrock LLM
@ -45,6 +46,8 @@ export interface BedrockConfigOptions {
guardrailConfig?: GuardrailConfiguration;
/** Inference profile ARNs keyed by model ID / friendly name */
inferenceProfiles?: InferenceProfileConfig;
/** Bedrock prompt cache checkpoint TTL. Defaults to Bedrock's 5-minute TTL when unset. */
promptCacheTtl?: BedrockPromptCacheTtl;
}
/**
@ -58,6 +61,7 @@ export interface BedrockLLMConfigResult {
endpointHost?: string;
guardrailConfig?: GuardrailConfiguration;
applicationInferenceProfile?: string;
promptCacheTtl?: BedrockPromptCacheTtl;
};
configOptions: Record<string, unknown>;
}

View file

@ -921,6 +921,68 @@ describe('bedrockInputParser', () => {
expect(result.promptCache).toBe(true);
});
test('should preserve one-hour promptCacheTtl for Claude 4.5 models', () => {
const input = {
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
promptCache: true,
promptCacheTtl: '1h',
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
expect(result.promptCache).toBe(true);
expect(result.promptCacheTtl).toBe('1h');
});
test('should strip one-hour promptCacheTtl for models that only support 5 minutes', () => {
const result = bedrockInputParser.parse({
model: 'amazon.nova-pro-v1:0',
promptCache: true,
promptCacheTtl: '1h',
}) as Record<string, unknown>;
expect(result.promptCache).toBe(true);
expect(result.promptCacheTtl).toBeUndefined();
});
test('should preserve explicit 5-minute promptCacheTtl for Nova models', () => {
const input = {
model: 'amazon.nova-pro-v1:0',
promptCache: true,
promptCacheTtl: '5m',
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
expect(result.promptCache).toBe(true);
expect(result.promptCacheTtl).toBe('5m');
});
test('should strip promptCacheTtl when promptCache is disabled', () => {
const input = {
model: 'anthropic.claude-sonnet-4-20250514-v1:0',
promptCache: false,
promptCacheTtl: '1h',
};
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
expect(result.promptCache).toBe(false);
expect(result.promptCacheTtl).toBeUndefined();
});
test('should strip stale promptCacheTtl when switching to non-Claude/Nova model', () => {
const staleConversationData = {
model: 'deepseek.deepseek-r1',
promptCacheTtl: '1h',
};
const result = bedrockInputParser.parse(staleConversationData) as Record<string, unknown>;
expect(result.promptCacheTtl).toBeUndefined();
});
test('bedrockInputSchema should strip stale promptCacheTtl when promptCache is disabled', () => {
const result = bedrockInputSchema.parse({
model: 'anthropic.claude-sonnet-4-20250514-v1:0',
promptCache: false,
promptCacheTtl: '1h',
}) as Record<string, unknown>;
expect(result.promptCache).toBe(false);
expect(result.promptCacheTtl).toBeUndefined();
});
test('should strip stale thinking config from additionalModelRequestFields for non-Anthropic models', () => {
const staleConversationData = {
model: 'moonshot.kimi-k2-0711-thinking',

View file

@ -40,6 +40,42 @@ function extractPersistedDisplay(amrf: unknown): string | undefined {
return typeof display === 'string' ? display : undefined;
}
function supportsBedrockPromptCache(model: unknown): boolean {
return typeof model === 'string' && (model.includes('claude') || model.includes('nova'));
}
function supportsOneHourBedrockPromptCache(model: unknown): boolean {
if (typeof model !== 'string') {
return false;
}
return (
model.includes('anthropic.claude-opus-4-5') ||
model.includes('anthropic.claude-sonnet-4-5') ||
model.includes('anthropic.claude-haiku-4-5')
);
}
function normalizeBedrockPromptCache(data: Record<string, unknown>) {
if (supportsBedrockPromptCache(data.model)) {
if (data.promptCache === undefined) {
data.promptCache = true;
}
if (data.promptCacheTtl === '1h' && !supportsOneHourBedrockPromptCache(data.model)) {
data.promptCacheTtl = undefined;
}
} else {
if (data.promptCache === true) {
data.promptCache = undefined;
}
data.promptCacheTtl = undefined;
}
if (data.promptCache === false) {
data.promptCacheTtl = undefined;
}
}
export function resolveThinkingDisplay(
model: string,
explicit?: s.ThinkingDisplay | string | null,
@ -226,6 +262,7 @@ export const bedrockInputSchema = s.tConversationSchema
thinkingDisplay: true,
reasoning_effort: true,
promptCache: true,
promptCacheTtl: true,
/* Catch-all fields */
topK: true,
additionalModelRequestFields: true,
@ -250,6 +287,7 @@ export const bedrockInputSchema = s.tConversationSchema
}
delete obj.additionalModelRequestFields;
}
normalizeBedrockPromptCache(obj as Record<string, unknown>);
return s.removeNullishValues(obj);
})
.catch(() => ({}));
@ -281,6 +319,7 @@ export const bedrockInputParser = s.tConversationSchema
thinkingDisplay: true,
reasoning_effort: true,
promptCache: true,
promptCacheTtl: true,
/* Catch-all fields */
topK: true,
additionalModelRequestFields: true,
@ -304,6 +343,7 @@ export const bedrockInputParser = s.tConversationSchema
'topP',
'stop',
'promptCache',
'promptCacheTtl',
];
const additionalFields: Record<string, unknown> = {};
@ -439,16 +479,7 @@ export const bedrockInputParser = s.tConversationSchema
}
/** Default promptCache for claude and nova models, if not defined */
if (
typeof typedData.model === 'string' &&
(typedData.model.includes('claude') || typedData.model.includes('nova'))
) {
if (typedData.promptCache === undefined) {
typedData.promptCache = true;
}
} else if (typedData.promptCache === true) {
typedData.promptCache = undefined;
}
normalizeBedrockPromptCache(typedData);
if (Object.keys(additionalFields).length > 0) {
typedData.additionalModelRequestFields = {

View file

@ -3,6 +3,7 @@ import { EModelEndpoint, isDocumentSupportedProvider } from './schemas';
import { getEndpointFileConfig, mergeFileConfig } from './file-config';
import {
allowedAddressesSchema,
bedrockEndpointSchema,
configSchema,
excludedKeys,
resolveEndpointType,
@ -29,6 +30,17 @@ describe('excludedKeys', () => {
});
});
describe('bedrockEndpointSchema', () => {
it('accepts supported Bedrock prompt cache TTL values', () => {
expect(bedrockEndpointSchema.parse({ promptCacheTtl: '5m' }).promptCacheTtl).toBe('5m');
expect(bedrockEndpointSchema.parse({ promptCacheTtl: '1h' }).promptCacheTtl).toBe('1h');
});
it('rejects unsupported Bedrock prompt cache TTL values', () => {
expect(() => bedrockEndpointSchema.parse({ promptCacheTtl: '30m' })).toThrow();
});
});
describe('resolveEndpointType', () => {
describe('non-agents endpoints', () => {
it('returns the config type for a custom endpoint', () => {

View file

@ -391,6 +391,7 @@ export const bedrockEndpointSchema = baseEndpointSchema.merge(
availableRegions: z.array(z.string()).optional(),
models: z.array(z.string()).optional(),
inferenceProfiles: z.record(z.string(), z.string()).optional(),
promptCacheTtl: z.enum(['5m', '1h']).optional(),
}),
);

View file

@ -800,6 +800,7 @@ export const tConversationSchema = z.object({
max_tokens: coerceNumber.optional(),
/* Anthropic */
promptCache: z.boolean().optional(),
promptCacheTtl: z.enum(['5m', '1h']).optional(),
system: z.string().optional(),
thinking: z.boolean().optional(),
thinkingBudget: coerceNumber.optional(),
@ -950,6 +951,8 @@ export const tQueryParamsSchema = tConversationSchema
maxOutputTokens: true,
/** @endpoints anthropic */
promptCache: true,
/** @endpoints bedrock */
promptCacheTtl: true,
thinking: true,
thinkingBudget: true,
thinkingLevel: true,

View file

@ -51,6 +51,7 @@ export type TEndpointOption = Pick<
| 'additionalModelRequestFields'
// Anthropic-specific
| 'promptCache'
| 'promptCacheTtl'
| 'thinking'
| 'thinkingBudget'
| 'thinkingLevel'

View file

@ -77,6 +77,10 @@ export const conversationPreset = {
promptCache: {
type: Boolean,
},
promptCacheTtl: {
type: String,
enum: ['5m', '1h'],
},
thinking: {
type: Boolean,
},

View file

@ -28,6 +28,7 @@ export interface IPreset extends Document {
file_ids?: string[];
resendImages?: boolean;
promptCache?: boolean;
promptCacheTtl?: '5m' | '1h';
thinking?: boolean;
thinkingBudget?: number;
effort?: string;

View file

@ -26,6 +26,7 @@ export interface IConversation extends Document {
file_ids?: string[];
resendImages?: boolean;
promptCache?: boolean;
promptCacheTtl?: '5m' | '1h';
thinking?: boolean;
thinkingBudget?: number;
effort?: string;