mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-05-13 07:46:47 +00:00
feat: add Bedrock prompt cache TTL config
This commit is contained in:
parent
b39bf837a7
commit
e5ff82f81a
14 changed files with 188 additions and 10 deletions
|
|
@ -69,6 +69,12 @@ librechat:
|
|||
# registration:
|
||||
# socialLogins: ["discord", "facebook", "github", "google", "openid"]
|
||||
# endpoints:
|
||||
# bedrock:
|
||||
# models:
|
||||
# - "anthropic.claude-sonnet-4-5-20250929-v1:0"
|
||||
# # Optional. Use "1h" only with Bedrock models that support 1-hour prompt cache TTL.
|
||||
# # Omit this field to keep Bedrock's default 5-minute prompt cache TTL.
|
||||
# promptCacheTtl: "1h"
|
||||
# azureOpenAI:
|
||||
# # Endpoint-level configuration
|
||||
# titleModel: "gpt-4o"
|
||||
|
|
|
|||
|
|
@ -534,6 +534,12 @@ endpoints:
|
|||
# - "anthropic.claude-3-7-sonnet-20250219-v1:0"
|
||||
# - "anthropic.claude-3-5-sonnet-20241022-v2:0"
|
||||
#
|
||||
# # Prompt Cache TTL
|
||||
# # Optional. Bedrock supports 5-minute cache checkpoints, and 1-hour checkpoints
|
||||
# # for Claude 4.5 models. When omitted, Bedrock uses its default 5-minute TTL.
|
||||
# # Reference: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
|
||||
# promptCacheTtl: "1h"
|
||||
#
|
||||
# # Inference Profiles Configuration
|
||||
# # Maps model IDs to their inference profile ARNs
|
||||
# # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above
|
||||
|
|
|
|||
|
|
@ -95,6 +95,48 @@ describe('initializeBedrock', () => {
|
|||
expect(result.llmConfig).toHaveProperty('region', 'us-east-1');
|
||||
});
|
||||
|
||||
it('should include promptCacheTtl from Bedrock endpoint config', async () => {
|
||||
const params = createMockParams({
|
||||
model_parameters: {
|
||||
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
|
||||
},
|
||||
config: {
|
||||
endpoints: {
|
||||
[EModelEndpoint.bedrock]: {
|
||||
promptCacheTtl: '1h',
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const result = await initializeBedrock(params);
|
||||
|
||||
expect(result.llmConfig).toHaveProperty('promptCacheTtl', '1h');
|
||||
expect(result.llmConfig).toHaveProperty('promptCache', true);
|
||||
});
|
||||
|
||||
it('should omit one-hour promptCacheTtl for models that only support 5 minutes', async () => {
|
||||
const params = createMockParams({
|
||||
config: {
|
||||
endpoints: {
|
||||
[EModelEndpoint.bedrock]: {
|
||||
promptCacheTtl: '1h',
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const result = await initializeBedrock(params);
|
||||
|
||||
expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
|
||||
expect(result.llmConfig).toHaveProperty('promptCache', true);
|
||||
});
|
||||
|
||||
it('should not include promptCacheTtl when not configured', async () => {
|
||||
const params = createMockParams();
|
||||
const result = await initializeBedrock(params);
|
||||
|
||||
expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
|
||||
});
|
||||
|
||||
it('should handle model_parameters', async () => {
|
||||
const params = createMockParams({
|
||||
model_parameters: {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import type {
|
|||
BaseInitializeParams,
|
||||
InitializeResultBase,
|
||||
BedrockCredentials,
|
||||
BedrockPromptCacheTtl,
|
||||
GuardrailConfiguration,
|
||||
InferenceProfileConfig,
|
||||
} from '~/types';
|
||||
|
|
@ -54,6 +55,7 @@ export async function initializeBedrock({
|
|||
| ({
|
||||
guardrailConfig?: GuardrailConfiguration;
|
||||
inferenceProfiles?: InferenceProfileConfig;
|
||||
promptCacheTtl?: BedrockPromptCacheTtl;
|
||||
} & Record<string, unknown>)
|
||||
| undefined;
|
||||
|
||||
|
|
@ -98,6 +100,7 @@ export async function initializeBedrock({
|
|||
const requestOptions: Record<string, unknown> = {
|
||||
model: model_parameters?.model as string | undefined,
|
||||
region: BEDROCK_AWS_DEFAULT_REGION,
|
||||
promptCacheTtl: bedrockConfig?.promptCacheTtl,
|
||||
};
|
||||
|
||||
const configOptions: Record<string, unknown> = {};
|
||||
|
|
@ -117,6 +120,7 @@ export async function initializeBedrock({
|
|||
endpointHost?: string;
|
||||
guardrailConfig?: GuardrailConfiguration;
|
||||
applicationInferenceProfile?: string;
|
||||
promptCacheTtl?: BedrockPromptCacheTtl;
|
||||
};
|
||||
|
||||
if (bedrockConfig?.guardrailConfig) {
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ export interface GuardrailConfiguration {
|
|||
* @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
|
||||
*/
|
||||
export type InferenceProfileConfig = Record<string, string>;
|
||||
export type BedrockPromptCacheTtl = '5m' | '1h';
|
||||
|
||||
/**
|
||||
* Configuration options for Bedrock LLM
|
||||
|
|
@ -45,6 +46,8 @@ export interface BedrockConfigOptions {
|
|||
guardrailConfig?: GuardrailConfiguration;
|
||||
/** Inference profile ARNs keyed by model ID / friendly name */
|
||||
inferenceProfiles?: InferenceProfileConfig;
|
||||
/** Bedrock prompt cache checkpoint TTL. Defaults to Bedrock's 5-minute TTL when unset. */
|
||||
promptCacheTtl?: BedrockPromptCacheTtl;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -58,6 +61,7 @@ export interface BedrockLLMConfigResult {
|
|||
endpointHost?: string;
|
||||
guardrailConfig?: GuardrailConfiguration;
|
||||
applicationInferenceProfile?: string;
|
||||
promptCacheTtl?: BedrockPromptCacheTtl;
|
||||
};
|
||||
configOptions: Record<string, unknown>;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -921,6 +921,68 @@ describe('bedrockInputParser', () => {
|
|||
expect(result.promptCache).toBe(true);
|
||||
});
|
||||
|
||||
test('should preserve one-hour promptCacheTtl for Claude 4.5 models', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
|
||||
promptCache: true,
|
||||
promptCacheTtl: '1h',
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
expect(result.promptCache).toBe(true);
|
||||
expect(result.promptCacheTtl).toBe('1h');
|
||||
});
|
||||
|
||||
test('should strip one-hour promptCacheTtl for models that only support 5 minutes', () => {
|
||||
const result = bedrockInputParser.parse({
|
||||
model: 'amazon.nova-pro-v1:0',
|
||||
promptCache: true,
|
||||
promptCacheTtl: '1h',
|
||||
}) as Record<string, unknown>;
|
||||
expect(result.promptCache).toBe(true);
|
||||
expect(result.promptCacheTtl).toBeUndefined();
|
||||
});
|
||||
|
||||
test('should preserve explicit 5-minute promptCacheTtl for Nova models', () => {
|
||||
const input = {
|
||||
model: 'amazon.nova-pro-v1:0',
|
||||
promptCache: true,
|
||||
promptCacheTtl: '5m',
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
expect(result.promptCache).toBe(true);
|
||||
expect(result.promptCacheTtl).toBe('5m');
|
||||
});
|
||||
|
||||
test('should strip promptCacheTtl when promptCache is disabled', () => {
|
||||
const input = {
|
||||
model: 'anthropic.claude-sonnet-4-20250514-v1:0',
|
||||
promptCache: false,
|
||||
promptCacheTtl: '1h',
|
||||
};
|
||||
const result = bedrockInputParser.parse(input) as Record<string, unknown>;
|
||||
expect(result.promptCache).toBe(false);
|
||||
expect(result.promptCacheTtl).toBeUndefined();
|
||||
});
|
||||
|
||||
test('should strip stale promptCacheTtl when switching to non-Claude/Nova model', () => {
|
||||
const staleConversationData = {
|
||||
model: 'deepseek.deepseek-r1',
|
||||
promptCacheTtl: '1h',
|
||||
};
|
||||
const result = bedrockInputParser.parse(staleConversationData) as Record<string, unknown>;
|
||||
expect(result.promptCacheTtl).toBeUndefined();
|
||||
});
|
||||
|
||||
test('bedrockInputSchema should strip stale promptCacheTtl when promptCache is disabled', () => {
|
||||
const result = bedrockInputSchema.parse({
|
||||
model: 'anthropic.claude-sonnet-4-20250514-v1:0',
|
||||
promptCache: false,
|
||||
promptCacheTtl: '1h',
|
||||
}) as Record<string, unknown>;
|
||||
expect(result.promptCache).toBe(false);
|
||||
expect(result.promptCacheTtl).toBeUndefined();
|
||||
});
|
||||
|
||||
test('should strip stale thinking config from additionalModelRequestFields for non-Anthropic models', () => {
|
||||
const staleConversationData = {
|
||||
model: 'moonshot.kimi-k2-0711-thinking',
|
||||
|
|
|
|||
|
|
@ -40,6 +40,42 @@ function extractPersistedDisplay(amrf: unknown): string | undefined {
|
|||
return typeof display === 'string' ? display : undefined;
|
||||
}
|
||||
|
||||
function supportsBedrockPromptCache(model: unknown): boolean {
|
||||
return typeof model === 'string' && (model.includes('claude') || model.includes('nova'));
|
||||
}
|
||||
|
||||
function supportsOneHourBedrockPromptCache(model: unknown): boolean {
|
||||
if (typeof model !== 'string') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return (
|
||||
model.includes('anthropic.claude-opus-4-5') ||
|
||||
model.includes('anthropic.claude-sonnet-4-5') ||
|
||||
model.includes('anthropic.claude-haiku-4-5')
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeBedrockPromptCache(data: Record<string, unknown>) {
|
||||
if (supportsBedrockPromptCache(data.model)) {
|
||||
if (data.promptCache === undefined) {
|
||||
data.promptCache = true;
|
||||
}
|
||||
if (data.promptCacheTtl === '1h' && !supportsOneHourBedrockPromptCache(data.model)) {
|
||||
data.promptCacheTtl = undefined;
|
||||
}
|
||||
} else {
|
||||
if (data.promptCache === true) {
|
||||
data.promptCache = undefined;
|
||||
}
|
||||
data.promptCacheTtl = undefined;
|
||||
}
|
||||
|
||||
if (data.promptCache === false) {
|
||||
data.promptCacheTtl = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveThinkingDisplay(
|
||||
model: string,
|
||||
explicit?: s.ThinkingDisplay | string | null,
|
||||
|
|
@ -226,6 +262,7 @@ export const bedrockInputSchema = s.tConversationSchema
|
|||
thinkingDisplay: true,
|
||||
reasoning_effort: true,
|
||||
promptCache: true,
|
||||
promptCacheTtl: true,
|
||||
/* Catch-all fields */
|
||||
topK: true,
|
||||
additionalModelRequestFields: true,
|
||||
|
|
@ -250,6 +287,7 @@ export const bedrockInputSchema = s.tConversationSchema
|
|||
}
|
||||
delete obj.additionalModelRequestFields;
|
||||
}
|
||||
normalizeBedrockPromptCache(obj as Record<string, unknown>);
|
||||
return s.removeNullishValues(obj);
|
||||
})
|
||||
.catch(() => ({}));
|
||||
|
|
@ -281,6 +319,7 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
thinkingDisplay: true,
|
||||
reasoning_effort: true,
|
||||
promptCache: true,
|
||||
promptCacheTtl: true,
|
||||
/* Catch-all fields */
|
||||
topK: true,
|
||||
additionalModelRequestFields: true,
|
||||
|
|
@ -304,6 +343,7 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
'topP',
|
||||
'stop',
|
||||
'promptCache',
|
||||
'promptCacheTtl',
|
||||
];
|
||||
|
||||
const additionalFields: Record<string, unknown> = {};
|
||||
|
|
@ -439,16 +479,7 @@ export const bedrockInputParser = s.tConversationSchema
|
|||
}
|
||||
|
||||
/** Default promptCache for claude and nova models, if not defined */
|
||||
if (
|
||||
typeof typedData.model === 'string' &&
|
||||
(typedData.model.includes('claude') || typedData.model.includes('nova'))
|
||||
) {
|
||||
if (typedData.promptCache === undefined) {
|
||||
typedData.promptCache = true;
|
||||
}
|
||||
} else if (typedData.promptCache === true) {
|
||||
typedData.promptCache = undefined;
|
||||
}
|
||||
normalizeBedrockPromptCache(typedData);
|
||||
|
||||
if (Object.keys(additionalFields).length > 0) {
|
||||
typedData.additionalModelRequestFields = {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ import { EModelEndpoint, isDocumentSupportedProvider } from './schemas';
|
|||
import { getEndpointFileConfig, mergeFileConfig } from './file-config';
|
||||
import {
|
||||
allowedAddressesSchema,
|
||||
bedrockEndpointSchema,
|
||||
configSchema,
|
||||
excludedKeys,
|
||||
resolveEndpointType,
|
||||
|
|
@ -29,6 +30,17 @@ describe('excludedKeys', () => {
|
|||
});
|
||||
});
|
||||
|
||||
describe('bedrockEndpointSchema', () => {
|
||||
it('accepts supported Bedrock prompt cache TTL values', () => {
|
||||
expect(bedrockEndpointSchema.parse({ promptCacheTtl: '5m' }).promptCacheTtl).toBe('5m');
|
||||
expect(bedrockEndpointSchema.parse({ promptCacheTtl: '1h' }).promptCacheTtl).toBe('1h');
|
||||
});
|
||||
|
||||
it('rejects unsupported Bedrock prompt cache TTL values', () => {
|
||||
expect(() => bedrockEndpointSchema.parse({ promptCacheTtl: '30m' })).toThrow();
|
||||
});
|
||||
});
|
||||
|
||||
describe('resolveEndpointType', () => {
|
||||
describe('non-agents endpoints', () => {
|
||||
it('returns the config type for a custom endpoint', () => {
|
||||
|
|
|
|||
|
|
@ -391,6 +391,7 @@ export const bedrockEndpointSchema = baseEndpointSchema.merge(
|
|||
availableRegions: z.array(z.string()).optional(),
|
||||
models: z.array(z.string()).optional(),
|
||||
inferenceProfiles: z.record(z.string(), z.string()).optional(),
|
||||
promptCacheTtl: z.enum(['5m', '1h']).optional(),
|
||||
}),
|
||||
);
|
||||
|
||||
|
|
|
|||
|
|
@ -800,6 +800,7 @@ export const tConversationSchema = z.object({
|
|||
max_tokens: coerceNumber.optional(),
|
||||
/* Anthropic */
|
||||
promptCache: z.boolean().optional(),
|
||||
promptCacheTtl: z.enum(['5m', '1h']).optional(),
|
||||
system: z.string().optional(),
|
||||
thinking: z.boolean().optional(),
|
||||
thinkingBudget: coerceNumber.optional(),
|
||||
|
|
@ -950,6 +951,8 @@ export const tQueryParamsSchema = tConversationSchema
|
|||
maxOutputTokens: true,
|
||||
/** @endpoints anthropic */
|
||||
promptCache: true,
|
||||
/** @endpoints bedrock */
|
||||
promptCacheTtl: true,
|
||||
thinking: true,
|
||||
thinkingBudget: true,
|
||||
thinkingLevel: true,
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ export type TEndpointOption = Pick<
|
|||
| 'additionalModelRequestFields'
|
||||
// Anthropic-specific
|
||||
| 'promptCache'
|
||||
| 'promptCacheTtl'
|
||||
| 'thinking'
|
||||
| 'thinkingBudget'
|
||||
| 'thinkingLevel'
|
||||
|
|
|
|||
|
|
@ -77,6 +77,10 @@ export const conversationPreset = {
|
|||
promptCache: {
|
||||
type: Boolean,
|
||||
},
|
||||
promptCacheTtl: {
|
||||
type: String,
|
||||
enum: ['5m', '1h'],
|
||||
},
|
||||
thinking: {
|
||||
type: Boolean,
|
||||
},
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ export interface IPreset extends Document {
|
|||
file_ids?: string[];
|
||||
resendImages?: boolean;
|
||||
promptCache?: boolean;
|
||||
promptCacheTtl?: '5m' | '1h';
|
||||
thinking?: boolean;
|
||||
thinkingBudget?: number;
|
||||
effort?: string;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ export interface IConversation extends Document {
|
|||
file_ids?: string[];
|
||||
resendImages?: boolean;
|
||||
promptCache?: boolean;
|
||||
promptCacheTtl?: '5m' | '1h';
|
||||
thinking?: boolean;
|
||||
thinkingBudget?: number;
|
||||
effort?: string;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue