feat: add Bedrock prompt cache TTL config

2026-05-13 07:46:47 +00:00 · 2026-04-29 12:16:48 +02:00 · 2026-04-29 12:16:48 +02:00 · e5ff82f81a
commit e5ff82f81a
parent b39bf837a7
14 changed files with 188 additions and 10 deletions
--- a/helm/librechat/values.yaml
+++ b/helm/librechat/values.yaml
@ -69,6 +69,12 @@ librechat:
  #   registration:
  #     socialLogins: ["discord", "facebook", "github", "google", "openid"] 
  #   endpoints:
+  #     bedrock:
+  #       models:
+  #         - "anthropic.claude-sonnet-4-5-20250929-v1:0"
+  #       # Optional. Use "1h" only with Bedrock models that support 1-hour prompt cache TTL.
+  #       # Omit this field to keep Bedrock's default 5-minute prompt cache TTL.
+  #       promptCacheTtl: "1h"
  #     azureOpenAI:
  #      # Endpoint-level configuration
  #      titleModel: "gpt-4o"
--- a/librechat.example.yaml
+++ b/librechat.example.yaml
@ -534,6 +534,12 @@ endpoints:
  #     - "anthropic.claude-3-7-sonnet-20250219-v1:0"
  #     - "anthropic.claude-3-5-sonnet-20241022-v2:0"
  #
+  #   # Prompt Cache TTL
+  #   # Optional. Bedrock supports 5-minute cache checkpoints, and 1-hour checkpoints
+  #   # for Claude 4.5 models. When omitted, Bedrock uses its default 5-minute TTL.
+  #   # Reference: https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
+  #   promptCacheTtl: "1h"
+  #
  #   # Inference Profiles Configuration
  #   # Maps model IDs to their inference profile ARNs
  #   # IMPORTANT: The model ID (key) MUST be a valid AWS Bedrock model ID that you've added to the models list above
--- a/packages/api/src/endpoints/bedrock/initialize.spec.ts
+++ b/packages/api/src/endpoints/bedrock/initialize.spec.ts
@ -95,6 +95,48 @@ describe('initializeBedrock', () => {
      expect(result.llmConfig).toHaveProperty('region', 'us-east-1');
    });

+    it('should include promptCacheTtl from Bedrock endpoint config', async () => {
+      const params = createMockParams({
+        model_parameters: {
+          model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
+        },
+        config: {
+          endpoints: {
+            [EModelEndpoint.bedrock]: {
+              promptCacheTtl: '1h',
+            },
+          },
+        },
+      });
+      const result = await initializeBedrock(params);
+
+      expect(result.llmConfig).toHaveProperty('promptCacheTtl', '1h');
+      expect(result.llmConfig).toHaveProperty('promptCache', true);
+    });
+
+    it('should omit one-hour promptCacheTtl for models that only support 5 minutes', async () => {
+      const params = createMockParams({
+        config: {
+          endpoints: {
+            [EModelEndpoint.bedrock]: {
+              promptCacheTtl: '1h',
+            },
+          },
+        },
+      });
+      const result = await initializeBedrock(params);
+
+      expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
+      expect(result.llmConfig).toHaveProperty('promptCache', true);
+    });
+
+    it('should not include promptCacheTtl when not configured', async () => {
+      const params = createMockParams();
+      const result = await initializeBedrock(params);
+
+      expect(result.llmConfig).not.toHaveProperty('promptCacheTtl');
+    });
+
    it('should handle model_parameters', async () => {
      const params = createMockParams({
        model_parameters: {
--- a/packages/api/src/endpoints/bedrock/initialize.ts
+++ b/packages/api/src/endpoints/bedrock/initialize.ts
@ -13,6 +13,7 @@ import type {
  BaseInitializeParams,
  InitializeResultBase,
  BedrockCredentials,
+  BedrockPromptCacheTtl,
  GuardrailConfiguration,
  InferenceProfileConfig,
 } from '~/types';
@ -54,6 +55,7 @@ export async function initializeBedrock({
    | ({
        guardrailConfig?: GuardrailConfiguration;
        inferenceProfiles?: InferenceProfileConfig;
+        promptCacheTtl?: BedrockPromptCacheTtl;
      } & Record<string, unknown>)
    | undefined;

@ -98,6 +100,7 @@ export async function initializeBedrock({
  const requestOptions: Record<string, unknown> = {
    model: model_parameters?.model as string | undefined,
    region: BEDROCK_AWS_DEFAULT_REGION,
+    promptCacheTtl: bedrockConfig?.promptCacheTtl,
  };

  const configOptions: Record<string, unknown> = {};
@ -117,6 +120,7 @@ export async function initializeBedrock({
    endpointHost?: string;
    guardrailConfig?: GuardrailConfiguration;
    applicationInferenceProfile?: string;
+    promptCacheTtl?: BedrockPromptCacheTtl;
  };

  if (bedrockConfig?.guardrailConfig) {
--- a/packages/api/src/types/bedrock.ts
+++ b/packages/api/src/types/bedrock.ts
@ -27,6 +27,7 @@ export interface GuardrailConfiguration {
 * @see https://docs.aws.amazon.com/bedrock/latest/userguide/inference-profiles.html
 */
 export type InferenceProfileConfig = Record<string, string>;
+export type BedrockPromptCacheTtl = '5m' | '1h';

 /**
 * Configuration options for Bedrock LLM
@ -45,6 +46,8 @@ export interface BedrockConfigOptions {
  guardrailConfig?: GuardrailConfiguration;
  /** Inference profile ARNs keyed by model ID / friendly name */
  inferenceProfiles?: InferenceProfileConfig;
+  /** Bedrock prompt cache checkpoint TTL. Defaults to Bedrock's 5-minute TTL when unset. */
+  promptCacheTtl?: BedrockPromptCacheTtl;
 }

 /**
@ -58,6 +61,7 @@ export interface BedrockLLMConfigResult {
    endpointHost?: string;
    guardrailConfig?: GuardrailConfiguration;
    applicationInferenceProfile?: string;
+    promptCacheTtl?: BedrockPromptCacheTtl;
  };
  configOptions: Record<string, unknown>;
 }
--- a/packages/data-provider/specs/bedrock.spec.ts
+++ b/packages/data-provider/specs/bedrock.spec.ts
@ -921,6 +921,68 @@ describe('bedrockInputParser', () => {
      expect(result.promptCache).toBe(true);
    });

+    test('should preserve one-hour promptCacheTtl for Claude 4.5 models', () => {
+      const input = {
+        model: 'anthropic.claude-sonnet-4-5-20250929-v1:0',
+        promptCache: true,
+        promptCacheTtl: '1h',
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      expect(result.promptCache).toBe(true);
+      expect(result.promptCacheTtl).toBe('1h');
+    });
+
+    test('should strip one-hour promptCacheTtl for models that only support 5 minutes', () => {
+      const result = bedrockInputParser.parse({
+        model: 'amazon.nova-pro-v1:0',
+        promptCache: true,
+        promptCacheTtl: '1h',
+      }) as Record<string, unknown>;
+      expect(result.promptCache).toBe(true);
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
+    test('should preserve explicit 5-minute promptCacheTtl for Nova models', () => {
+      const input = {
+        model: 'amazon.nova-pro-v1:0',
+        promptCache: true,
+        promptCacheTtl: '5m',
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      expect(result.promptCache).toBe(true);
+      expect(result.promptCacheTtl).toBe('5m');
+    });
+
+    test('should strip promptCacheTtl when promptCache is disabled', () => {
+      const input = {
+        model: 'anthropic.claude-sonnet-4-20250514-v1:0',
+        promptCache: false,
+        promptCacheTtl: '1h',
+      };
+      const result = bedrockInputParser.parse(input) as Record<string, unknown>;
+      expect(result.promptCache).toBe(false);
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
+    test('should strip stale promptCacheTtl when switching to non-Claude/Nova model', () => {
+      const staleConversationData = {
+        model: 'deepseek.deepseek-r1',
+        promptCacheTtl: '1h',
+      };
+      const result = bedrockInputParser.parse(staleConversationData) as Record<string, unknown>;
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
+    test('bedrockInputSchema should strip stale promptCacheTtl when promptCache is disabled', () => {
+      const result = bedrockInputSchema.parse({
+        model: 'anthropic.claude-sonnet-4-20250514-v1:0',
+        promptCache: false,
+        promptCacheTtl: '1h',
+      }) as Record<string, unknown>;
+      expect(result.promptCache).toBe(false);
+      expect(result.promptCacheTtl).toBeUndefined();
+    });
+
    test('should strip stale thinking config from additionalModelRequestFields for non-Anthropic models', () => {
      const staleConversationData = {
        model: 'moonshot.kimi-k2-0711-thinking',
--- a/packages/data-provider/src/bedrock.ts
+++ b/packages/data-provider/src/bedrock.ts
@ -40,6 +40,42 @@ function extractPersistedDisplay(amrf: unknown): string | undefined {
  return typeof display === 'string' ? display : undefined;
 }

+function supportsBedrockPromptCache(model: unknown): boolean {
+  return typeof model === 'string' && (model.includes('claude') || model.includes('nova'));
+}
+
+function supportsOneHourBedrockPromptCache(model: unknown): boolean {
+  if (typeof model !== 'string') {
+    return false;
+  }
+
+  return (
+    model.includes('anthropic.claude-opus-4-5') ||
+    model.includes('anthropic.claude-sonnet-4-5') ||
+    model.includes('anthropic.claude-haiku-4-5')
+  );
+}
+
+function normalizeBedrockPromptCache(data: Record<string, unknown>) {
+  if (supportsBedrockPromptCache(data.model)) {
+    if (data.promptCache === undefined) {
+      data.promptCache = true;
+    }
+    if (data.promptCacheTtl === '1h' && !supportsOneHourBedrockPromptCache(data.model)) {
+      data.promptCacheTtl = undefined;
+    }
+  } else {
+    if (data.promptCache === true) {
+      data.promptCache = undefined;
+    }
+    data.promptCacheTtl = undefined;
+  }
+
+  if (data.promptCache === false) {
+    data.promptCacheTtl = undefined;
+  }
+}
+
 export function resolveThinkingDisplay(
  model: string,
  explicit?: s.ThinkingDisplay | string | null,
@ -226,6 +262,7 @@ export const bedrockInputSchema = s.tConversationSchema
    thinkingDisplay: true,
    reasoning_effort: true,
    promptCache: true,
+    promptCacheTtl: true,
    /* Catch-all fields */
    topK: true,
    additionalModelRequestFields: true,
@ -250,6 +287,7 @@ export const bedrockInputSchema = s.tConversationSchema
      }
      delete obj.additionalModelRequestFields;
    }
+    normalizeBedrockPromptCache(obj as Record<string, unknown>);
    return s.removeNullishValues(obj);
  })
  .catch(() => ({}));
@ -281,6 +319,7 @@ export const bedrockInputParser = s.tConversationSchema
    thinkingDisplay: true,
    reasoning_effort: true,
    promptCache: true,
+    promptCacheTtl: true,
    /* Catch-all fields */
    topK: true,
    additionalModelRequestFields: true,
@ -304,6 +343,7 @@ export const bedrockInputParser = s.tConversationSchema
      'topP',
      'stop',
      'promptCache',
+      'promptCacheTtl',
    ];

    const additionalFields: Record<string, unknown> = {};
@ -439,16 +479,7 @@ export const bedrockInputParser = s.tConversationSchema
    }

    /** Default promptCache for claude and nova models, if not defined */
-    if (
-      typeof typedData.model === 'string' &&
-      (typedData.model.includes('claude') || typedData.model.includes('nova'))
-    ) {
-      if (typedData.promptCache === undefined) {
-        typedData.promptCache = true;
-      }
-    } else if (typedData.promptCache === true) {
-      typedData.promptCache = undefined;
-    }
+    normalizeBedrockPromptCache(typedData);

    if (Object.keys(additionalFields).length > 0) {
      typedData.additionalModelRequestFields = {
--- a/packages/data-provider/src/config.spec.ts
+++ b/packages/data-provider/src/config.spec.ts
@ -3,6 +3,7 @@ import { EModelEndpoint, isDocumentSupportedProvider } from './schemas';
 import { getEndpointFileConfig, mergeFileConfig } from './file-config';
 import {
  allowedAddressesSchema,
+  bedrockEndpointSchema,
  configSchema,
  excludedKeys,
  resolveEndpointType,
@ -29,6 +30,17 @@ describe('excludedKeys', () => {
  });
 });

+describe('bedrockEndpointSchema', () => {
+  it('accepts supported Bedrock prompt cache TTL values', () => {
+    expect(bedrockEndpointSchema.parse({ promptCacheTtl: '5m' }).promptCacheTtl).toBe('5m');
+    expect(bedrockEndpointSchema.parse({ promptCacheTtl: '1h' }).promptCacheTtl).toBe('1h');
+  });
+
+  it('rejects unsupported Bedrock prompt cache TTL values', () => {
+    expect(() => bedrockEndpointSchema.parse({ promptCacheTtl: '30m' })).toThrow();
+  });
+});
+
 describe('resolveEndpointType', () => {
  describe('non-agents endpoints', () => {
    it('returns the config type for a custom endpoint', () => {
--- a/packages/data-provider/src/config.ts
+++ b/packages/data-provider/src/config.ts
@ -391,6 +391,7 @@ export const bedrockEndpointSchema = baseEndpointSchema.merge(
    availableRegions: z.array(z.string()).optional(),
    models: z.array(z.string()).optional(),
    inferenceProfiles: z.record(z.string(), z.string()).optional(),
+    promptCacheTtl: z.enum(['5m', '1h']).optional(),
  }),
 );

--- a/packages/data-provider/src/schemas.ts
+++ b/packages/data-provider/src/schemas.ts
@ -800,6 +800,7 @@ export const tConversationSchema = z.object({
  max_tokens: coerceNumber.optional(),
  /* Anthropic */
  promptCache: z.boolean().optional(),
+  promptCacheTtl: z.enum(['5m', '1h']).optional(),
  system: z.string().optional(),
  thinking: z.boolean().optional(),
  thinkingBudget: coerceNumber.optional(),
@ -950,6 +951,8 @@ export const tQueryParamsSchema = tConversationSchema
    maxOutputTokens: true,
    /** @endpoints anthropic */
    promptCache: true,
+    /** @endpoints bedrock */
+    promptCacheTtl: true,
    thinking: true,
    thinkingBudget: true,
    thinkingLevel: true,
--- a/packages/data-provider/src/types.ts
+++ b/packages/data-provider/src/types.ts
@ -51,6 +51,7 @@ export type TEndpointOption = Pick<
  | 'additionalModelRequestFields'
  // Anthropic-specific
  | 'promptCache'
+  | 'promptCacheTtl'
  | 'thinking'
  | 'thinkingBudget'
  | 'thinkingLevel'
--- a/packages/data-schemas/src/schema/defaults.ts
+++ b/packages/data-schemas/src/schema/defaults.ts
@ -77,6 +77,10 @@ export const conversationPreset = {
  promptCache: {
    type: Boolean,
  },
+  promptCacheTtl: {
+    type: String,
+    enum: ['5m', '1h'],
+  },
  thinking: {
    type: Boolean,
  },
--- a/packages/data-schemas/src/schema/preset.ts
+++ b/packages/data-schemas/src/schema/preset.ts
@ -28,6 +28,7 @@ export interface IPreset extends Document {
  file_ids?: string[];
  resendImages?: boolean;
  promptCache?: boolean;
+  promptCacheTtl?: '5m' | '1h';
  thinking?: boolean;
  thinkingBudget?: number;
  effort?: string;
--- a/packages/data-schemas/src/types/convo.ts
+++ b/packages/data-schemas/src/types/convo.ts
@ -26,6 +26,7 @@ export interface IConversation extends Document {
  file_ids?: string[];
  resendImages?: boolean;
  promptCache?: boolean;
+  promptCacheTtl?: '5m' | '1h';
  thinking?: boolean;
  thinkingBudget?: number;
  effort?: string;