jest.mock('uuid', () => ({ v4: jest.fn(() => 'mock-uuid') })); jest.mock('@librechat/data-schemas', () => ({ logger: { warn: jest.fn(), debug: jest.fn(), error: jest.fn() }, })); jest.mock('@librechat/agents', () => ({})); jest.mock('@librechat/api', () => ({ sanitizeFilename: jest.fn((n) => n), parseText: jest.fn().mockResolvedValue({ text: '', bytes: 0 }), processAudioFile: jest.fn(), getStorageMetadata: jest.fn(() => ({})), })); jest.mock('librechat-data-provider', () => ({ ...jest.requireActual('librechat-data-provider'), mergeFileConfig: jest.fn(), })); jest.mock('~/server/services/Files/images', () => ({ convertImage: jest.fn(), resizeAndConvert: jest.fn(), resizeImageBuffer: jest.fn(), })); jest.mock('~/server/controllers/assistants/v2', () => ({ addResourceFileId: jest.fn(), deleteResourceFileId: jest.fn(), })); jest.mock('~/server/controllers/assistants/helpers', () => ({ getOpenAIClient: jest.fn(), })); jest.mock('~/server/services/Tools/credentials', () => ({ loadAuthValues: jest.fn(), })); jest.mock('~/models', () => ({ createFile: jest.fn().mockResolvedValue({ file_id: 'created-file-id' }), updateFileUsage: jest.fn(), deleteFiles: jest.fn(), addAgentResourceFile: jest.fn().mockResolvedValue({}), removeAgentResourceFiles: jest.fn(), })); jest.mock('~/server/utils/getFileStrategy', () => ({ getFileStrategy: jest.fn().mockReturnValue('local'), })); jest.mock('~/server/services/Config', () => ({ checkCapability: jest.fn().mockResolvedValue(true), })); jest.mock('~/server/utils/queue', () => ({ LB_QueueAsyncCall: jest.fn(), })); jest.mock('~/server/services/Files/strategies', () => ({ getStrategyFunctions: jest.fn(), })); jest.mock('~/server/utils', () => ({ determineFileType: jest.fn(), })); jest.mock('~/server/services/Files/Audio/STTService', () => ({ STTService: { getInstance: jest.fn() }, })); const { EToolResources, FileSources, FileContext, AgentCapabilities, } = require('librechat-data-provider'); const { mergeFileConfig } = require('librechat-data-provider'); const { checkCapability } = require('~/server/services/Config'); const { getStrategyFunctions } = require('~/server/services/Files/strategies'); const db = require('~/models'); const { processAgentFileUpload, processFileURL } = require('./process'); const PDF_MIME = 'application/pdf'; const DOCX_MIME = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'; const XLSX_MIME = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'; const XLS_MIME = 'application/vnd.ms-excel'; const ODS_MIME = 'application/vnd.oasis.opendocument.spreadsheet'; const ODT_MIME = 'application/vnd.oasis.opendocument.text'; const ODP_MIME = 'application/vnd.oasis.opendocument.presentation'; const ODG_MIME = 'application/vnd.oasis.opendocument.graphics'; const makeReq = ({ mimetype = PDF_MIME, ocrConfig = null } = {}) => ({ user: { id: 'user-123', tenantId: 'tenant-a' }, file: { path: '/tmp/upload.bin', originalname: 'upload.bin', filename: 'upload-uuid.bin', mimetype, }, body: { model: 'gpt-4o' }, config: { fileConfig: {}, fileStrategy: 'local', ocr: ocrConfig, }, }); const makeMetadata = () => ({ agent_id: 'agent-abc', tool_resource: EToolResources.context, file_id: 'file-uuid-123', }); const mockRes = { status: jest.fn().mockReturnThis(), json: jest.fn().mockReturnValue({}), }; const makeFileConfig = ({ ocrSupportedMimeTypes = [] } = {}) => ({ checkType: (mime, types) => (types ?? []).includes(mime), ocr: { supportedMimeTypes: ocrSupportedMimeTypes }, stt: { supportedMimeTypes: [] }, text: { supportedMimeTypes: [] }, }); describe('processAgentFileUpload', () => { beforeEach(() => { jest.clearAllMocks(); mockRes.status.mockReturnThis(); mockRes.json.mockReturnValue({}); checkCapability.mockResolvedValue(true); getStrategyFunctions.mockReturnValue({ handleFileUpload: jest .fn() .mockResolvedValue({ text: 'extracted text', bytes: 42, filepath: 'doc://result' }), }); mergeFileConfig.mockReturnValue(makeFileConfig()); }); describe('OCR strategy selection', () => { test.each([ ['PDF', PDF_MIME], ['DOCX', DOCX_MIME], ['XLSX', XLSX_MIME], ['XLS', XLS_MIME], ['ODS', ODS_MIME], ['Excel variant (msexcel)', 'application/msexcel'], ['Excel variant (x-msexcel)', 'application/x-msexcel'], ])('uses document_parser automatically for %s when no OCR is configured', async (_, mime) => { mergeFileConfig.mockReturnValue(makeFileConfig()); const req = makeReq({ mimetype: mime, ocrConfig: null }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.document_parser); }); test('does not check OCR capability when using automatic document_parser fallback', async () => { const req = makeReq({ mimetype: PDF_MIME, ocrConfig: null }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(checkCapability).not.toHaveBeenCalledWith(expect.anything(), AgentCapabilities.ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.document_parser); }); test('uses the configured OCR strategy when OCR is set up for the file type', async () => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [PDF_MIME] })); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: { strategy: FileSources.mistral_ocr }, }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(checkCapability).toHaveBeenCalledWith(expect.anything(), AgentCapabilities.ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.mistral_ocr); }); test('uses document_parser as default when OCR is configured but no strategy is specified', async () => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [PDF_MIME] })); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: { supportedMimeTypes: [PDF_MIME] }, }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(checkCapability).toHaveBeenCalledWith(expect.anything(), AgentCapabilities.ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.document_parser); }); test('throws when configured OCR capability is not enabled for the agent', async () => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [PDF_MIME] })); checkCapability.mockResolvedValue(false); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: { strategy: FileSources.mistral_ocr }, }); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).rejects.toThrow('OCR capability is not enabled for Agents'); }); test('uses document_parser (no capability check) when OCR capability returns false but no OCR config', async () => { checkCapability.mockResolvedValue(false); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: null }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(checkCapability).not.toHaveBeenCalledWith(expect.anything(), AgentCapabilities.ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.document_parser); }); test('uses document_parser when OCR is configured but the file type is not in OCR supported types', async () => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [PDF_MIME] })); const req = makeReq({ mimetype: DOCX_MIME, ocrConfig: { strategy: FileSources.mistral_ocr }, }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(checkCapability).not.toHaveBeenCalledWith(expect.anything(), AgentCapabilities.ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.document_parser); expect(getStrategyFunctions).not.toHaveBeenCalledWith(FileSources.mistral_ocr); }); test('does not invoke any OCR strategy for unsupported MIME types without OCR config', async () => { const req = makeReq({ mimetype: 'text/plain', ocrConfig: null }); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).rejects.toThrow('File type text/plain is not supported for text parsing.'); expect(getStrategyFunctions).not.toHaveBeenCalled(); }); test.each([ ['ODT', ODT_MIME], ['ODP', ODP_MIME], ['ODG', ODG_MIME], ])('routes %s through configured OCR when OCR supports the type', async (_, mime) => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [mime] })); const req = makeReq({ mimetype: mime, ocrConfig: { strategy: FileSources.mistral_ocr }, }); await processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }); expect(checkCapability).toHaveBeenCalledWith(expect.anything(), AgentCapabilities.ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.mistral_ocr); }); test('throws instead of falling back to parseText when document_parser fails for a document MIME type', async () => { getStrategyFunctions.mockReturnValue({ handleFileUpload: jest.fn().mockRejectedValue(new Error('No text found in document')), }); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: null }); const { parseText } = require('@librechat/api'); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).rejects.toThrow(/image-based and requires an OCR service/); expect(parseText).not.toHaveBeenCalled(); }); test('falls back to document_parser when configured OCR fails for a document MIME type', async () => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [PDF_MIME] })); const failingUpload = jest.fn().mockRejectedValue(new Error('OCR API returned 500')); const fallbackUpload = jest .fn() .mockResolvedValue({ text: 'parsed text', bytes: 11, filepath: 'doc://result' }); getStrategyFunctions .mockReturnValueOnce({ handleFileUpload: failingUpload }) .mockReturnValueOnce({ handleFileUpload: fallbackUpload }); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: { strategy: FileSources.mistral_ocr }, }); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).resolves.not.toThrow(); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.mistral_ocr); expect(getStrategyFunctions).toHaveBeenCalledWith(FileSources.document_parser); }); test('throws when both configured OCR and document_parser fallback fail', async () => { mergeFileConfig.mockReturnValue(makeFileConfig({ ocrSupportedMimeTypes: [PDF_MIME] })); getStrategyFunctions.mockReturnValue({ handleFileUpload: jest.fn().mockRejectedValue(new Error('failure')), }); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: { strategy: FileSources.mistral_ocr }, }); const { parseText } = require('@librechat/api'); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).rejects.toThrow(/image-based and requires an OCR service/); expect(parseText).not.toHaveBeenCalled(); }); }); describe('text size guard', () => { test('throws before writing to MongoDB when extracted text exceeds 15MB', async () => { const oversizedText = 'x'.repeat(15 * 1024 * 1024 + 1); getStrategyFunctions.mockReturnValue({ handleFileUpload: jest.fn().mockResolvedValue({ text: oversizedText, bytes: Buffer.byteLength(oversizedText, 'utf8'), filepath: 'doc://result', }), }); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: null }); const { createFile } = require('~/models'); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).rejects.toThrow(/exceeds the 15MB storage limit/); expect(createFile).not.toHaveBeenCalled(); }); test('succeeds when extracted text is within the 15MB limit', async () => { const okText = 'x'.repeat(1024); getStrategyFunctions.mockReturnValue({ handleFileUpload: jest.fn().mockResolvedValue({ text: okText, bytes: Buffer.byteLength(okText, 'utf8'), filepath: 'doc://result', }), }); const req = makeReq({ mimetype: PDF_MIME, ocrConfig: null }); await expect( processAgentFileUpload({ req, res: mockRes, metadata: makeMetadata() }), ).resolves.not.toThrow(); }); }); /* Phase C / option α regression: the upload must persist its sandbox * pointer under `metadata.codeEnvRef` (the post-cutover schema). The * legacy `metadata.fileIdentifier` key is silently stripped by mongoose * strict mode and downstream readers (`primeFiles`, `getCodeFilesByIds`, * `categorizeFileForToolResources`, controller filtering) only check * `codeEnvRef`. Storing under the legacy key would orphan the file — * priming would skip it on subsequent code-execution turns and the * sandbox copy would never re-mount. */ describe('execute_code uploads persist codeEnvRef metadata', () => { const fs = require('fs'); const { Readable } = require('stream'); let createReadStreamSpy; beforeEach(() => { /* `processAgentFileUpload` opens the multer-staged temp file via * `fs.createReadStream`. The test fixture path doesn't exist, so * stub it to a tiny in-memory stream. */ createReadStreamSpy = jest .spyOn(fs, 'createReadStream') .mockImplementation(() => Readable.from(Buffer.from(''))); }); afterEach(() => { createReadStreamSpy.mockRestore(); }); const setupCodeEnvUpload = (uploaded) => { /* `processAgentFileUpload` calls `getStrategyFunctions` twice: * once with `execute_code` for the codeapi upload, then again with * the on-disk strategy (`local`) for the standard storage step that * runs in the same flow. Both must return a working * `handleFileUpload`. */ const codeEnvUpload = jest.fn().mockResolvedValue(uploaded); const localUpload = jest.fn().mockResolvedValue({ bytes: 0, filename: 'upload.bin', filepath: '/uploads/upload.bin', }); getStrategyFunctions.mockImplementation((src) => src === FileSources.execute_code ? { handleFileUpload: codeEnvUpload } : { handleFileUpload: localUpload, saveBuffer: jest.fn() }, ); return codeEnvUpload; }; it('persists kind:user codeEnvRef for chat attachments (messageAttachment=true)', async () => { setupCodeEnvUpload({ storage_session_id: 'sess-1', file_id: 'fid-1' }); const req = makeReq(); await processAgentFileUpload({ req, res: mockRes, metadata: { agent_id: 'agent-abc', tool_resource: EToolResources.execute_code, file_id: 'file-uuid', message_file: true, }, }); expect(db.createFile).toHaveBeenCalledWith( expect.objectContaining({ metadata: { codeEnvRef: { kind: 'user', id: 'user-123', storage_session_id: 'sess-1', file_id: 'fid-1', }, }, }), true, ); }); it('persists kind:agent codeEnvRef for agent setup files (messageAttachment=false)', async () => { setupCodeEnvUpload({ storage_session_id: 'sess-2', file_id: 'fid-2' }); const req = makeReq(); await processAgentFileUpload({ req, res: mockRes, metadata: { agent_id: 'agent-abc', tool_resource: EToolResources.execute_code, file_id: 'file-uuid', }, }); expect(db.createFile).toHaveBeenCalledWith( expect.objectContaining({ metadata: { codeEnvRef: { kind: 'agent', id: 'agent-abc', storage_session_id: 'sess-2', file_id: 'fid-2', }, }, }), true, ); }); it('does not persist legacy fileIdentifier key (mongoose strict drops it)', async () => { setupCodeEnvUpload({ storage_session_id: 'sess-3', file_id: 'fid-3' }); const req = makeReq(); await processAgentFileUpload({ req, res: mockRes, metadata: { agent_id: 'agent-abc', tool_resource: EToolResources.execute_code, file_id: 'file-uuid', message_file: true, }, }); const persisted = db.createFile.mock.calls[0][0]; expect(persisted.metadata).not.toHaveProperty('fileIdentifier'); }); }); }); describe('processFileURL', () => { beforeEach(() => { jest.clearAllMocks(); }); it('throws and skips DB persistence when saveURL returns null', async () => { const saveURL = jest.fn().mockResolvedValue(null); const getFileURL = jest.fn(); getStrategyFunctions.mockReturnValue({ saveURL, getFileURL }); await expect( processFileURL({ fileStrategy: FileSources.local, userId: 'user-123', URL: 'https://example.com/image.png', fileName: 'image.png', basePath: 'images', context: FileContext.image_generation, tenantId: 'tenant-a', }), ).rejects.toThrow('Strategy "local" did not save "image.png"'); expect(getFileURL).not.toHaveBeenCalled(); expect(db.createFile).not.toHaveBeenCalled(); }); it('persists tenantId and strategy-returned filepath metadata', async () => { const saveURL = jest.fn().mockResolvedValue({ filepath: 'https://cdn.example.com/t/tenant-a/images/user-123/image.png', bytes: 512, type: 'image/png', dimensions: { width: 32, height: 64 }, }); const getFileURL = jest.fn(); getStrategyFunctions.mockReturnValue({ saveURL, getFileURL }); await processFileURL({ fileStrategy: FileSources.cloudfront, userId: 'user-123', URL: 'https://example.com/image.png', fileName: 'image.png', basePath: 'images', context: FileContext.image_generation, tenantId: 'tenant-a', }); expect(getFileURL).not.toHaveBeenCalled(); expect(db.createFile).toHaveBeenCalledWith( expect.objectContaining({ user: 'user-123', filepath: 'https://cdn.example.com/t/tenant-a/images/user-123/image.png', bytes: 512, filename: 'image.png', source: FileSources.cloudfront, type: 'image/png', context: FileContext.image_generation, tenantId: 'tenant-a', width: 32, height: 64, }), true, ); }); it('falls back to getFileURL with user and tenant context when metadata lacks filepath', async () => { const saveURL = jest.fn().mockResolvedValue({ bytes: 256, type: 'image/png', }); const getFileURL = jest .fn() .mockResolvedValue('https://cdn.example.com/t/tenant-a/images/user-123/image.png'); getStrategyFunctions.mockReturnValue({ saveURL, getFileURL }); await processFileURL({ fileStrategy: FileSources.cloudfront, userId: 'user-123', URL: 'https://example.com/image.png', fileName: 'image.png', basePath: 'images', context: FileContext.image_generation, tenantId: 'tenant-a', }); expect(getFileURL).toHaveBeenCalledWith({ userId: 'user-123', fileName: 'image.png', basePath: 'images', tenantId: 'tenant-a', }); expect(db.createFile).toHaveBeenCalledWith( expect.objectContaining({ filepath: 'https://cdn.example.com/t/tenant-a/images/user-123/image.png', tenantId: 'tenant-a', }), true, ); }); it('preserves the user path segment for local fallback URLs', async () => { const saveURL = jest.fn().mockResolvedValue({ bytes: 256, type: 'image/png', }); const getFileURL = jest.fn().mockResolvedValue('/images/user-123/image.png'); getStrategyFunctions.mockReturnValue({ saveURL, getFileURL }); await processFileURL({ fileStrategy: FileSources.local, userId: 'user-123', URL: 'https://example.com/image.png', fileName: 'image.png', basePath: 'images', context: FileContext.image_generation, tenantId: 'tenant-a', }); expect(getFileURL).toHaveBeenCalledWith({ userId: 'user-123', fileName: 'user-123/image.png', basePath: 'images', tenantId: 'tenant-a', }); expect(db.createFile).toHaveBeenCalledWith( expect.objectContaining({ filepath: '/images/user-123/image.png', tenantId: 'tenant-a', }), true, ); }); });