mirror of
https://github.com/danny-avila/LibreChat.git
synced 2026-05-13 16:07:30 +00:00
🩹 fix: LibreOffice PDF embed uses blob: URL (Chrome blocks data: PDFs)
Manual e2e on PR #12934: enabling `OFFICE_PREVIEW_LIBREOFFICE=true` on a host with `soffice` installed surfaced "This page has been blocked by Chrome" inside the PDF preview iframe. Root cause: Chrome blocks `data:application/pdf;base64,...` navigations inside sandboxed iframes (anti-phishing measure since Chrome 76, see crbug.com/863001). The Sandpack iframe IS sandboxed (its `sandbox="..."` attribute lacks `allow-top-navigation` for data: URLs specifically), so when our inner `<iframe src="data: application/pdf;...">` tries to navigate, Chrome's interstitial fires and renders the "blocked" message. Fix: switch from `data:` URL to `blob:` URL. The bootstrap now: 1. Reads the base64 payload from a `<script type="application/ octet-stream;base64">` data block (same pattern as the DOCX and PPTX wrappers). 2. Decodes via `atob` + `Uint8Array.from`. 3. Creates a `Blob` with `type: 'application/pdf'`. 4. `URL.createObjectURL(blob)` produces a same-origin blob: URL. 5. Sets `pdfFrame.src = url + '#view=FitH'` — Chrome treats blob: URLs as legitimate navigation and serves the built-in PDF viewer. CSP updated: `frame-src blob:` (was `frame-src data:`). `data:` is now explicitly NOT allowed in `frame-src` since Chrome would block it anyway in our context — keeping it would be misleading documentation. Bonus: failure paths now log to `console.error` with a `[libreoffice-pdf]` prefix so DevTools surfaces blob-creation failures and PDF-viewer load timeouts in red. Tests updated: - "emits a complete sandboxed HTML document" now asserts the data-block + blob URL construction (not the old data: URL). - New CSP test "allows blob: in frame-src (NOT data:)" with both positive and negative assertions to lock in the change. - Integration test for `tryLibreOfficePreview` updated to look for the data block + `URL.createObjectURL` instead of the data: URL. - Large-payload test now verifies the data block round-trip rather than data: URL escaping (base64 alphabet has no characters that break out of `<script>` anyway).
This commit is contained in:
parent
06c3bfa3c0
commit
d90f26c11c
2 changed files with 100 additions and 42 deletions
|
|
@ -80,22 +80,40 @@ describe('libreoffice (env gating + wrapper)', () => {
|
|||
* wrappers. */
|
||||
const FAKE_PDF_B64 = 'JVBERi0xLjQK'; // "%PDF-1.4\n" base64
|
||||
|
||||
it('emits a complete sandboxed HTML document', () => {
|
||||
it('emits a complete sandboxed HTML document with PDF bytes embedded as a data block', () => {
|
||||
const html = buildPdfEmbedDocument(FAKE_PDF_B64);
|
||||
expect(html).toMatch(/^<!DOCTYPE html>/);
|
||||
expect(html).toContain('<title>Preview</title>');
|
||||
expect(html).toContain('id="lc-pdf"');
|
||||
expect(html).toContain(`data:application/pdf;base64,${FAKE_PDF_B64}`);
|
||||
/* PDF bytes live in a `<script type="application/octet-stream;base64">`
|
||||
* data block — the bootstrap reads it and constructs a blob: URL
|
||||
* at runtime. We deliberately do NOT use `<iframe src="data:
|
||||
* application/pdf;...">` because Chrome blocks data: navigations
|
||||
* inside sandboxed iframes (manual e2e on PR #12934 — surfaced
|
||||
* as the "This page has been blocked by Chrome" interstitial).
|
||||
* blob: URLs are same-origin and bypass that restriction. */
|
||||
expect(html).toContain('id="lc-pdf-data"');
|
||||
expect(html).toContain(FAKE_PDF_B64);
|
||||
expect(html).not.toMatch(/src="data:application\/pdf/);
|
||||
/* The bootstrap code that converts the base64 to a blob URL. */
|
||||
expect(html).toContain('URL.createObjectURL');
|
||||
expect(html).toContain('new Blob');
|
||||
expect(html).toContain("type: 'application/pdf'");
|
||||
});
|
||||
|
||||
it('CSP locks the iframe down: no script CDN, no outbound connect, no eval', () => {
|
||||
it('CSP allows blob: in frame-src (NOT data:) and locks the iframe down otherwise', () => {
|
||||
const html = buildPdfEmbedDocument(FAKE_PDF_B64);
|
||||
const cspMatch = html.match(/<meta http-equiv="Content-Security-Policy" content="([^"]+)">/);
|
||||
expect(cspMatch).not.toBeNull();
|
||||
const csp = cspMatch![1];
|
||||
expect(csp).toMatch(/default-src 'none'/);
|
||||
/* The whole point: data: URIs in nested iframes (browser PDF viewer). */
|
||||
expect(csp).toMatch(/frame-src data:/);
|
||||
/* blob: in frame-src, NOT data: — Chrome blocks data:application/pdf
|
||||
* navigations inside sandboxed iframes (anti-phishing measure
|
||||
* since Chrome 76). The bootstrap creates blob: URLs at runtime
|
||||
* which Chrome treats as same-origin and allows. Manual e2e on
|
||||
* PR #12934. */
|
||||
expect(csp).toMatch(/frame-src[^;]*\bblob:/);
|
||||
expect(csp).not.toMatch(/frame-src[^;]*\bdata:/);
|
||||
/* No outbound HTTP from the rendered iframe — a malicious PDF
|
||||
* can't beacon home from inside the viewer. */
|
||||
expect(csp).toMatch(/connect-src 'none'/);
|
||||
|
|
@ -113,6 +131,8 @@ describe('libreoffice (env gating + wrapper)', () => {
|
|||
expect(html).toContain('PDF preview unavailable in this browser');
|
||||
/* The 4-second heuristic timer that swaps to the fallback. */
|
||||
expect(html).toContain('4000');
|
||||
/* Reasons logged to console.error for power-user debugging. */
|
||||
expect(html).toContain("console.error('[libreoffice-pdf] fallback fired:'");
|
||||
});
|
||||
|
||||
it('uses #view=FitH so the PDF fills the panel width on first paint', () => {
|
||||
|
|
@ -120,16 +140,18 @@ describe('libreoffice (env gating + wrapper)', () => {
|
|||
expect(html).toContain('#view=FitH');
|
||||
});
|
||||
|
||||
it('embeds large base64 payloads without breaking out of the data URI', () => {
|
||||
/* A `</script>` substring in the base64 wouldn't terminate the
|
||||
* iframe `src=` attribute, but a stray `"` would. base64 alphabet
|
||||
* is `A-Za-z0-9+/=` — none of those are dangerous. Sanity-check
|
||||
* with a synthetically large payload. */
|
||||
it('embeds large base64 payloads inside the data block without escaping issues', () => {
|
||||
/* The base64 alphabet (A-Za-z0-9+/=) contains no characters that
|
||||
* could break out of `<script type="application/octet-stream;
|
||||
* base64">...</script>` — base64 cannot contain `<`, `>`, `&`, or
|
||||
* quote characters. Sanity-check that the data round-trips. */
|
||||
const big = 'A'.repeat(100_000);
|
||||
const html = buildPdfEmbedDocument(big);
|
||||
const src = html.match(/src="data:application\/pdf;base64,([^"]+)/);
|
||||
expect(src).not.toBeNull();
|
||||
expect(src![1].length).toBe(big.length + '#view=FitH'.length);
|
||||
const dataBlock = html.match(
|
||||
/<script id="lc-pdf-data" type="application\/octet-stream;base64">([^<]+)<\/script>/,
|
||||
);
|
||||
expect(dataBlock).not.toBeNull();
|
||||
expect(dataBlock![1]).toBe(big);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -274,7 +296,11 @@ describe('libreoffice integration (skipped unless LibreOffice is on $PATH)', ()
|
|||
const out = await tryLibreOfficePreview(buf, 'docx', 512 * 1024);
|
||||
expect(out).not.toBeNull();
|
||||
expect(out!).toMatch(/^<!DOCTYPE html>/);
|
||||
expect(out!).toContain('data:application/pdf;base64,');
|
||||
/* PDF bytes are embedded as a base64 data block (not as a data:
|
||||
* URL — Chrome blocks data:application/pdf in sandboxed iframes;
|
||||
* the bootstrap converts to a blob: URL at runtime). */
|
||||
expect(out!).toContain('id="lc-pdf-data"');
|
||||
expect(out!).toContain('URL.createObjectURL');
|
||||
expect(Buffer.byteLength(out!, 'utf-8')).toBeLessThanOrEqual(512 * 1024);
|
||||
},
|
||||
35_000,
|
||||
|
|
|
|||
|
|
@ -258,31 +258,42 @@ function runConversion(binary: string, inputPath: string, tempDir: string): Prom
|
|||
* `<iframe>` so the host browser's PDF viewer (PDF.js in Firefox, Chrome's
|
||||
* built-in viewer, Safari's Preview-driven viewer) can render it directly.
|
||||
*
|
||||
* Why an inner iframe rather than `<embed>` or `<object>`:
|
||||
* - `<embed>` and `<object>` rendering is least consistent across modern
|
||||
* browsers (Chrome's pdfium plugin requires CSP `object-src data:`,
|
||||
* and some headless contexts disable it).
|
||||
* - `<iframe src="data:application/pdf;base64,...">` is the most
|
||||
* reliable cross-browser path. Chromium's PDF viewer treats it as a
|
||||
* top-level navigation and serves the built-in viewer.
|
||||
* Why blob: URL (vs data: URL):
|
||||
* Chrome blocks `data:application/pdf` navigations inside sandboxed
|
||||
* iframes (anti-phishing measure since Chrome 76 — surfaces as a
|
||||
* "This page has been blocked by Chrome" interstitial). The Sandpack
|
||||
* iframe IS sandboxed, so the inner iframe's data: navigation hits
|
||||
* that block. Constructing a `blob:` URL at runtime via
|
||||
* `URL.createObjectURL(new Blob([bytes], {type: 'application/pdf'}))`
|
||||
* produces a same-origin URL that Chrome treats as legitimate
|
||||
* navigation — works inside sandboxed contexts where data: doesn't.
|
||||
* Manual e2e on PR #12934.
|
||||
*
|
||||
* The inner iframe is fully sandboxed — no script, same-origin, etc. —
|
||||
* and uses `#view=FitH` to size to the panel's width on first paint.
|
||||
* Why an inner iframe rather than `<embed>` or `<object>`:
|
||||
* `<embed>` and `<object>` rendering is least consistent across modern
|
||||
* browsers (Chrome's pdfium plugin requires CSP `object-src` and
|
||||
* some headless contexts disable it). `<iframe src="blob:...">` is
|
||||
* the most reliable cross-browser path; Chromium/Firefox/Safari all
|
||||
* serve their built-in PDF viewer for it.
|
||||
*
|
||||
* The inner iframe uses `#view=FitH` to size to the panel's width
|
||||
* on first paint.
|
||||
*/
|
||||
export function buildPdfEmbedDocument(pdfBase64: string): string {
|
||||
/* CSP scoping:
|
||||
* - `default-src 'none'`: lock everything down.
|
||||
* - `frame-src data:`: allow the inner `<iframe src="data:application/pdf;...">`.
|
||||
* - `object-src 'self' data:`: belt-and-suspenders for browsers that
|
||||
* route PDFs through `<object>` via the iframe sandbox quirk.
|
||||
* - `script-src 'unsafe-inline'`: only our tiny load-detector script.
|
||||
* - `frame-src blob:`: allow the inner `<iframe src="blob:...">`
|
||||
* navigation that the bootstrap creates from the PDF bytes.
|
||||
* `data:` is intentionally NOT in `frame-src` because Chrome
|
||||
* blocks it in sandboxed contexts anyway.
|
||||
* - `script-src 'unsafe-inline'`: only our tiny bootstrap script.
|
||||
* - `style-src 'unsafe-inline'`: page chrome (no external sheets).
|
||||
* - `connect-src 'none'`: rendered iframe makes no network calls.
|
||||
*/
|
||||
const csp = [
|
||||
"default-src 'none'",
|
||||
'frame-src data:',
|
||||
"object-src 'self' data:",
|
||||
'frame-src blob:',
|
||||
"object-src 'self' blob:",
|
||||
"script-src 'unsafe-inline'",
|
||||
"style-src 'unsafe-inline'",
|
||||
"img-src 'self' data: blob:",
|
||||
|
|
@ -307,26 +318,47 @@ html, body { margin: 0; padding: 0; height: 100%; background: var(--bg); color:
|
|||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<iframe id="lc-pdf" src="data:application/pdf;base64,${pdfBase64}#view=FitH" title="PDF preview"></iframe>
|
||||
<iframe id="lc-pdf" title="PDF preview"></iframe>
|
||||
<div id="lc-fallback">PDF preview unavailable in this browser. Please download the file to view it.</div>
|
||||
<script id="lc-pdf-data" type="application/octet-stream;base64">${pdfBase64}</script>
|
||||
<script>
|
||||
(function () {
|
||||
/* Some browsers / kiosk profiles disable the built-in PDF viewer; the
|
||||
* iframe loads but stays blank. We can't reliably detect the inner
|
||||
* viewer's success across browsers, so we use a 4-second heuristic:
|
||||
* if the iframe never reports a load event by then, swap to the
|
||||
* fallback message. False negatives (slow networks, cold viewers)
|
||||
* are acceptable — the user can still download the file. */
|
||||
var pdfFrame = document.getElementById('lc-pdf');
|
||||
var fallback = document.getElementById('lc-fallback');
|
||||
var loaded = false;
|
||||
if (pdfFrame) {
|
||||
pdfFrame.addEventListener('load', function () { loaded = true; });
|
||||
if (!pdfFrame || !fallback) { return; }
|
||||
|
||||
function showFallback(reason) {
|
||||
pdfFrame.style.display = 'none';
|
||||
fallback.classList.add('visible');
|
||||
if (reason && typeof console !== 'undefined' && console.error) {
|
||||
console.error('[libreoffice-pdf] fallback fired:', reason);
|
||||
}
|
||||
}
|
||||
|
||||
/* Decode the embedded base64 and create a blob: URL. Chrome blocks
|
||||
* data:application/pdf in sandboxed iframes (parent Sandpack iframe
|
||||
* is sandboxed); blob: URLs are treated as same-origin and bypass
|
||||
* that restriction. Manual e2e on PR #12934 — "This page has been
|
||||
* blocked by Chrome" interstitial was the symptom. */
|
||||
var loaded = false;
|
||||
try {
|
||||
var b64 = document.getElementById('lc-pdf-data').textContent.trim();
|
||||
var bytes = Uint8Array.from(atob(b64), function (c) { return c.charCodeAt(0); });
|
||||
var blob = new Blob([bytes], { type: 'application/pdf' });
|
||||
var url = URL.createObjectURL(blob);
|
||||
pdfFrame.addEventListener('load', function () { loaded = true; });
|
||||
pdfFrame.src = url + '#view=FitH';
|
||||
} catch (err) {
|
||||
showFallback((err && err.message) || 'blob-creation-failed');
|
||||
return;
|
||||
}
|
||||
|
||||
/* 4-second heuristic: if the iframe never reports a load event by
|
||||
* then, the host browser PDF viewer is probably disabled (kiosk
|
||||
* profile, Brave Shields, etc.). Swap to the fallback message. */
|
||||
setTimeout(function () {
|
||||
if (!loaded && fallback) {
|
||||
if (pdfFrame) { pdfFrame.style.display = 'none'; }
|
||||
fallback.classList.add('visible');
|
||||
if (!loaded) {
|
||||
showFallback('pdf-viewer-load-timeout');
|
||||
}
|
||||
}, 4000);
|
||||
})();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue