From fc86e1842ddebb3ecdda0f4b3dbea34ca4e11914 Mon Sep 17 00:00:00 2001 From: Volodymyr Bulyha Date: Wed, 16 Apr 2025 10:03:49 +0100 Subject: [PATCH] =?UTF-8?q?fix:=20patchDocument,=20looks=20for=20namspaces?= =?UTF-8?q?=20more=20carefully=20over=20whole=20doc=E2=80=A6=20(#2943)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: patchDocument, looks for namspaces more carefully over whole document added support of different placeholder brackets, default `{{`: `}}` extended inputDataType to accept JSZip * fix: Attribute value string not converted to HTML esc char by js2xml original: https://github.com/nashwaan/xml-js/issues/142 * fix: skip processing utf-16 encoded files * fix: patching goes on the second circle over patched document added parameter `recursive`(= true by default) to be able to disable this behavior * Fix linting * Simplify code * write tests to skip UTF-16 types --------- Co-authored-by: Vladimir Bulyga Co-authored-by: Dolan Miu --- src/patcher/from-docx.spec.ts | 38 +++++++++++++++++++++++++++-------- src/patcher/from-docx.ts | 37 ++++++++++++++++++++++++++-------- src/patcher/patch-detector.ts | 2 +- src/patcher/traverser.ts | 5 ++--- 4 files changed, 62 insertions(+), 20 deletions(-) diff --git a/src/patcher/from-docx.spec.ts b/src/patcher/from-docx.spec.ts index 908c4a94e5..c7f4f9667d 100644 --- a/src/patcher/from-docx.spec.ts +++ b/src/patcher/from-docx.spec.ts @@ -202,15 +202,11 @@ describe("from-docx", () => { describe("patchDocument", () => { describe("document.xml and [Content_Types].xml", () => { beforeEach(() => { - vi.spyOn(JSZip, "loadAsync").mockReturnValue( - new Promise((resolve) => { - const zip = new JSZip(); + const zip = new JSZip(); - zip.file("word/document.xml", MOCK_XML); - zip.file("[Content_Types].xml", ``); - resolve(zip); - }), - ); + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", ``); + vi.spyOn(JSZip, "loadAsync").mockResolvedValue(zip); }); afterEach(() => { @@ -289,6 +285,32 @@ describe("from-docx", () => { expect(output).to.not.be.undefined; }); + it("should work with the raw JSZip type", async () => { + const zip = new JSZip(); + + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", ``); + const output = await patchDocument({ + outputType: "uint8array", + data: zip, + patches: {}, + }); + expect(output).to.not.be.undefined; + }); + + it("should skiup UTF-16 types", async () => { + const zip = new JSZip(); + + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", Buffer.from([0xff, 0xfe])); + const output = await patchDocument({ + outputType: "uint8array", + data: zip, + patches: {}, + }); + expect(output).to.not.be.undefined; + }); + it("should patch the document", async () => { const output = await patchDocument({ outputType: "uint8array", diff --git a/src/patcher/from-docx.ts b/src/patcher/from-docx.ts index a85bcd06dd..f69a0e87e1 100644 --- a/src/patcher/from-docx.ts +++ b/src/patcher/from-docx.ts @@ -19,7 +19,7 @@ import { replacer } from "./replacer"; import { toJson } from "./util"; // eslint-disable-next-line functional/prefer-readonly-type -export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream; +export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream | JSZip; export const PatchType = { DOCUMENT: "file", @@ -59,9 +59,12 @@ export type PatchDocumentOptions; + readonly recursive?: boolean; }; const imageReplacer = new ImageReplacer(); +const UTF16LE = Buffer.from([0xff, 0xfe]); +const UTF16BE = Buffer.from([0xfe, 0xff]); export const patchDocument = async ({ outputType, @@ -69,8 +72,12 @@ export const patchDocument = async ): Promise => { - const zipContent = await JSZip.loadAsync(data); + const zipContent = data instanceof JSZip ? data : await JSZip.loadAsync(data); const contexts = new Map(); const file = { Media: new Media(), @@ -87,8 +94,15 @@ export const patchDocument = async (); for (const [key, value] of Object.entries(zipContent.files)) { + const binaryValue = await value.async("uint8array"); + const startBytes = binaryValue.slice(0, 2); + if (UTF16LE.equals(startBytes) || UTF16BE.equals(startBytes)) { + binaryContentMap.set(key, binaryValue); + continue; + } + if (!key.endsWith(".xml") && !key.endsWith(".rels")) { - binaryContentMap.set(key, await value.async("uint8array")); + binaryContentMap.set(key, binaryValue); continue; } @@ -96,12 +110,10 @@ export const patchDocument = async i.name === "w:document"); - if (document) { + if (document && document.attributes) { // We could check all namespaces from Document, but we'll instead // check only those that may be used by our element types. - // eslint-disable-next-line functional/immutable-data - document.attributes = document.attributes ?? {}; for (const ns of ["mc", "wp", "r", "w15", "m"] as const) { // eslint-disable-next-line functional/immutable-data document.attributes[`xmlns:${ns}`] = DocumentAttributeNamespaces[ns]; @@ -179,7 +191,8 @@ export const patchDocument = async { - const output = js2xml(jsonObj); + const output = js2xml(jsonObj, { + attributeValueFn: (str) => + String(str) + .replace(/&(?!amp;|lt;|gt;|quot;|apos;)/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"), // cspell:words apos + }); return output; }; diff --git a/src/patcher/patch-detector.ts b/src/patcher/patch-detector.ts index 31756f0a6e..eba608e258 100644 --- a/src/patcher/patch-detector.ts +++ b/src/patcher/patch-detector.ts @@ -10,7 +10,7 @@ type PatchDetectorOptions = { /** Detects which patches are needed/present in a template */ export const patchDetector = async ({ data }: PatchDetectorOptions): Promise => { - const zipContent = await JSZip.loadAsync(data); + const zipContent = data instanceof JSZip ? data : await JSZip.loadAsync(data); const patches = new Set(); for (const [key, value] of Object.entries(zipContent.files)) { diff --git a/src/patcher/traverser.ts b/src/patcher/traverser.ts index 2df41859bc..30f647fd95 100644 --- a/src/patcher/traverser.ts +++ b/src/patcher/traverser.ts @@ -34,10 +34,9 @@ export const traverse = (node: Element): readonly IRenderedParagraphNode[] => { if (currentNode.element.name === "w:p") { renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)]; - } else { - // eslint-disable-next-line functional/immutable-data - queue.push(...elementsToWrapper(currentNode)); } + // eslint-disable-next-line functional/immutable-data + queue.push(...elementsToWrapper(currentNode)); } return renderedParagraphs;