diff --git a/src/patcher/from-docx.spec.ts b/src/patcher/from-docx.spec.ts index 908c4a94e5..c7f4f9667d 100644 --- a/src/patcher/from-docx.spec.ts +++ b/src/patcher/from-docx.spec.ts @@ -202,15 +202,11 @@ describe("from-docx", () => { describe("patchDocument", () => { describe("document.xml and [Content_Types].xml", () => { beforeEach(() => { - vi.spyOn(JSZip, "loadAsync").mockReturnValue( - new Promise((resolve) => { - const zip = new JSZip(); + const zip = new JSZip(); - zip.file("word/document.xml", MOCK_XML); - zip.file("[Content_Types].xml", ``); - resolve(zip); - }), - ); + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", ``); + vi.spyOn(JSZip, "loadAsync").mockResolvedValue(zip); }); afterEach(() => { @@ -289,6 +285,32 @@ describe("from-docx", () => { expect(output).to.not.be.undefined; }); + it("should work with the raw JSZip type", async () => { + const zip = new JSZip(); + + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", ``); + const output = await patchDocument({ + outputType: "uint8array", + data: zip, + patches: {}, + }); + expect(output).to.not.be.undefined; + }); + + it("should skiup UTF-16 types", async () => { + const zip = new JSZip(); + + zip.file("word/document.xml", MOCK_XML); + zip.file("[Content_Types].xml", Buffer.from([0xff, 0xfe])); + const output = await patchDocument({ + outputType: "uint8array", + data: zip, + patches: {}, + }); + expect(output).to.not.be.undefined; + }); + it("should patch the document", async () => { const output = await patchDocument({ outputType: "uint8array", diff --git a/src/patcher/from-docx.ts b/src/patcher/from-docx.ts index a85bcd06dd..f69a0e87e1 100644 --- a/src/patcher/from-docx.ts +++ b/src/patcher/from-docx.ts @@ -19,7 +19,7 @@ import { replacer } from "./replacer"; import { toJson } from "./util"; // eslint-disable-next-line functional/prefer-readonly-type -export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream; +export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream | JSZip; export const PatchType = { DOCUMENT: "file", @@ -59,9 +59,12 @@ export type PatchDocumentOptions; + readonly recursive?: boolean; }; const imageReplacer = new ImageReplacer(); +const UTF16LE = Buffer.from([0xff, 0xfe]); +const UTF16BE = Buffer.from([0xfe, 0xff]); export const patchDocument = async ({ outputType, @@ -69,8 +72,12 @@ export const patchDocument = async ): Promise => { - const zipContent = await JSZip.loadAsync(data); + const zipContent = data instanceof JSZip ? data : await JSZip.loadAsync(data); const contexts = new Map(); const file = { Media: new Media(), @@ -87,8 +94,15 @@ export const patchDocument = async (); for (const [key, value] of Object.entries(zipContent.files)) { + const binaryValue = await value.async("uint8array"); + const startBytes = binaryValue.slice(0, 2); + if (UTF16LE.equals(startBytes) || UTF16BE.equals(startBytes)) { + binaryContentMap.set(key, binaryValue); + continue; + } + if (!key.endsWith(".xml") && !key.endsWith(".rels")) { - binaryContentMap.set(key, await value.async("uint8array")); + binaryContentMap.set(key, binaryValue); continue; } @@ -96,12 +110,10 @@ export const patchDocument = async i.name === "w:document"); - if (document) { + if (document && document.attributes) { // We could check all namespaces from Document, but we'll instead // check only those that may be used by our element types. - // eslint-disable-next-line functional/immutable-data - document.attributes = document.attributes ?? {}; for (const ns of ["mc", "wp", "r", "w15", "m"] as const) { // eslint-disable-next-line functional/immutable-data document.attributes[`xmlns:${ns}`] = DocumentAttributeNamespaces[ns]; @@ -179,7 +191,8 @@ export const patchDocument = async { - const output = js2xml(jsonObj); + const output = js2xml(jsonObj, { + attributeValueFn: (str) => + String(str) + .replace(/&(?!amp;|lt;|gt;|quot;|apos;)/g, "&") + .replace(//g, ">") + .replace(/"/g, """) + .replace(/'/g, "'"), // cspell:words apos + }); return output; }; diff --git a/src/patcher/patch-detector.ts b/src/patcher/patch-detector.ts index 31756f0a6e..eba608e258 100644 --- a/src/patcher/patch-detector.ts +++ b/src/patcher/patch-detector.ts @@ -10,7 +10,7 @@ type PatchDetectorOptions = { /** Detects which patches are needed/present in a template */ export const patchDetector = async ({ data }: PatchDetectorOptions): Promise => { - const zipContent = await JSZip.loadAsync(data); + const zipContent = data instanceof JSZip ? data : await JSZip.loadAsync(data); const patches = new Set(); for (const [key, value] of Object.entries(zipContent.files)) { diff --git a/src/patcher/traverser.ts b/src/patcher/traverser.ts index 2df41859bc..30f647fd95 100644 --- a/src/patcher/traverser.ts +++ b/src/patcher/traverser.ts @@ -34,10 +34,9 @@ export const traverse = (node: Element): readonly IRenderedParagraphNode[] => { if (currentNode.element.name === "w:p") { renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)]; - } else { - // eslint-disable-next-line functional/immutable-data - queue.push(...elementsToWrapper(currentNode)); } + // eslint-disable-next-line functional/immutable-data + queue.push(...elementsToWrapper(currentNode)); } return renderedParagraphs;