fix: patchDocument, looks for namspaces more carefully over whole doc… (#2943)
* fix: patchDocument, looks for namspaces more carefully over whole document added support of different placeholder brackets, default `{{`: `}}` extended inputDataType to accept JSZip * fix: Attribute value string not converted to HTML esc char by js2xml original: https://github.com/nashwaan/xml-js/issues/142 * fix: skip processing utf-16 encoded files * fix: patching goes on the second circle over patched document added parameter `recursive`(= true by default) to be able to disable this behavior * Fix linting * Simplify code * write tests to skip UTF-16 types --------- Co-authored-by: Vladimir Bulyga <vladimir.bulyga@legatics.com> Co-authored-by: Dolan Miu <dolan_miu@hotmail.com>
This commit is contained in:
@ -202,15 +202,11 @@ describe("from-docx", () => {
|
|||||||
describe("patchDocument", () => {
|
describe("patchDocument", () => {
|
||||||
describe("document.xml and [Content_Types].xml", () => {
|
describe("document.xml and [Content_Types].xml", () => {
|
||||||
beforeEach(() => {
|
beforeEach(() => {
|
||||||
vi.spyOn(JSZip, "loadAsync").mockReturnValue(
|
|
||||||
new Promise<JSZip>((resolve) => {
|
|
||||||
const zip = new JSZip();
|
const zip = new JSZip();
|
||||||
|
|
||||||
zip.file("word/document.xml", MOCK_XML);
|
zip.file("word/document.xml", MOCK_XML);
|
||||||
zip.file("[Content_Types].xml", `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`);
|
zip.file("[Content_Types].xml", `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`);
|
||||||
resolve(zip);
|
vi.spyOn(JSZip, "loadAsync").mockResolvedValue(zip);
|
||||||
}),
|
|
||||||
);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
afterEach(() => {
|
afterEach(() => {
|
||||||
@ -289,6 +285,32 @@ describe("from-docx", () => {
|
|||||||
expect(output).to.not.be.undefined;
|
expect(output).to.not.be.undefined;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("should work with the raw JSZip type", async () => {
|
||||||
|
const zip = new JSZip();
|
||||||
|
|
||||||
|
zip.file("word/document.xml", MOCK_XML);
|
||||||
|
zip.file("[Content_Types].xml", `<?xml version="1.0" encoding="UTF-8" standalone="yes"?>`);
|
||||||
|
const output = await patchDocument({
|
||||||
|
outputType: "uint8array",
|
||||||
|
data: zip,
|
||||||
|
patches: {},
|
||||||
|
});
|
||||||
|
expect(output).to.not.be.undefined;
|
||||||
|
});
|
||||||
|
|
||||||
|
it("should skiup UTF-16 types", async () => {
|
||||||
|
const zip = new JSZip();
|
||||||
|
|
||||||
|
zip.file("word/document.xml", MOCK_XML);
|
||||||
|
zip.file("[Content_Types].xml", Buffer.from([0xff, 0xfe]));
|
||||||
|
const output = await patchDocument({
|
||||||
|
outputType: "uint8array",
|
||||||
|
data: zip,
|
||||||
|
patches: {},
|
||||||
|
});
|
||||||
|
expect(output).to.not.be.undefined;
|
||||||
|
});
|
||||||
|
|
||||||
it("should patch the document", async () => {
|
it("should patch the document", async () => {
|
||||||
const output = await patchDocument({
|
const output = await patchDocument({
|
||||||
outputType: "uint8array",
|
outputType: "uint8array",
|
||||||
|
@ -19,7 +19,7 @@ import { replacer } from "./replacer";
|
|||||||
import { toJson } from "./util";
|
import { toJson } from "./util";
|
||||||
|
|
||||||
// eslint-disable-next-line functional/prefer-readonly-type
|
// eslint-disable-next-line functional/prefer-readonly-type
|
||||||
export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
|
export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream | JSZip;
|
||||||
|
|
||||||
export const PatchType = {
|
export const PatchType = {
|
||||||
DOCUMENT: "file",
|
DOCUMENT: "file",
|
||||||
@ -59,9 +59,12 @@ export type PatchDocumentOptions<T extends PatchDocumentOutputType = PatchDocume
|
|||||||
readonly start: string;
|
readonly start: string;
|
||||||
readonly end: string;
|
readonly end: string;
|
||||||
}>;
|
}>;
|
||||||
|
readonly recursive?: boolean;
|
||||||
};
|
};
|
||||||
|
|
||||||
const imageReplacer = new ImageReplacer();
|
const imageReplacer = new ImageReplacer();
|
||||||
|
const UTF16LE = Buffer.from([0xff, 0xfe]);
|
||||||
|
const UTF16BE = Buffer.from([0xfe, 0xff]);
|
||||||
|
|
||||||
export const patchDocument = async <T extends PatchDocumentOutputType = PatchDocumentOutputType>({
|
export const patchDocument = async <T extends PatchDocumentOutputType = PatchDocumentOutputType>({
|
||||||
outputType,
|
outputType,
|
||||||
@ -69,8 +72,12 @@ export const patchDocument = async <T extends PatchDocumentOutputType = PatchDoc
|
|||||||
patches,
|
patches,
|
||||||
keepOriginalStyles,
|
keepOriginalStyles,
|
||||||
placeholderDelimiters = { start: "{{", end: "}}" } as const,
|
placeholderDelimiters = { start: "{{", end: "}}" } as const,
|
||||||
|
/**
|
||||||
|
* Search for occurrences over patched document
|
||||||
|
*/
|
||||||
|
recursive = true,
|
||||||
}: PatchDocumentOptions<T>): Promise<OutputByType[T]> => {
|
}: PatchDocumentOptions<T>): Promise<OutputByType[T]> => {
|
||||||
const zipContent = await JSZip.loadAsync(data);
|
const zipContent = data instanceof JSZip ? data : await JSZip.loadAsync(data);
|
||||||
const contexts = new Map<string, IContext>();
|
const contexts = new Map<string, IContext>();
|
||||||
const file = {
|
const file = {
|
||||||
Media: new Media(),
|
Media: new Media(),
|
||||||
@ -87,8 +94,15 @@ export const patchDocument = async <T extends PatchDocumentOutputType = PatchDoc
|
|||||||
const binaryContentMap = new Map<string, Uint8Array>();
|
const binaryContentMap = new Map<string, Uint8Array>();
|
||||||
|
|
||||||
for (const [key, value] of Object.entries(zipContent.files)) {
|
for (const [key, value] of Object.entries(zipContent.files)) {
|
||||||
|
const binaryValue = await value.async("uint8array");
|
||||||
|
const startBytes = binaryValue.slice(0, 2);
|
||||||
|
if (UTF16LE.equals(startBytes) || UTF16BE.equals(startBytes)) {
|
||||||
|
binaryContentMap.set(key, binaryValue);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (!key.endsWith(".xml") && !key.endsWith(".rels")) {
|
if (!key.endsWith(".xml") && !key.endsWith(".rels")) {
|
||||||
binaryContentMap.set(key, await value.async("uint8array"));
|
binaryContentMap.set(key, binaryValue);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -96,12 +110,10 @@ export const patchDocument = async <T extends PatchDocumentOutputType = PatchDoc
|
|||||||
|
|
||||||
if (key === "word/document.xml") {
|
if (key === "word/document.xml") {
|
||||||
const document = json.elements?.find((i) => i.name === "w:document");
|
const document = json.elements?.find((i) => i.name === "w:document");
|
||||||
if (document) {
|
if (document && document.attributes) {
|
||||||
// We could check all namespaces from Document, but we'll instead
|
// We could check all namespaces from Document, but we'll instead
|
||||||
// check only those that may be used by our element types.
|
// check only those that may be used by our element types.
|
||||||
|
|
||||||
// eslint-disable-next-line functional/immutable-data
|
|
||||||
document.attributes = document.attributes ?? {};
|
|
||||||
for (const ns of ["mc", "wp", "r", "w15", "m"] as const) {
|
for (const ns of ["mc", "wp", "r", "w15", "m"] as const) {
|
||||||
// eslint-disable-next-line functional/immutable-data
|
// eslint-disable-next-line functional/immutable-data
|
||||||
document.attributes[`xmlns:${ns}`] = DocumentAttributeNamespaces[ns];
|
document.attributes[`xmlns:${ns}`] = DocumentAttributeNamespaces[ns];
|
||||||
@ -179,7 +191,8 @@ export const patchDocument = async <T extends PatchDocumentOutputType = PatchDoc
|
|||||||
context,
|
context,
|
||||||
keepOriginalStyles,
|
keepOriginalStyles,
|
||||||
});
|
});
|
||||||
if (!didFindOccurrence) {
|
// What the reason doing that? Once document is patched - it search over patched json again, that takes too long if patched document has big and deep structure.
|
||||||
|
if (!recursive || !didFindOccurrence) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -275,7 +288,15 @@ export const patchDocument = async <T extends PatchDocumentOutputType = PatchDoc
|
|||||||
};
|
};
|
||||||
|
|
||||||
const toXml = (jsonObj: Element): string => {
|
const toXml = (jsonObj: Element): string => {
|
||||||
const output = js2xml(jsonObj);
|
const output = js2xml(jsonObj, {
|
||||||
|
attributeValueFn: (str) =>
|
||||||
|
String(str)
|
||||||
|
.replace(/&(?!amp;|lt;|gt;|quot;|apos;)/g, "&")
|
||||||
|
.replace(/</g, "<")
|
||||||
|
.replace(/>/g, ">")
|
||||||
|
.replace(/"/g, """)
|
||||||
|
.replace(/'/g, "'"), // cspell:words apos
|
||||||
|
});
|
||||||
return output;
|
return output;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ type PatchDetectorOptions = {
|
|||||||
|
|
||||||
/** Detects which patches are needed/present in a template */
|
/** Detects which patches are needed/present in a template */
|
||||||
export const patchDetector = async ({ data }: PatchDetectorOptions): Promise<readonly string[]> => {
|
export const patchDetector = async ({ data }: PatchDetectorOptions): Promise<readonly string[]> => {
|
||||||
const zipContent = await JSZip.loadAsync(data);
|
const zipContent = data instanceof JSZip ? data : await JSZip.loadAsync(data);
|
||||||
const patches = new Set<string>();
|
const patches = new Set<string>();
|
||||||
|
|
||||||
for (const [key, value] of Object.entries(zipContent.files)) {
|
for (const [key, value] of Object.entries(zipContent.files)) {
|
||||||
|
@ -34,11 +34,10 @@ export const traverse = (node: Element): readonly IRenderedParagraphNode[] => {
|
|||||||
|
|
||||||
if (currentNode.element.name === "w:p") {
|
if (currentNode.element.name === "w:p") {
|
||||||
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
|
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
|
||||||
} else {
|
}
|
||||||
// eslint-disable-next-line functional/immutable-data
|
// eslint-disable-next-line functional/immutable-data
|
||||||
queue.push(...elementsToWrapper(currentNode));
|
queue.push(...elementsToWrapper(currentNode));
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
return renderedParagraphs;
|
return renderedParagraphs;
|
||||||
};
|
};
|
||||||
|
Reference in New Issue
Block a user