Files
docx-js/src/patcher/from-docx.ts

285 lines
10 KiB
TypeScript
Raw Normal View History

2023-05-01 20:37:39 +01:00
import JSZip from "jszip";
2023-02-18 20:36:24 +00:00
import { Element, js2xml } from "xml-js";
2023-02-25 19:33:12 +00:00
import { ImageReplacer } from "@export/packer/image-replacer";
2023-03-03 23:47:50 +00:00
import { IViewWrapper } from "@file/document-wrapper";
import { File } from "@file/file";
import { FileChild } from "@file/file-child";
import { IMediaData, Media } from "@file/media";
import { ConcreteHyperlink, ExternalHyperlink, ParagraphChild } from "@file/paragraph";
2023-03-08 23:30:51 +00:00
import { TargetModeType } from "@file/relationships/relationship/relationship";
import { IContext } from "@file/xml-components";
2023-03-08 23:30:51 +00:00
import { uniqueId } from "@util/convenience-functions";
2023-02-25 19:33:12 +00:00
import { appendContentType } from "./content-types-manager";
import { appendRelationship, getNextRelationshipIndex } from "./relationship-manager";
2023-02-16 20:17:48 +00:00
import { replacer } from "./replacer";
2023-02-18 20:36:24 +00:00
import { toJson } from "./util";
2023-02-08 03:18:11 +00:00
// eslint-disable-next-line functional/prefer-readonly-type
export type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
2023-02-08 03:18:11 +00:00
export const PatchType = {
DOCUMENT: "file",
PARAGRAPH: "paragraph",
} as const;
2023-02-25 19:33:12 +00:00
type ParagraphPatch = {
readonly type: typeof PatchType.PARAGRAPH;
2023-02-25 19:33:12 +00:00
readonly children: readonly ParagraphChild[];
};
type FilePatch = {
readonly type: typeof PatchType.DOCUMENT;
2023-02-25 19:33:12 +00:00
readonly children: readonly FileChild[];
};
type IImageRelationshipAddition = {
2023-03-03 23:47:50 +00:00
readonly key: string;
readonly mediaDatas: readonly IMediaData[];
};
2023-03-03 23:47:50 +00:00
type IHyperlinkRelationshipAddition = {
2023-03-08 23:30:51 +00:00
readonly key: string;
readonly hyperlink: { readonly id: string; readonly link: string };
};
2023-03-08 23:30:51 +00:00
2023-02-25 22:18:31 +00:00
export type IPatch = ParagraphPatch | FilePatch;
2023-02-25 19:33:12 +00:00
// From JSZip
type OutputByType = {
readonly base64: string;
// eslint-disable-next-line id-denylist
readonly string: string;
readonly text: string;
readonly binarystring: string;
readonly array: readonly number[];
readonly uint8array: Uint8Array;
readonly arraybuffer: ArrayBuffer;
readonly blob: Blob;
readonly nodebuffer: Buffer;
};
export type PatchDocumentOutputType = keyof OutputByType;
export type PatchDocumentOptions<T extends PatchDocumentOutputType = PatchDocumentOutputType> = {
readonly outputType: T;
readonly data: InputDataType;
readonly patches: Readonly<Record<string, IPatch>>;
readonly keepOriginalStyles?: boolean;
};
2023-02-16 20:17:48 +00:00
2023-03-03 23:47:50 +00:00
const imageReplacer = new ImageReplacer();
export const patchDocument = async <T extends PatchDocumentOutputType = PatchDocumentOutputType>({
outputType,
data,
patches,
keepOriginalStyles,
}: PatchDocumentOptions<T>): Promise<OutputByType[T]> => {
2023-02-08 03:18:11 +00:00
const zipContent = await JSZip.loadAsync(data);
2023-03-13 21:35:16 +00:00
const contexts = new Map<string, IContext>();
const file = {
Media: new Media(),
} as unknown as File;
2023-03-03 23:47:50 +00:00
2023-02-17 10:38:03 +00:00
const map = new Map<string, Element>();
2023-02-08 03:18:11 +00:00
2023-03-03 23:47:50 +00:00
// eslint-disable-next-line functional/prefer-readonly-type
2023-03-08 23:30:51 +00:00
const imageRelationshipAdditions: IImageRelationshipAddition[] = [];
// eslint-disable-next-line functional/prefer-readonly-type
const hyperlinkRelationshipAdditions: IHyperlinkRelationshipAddition[] = [];
2023-03-03 23:47:50 +00:00
let hasMedia = false;
const binaryContentMap = new Map<string, Uint8Array>();
2023-02-08 03:18:11 +00:00
for (const [key, value] of Object.entries(zipContent.files)) {
2023-03-21 04:30:20 +00:00
if (!key.endsWith(".xml") && !key.endsWith(".rels")) {
binaryContentMap.set(key, await value.async("uint8array"));
continue;
}
2023-02-08 03:18:11 +00:00
const json = toJson(await value.async("text"));
2023-03-03 23:47:50 +00:00
if (key.startsWith("word/") && !key.endsWith(".xml.rels")) {
2023-03-13 21:35:16 +00:00
const context: IContext = {
file,
viewWrapper: {
Relationships: {
createRelationship: (
linkId: string,
_: string,
target: string,
__: (typeof TargetModeType)[keyof typeof TargetModeType],
) => {
2023-03-13 21:35:16 +00:00
// eslint-disable-next-line functional/immutable-data
hyperlinkRelationshipAdditions.push({
key,
hyperlink: {
id: linkId,
link: target,
},
});
},
},
} as unknown as IViewWrapper,
stack: [],
};
contexts.set(key, context);
for (const [patchKey, patchValue] of Object.entries(patches)) {
2023-02-25 22:18:31 +00:00
const patchText = `{{${patchKey}}}`;
2023-02-25 22:18:56 +00:00
// TODO: mutates json. Make it immutable
// We need to loop through to catch every occurrence of the patch text
// It is possible that the patch text is in the same run
// This algorithm is limited to one patch per text run
// Once it cannot find any more occurrences, it will throw an error, and then we break out of the loop
// https://github.com/dolanmiu/docx/issues/2267
while (true) {
try {
replacer({
json,
patch: {
...patchValue,
children: patchValue.children.map((element) => {
// We need to replace external hyperlinks with concrete hyperlinks
if (element instanceof ExternalHyperlink) {
const concreteHyperlink = new ConcreteHyperlink(element.options.children, uniqueId());
// eslint-disable-next-line functional/immutable-data
hyperlinkRelationshipAdditions.push({
key,
hyperlink: {
id: concreteHyperlink.linkId,
link: element.options.link,
},
});
return concreteHyperlink;
} else {
return element;
}
}),
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any,
patchText,
context,
keepOriginalStyles,
});
} catch {
break;
}
}
2023-03-03 23:47:50 +00:00
}
const mediaDatas = imageReplacer.getMediaData(JSON.stringify(json), context.file.Media);
if (mediaDatas.length > 0) {
hasMedia = true;
// eslint-disable-next-line functional/immutable-data
2023-03-08 23:30:51 +00:00
imageRelationshipAdditions.push({
2023-03-03 23:47:50 +00:00
key,
mediaDatas,
});
2023-02-17 10:38:03 +00:00
}
2023-02-16 20:17:48 +00:00
}
2023-02-08 03:18:11 +00:00
map.set(key, json);
}
2023-03-08 23:30:51 +00:00
for (const { key, mediaDatas } of imageRelationshipAdditions) {
2023-03-03 23:47:50 +00:00
// eslint-disable-next-line functional/immutable-data
2023-03-08 23:30:51 +00:00
const relationshipKey = `word/_rels/${key.split("/").pop()}.rels`;
2023-03-16 01:55:18 +00:00
const relationshipsJson = map.get(relationshipKey) ?? createRelationshipFile();
map.set(relationshipKey, relationshipsJson);
2023-03-08 23:30:51 +00:00
const index = getNextRelationshipIndex(relationshipsJson);
const newJson = imageReplacer.replace(JSON.stringify(map.get(key)), mediaDatas, index);
map.set(key, JSON.parse(newJson) as Element);
2023-03-16 01:55:18 +00:00
for (let i = 0; i < mediaDatas.length; i++) {
const { fileName } = mediaDatas[i];
2023-03-08 23:30:51 +00:00
appendRelationship(
relationshipsJson,
2023-03-16 01:55:18 +00:00
index + i,
2023-03-08 23:30:51 +00:00
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",
`media/${fileName}`,
);
2023-03-03 23:47:50 +00:00
}
}
2023-03-08 23:30:51 +00:00
for (const { key, hyperlink } of hyperlinkRelationshipAdditions) {
// eslint-disable-next-line functional/immutable-data
const relationshipKey = `word/_rels/${key.split("/").pop()}.rels`;
2023-03-15 03:14:38 +00:00
const relationshipsJson = map.get(relationshipKey) ?? createRelationshipFile();
map.set(relationshipKey, relationshipsJson);
2023-03-08 23:30:51 +00:00
appendRelationship(
relationshipsJson,
hyperlink.id,
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
hyperlink.link,
TargetModeType.EXTERNAL,
);
}
2023-03-03 23:47:50 +00:00
if (hasMedia) {
const contentTypesJson = map.get("[Content_Types].xml");
if (!contentTypesJson) {
throw new Error("Could not find content types file");
}
appendContentType(contentTypesJson, "image/png", "png");
appendContentType(contentTypesJson, "image/jpeg", "jpeg");
appendContentType(contentTypesJson, "image/jpeg", "jpg");
appendContentType(contentTypesJson, "image/bmp", "bmp");
appendContentType(contentTypesJson, "image/gif", "gif");
appendContentType(contentTypesJson, "image/svg+xml", "svg");
2023-03-03 23:47:50 +00:00
}
2023-02-08 03:18:11 +00:00
const zip = new JSZip();
for (const [key, value] of map) {
const output = toXml(value);
zip.file(key, output);
}
for (const [key, value] of binaryContentMap) {
zip.file(key, value);
}
for (const { data: stream, fileName } of file.Media.Array) {
2023-03-03 23:47:50 +00:00
zip.file(`word/media/${fileName}`, stream);
}
return zip.generateAsync({
type: outputType,
2023-02-08 03:18:11 +00:00
mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
compression: "DEFLATE",
});
};
2023-02-17 10:38:03 +00:00
const toXml = (jsonObj: Element): string => {
2023-02-08 03:18:11 +00:00
const output = js2xml(jsonObj);
return output;
};
2023-03-08 23:30:51 +00:00
const createRelationshipFile = (): Element => ({
declaration: {
attributes: {
version: "1.0",
encoding: "UTF-8",
standalone: "yes",
},
},
elements: [
{
type: "element",
name: "Relationships",
attributes: {
xmlns: "http://schemas.openxmlformats.org/package/2006/relationships",
},
elements: [],
},
],
});