Re-name templater to patcher
This commit is contained in:
74
src/patcher/from-docx.ts
Normal file
74
src/patcher/from-docx.ts
Normal file
@ -0,0 +1,74 @@
|
||||
import * as JSZip from "jszip";
|
||||
import { Element, js2xml } from "xml-js";
|
||||
|
||||
import { ParagraphChild } from "@file/paragraph";
|
||||
import { FileChild } from "@file/file-child";
|
||||
|
||||
import { replacer } from "./replacer";
|
||||
import { findLocationOfText } from "./traverser";
|
||||
import { toJson } from "./util";
|
||||
|
||||
// eslint-disable-next-line functional/prefer-readonly-type
|
||||
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
|
||||
|
||||
export enum PatchType {
|
||||
DOCUMENT = "file",
|
||||
PARAGRAPH = "paragraph",
|
||||
}
|
||||
|
||||
type ParagraphPatch = {
|
||||
readonly type: PatchType.PARAGRAPH;
|
||||
readonly children: readonly ParagraphChild[];
|
||||
};
|
||||
|
||||
type FilePatch = {
|
||||
readonly type: PatchType.DOCUMENT;
|
||||
readonly children: readonly FileChild[];
|
||||
};
|
||||
|
||||
export type IPatch = {
|
||||
readonly text: string;
|
||||
} & (ParagraphPatch | FilePatch);
|
||||
|
||||
export interface PatchDocumentOptions {
|
||||
readonly patches: readonly IPatch[];
|
||||
}
|
||||
|
||||
export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => {
|
||||
const zipContent = await JSZip.loadAsync(data);
|
||||
|
||||
const map = new Map<string, Element>();
|
||||
|
||||
for (const [key, value] of Object.entries(zipContent.files)) {
|
||||
const json = toJson(await value.async("text"));
|
||||
if (key.startsWith("word/")) {
|
||||
for (const patch of options.patches) {
|
||||
const renderedParagraphs = findLocationOfText(json, patch.text);
|
||||
replacer(json, patch, renderedParagraphs);
|
||||
}
|
||||
}
|
||||
|
||||
map.set(key, json);
|
||||
}
|
||||
|
||||
const zip = new JSZip();
|
||||
|
||||
for (const [key, value] of map) {
|
||||
const output = toXml(value);
|
||||
|
||||
zip.file(key, output);
|
||||
}
|
||||
|
||||
const zipData = await zip.generateAsync({
|
||||
type: "nodebuffer",
|
||||
mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
compression: "DEFLATE",
|
||||
});
|
||||
|
||||
return zipData;
|
||||
};
|
||||
|
||||
const toXml = (jsonObj: Element): string => {
|
||||
const output = js2xml(jsonObj);
|
||||
return output;
|
||||
};
|
1
src/patcher/index.ts
Normal file
1
src/patcher/index.ts
Normal file
@ -0,0 +1 @@
|
||||
export * from "./from-docx";
|
56
src/patcher/paragraph-split-inject.ts
Normal file
56
src/patcher/paragraph-split-inject.ts
Normal file
@ -0,0 +1,56 @@
|
||||
import { Element } from "xml-js";
|
||||
import { createTextElementContents } from "./util";
|
||||
|
||||
export const findRunElementIndexWithToken = (paragraphElement: Element, token: string): number => {
|
||||
for (let i = 0; i < (paragraphElement.elements ?? []).length; i++) {
|
||||
const element = paragraphElement.elements![i];
|
||||
if (element.type === "element" && element.name === "w:r") {
|
||||
const textElement = (element.elements ?? []).filter((e) => e.type === "element" && e.name === "w:t");
|
||||
|
||||
for (const text of textElement) {
|
||||
if ((text.elements?.[0].text as string)?.includes(token)) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// return -1;
|
||||
throw new Error("Token not found");
|
||||
};
|
||||
|
||||
export const splitRunElement = (runElement: Element, token: string): { readonly left: Element; readonly right: Element } => {
|
||||
let splitIndex = 0;
|
||||
|
||||
const splitElements =
|
||||
runElement.elements
|
||||
?.map((e, i) => {
|
||||
if (e.type === "element" && e.name === "w:t") {
|
||||
const text = e.elements?.[0].text as string;
|
||||
const splitText = text.split(token);
|
||||
const newElements = splitText.map((t) => ({
|
||||
...e,
|
||||
attributes: {
|
||||
"xml:space": "preserve",
|
||||
},
|
||||
elements: createTextElementContents(t),
|
||||
}));
|
||||
splitIndex = i;
|
||||
return newElements;
|
||||
} else {
|
||||
return e;
|
||||
}
|
||||
})
|
||||
.flat() ?? [];
|
||||
|
||||
const leftRunElement: Element = {
|
||||
...JSON.parse(JSON.stringify(runElement)),
|
||||
elements: splitElements.slice(0, splitIndex + 1),
|
||||
};
|
||||
|
||||
const rightRunElement: Element = {
|
||||
...JSON.parse(JSON.stringify(runElement)),
|
||||
elements: splitElements.slice(splitIndex + 1),
|
||||
};
|
||||
|
||||
return { left: leftRunElement, right: rightRunElement };
|
||||
};
|
64
src/patcher/paragraph-token-replacer.ts
Normal file
64
src/patcher/paragraph-token-replacer.ts
Normal file
@ -0,0 +1,64 @@
|
||||
import { Element } from "xml-js";
|
||||
|
||||
import { createTextElementContents } from "./util";
|
||||
import { IRenderedParagraphNode } from "./run-renderer";
|
||||
|
||||
enum ReplaceMode {
|
||||
START,
|
||||
MIDDLE,
|
||||
END,
|
||||
}
|
||||
|
||||
export const replaceTokenInParagraphElement = ({
|
||||
paragraphElement,
|
||||
renderedParagraph,
|
||||
originalText,
|
||||
replacementText,
|
||||
}: {
|
||||
readonly paragraphElement: Element;
|
||||
readonly renderedParagraph: IRenderedParagraphNode;
|
||||
readonly originalText: string;
|
||||
readonly replacementText: string;
|
||||
}): Element => {
|
||||
const startIndex = renderedParagraph.text.indexOf(originalText);
|
||||
const endIndex = startIndex + originalText.length - 1;
|
||||
|
||||
let replaceMode = ReplaceMode.START;
|
||||
|
||||
for (const run of renderedParagraph.runs) {
|
||||
for (const { text, index, start, end } of run.parts) {
|
||||
switch (replaceMode) {
|
||||
case ReplaceMode.START:
|
||||
if (startIndex >= start) {
|
||||
const partToReplace = run.text.substring(Math.max(startIndex, start), Math.min(endIndex, end) + 1);
|
||||
// We use a token to split the text if the replacement is within the same run
|
||||
// If not, we just add text to the middle of the run later
|
||||
const firstPart = text.replace(partToReplace, replacementText);
|
||||
patchTextElement(paragraphElement.elements![run.index].elements![index], firstPart);
|
||||
replaceMode = ReplaceMode.MIDDLE;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case ReplaceMode.MIDDLE:
|
||||
if (endIndex <= end) {
|
||||
const lastPart = text.substring(endIndex - start + 1);
|
||||
patchTextElement(paragraphElement.elements![run.index].elements![index], lastPart);
|
||||
replaceMode = ReplaceMode.END;
|
||||
} else {
|
||||
patchTextElement(paragraphElement.elements![run.index].elements![index], "");
|
||||
}
|
||||
break;
|
||||
default:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return paragraphElement;
|
||||
};
|
||||
|
||||
const patchTextElement = (element: Element, text: string): Element => {
|
||||
// eslint-disable-next-line functional/immutable-data
|
||||
element.elements = createTextElementContents(text);
|
||||
|
||||
return element;
|
||||
};
|
87
src/patcher/replacer.ts
Normal file
87
src/patcher/replacer.ts
Normal file
@ -0,0 +1,87 @@
|
||||
import { Element } from "xml-js";
|
||||
import * as xml from "xml";
|
||||
|
||||
import { Formatter } from "@export/formatter";
|
||||
import { XmlComponent } from "@file/xml-components";
|
||||
|
||||
import { IPatch, PatchType } from "./from-docx";
|
||||
import { toJson } from "./util";
|
||||
import { IRenderedParagraphNode } from "./run-renderer";
|
||||
import { replaceTokenInParagraphElement } from "./paragraph-token-replacer";
|
||||
import { findRunElementIndexWithToken, splitRunElement } from "./paragraph-split-inject";
|
||||
|
||||
const formatter = new Formatter();
|
||||
|
||||
const SPLIT_TOKEN = "ɵ";
|
||||
|
||||
export const replacer = (json: Element, options: IPatch, renderedParagraphs: readonly IRenderedParagraphNode[]): Element => {
|
||||
for (const renderedParagraph of renderedParagraphs) {
|
||||
const textJson = options.children.map((c) => toJson(xml(formatter.format(c as XmlComponent)))).map((c) => c.elements![0]);
|
||||
|
||||
if (options.type === PatchType.DOCUMENT) {
|
||||
const parentElement = goToParentElementFromPath(json, renderedParagraph.path);
|
||||
const elementIndex = getLastElementIndexFromPath(renderedParagraph.path);
|
||||
// Easy case where the text is the entire paragraph
|
||||
// We can assume that the Paragraph/Table only has one element
|
||||
// eslint-disable-next-line functional/immutable-data, prefer-destructuring
|
||||
parentElement.elements?.splice(elementIndex, 1, ...textJson);
|
||||
// console.log(JSON.stringify(renderedParagraphs, null, 2));
|
||||
// console.log(JSON.stringify(textJson, null, 2));
|
||||
// console.log("paragraphElement after", JSON.stringify(parentElement.elements![elementIndex], null, 2));
|
||||
} else if (options.type === PatchType.PARAGRAPH) {
|
||||
const paragraphElement = goToElementFromPath(json, renderedParagraph.path);
|
||||
const startIndex = renderedParagraph.text.indexOf(options.text);
|
||||
const endIndex = startIndex + options.text.length - 1;
|
||||
|
||||
if (startIndex === 0 && endIndex === renderedParagraph.text.length - 1) {
|
||||
// Easy case where the text is the entire paragraph
|
||||
// eslint-disable-next-line functional/immutable-data
|
||||
paragraphElement.elements = textJson;
|
||||
console.log(JSON.stringify(paragraphElement, null, 2));
|
||||
} else {
|
||||
// Hard case where the text is only part of the paragraph
|
||||
|
||||
replaceTokenInParagraphElement({
|
||||
paragraphElement,
|
||||
renderedParagraph,
|
||||
originalText: options.text,
|
||||
replacementText: SPLIT_TOKEN,
|
||||
});
|
||||
|
||||
const index = findRunElementIndexWithToken(paragraphElement, SPLIT_TOKEN);
|
||||
|
||||
const { left, right } = splitRunElement(paragraphElement.elements![index], SPLIT_TOKEN);
|
||||
// eslint-disable-next-line functional/immutable-data
|
||||
paragraphElement.elements!.splice(index, 1, left, ...textJson, right);
|
||||
// console.log(index, JSON.stringify(paragraphElement.elements![index], null, 2));
|
||||
// console.log("paragraphElement after", JSON.stringify(paragraphElement, null, 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return json;
|
||||
};
|
||||
|
||||
const goToElementFromPath = (json: Element, path: readonly number[]): Element => {
|
||||
let element = json;
|
||||
|
||||
// We start from 1 because the first element is the root element
|
||||
// Which we do not want to double count
|
||||
for (let i = 1; i < path.length; i++) {
|
||||
const index = path[i];
|
||||
const nextElements = element.elements;
|
||||
|
||||
if (!nextElements) {
|
||||
throw new Error("Could not find element");
|
||||
}
|
||||
|
||||
element = nextElements[index];
|
||||
}
|
||||
|
||||
return element;
|
||||
};
|
||||
|
||||
const goToParentElementFromPath = (json: Element, path: readonly number[]): Element =>
|
||||
goToElementFromPath(json, path.slice(0, path.length - 1));
|
||||
|
||||
const getLastElementIndexFromPath = (path: readonly number[]): number => path[path.length - 1];
|
113
src/patcher/run-renderer.ts
Normal file
113
src/patcher/run-renderer.ts
Normal file
@ -0,0 +1,113 @@
|
||||
import { Element } from "xml-js";
|
||||
|
||||
import { ElementWrapper } from "./traverser";
|
||||
|
||||
export interface IRenderedParagraphNode {
|
||||
readonly text: string;
|
||||
readonly runs: readonly IRenderedRunNode[];
|
||||
readonly index: number;
|
||||
readonly path: readonly number[];
|
||||
}
|
||||
|
||||
interface StartAndEnd {
|
||||
readonly start: number;
|
||||
readonly end: number;
|
||||
}
|
||||
|
||||
type IParts = {
|
||||
readonly text: string;
|
||||
readonly index: number;
|
||||
} & StartAndEnd;
|
||||
|
||||
export type IRenderedRunNode = {
|
||||
readonly text: string;
|
||||
readonly parts: readonly IParts[];
|
||||
readonly index: number;
|
||||
} & StartAndEnd;
|
||||
|
||||
export const renderParagraphNode = (node: ElementWrapper): IRenderedParagraphNode => {
|
||||
if (node.element.name !== "w:p") {
|
||||
throw new Error(`Invalid node type: ${node.element.name}`);
|
||||
}
|
||||
|
||||
if (!node.element.elements) {
|
||||
return {
|
||||
text: "",
|
||||
runs: [],
|
||||
index: -1,
|
||||
path: [],
|
||||
};
|
||||
}
|
||||
|
||||
let currentRunStringLength = 0;
|
||||
|
||||
const runs = node.element.elements
|
||||
.map((element, i) => ({ element, i }))
|
||||
.filter(({ element }) => element.name === "w:r")
|
||||
.map(({ element, i }) => {
|
||||
const renderedRunNode = renderRunNode(element, i, currentRunStringLength);
|
||||
currentRunStringLength += renderedRunNode.text.length;
|
||||
|
||||
return renderedRunNode;
|
||||
})
|
||||
.filter((e) => !!e)
|
||||
.map((e) => e as IRenderedRunNode);
|
||||
|
||||
const text = runs.reduce((acc, curr) => acc + curr.text, "");
|
||||
|
||||
return {
|
||||
text,
|
||||
runs,
|
||||
index: node.index,
|
||||
path: buildNodePath(node),
|
||||
};
|
||||
};
|
||||
|
||||
const renderRunNode = (node: Element, index: number, currentRunStringIndex: number): IRenderedRunNode => {
|
||||
if (node.name !== "w:r") {
|
||||
throw new Error(`Invalid node type: ${node.name}`);
|
||||
}
|
||||
|
||||
if (!node.elements) {
|
||||
return {
|
||||
text: "",
|
||||
parts: [],
|
||||
index: -1,
|
||||
start: currentRunStringIndex,
|
||||
end: currentRunStringIndex,
|
||||
};
|
||||
}
|
||||
|
||||
let currentTextStringIndex = currentRunStringIndex;
|
||||
|
||||
const parts = node.elements
|
||||
.map((element, i: number) =>
|
||||
element.name === "w:t" && element.elements && element.elements.length > 0
|
||||
? {
|
||||
text: element.elements[0].text?.toString() ?? "",
|
||||
index: i,
|
||||
start: currentTextStringIndex,
|
||||
end: (() => {
|
||||
// Side effect
|
||||
currentTextStringIndex += (element.elements[0].text?.toString() ?? "").length - 1;
|
||||
return currentTextStringIndex;
|
||||
})(),
|
||||
}
|
||||
: undefined,
|
||||
)
|
||||
.filter((e) => !!e)
|
||||
.map((e) => e as IParts);
|
||||
|
||||
const text = parts.reduce((acc, curr) => acc + curr.text, "");
|
||||
|
||||
return {
|
||||
text,
|
||||
parts,
|
||||
index,
|
||||
start: currentRunStringIndex,
|
||||
end: currentTextStringIndex,
|
||||
};
|
||||
};
|
||||
|
||||
const buildNodePath = (node: ElementWrapper): readonly number[] =>
|
||||
node.parent ? [...buildNodePath(node.parent), node.index] : [node.index];
|
62
src/patcher/traverser.ts
Normal file
62
src/patcher/traverser.ts
Normal file
@ -0,0 +1,62 @@
|
||||
import { Element } from "xml-js";
|
||||
|
||||
import { IRenderedParagraphNode, renderParagraphNode } from "./run-renderer";
|
||||
|
||||
export interface ElementWrapper {
|
||||
readonly element: Element;
|
||||
readonly index: number;
|
||||
readonly parent: ElementWrapper | undefined;
|
||||
}
|
||||
|
||||
export interface ILocationOfText {
|
||||
readonly parent: Element;
|
||||
readonly startIndex: number;
|
||||
readonly endIndex: number;
|
||||
readonly currentText: string;
|
||||
// This is optional because the text could start in the middle of a tag
|
||||
readonly startElement?: Element;
|
||||
// This is optional because the text could end in the middle of a tag
|
||||
readonly endElement?: Element;
|
||||
}
|
||||
|
||||
const elementsToWrapper = (wrapper: ElementWrapper): readonly ElementWrapper[] =>
|
||||
wrapper.element.elements?.map((e, i) => ({
|
||||
element: e,
|
||||
index: i,
|
||||
parent: wrapper,
|
||||
})) ?? [];
|
||||
|
||||
export const findLocationOfText = (node: Element, text: string): readonly IRenderedParagraphNode[] => {
|
||||
let renderedParagraphs: readonly IRenderedParagraphNode[] = [];
|
||||
|
||||
// eslint-disable-next-line functional/prefer-readonly-type
|
||||
const queue: ElementWrapper[] = [
|
||||
...(elementsToWrapper({
|
||||
element: node,
|
||||
index: 0,
|
||||
parent: undefined,
|
||||
}) ?? []),
|
||||
];
|
||||
|
||||
// eslint-disable-next-line functional/immutable-data
|
||||
let currentNode: ElementWrapper | undefined;
|
||||
while (queue.length > 0) {
|
||||
// eslint-disable-next-line functional/immutable-data
|
||||
currentNode = queue.shift();
|
||||
|
||||
if (!currentNode) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (currentNode.element.name === "w:p") {
|
||||
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
|
||||
} else {
|
||||
// eslint-disable-next-line functional/immutable-data
|
||||
queue.push(...(elementsToWrapper(currentNode) ?? []));
|
||||
}
|
||||
}
|
||||
|
||||
const filteredParagraphs = renderedParagraphs.filter((p) => p.text.includes(text));
|
||||
|
||||
return filteredParagraphs;
|
||||
};
|
19
src/patcher/util.ts
Normal file
19
src/patcher/util.ts
Normal file
@ -0,0 +1,19 @@
|
||||
import { xml2js, Element } from "xml-js";
|
||||
import * as xml from "xml";
|
||||
|
||||
import { Formatter } from "@export/formatter";
|
||||
import { Text } from "@file/paragraph/run/run-components/text";
|
||||
|
||||
const formatter = new Formatter();
|
||||
|
||||
export const toJson = (xmlData: string): Element => {
|
||||
const xmlObj = xml2js(xmlData, { compact: false }) as Element;
|
||||
return xmlObj;
|
||||
};
|
||||
|
||||
// eslint-disable-next-line functional/prefer-readonly-type
|
||||
export const createTextElementContents = (text: string): Element[] => {
|
||||
const textJson = toJson(xml(formatter.format(new Text({ text }))));
|
||||
|
||||
return textJson.elements![0].elements ?? [];
|
||||
};
|
Reference in New Issue
Block a user