Re-name templater to patcher

This commit is contained in:
Dolan Miu
2023-02-25 20:18:00 +00:00
parent 3f6c006716
commit 4e875b4744
9 changed files with 1 additions and 1 deletions

74
src/patcher/from-docx.ts Normal file
View File

@ -0,0 +1,74 @@
import * as JSZip from "jszip";
import { Element, js2xml } from "xml-js";
import { ParagraphChild } from "@file/paragraph";
import { FileChild } from "@file/file-child";
import { replacer } from "./replacer";
import { findLocationOfText } from "./traverser";
import { toJson } from "./util";
// eslint-disable-next-line functional/prefer-readonly-type
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
export enum PatchType {
DOCUMENT = "file",
PARAGRAPH = "paragraph",
}
type ParagraphPatch = {
readonly type: PatchType.PARAGRAPH;
readonly children: readonly ParagraphChild[];
};
type FilePatch = {
readonly type: PatchType.DOCUMENT;
readonly children: readonly FileChild[];
};
export type IPatch = {
readonly text: string;
} & (ParagraphPatch | FilePatch);
export interface PatchDocumentOptions {
readonly patches: readonly IPatch[];
}
export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => {
const zipContent = await JSZip.loadAsync(data);
const map = new Map<string, Element>();
for (const [key, value] of Object.entries(zipContent.files)) {
const json = toJson(await value.async("text"));
if (key.startsWith("word/")) {
for (const patch of options.patches) {
const renderedParagraphs = findLocationOfText(json, patch.text);
replacer(json, patch, renderedParagraphs);
}
}
map.set(key, json);
}
const zip = new JSZip();
for (const [key, value] of map) {
const output = toXml(value);
zip.file(key, output);
}
const zipData = await zip.generateAsync({
type: "nodebuffer",
mimeType: "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
compression: "DEFLATE",
});
return zipData;
};
const toXml = (jsonObj: Element): string => {
const output = js2xml(jsonObj);
return output;
};

1
src/patcher/index.ts Normal file
View File

@ -0,0 +1 @@
export * from "./from-docx";

View File

@ -0,0 +1,56 @@
import { Element } from "xml-js";
import { createTextElementContents } from "./util";
export const findRunElementIndexWithToken = (paragraphElement: Element, token: string): number => {
for (let i = 0; i < (paragraphElement.elements ?? []).length; i++) {
const element = paragraphElement.elements![i];
if (element.type === "element" && element.name === "w:r") {
const textElement = (element.elements ?? []).filter((e) => e.type === "element" && e.name === "w:t");
for (const text of textElement) {
if ((text.elements?.[0].text as string)?.includes(token)) {
return i;
}
}
}
}
// return -1;
throw new Error("Token not found");
};
export const splitRunElement = (runElement: Element, token: string): { readonly left: Element; readonly right: Element } => {
let splitIndex = 0;
const splitElements =
runElement.elements
?.map((e, i) => {
if (e.type === "element" && e.name === "w:t") {
const text = e.elements?.[0].text as string;
const splitText = text.split(token);
const newElements = splitText.map((t) => ({
...e,
attributes: {
"xml:space": "preserve",
},
elements: createTextElementContents(t),
}));
splitIndex = i;
return newElements;
} else {
return e;
}
})
.flat() ?? [];
const leftRunElement: Element = {
...JSON.parse(JSON.stringify(runElement)),
elements: splitElements.slice(0, splitIndex + 1),
};
const rightRunElement: Element = {
...JSON.parse(JSON.stringify(runElement)),
elements: splitElements.slice(splitIndex + 1),
};
return { left: leftRunElement, right: rightRunElement };
};

View File

@ -0,0 +1,64 @@
import { Element } from "xml-js";
import { createTextElementContents } from "./util";
import { IRenderedParagraphNode } from "./run-renderer";
enum ReplaceMode {
START,
MIDDLE,
END,
}
export const replaceTokenInParagraphElement = ({
paragraphElement,
renderedParagraph,
originalText,
replacementText,
}: {
readonly paragraphElement: Element;
readonly renderedParagraph: IRenderedParagraphNode;
readonly originalText: string;
readonly replacementText: string;
}): Element => {
const startIndex = renderedParagraph.text.indexOf(originalText);
const endIndex = startIndex + originalText.length - 1;
let replaceMode = ReplaceMode.START;
for (const run of renderedParagraph.runs) {
for (const { text, index, start, end } of run.parts) {
switch (replaceMode) {
case ReplaceMode.START:
if (startIndex >= start) {
const partToReplace = run.text.substring(Math.max(startIndex, start), Math.min(endIndex, end) + 1);
// We use a token to split the text if the replacement is within the same run
// If not, we just add text to the middle of the run later
const firstPart = text.replace(partToReplace, replacementText);
patchTextElement(paragraphElement.elements![run.index].elements![index], firstPart);
replaceMode = ReplaceMode.MIDDLE;
continue;
}
break;
case ReplaceMode.MIDDLE:
if (endIndex <= end) {
const lastPart = text.substring(endIndex - start + 1);
patchTextElement(paragraphElement.elements![run.index].elements![index], lastPart);
replaceMode = ReplaceMode.END;
} else {
patchTextElement(paragraphElement.elements![run.index].elements![index], "");
}
break;
default:
}
}
}
return paragraphElement;
};
const patchTextElement = (element: Element, text: string): Element => {
// eslint-disable-next-line functional/immutable-data
element.elements = createTextElementContents(text);
return element;
};

87
src/patcher/replacer.ts Normal file
View File

@ -0,0 +1,87 @@
import { Element } from "xml-js";
import * as xml from "xml";
import { Formatter } from "@export/formatter";
import { XmlComponent } from "@file/xml-components";
import { IPatch, PatchType } from "./from-docx";
import { toJson } from "./util";
import { IRenderedParagraphNode } from "./run-renderer";
import { replaceTokenInParagraphElement } from "./paragraph-token-replacer";
import { findRunElementIndexWithToken, splitRunElement } from "./paragraph-split-inject";
const formatter = new Formatter();
const SPLIT_TOKEN = "ɵ";
export const replacer = (json: Element, options: IPatch, renderedParagraphs: readonly IRenderedParagraphNode[]): Element => {
for (const renderedParagraph of renderedParagraphs) {
const textJson = options.children.map((c) => toJson(xml(formatter.format(c as XmlComponent)))).map((c) => c.elements![0]);
if (options.type === PatchType.DOCUMENT) {
const parentElement = goToParentElementFromPath(json, renderedParagraph.path);
const elementIndex = getLastElementIndexFromPath(renderedParagraph.path);
// Easy case where the text is the entire paragraph
// We can assume that the Paragraph/Table only has one element
// eslint-disable-next-line functional/immutable-data, prefer-destructuring
parentElement.elements?.splice(elementIndex, 1, ...textJson);
// console.log(JSON.stringify(renderedParagraphs, null, 2));
// console.log(JSON.stringify(textJson, null, 2));
// console.log("paragraphElement after", JSON.stringify(parentElement.elements![elementIndex], null, 2));
} else if (options.type === PatchType.PARAGRAPH) {
const paragraphElement = goToElementFromPath(json, renderedParagraph.path);
const startIndex = renderedParagraph.text.indexOf(options.text);
const endIndex = startIndex + options.text.length - 1;
if (startIndex === 0 && endIndex === renderedParagraph.text.length - 1) {
// Easy case where the text is the entire paragraph
// eslint-disable-next-line functional/immutable-data
paragraphElement.elements = textJson;
console.log(JSON.stringify(paragraphElement, null, 2));
} else {
// Hard case where the text is only part of the paragraph
replaceTokenInParagraphElement({
paragraphElement,
renderedParagraph,
originalText: options.text,
replacementText: SPLIT_TOKEN,
});
const index = findRunElementIndexWithToken(paragraphElement, SPLIT_TOKEN);
const { left, right } = splitRunElement(paragraphElement.elements![index], SPLIT_TOKEN);
// eslint-disable-next-line functional/immutable-data
paragraphElement.elements!.splice(index, 1, left, ...textJson, right);
// console.log(index, JSON.stringify(paragraphElement.elements![index], null, 2));
// console.log("paragraphElement after", JSON.stringify(paragraphElement, null, 2));
}
}
}
return json;
};
const goToElementFromPath = (json: Element, path: readonly number[]): Element => {
let element = json;
// We start from 1 because the first element is the root element
// Which we do not want to double count
for (let i = 1; i < path.length; i++) {
const index = path[i];
const nextElements = element.elements;
if (!nextElements) {
throw new Error("Could not find element");
}
element = nextElements[index];
}
return element;
};
const goToParentElementFromPath = (json: Element, path: readonly number[]): Element =>
goToElementFromPath(json, path.slice(0, path.length - 1));
const getLastElementIndexFromPath = (path: readonly number[]): number => path[path.length - 1];

113
src/patcher/run-renderer.ts Normal file
View File

@ -0,0 +1,113 @@
import { Element } from "xml-js";
import { ElementWrapper } from "./traverser";
export interface IRenderedParagraphNode {
readonly text: string;
readonly runs: readonly IRenderedRunNode[];
readonly index: number;
readonly path: readonly number[];
}
interface StartAndEnd {
readonly start: number;
readonly end: number;
}
type IParts = {
readonly text: string;
readonly index: number;
} & StartAndEnd;
export type IRenderedRunNode = {
readonly text: string;
readonly parts: readonly IParts[];
readonly index: number;
} & StartAndEnd;
export const renderParagraphNode = (node: ElementWrapper): IRenderedParagraphNode => {
if (node.element.name !== "w:p") {
throw new Error(`Invalid node type: ${node.element.name}`);
}
if (!node.element.elements) {
return {
text: "",
runs: [],
index: -1,
path: [],
};
}
let currentRunStringLength = 0;
const runs = node.element.elements
.map((element, i) => ({ element, i }))
.filter(({ element }) => element.name === "w:r")
.map(({ element, i }) => {
const renderedRunNode = renderRunNode(element, i, currentRunStringLength);
currentRunStringLength += renderedRunNode.text.length;
return renderedRunNode;
})
.filter((e) => !!e)
.map((e) => e as IRenderedRunNode);
const text = runs.reduce((acc, curr) => acc + curr.text, "");
return {
text,
runs,
index: node.index,
path: buildNodePath(node),
};
};
const renderRunNode = (node: Element, index: number, currentRunStringIndex: number): IRenderedRunNode => {
if (node.name !== "w:r") {
throw new Error(`Invalid node type: ${node.name}`);
}
if (!node.elements) {
return {
text: "",
parts: [],
index: -1,
start: currentRunStringIndex,
end: currentRunStringIndex,
};
}
let currentTextStringIndex = currentRunStringIndex;
const parts = node.elements
.map((element, i: number) =>
element.name === "w:t" && element.elements && element.elements.length > 0
? {
text: element.elements[0].text?.toString() ?? "",
index: i,
start: currentTextStringIndex,
end: (() => {
// Side effect
currentTextStringIndex += (element.elements[0].text?.toString() ?? "").length - 1;
return currentTextStringIndex;
})(),
}
: undefined,
)
.filter((e) => !!e)
.map((e) => e as IParts);
const text = parts.reduce((acc, curr) => acc + curr.text, "");
return {
text,
parts,
index,
start: currentRunStringIndex,
end: currentTextStringIndex,
};
};
const buildNodePath = (node: ElementWrapper): readonly number[] =>
node.parent ? [...buildNodePath(node.parent), node.index] : [node.index];

62
src/patcher/traverser.ts Normal file
View File

@ -0,0 +1,62 @@
import { Element } from "xml-js";
import { IRenderedParagraphNode, renderParagraphNode } from "./run-renderer";
export interface ElementWrapper {
readonly element: Element;
readonly index: number;
readonly parent: ElementWrapper | undefined;
}
export interface ILocationOfText {
readonly parent: Element;
readonly startIndex: number;
readonly endIndex: number;
readonly currentText: string;
// This is optional because the text could start in the middle of a tag
readonly startElement?: Element;
// This is optional because the text could end in the middle of a tag
readonly endElement?: Element;
}
const elementsToWrapper = (wrapper: ElementWrapper): readonly ElementWrapper[] =>
wrapper.element.elements?.map((e, i) => ({
element: e,
index: i,
parent: wrapper,
})) ?? [];
export const findLocationOfText = (node: Element, text: string): readonly IRenderedParagraphNode[] => {
let renderedParagraphs: readonly IRenderedParagraphNode[] = [];
// eslint-disable-next-line functional/prefer-readonly-type
const queue: ElementWrapper[] = [
...(elementsToWrapper({
element: node,
index: 0,
parent: undefined,
}) ?? []),
];
// eslint-disable-next-line functional/immutable-data
let currentNode: ElementWrapper | undefined;
while (queue.length > 0) {
// eslint-disable-next-line functional/immutable-data
currentNode = queue.shift();
if (!currentNode) {
break;
}
if (currentNode.element.name === "w:p") {
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
} else {
// eslint-disable-next-line functional/immutable-data
queue.push(...(elementsToWrapper(currentNode) ?? []));
}
}
const filteredParagraphs = renderedParagraphs.filter((p) => p.text.includes(text));
return filteredParagraphs;
};

19
src/patcher/util.ts Normal file
View File

@ -0,0 +1,19 @@
import { xml2js, Element } from "xml-js";
import * as xml from "xml";
import { Formatter } from "@export/formatter";
import { Text } from "@file/paragraph/run/run-components/text";
const formatter = new Formatter();
export const toJson = (xmlData: string): Element => {
const xmlObj = xml2js(xmlData, { compact: false }) as Element;
return xmlObj;
};
// eslint-disable-next-line functional/prefer-readonly-type
export const createTextElementContents = (text: string): Element[] => {
const textJson = toJson(xml(formatter.format(new Text({ text }))));
return textJson.elements![0].elements ?? [];
};