Extract runs and text

This commit is contained in:
Dolan
2023-02-17 10:38:03 +00:00
parent c206d23480
commit 86de252a52
6 changed files with 144 additions and 12 deletions

View File

@ -4,7 +4,12 @@ import * as fs from "fs";
import { Paragraph, patchDocument, TextRun } from "../build"; import { Paragraph, patchDocument, TextRun } from "../build";
patchDocument(fs.readFileSync("demo/assets/simple-template.docx"), { patchDocument(fs.readFileSync("demo/assets/simple-template.docx"), {
children: [new Paragraph("ff"), new TextRun("fgf")], patches: [
{
children: [new Paragraph("ff"), new TextRun("fgf")],
text: "{{ name }}",
},
],
}).then((doc) => { }).then((doc) => {
fs.writeFileSync("My Document.docx", doc); fs.writeFileSync("My Document.docx", doc);
}); });

Binary file not shown.

View File

@ -1,25 +1,31 @@
import * as JSZip from "jszip"; import * as JSZip from "jszip";
import { xml2js, ElementCompact, js2xml } from "xml-js"; import { xml2js, Element, js2xml } from "xml-js";
import { replacer } from "./replacer"; import { replacer } from "./replacer";
import { findLocationOfText } from "./traverser";
// eslint-disable-next-line functional/prefer-readonly-type // eslint-disable-next-line functional/prefer-readonly-type
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream; type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
export interface PatchDocumentOptions { export interface IPatch {
readonly children: any[]; readonly children: any[];
readonly text: string;
}
export interface PatchDocumentOptions {
readonly patches: readonly IPatch[];
} }
export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => { export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => {
const zipContent = await JSZip.loadAsync(data); const zipContent = await JSZip.loadAsync(data);
const map = new Map<string, ElementCompact>(); const map = new Map<string, Element>();
console.log(options);
for (const [key, value] of Object.entries(zipContent.files)) { for (const [key, value] of Object.entries(zipContent.files)) {
const json = toJson(await value.async("text")); const json = toJson(await value.async("text"));
if (key === "word/document.xml") { if (key === "word/document.xml") {
console.log(json); for (const patch of options.patches) {
replacer(json, options); findLocationOfText(json, patch.text);
replacer(json, patch);
}
} }
map.set(key, json); map.set(key, json);
@ -42,12 +48,12 @@ export const patchDocument = async (data: InputDataType, options: PatchDocumentO
return zipData; return zipData;
}; };
const toJson = (xmlData: string): ElementCompact => { const toJson = (xmlData: string): Element => {
const xmlObj = xml2js(xmlData, { compact: false }) as ElementCompact; const xmlObj = xml2js(xmlData, { compact: false }) as Element;
return xmlObj; return xmlObj;
}; };
const toXml = (jsonObj: ElementCompact): string => { const toXml = (jsonObj: Element): string => {
const output = js2xml(jsonObj); const output = js2xml(jsonObj);
return output; return output;
}; };

View File

@ -1,8 +1,8 @@
import { Paragraph, TextRun } from "@file/paragraph"; import { Paragraph, TextRun } from "@file/paragraph";
import { ElementCompact } from "xml-js"; import { ElementCompact } from "xml-js";
import { PatchDocumentOptions } from "./from-docx"; import { IPatch } from "./from-docx";
export const replacer = (json: ElementCompact, options: PatchDocumentOptions): ElementCompact => { export const replacer = (json: ElementCompact, options: IPatch): ElementCompact => {
for (const child of options.children) { for (const child of options.children) {
if (child instanceof Paragraph) { if (child instanceof Paragraph) {
console.log("is para"); console.log("is para");

View File

@ -0,0 +1,78 @@
import { Element } from "xml-js";
export interface IRenderedParagraphNode {
readonly text: string;
readonly runs: readonly IRenderedRunNode[];
}
interface IParts {
readonly text: string;
readonly index: number;
}
export interface IRenderedRunNode {
readonly text: string;
readonly parts: readonly IParts[];
readonly index: number;
}
export const renderParagraphNode = (node: Element): IRenderedParagraphNode => {
if (node.name !== "w:p") {
throw new Error(`Invalid node type: ${node.name}`);
}
if (!node.elements) {
return {
text: "",
runs: [],
};
}
const runs = node.elements
.map((element, i) => ({ element, i }))
.filter(({ element }) => element.name === "w:r")
.map(({ element, i }) => renderRunNode(element, i))
.filter((e) => !!e)
.map((e) => e as IRenderedRunNode);
const text = runs.reduce((acc, curr) => acc + curr.text, "");
return {
text,
runs,
};
};
const renderRunNode = (node: Element, index: number): IRenderedRunNode => {
if (node.name !== "w:r") {
throw new Error(`Invalid node type: ${node.name}`);
}
if (!node.elements) {
return {
text: "",
parts: [],
index: -1,
};
}
const parts = node.elements
.map((element, i: number) =>
element.name === "w:t" && element.elements
? {
text: element.elements[0].text?.toString() ?? "",
index: i,
}
: undefined,
)
.filter((e) => !!e)
.map((e) => e as IParts);
const text = parts.reduce((acc, curr) => acc + curr.text, "");
return {
text,
parts,
index,
};
};

View File

@ -0,0 +1,43 @@
import { Element } from "xml-js";
import { IRenderedParagraphNode, renderParagraphNode } from "./run-renderer";
export interface ILocationOfText {
readonly parent: Element;
readonly startIndex: number;
readonly endIndex: number;
readonly currentText: string;
// This is optional because the text could start in the middle of a tag
readonly startElement?: Element;
// This is optional because the text could end in the middle of a tag
readonly endElement?: Element;
}
export const findLocationOfText = (node: Element, text: string): void => {
let renderedParagraphs: readonly IRenderedParagraphNode[] = [];
// eslint-disable-next-line functional/prefer-readonly-type
const queue: Element[] = [...(node.elements ?? [])];
// eslint-disable-next-line functional/immutable-data
let currentNode: Element | undefined;
while (queue.length > 0) {
// eslint-disable-next-line functional/immutable-data
currentNode = queue.shift();
if (!currentNode) {
break;
}
if (currentNode.name === "w:p") {
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
} else {
// eslint-disable-next-line functional/immutable-data
queue.push(...(currentNode.elements ?? []));
}
}
const filteredParagraphs = renderedParagraphs.filter((p) => p.text.includes(text));
console.log("paragrapghs", JSON.stringify(filteredParagraphs, null, 2));
return undefined;
};