Extract runs and text

This commit is contained in:
Dolan
2023-02-17 10:38:03 +00:00
parent c206d23480
commit 86de252a52
6 changed files with 144 additions and 12 deletions

View File

@ -4,7 +4,12 @@ import * as fs from "fs";
import { Paragraph, patchDocument, TextRun } from "../build";
patchDocument(fs.readFileSync("demo/assets/simple-template.docx"), {
children: [new Paragraph("ff"), new TextRun("fgf")],
patches: [
{
children: [new Paragraph("ff"), new TextRun("fgf")],
text: "{{ name }}",
},
],
}).then((doc) => {
fs.writeFileSync("My Document.docx", doc);
});

Binary file not shown.

View File

@ -1,25 +1,31 @@
import * as JSZip from "jszip";
import { xml2js, ElementCompact, js2xml } from "xml-js";
import { xml2js, Element, js2xml } from "xml-js";
import { replacer } from "./replacer";
import { findLocationOfText } from "./traverser";
// eslint-disable-next-line functional/prefer-readonly-type
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
export interface PatchDocumentOptions {
export interface IPatch {
readonly children: any[];
readonly text: string;
}
export interface PatchDocumentOptions {
readonly patches: readonly IPatch[];
}
export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => {
const zipContent = await JSZip.loadAsync(data);
const map = new Map<string, ElementCompact>();
console.log(options);
const map = new Map<string, Element>();
for (const [key, value] of Object.entries(zipContent.files)) {
const json = toJson(await value.async("text"));
if (key === "word/document.xml") {
console.log(json);
replacer(json, options);
for (const patch of options.patches) {
findLocationOfText(json, patch.text);
replacer(json, patch);
}
}
map.set(key, json);
@ -42,12 +48,12 @@ export const patchDocument = async (data: InputDataType, options: PatchDocumentO
return zipData;
};
const toJson = (xmlData: string): ElementCompact => {
const xmlObj = xml2js(xmlData, { compact: false }) as ElementCompact;
const toJson = (xmlData: string): Element => {
const xmlObj = xml2js(xmlData, { compact: false }) as Element;
return xmlObj;
};
const toXml = (jsonObj: ElementCompact): string => {
const toXml = (jsonObj: Element): string => {
const output = js2xml(jsonObj);
return output;
};

View File

@ -1,8 +1,8 @@
import { Paragraph, TextRun } from "@file/paragraph";
import { ElementCompact } from "xml-js";
import { PatchDocumentOptions } from "./from-docx";
import { IPatch } from "./from-docx";
export const replacer = (json: ElementCompact, options: PatchDocumentOptions): ElementCompact => {
export const replacer = (json: ElementCompact, options: IPatch): ElementCompact => {
for (const child of options.children) {
if (child instanceof Paragraph) {
console.log("is para");

View File

@ -0,0 +1,78 @@
import { Element } from "xml-js";
export interface IRenderedParagraphNode {
readonly text: string;
readonly runs: readonly IRenderedRunNode[];
}
interface IParts {
readonly text: string;
readonly index: number;
}
export interface IRenderedRunNode {
readonly text: string;
readonly parts: readonly IParts[];
readonly index: number;
}
export const renderParagraphNode = (node: Element): IRenderedParagraphNode => {
if (node.name !== "w:p") {
throw new Error(`Invalid node type: ${node.name}`);
}
if (!node.elements) {
return {
text: "",
runs: [],
};
}
const runs = node.elements
.map((element, i) => ({ element, i }))
.filter(({ element }) => element.name === "w:r")
.map(({ element, i }) => renderRunNode(element, i))
.filter((e) => !!e)
.map((e) => e as IRenderedRunNode);
const text = runs.reduce((acc, curr) => acc + curr.text, "");
return {
text,
runs,
};
};
const renderRunNode = (node: Element, index: number): IRenderedRunNode => {
if (node.name !== "w:r") {
throw new Error(`Invalid node type: ${node.name}`);
}
if (!node.elements) {
return {
text: "",
parts: [],
index: -1,
};
}
const parts = node.elements
.map((element, i: number) =>
element.name === "w:t" && element.elements
? {
text: element.elements[0].text?.toString() ?? "",
index: i,
}
: undefined,
)
.filter((e) => !!e)
.map((e) => e as IParts);
const text = parts.reduce((acc, curr) => acc + curr.text, "");
return {
text,
parts,
index,
};
};

View File

@ -0,0 +1,43 @@
import { Element } from "xml-js";
import { IRenderedParagraphNode, renderParagraphNode } from "./run-renderer";
export interface ILocationOfText {
readonly parent: Element;
readonly startIndex: number;
readonly endIndex: number;
readonly currentText: string;
// This is optional because the text could start in the middle of a tag
readonly startElement?: Element;
// This is optional because the text could end in the middle of a tag
readonly endElement?: Element;
}
export const findLocationOfText = (node: Element, text: string): void => {
let renderedParagraphs: readonly IRenderedParagraphNode[] = [];
// eslint-disable-next-line functional/prefer-readonly-type
const queue: Element[] = [...(node.elements ?? [])];
// eslint-disable-next-line functional/immutable-data
let currentNode: Element | undefined;
while (queue.length > 0) {
// eslint-disable-next-line functional/immutable-data
currentNode = queue.shift();
if (!currentNode) {
break;
}
if (currentNode.name === "w:p") {
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
} else {
// eslint-disable-next-line functional/immutable-data
queue.push(...(currentNode.elements ?? []));
}
}
const filteredParagraphs = renderedParagraphs.filter((p) => p.text.includes(text));
console.log("paragrapghs", JSON.stringify(filteredParagraphs, null, 2));
return undefined;
};