Extract runs and text
This commit is contained in:
@ -4,7 +4,12 @@ import * as fs from "fs";
|
|||||||
import { Paragraph, patchDocument, TextRun } from "../build";
|
import { Paragraph, patchDocument, TextRun } from "../build";
|
||||||
|
|
||||||
patchDocument(fs.readFileSync("demo/assets/simple-template.docx"), {
|
patchDocument(fs.readFileSync("demo/assets/simple-template.docx"), {
|
||||||
children: [new Paragraph("ff"), new TextRun("fgf")],
|
patches: [
|
||||||
|
{
|
||||||
|
children: [new Paragraph("ff"), new TextRun("fgf")],
|
||||||
|
text: "{{ name }}",
|
||||||
|
},
|
||||||
|
],
|
||||||
}).then((doc) => {
|
}).then((doc) => {
|
||||||
fs.writeFileSync("My Document.docx", doc);
|
fs.writeFileSync("My Document.docx", doc);
|
||||||
});
|
});
|
||||||
|
Binary file not shown.
@ -1,25 +1,31 @@
|
|||||||
import * as JSZip from "jszip";
|
import * as JSZip from "jszip";
|
||||||
import { xml2js, ElementCompact, js2xml } from "xml-js";
|
import { xml2js, Element, js2xml } from "xml-js";
|
||||||
import { replacer } from "./replacer";
|
import { replacer } from "./replacer";
|
||||||
|
import { findLocationOfText } from "./traverser";
|
||||||
|
|
||||||
// eslint-disable-next-line functional/prefer-readonly-type
|
// eslint-disable-next-line functional/prefer-readonly-type
|
||||||
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
|
type InputDataType = Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream;
|
||||||
|
|
||||||
export interface PatchDocumentOptions {
|
export interface IPatch {
|
||||||
readonly children: any[];
|
readonly children: any[];
|
||||||
|
readonly text: string;
|
||||||
|
}
|
||||||
|
export interface PatchDocumentOptions {
|
||||||
|
readonly patches: readonly IPatch[];
|
||||||
}
|
}
|
||||||
|
|
||||||
export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => {
|
export const patchDocument = async (data: InputDataType, options: PatchDocumentOptions): Promise<Buffer> => {
|
||||||
const zipContent = await JSZip.loadAsync(data);
|
const zipContent = await JSZip.loadAsync(data);
|
||||||
|
|
||||||
const map = new Map<string, ElementCompact>();
|
const map = new Map<string, Element>();
|
||||||
console.log(options);
|
|
||||||
|
|
||||||
for (const [key, value] of Object.entries(zipContent.files)) {
|
for (const [key, value] of Object.entries(zipContent.files)) {
|
||||||
const json = toJson(await value.async("text"));
|
const json = toJson(await value.async("text"));
|
||||||
if (key === "word/document.xml") {
|
if (key === "word/document.xml") {
|
||||||
console.log(json);
|
for (const patch of options.patches) {
|
||||||
replacer(json, options);
|
findLocationOfText(json, patch.text);
|
||||||
|
replacer(json, patch);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
map.set(key, json);
|
map.set(key, json);
|
||||||
@ -42,12 +48,12 @@ export const patchDocument = async (data: InputDataType, options: PatchDocumentO
|
|||||||
return zipData;
|
return zipData;
|
||||||
};
|
};
|
||||||
|
|
||||||
const toJson = (xmlData: string): ElementCompact => {
|
const toJson = (xmlData: string): Element => {
|
||||||
const xmlObj = xml2js(xmlData, { compact: false }) as ElementCompact;
|
const xmlObj = xml2js(xmlData, { compact: false }) as Element;
|
||||||
return xmlObj;
|
return xmlObj;
|
||||||
};
|
};
|
||||||
|
|
||||||
const toXml = (jsonObj: ElementCompact): string => {
|
const toXml = (jsonObj: Element): string => {
|
||||||
const output = js2xml(jsonObj);
|
const output = js2xml(jsonObj);
|
||||||
return output;
|
return output;
|
||||||
};
|
};
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
import { Paragraph, TextRun } from "@file/paragraph";
|
import { Paragraph, TextRun } from "@file/paragraph";
|
||||||
import { ElementCompact } from "xml-js";
|
import { ElementCompact } from "xml-js";
|
||||||
import { PatchDocumentOptions } from "./from-docx";
|
import { IPatch } from "./from-docx";
|
||||||
|
|
||||||
export const replacer = (json: ElementCompact, options: PatchDocumentOptions): ElementCompact => {
|
export const replacer = (json: ElementCompact, options: IPatch): ElementCompact => {
|
||||||
for (const child of options.children) {
|
for (const child of options.children) {
|
||||||
if (child instanceof Paragraph) {
|
if (child instanceof Paragraph) {
|
||||||
console.log("is para");
|
console.log("is para");
|
||||||
|
78
src/templater/run-renderer.ts
Normal file
78
src/templater/run-renderer.ts
Normal file
@ -0,0 +1,78 @@
|
|||||||
|
import { Element } from "xml-js";
|
||||||
|
|
||||||
|
export interface IRenderedParagraphNode {
|
||||||
|
readonly text: string;
|
||||||
|
readonly runs: readonly IRenderedRunNode[];
|
||||||
|
}
|
||||||
|
|
||||||
|
interface IParts {
|
||||||
|
readonly text: string;
|
||||||
|
readonly index: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface IRenderedRunNode {
|
||||||
|
readonly text: string;
|
||||||
|
readonly parts: readonly IParts[];
|
||||||
|
readonly index: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const renderParagraphNode = (node: Element): IRenderedParagraphNode => {
|
||||||
|
if (node.name !== "w:p") {
|
||||||
|
throw new Error(`Invalid node type: ${node.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!node.elements) {
|
||||||
|
return {
|
||||||
|
text: "",
|
||||||
|
runs: [],
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const runs = node.elements
|
||||||
|
.map((element, i) => ({ element, i }))
|
||||||
|
.filter(({ element }) => element.name === "w:r")
|
||||||
|
.map(({ element, i }) => renderRunNode(element, i))
|
||||||
|
.filter((e) => !!e)
|
||||||
|
.map((e) => e as IRenderedRunNode);
|
||||||
|
|
||||||
|
const text = runs.reduce((acc, curr) => acc + curr.text, "");
|
||||||
|
|
||||||
|
return {
|
||||||
|
text,
|
||||||
|
runs,
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
const renderRunNode = (node: Element, index: number): IRenderedRunNode => {
|
||||||
|
if (node.name !== "w:r") {
|
||||||
|
throw new Error(`Invalid node type: ${node.name}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!node.elements) {
|
||||||
|
return {
|
||||||
|
text: "",
|
||||||
|
parts: [],
|
||||||
|
index: -1,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const parts = node.elements
|
||||||
|
.map((element, i: number) =>
|
||||||
|
element.name === "w:t" && element.elements
|
||||||
|
? {
|
||||||
|
text: element.elements[0].text?.toString() ?? "",
|
||||||
|
index: i,
|
||||||
|
}
|
||||||
|
: undefined,
|
||||||
|
)
|
||||||
|
.filter((e) => !!e)
|
||||||
|
.map((e) => e as IParts);
|
||||||
|
|
||||||
|
const text = parts.reduce((acc, curr) => acc + curr.text, "");
|
||||||
|
|
||||||
|
return {
|
||||||
|
text,
|
||||||
|
parts,
|
||||||
|
index,
|
||||||
|
};
|
||||||
|
};
|
43
src/templater/traverser.ts
Normal file
43
src/templater/traverser.ts
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
import { Element } from "xml-js";
|
||||||
|
import { IRenderedParagraphNode, renderParagraphNode } from "./run-renderer";
|
||||||
|
|
||||||
|
export interface ILocationOfText {
|
||||||
|
readonly parent: Element;
|
||||||
|
readonly startIndex: number;
|
||||||
|
readonly endIndex: number;
|
||||||
|
readonly currentText: string;
|
||||||
|
// This is optional because the text could start in the middle of a tag
|
||||||
|
readonly startElement?: Element;
|
||||||
|
// This is optional because the text could end in the middle of a tag
|
||||||
|
readonly endElement?: Element;
|
||||||
|
}
|
||||||
|
|
||||||
|
export const findLocationOfText = (node: Element, text: string): void => {
|
||||||
|
let renderedParagraphs: readonly IRenderedParagraphNode[] = [];
|
||||||
|
|
||||||
|
// eslint-disable-next-line functional/prefer-readonly-type
|
||||||
|
const queue: Element[] = [...(node.elements ?? [])];
|
||||||
|
|
||||||
|
// eslint-disable-next-line functional/immutable-data
|
||||||
|
let currentNode: Element | undefined;
|
||||||
|
while (queue.length > 0) {
|
||||||
|
// eslint-disable-next-line functional/immutable-data
|
||||||
|
currentNode = queue.shift();
|
||||||
|
|
||||||
|
if (!currentNode) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (currentNode.name === "w:p") {
|
||||||
|
renderedParagraphs = [...renderedParagraphs, renderParagraphNode(currentNode)];
|
||||||
|
} else {
|
||||||
|
// eslint-disable-next-line functional/immutable-data
|
||||||
|
queue.push(...(currentNode.elements ?? []));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const filteredParagraphs = renderedParagraphs.filter((p) => p.text.includes(text));
|
||||||
|
|
||||||
|
console.log("paragrapghs", JSON.stringify(filteredParagraphs, null, 2));
|
||||||
|
return undefined;
|
||||||
|
};
|
Reference in New Issue
Block a user