Files
docx-js/src/import-dotx/import-dotx.ts

228 lines
9.5 KiB
TypeScript
Raw Normal View History

2018-08-29 18:36:48 +03:00
import * as fastXmlParser from "fast-xml-parser";
2018-10-16 11:28:25 +03:00
import { xml2js, Element as XMLElement } from 'xml-js'
// var convertXmlJs = require('xml-js');
2018-09-06 08:30:23 +01:00
import * as JSZip from "jszip";
2018-10-16 11:28:25 +03:00
2018-09-06 08:30:23 +01:00
import { FooterReferenceType } from "file/document/body/section-properties/footer-reference";
import { HeaderReferenceType } from "file/document/body/section-properties/header-reference";
2018-09-17 20:06:51 +01:00
import { FooterWrapper, IDocumentFooter } from "file/footer-wrapper";
import { HeaderWrapper, IDocumentHeader } from "file/header-wrapper";
2018-10-16 11:28:25 +03:00
import { convertToXmlComponent, ImportedXmlComponent, parseOptions, convertToXmlComponentOld } from "file/xml-components";
2018-09-04 17:16:31 +03:00
2018-09-26 14:33:05 +03:00
import { Styles } from "file/styles";
import { ExternalStylesFactory } from "file/styles/external-styles-factory";
2018-09-17 23:54:19 +01:00
const importParseOptions = {
...parseOptions,
textNodeName: "",
trimValues: false,
};
2018-09-04 17:16:31 +03:00
const schemeToType = {
2018-09-06 08:30:23 +01:00
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header": "header",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer": "footer",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image": "image",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink": "hyperlink",
2018-09-06 08:30:23 +01:00
};
interface IDocumentRefs {
headers: Array<{ id: number; type: HeaderReferenceType }>;
footers: Array<{ id: number; type: FooterReferenceType }>;
2018-09-04 17:16:31 +03:00
}
2018-09-17 20:06:51 +01:00
interface IRelationshipFileInfo {
2018-09-06 08:30:23 +01:00
id: number;
target: string;
type: "header" | "footer" | "image" | "hyperlink";
2018-09-04 17:16:31 +03:00
}
2018-09-19 23:04:34 +01:00
// Document Template
// https://fileinfo.com/extension/dotx
export interface IDocumentTemplate {
2018-09-06 08:30:23 +01:00
currentRelationshipId: number;
2018-09-17 20:06:51 +01:00
headers: IDocumentHeader[];
footers: IDocumentFooter[];
2018-09-26 14:33:05 +03:00
styles: Styles;
titlePageIsDefined: boolean;
2018-09-04 17:16:31 +03:00
}
2018-08-29 18:36:48 +03:00
2018-09-24 22:00:08 +01:00
export class ImportDotx {
2018-09-06 08:30:23 +01:00
private currentRelationshipId: number;
2018-09-04 17:16:31 +03:00
2018-08-29 18:36:48 +03:00
constructor() {
2018-09-06 08:30:23 +01:00
this.currentRelationshipId = 1;
2018-08-29 18:36:48 +03:00
}
2018-09-19 23:04:34 +01:00
public async extract(data: Buffer): Promise<IDocumentTemplate> {
2018-09-06 08:30:23 +01:00
const zipContent = await JSZip.loadAsync(data);
2018-09-04 17:16:31 +03:00
2018-09-26 14:33:05 +03:00
const stylesContent = await zipContent.files["word/styles.xml"].async("text");
const stylesFactory = new ExternalStylesFactory();
const styles = stylesFactory.newInstance(stylesContent);
2018-09-06 08:30:23 +01:00
const documentContent = zipContent.files["word/document.xml"];
const documentRefs: IDocumentRefs = this.extractDocumentRefs(await documentContent.async("text"));
const titlePageIsDefined = this.titlePageIsDefined(await documentContent.async("text"));
2018-09-04 17:16:31 +03:00
2018-09-06 08:30:23 +01:00
const relationshipContent = zipContent.files["word/_rels/document.xml.rels"];
2018-09-17 20:06:51 +01:00
const documentRelationships: IRelationshipFileInfo[] = this.findReferenceFiles(await relationshipContent.async("text"));
2018-09-04 17:16:31 +03:00
2018-09-17 20:06:51 +01:00
const headers: IDocumentHeader[] = [];
2018-09-06 08:30:23 +01:00
for (const headerRef of documentRefs.headers) {
const headerKey = "w:hdr";
2018-09-17 20:06:51 +01:00
const relationFileInfo = documentRelationships.find((rel) => rel.id === headerRef.id);
2018-09-06 08:30:23 +01:00
if (relationFileInfo === null || !relationFileInfo) {
throw new Error(`Can not find target file for id ${headerRef.id}`);
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
const xmlData = await zipContent.files[`word/${relationFileInfo.target}`].async("text");
2018-10-16 11:28:25 +03:00
const xmlObjOld = fastXmlParser.parse(xmlData, importParseOptions);
const xmlObj = xml2js(xmlData, {compact: false}) as XMLElement;
if (xmlObj.elements === undefined || xmlObj.elements[0].name !== headerKey) {
continue;
}
console.log('=========== fast-xml-parser header =======');
console.log(JSON.stringify(xmlObjOld, null, 2));
console.log('=========== xml-js header =======');
console.log(JSON.stringify(xmlObj, null, 2));
const headerXmlElement = xmlObj.elements[0];
const importedComp = convertToXmlComponent(headerKey, headerXmlElement) as ImportedXmlComponent;
console.log('=========== importedComp header =======');
console.log(JSON.stringify(importedComp, null, 2));
const importedCompOld = convertToXmlComponentOld(headerKey, xmlObjOld[headerKey]) as ImportedXmlComponent;
console.log('=========== importedCompOld header =======');
console.log(JSON.stringify(importedCompOld, null, 2));
const header = new HeaderWrapper(this.currentRelationshipId++, importedCompOld);
// const header = new HeaderWrapper(this.currentRelationshipId++, importedComp);
await this.addRelationToWrapper(relationFileInfo, zipContent, header);
2018-09-06 08:30:23 +01:00
headers.push({ type: headerRef.type, header });
2018-09-04 17:16:31 +03:00
}
2018-09-17 20:06:51 +01:00
const footers: IDocumentFooter[] = [];
2018-09-06 08:30:23 +01:00
for (const footerRef of documentRefs.footers) {
const footerKey = "w:ftr";
2018-09-17 20:06:51 +01:00
const relationFileInfo = documentRelationships.find((rel) => rel.id === footerRef.id);
2018-09-06 08:30:23 +01:00
if (relationFileInfo === null || !relationFileInfo) {
throw new Error(`Can not find target file for id ${footerRef.id}`);
2018-09-04 17:16:31 +03:00
}
const xmlData = await zipContent.files[`word/${relationFileInfo.target}`].async("text");
2018-09-17 23:54:19 +01:00
const xmlObj = fastXmlParser.parse(xmlData, importParseOptions);
2018-09-06 08:30:23 +01:00
const importedComp = convertToXmlComponent(footerKey, xmlObj[footerKey]) as ImportedXmlComponent;
const footer = new FooterWrapper(this.currentRelationshipId++, importedComp);
await this.addRelationToWrapper(relationFileInfo, zipContent, footer);
2018-09-06 08:30:23 +01:00
footers.push({ type: footerRef.type, footer });
2018-09-04 17:16:31 +03:00
}
const templateDocument: IDocumentTemplate = {
headers,
footers,
currentRelationshipId: this.currentRelationshipId,
styles,
titlePageIsDefined,
};
2018-09-04 17:16:31 +03:00
return templateDocument;
}
public async addRelationToWrapper(
2018-09-17 20:06:51 +01:00
relationFile: IRelationshipFileInfo,
2018-09-07 21:48:59 +01:00
zipContent: JSZip,
wrapper: HeaderWrapper | FooterWrapper,
): Promise<void> {
2018-09-17 20:06:51 +01:00
let wrapperImagesReferences: IRelationshipFileInfo[] = [];
let hyperLinkReferences: IRelationshipFileInfo[] = [];
const refFile = zipContent.files[`word/_rels/${relationFile.target}.rels`];
2018-09-04 17:16:31 +03:00
if (refFile) {
2018-09-06 08:30:23 +01:00
const xmlRef = await refFile.async("text");
wrapperImagesReferences = this.findReferenceFiles(xmlRef).filter((r) => r.type === "image");
hyperLinkReferences = this.findReferenceFiles(xmlRef).filter((r) => r.type === "hyperlink");
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
for (const r of wrapperImagesReferences) {
const buffer = await zipContent.files[`word/${r.target}`].async("nodebuffer");
2018-09-14 02:37:03 +01:00
wrapper.addImageRelationship(buffer, r.id);
2018-09-04 17:16:31 +03:00
}
for (const r of hyperLinkReferences) {
wrapper.addHyperlinkRelationship(r.target, r.id, "External");
}
2018-09-04 17:16:31 +03:00
}
2018-09-17 20:06:51 +01:00
public findReferenceFiles(xmlData: string): IRelationshipFileInfo[] {
2018-09-17 23:54:19 +01:00
const xmlObj = fastXmlParser.parse(xmlData, importParseOptions);
2018-09-06 08:30:23 +01:00
const relationXmlArray = Array.isArray(xmlObj.Relationships.Relationship)
? xmlObj.Relationships.Relationship
: [xmlObj.Relationships.Relationship];
2018-09-17 20:06:51 +01:00
const relations: IRelationshipFileInfo[] = relationXmlArray
2018-09-06 08:30:23 +01:00
.map((item) => {
2018-09-04 17:16:31 +03:00
return {
2018-09-06 08:30:23 +01:00
id: this.parseRefId(item._attr.Id),
type: schemeToType[item._attr.Type],
target: item._attr.Target,
2018-09-06 08:30:23 +01:00
};
2018-09-04 17:16:31 +03:00
})
2018-09-06 08:30:23 +01:00
.filter((item) => item.type !== null);
2018-09-04 17:16:31 +03:00
return relations;
}
2018-09-06 08:30:23 +01:00
public extractDocumentRefs(xmlData: string): IDocumentRefs {
interface IAttributedXML {
_attr: object;
}
2018-09-17 23:54:19 +01:00
const xmlObj = fastXmlParser.parse(xmlData, importParseOptions);
2018-09-06 08:30:23 +01:00
const sectionProp = xmlObj["w:document"]["w:body"]["w:sectPr"];
const headerProps: undefined | IAttributedXML | IAttributedXML[] = sectionProp["w:headerReference"];
let headersXmlArray: IAttributedXML[];
if (headerProps === undefined) {
headersXmlArray = [];
} else if (Array.isArray(headerProps)) {
headersXmlArray = headerProps;
} else {
headersXmlArray = [headerProps];
}
2018-09-06 08:30:23 +01:00
const headers = headersXmlArray.map((item) => {
return {
type: item._attr["w:type"],
id: this.parseRefId(item._attr["r:id"]),
};
});
const footerProps: undefined | IAttributedXML | IAttributedXML[] = sectionProp["w:footerReference"];
let footersXmlArray: IAttributedXML[];
if (footerProps === undefined) {
footersXmlArray = [];
} else if (Array.isArray(footerProps)) {
footersXmlArray = footerProps;
} else {
footersXmlArray = [footerProps];
}
2018-09-06 08:30:23 +01:00
const footers = footersXmlArray.map((item) => {
return {
type: item._attr["w:type"],
id: this.parseRefId(item._attr["r:id"]),
};
});
return { headers, footers };
2018-08-29 18:36:48 +03:00
}
2018-09-04 17:16:31 +03:00
public titlePageIsDefined(xmlData: string): boolean {
const xmlObj = fastXmlParser.parse(xmlData, importParseOptions);
const sectionProp = xmlObj["w:document"]["w:body"]["w:sectPr"];
return sectionProp["w:titlePg"] !== undefined;
}
2018-09-06 08:30:23 +01:00
public parseRefId(str: string): number {
const match = /^rId(\d+)$/.exec(str);
if (match === null) {
throw new Error("Invalid ref id");
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
return parseInt(match[1], 10);
}
2018-08-29 18:36:48 +03:00
}