Files
docx-js/src/import-dotx/import-dotx.ts

260 lines
11 KiB
TypeScript
Raw Normal View History

2018-09-06 08:30:23 +01:00
import * as JSZip from "jszip";
2018-10-17 09:15:32 +03:00
import { Element as XMLElement, ElementCompact as XMLElementCompact, xml2js } from "xml-js";
2018-10-16 11:28:25 +03:00
2018-09-06 08:30:23 +01:00
import { FooterReferenceType } from "file/document/body/section-properties/footer-reference";
import { HeaderReferenceType } from "file/document/body/section-properties/header-reference";
2018-09-17 20:06:51 +01:00
import { FooterWrapper, IDocumentFooter } from "file/footer-wrapper";
import { HeaderWrapper, IDocumentHeader } from "file/header-wrapper";
2018-10-23 00:41:18 +01:00
import { Media } from "file/media";
2018-12-05 00:05:11 +00:00
import { TargetModeType } from "file/relationships/relationship/relationship";
import { convertToXmlComponent, ImportedXmlComponent } from "file/xml-components";
2018-09-26 14:33:05 +03:00
2018-09-04 17:16:31 +03:00
const schemeToType = {
2018-09-06 08:30:23 +01:00
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header": "header",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer": "footer",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image": "image",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink": "hyperlink",
2018-09-06 08:30:23 +01:00
};
interface IDocumentRefs {
2020-08-01 17:58:16 +01:00
readonly headers: { readonly id: number; readonly type: HeaderReferenceType }[];
readonly footers: { readonly id: number; readonly type: FooterReferenceType }[];
2018-09-04 17:16:31 +03:00
}
2018-12-05 00:05:11 +00:00
enum RelationshipType {
HEADER = "header",
FOOTER = "footer",
IMAGE = "image",
HYPERLINK = "hyperlink",
}
2018-09-17 20:06:51 +01:00
interface IRelationshipFileInfo {
readonly id: number;
readonly target: string;
2018-12-05 00:05:11 +00:00
readonly type: RelationshipType;
2018-09-04 17:16:31 +03:00
}
2018-09-19 23:04:34 +01:00
// Document Template
// https://fileinfo.com/extension/dotx
export interface IDocumentTemplate {
readonly currentRelationshipId: number;
readonly headers: IDocumentHeader[];
readonly footers: IDocumentFooter[];
2019-01-07 21:41:39 +00:00
readonly styles: string;
readonly titlePageIsDefined: boolean;
2019-01-04 00:11:50 +00:00
readonly media: Media;
2018-09-04 17:16:31 +03:00
}
2018-08-29 18:36:48 +03:00
2018-09-24 22:00:08 +01:00
export class ImportDotx {
public async extract(data: Buffer | string | number[] | Uint8Array | ArrayBuffer | Blob | NodeJS.ReadableStream): Promise<IDocumentTemplate> {
2018-09-06 08:30:23 +01:00
const zipContent = await JSZip.loadAsync(data);
2018-09-04 17:16:31 +03:00
2018-12-18 23:37:21 +00:00
const documentContent = await zipContent.files["word/document.xml"].async("text");
const relationshipContent = await zipContent.files["word/_rels/document.xml.rels"].async("text");
2018-09-04 17:16:31 +03:00
2018-12-18 23:37:21 +00:00
const documentRefs = this.extractDocumentRefs(documentContent);
const documentRelationships = this.findReferenceFiles(relationshipContent);
2018-09-04 17:16:31 +03:00
2018-10-23 00:31:51 +01:00
const media = new Media();
2018-12-18 23:37:21 +00:00
const templateDocument: IDocumentTemplate = {
2019-01-07 21:41:39 +00:00
headers: await this.createHeaders(zipContent, documentRefs, documentRelationships, media, 0),
footers: await this.createFooters(zipContent, documentRefs, documentRelationships, media, documentRefs.headers.length),
currentRelationshipId: documentRefs.footers.length + documentRefs.headers.length,
styles: await zipContent.files["word/styles.xml"].async("text"),
2018-12-18 23:37:21 +00:00
titlePageIsDefined: this.checkIfTitlePageIsDefined(documentContent),
2019-01-04 00:11:50 +00:00
media: media,
2018-12-18 23:37:21 +00:00
};
2018-09-06 08:30:23 +01:00
2018-12-18 23:37:21 +00:00
return templateDocument;
}
2018-09-04 17:16:31 +03:00
2018-12-18 23:37:21 +00:00
private async createFooters(
zipContent: JSZip,
documentRefs: IDocumentRefs,
documentRelationships: IRelationshipFileInfo[],
media: Media,
2019-01-07 21:41:39 +00:00
startingRelationshipId: number,
2018-12-18 23:37:21 +00:00
): Promise<IDocumentFooter[]> {
2019-01-07 21:41:39 +00:00
const result = documentRefs.footers
.map(async (reference, i) => {
const relationshipFileInfo = documentRelationships.find((rel) => rel.id === reference.id);
2018-12-18 23:37:21 +00:00
2019-01-07 21:41:39 +00:00
if (relationshipFileInfo === null || !relationshipFileInfo) {
throw new Error(`Can not find target file for id ${reference.id}`);
}
2018-12-18 23:37:21 +00:00
2019-01-07 21:41:39 +00:00
const xmlData = await zipContent.files[`word/${relationshipFileInfo.target}`].async("text");
const xmlObj = xml2js(xmlData, { compact: false, captureSpacesBetweenElements: true }) as XMLElement;
2018-12-18 23:37:21 +00:00
2019-01-07 21:41:39 +00:00
if (!xmlObj.elements) {
return undefined;
2018-10-17 09:15:32 +03:00
}
2018-09-04 17:16:31 +03:00
2019-01-07 21:41:39 +00:00
const xmlElement = xmlObj.elements.reduce((acc, current) => (current.name === "w:ftr" ? current : acc));
const importedComp = convertToXmlComponent(xmlElement) as ImportedXmlComponent;
const wrapper = new FooterWrapper(media, startingRelationshipId + i, importedComp);
await this.addRelationshipToWrapper(relationshipFileInfo, zipContent, wrapper, media);
return { type: reference.type, footer: wrapper };
})
2020-08-01 17:58:16 +01:00
.filter((x) => !!x) as Promise<IDocumentFooter>[];
2019-01-07 21:41:39 +00:00
return Promise.all(result);
2018-12-18 23:37:21 +00:00
}
2018-12-05 19:47:56 +00:00
2018-12-18 23:37:21 +00:00
private async createHeaders(
zipContent: JSZip,
documentRefs: IDocumentRefs,
documentRelationships: IRelationshipFileInfo[],
media: Media,
2019-01-07 21:41:39 +00:00
startingRelationshipId: number,
2018-12-18 23:37:21 +00:00
): Promise<IDocumentHeader[]> {
2019-01-07 21:41:39 +00:00
const result = documentRefs.headers
.map(async (reference, i) => {
const relationshipFileInfo = documentRelationships.find((rel) => rel.id === reference.id);
if (relationshipFileInfo === null || !relationshipFileInfo) {
throw new Error(`Can not find target file for id ${reference.id}`);
2018-12-18 23:37:21 +00:00
}
2019-01-07 21:41:39 +00:00
const xmlData = await zipContent.files[`word/${relationshipFileInfo.target}`].async("text");
const xmlObj = xml2js(xmlData, { compact: false, captureSpacesBetweenElements: true }) as XMLElement;
if (!xmlObj.elements) {
return undefined;
}
const xmlElement = xmlObj.elements.reduce((acc, current) => (current.name === "w:hdr" ? current : acc));
const importedComp = convertToXmlComponent(xmlElement) as ImportedXmlComponent;
const wrapper = new HeaderWrapper(media, startingRelationshipId + i, importedComp);
await this.addRelationshipToWrapper(relationshipFileInfo, zipContent, wrapper, media);
return { type: reference.type, header: wrapper };
})
2020-08-01 17:58:16 +01:00
.filter((x) => !!x) as Promise<IDocumentHeader>[];
2019-01-07 21:41:39 +00:00
return Promise.all(result);
2018-09-04 17:16:31 +03:00
}
2018-12-05 00:05:11 +00:00
private async addRelationshipToWrapper(
2018-10-23 00:31:51 +01:00
relationhipFile: IRelationshipFileInfo,
2018-09-07 21:48:59 +01:00
zipContent: JSZip,
wrapper: HeaderWrapper | FooterWrapper,
2018-12-05 00:05:11 +00:00
media: Media,
2018-09-07 21:48:59 +01:00
): Promise<void> {
2018-10-23 00:31:51 +01:00
const refFile = zipContent.files[`word/_rels/${relationhipFile.target}.rels`];
2018-12-05 00:05:11 +00:00
if (!refFile) {
return;
2018-09-04 17:16:31 +03:00
}
2018-12-05 00:05:11 +00:00
const xmlRef = await refFile.async("text");
const wrapperImagesReferences = this.findReferenceFiles(xmlRef).filter((r) => r.type === RelationshipType.IMAGE);
const hyperLinkReferences = this.findReferenceFiles(xmlRef).filter((r) => r.type === RelationshipType.HYPERLINK);
2018-09-06 08:30:23 +01:00
for (const r of wrapperImagesReferences) {
const buffer = await zipContent.files[`word/${r.target}`].async("nodebuffer");
2018-12-24 16:50:53 +00:00
const mediaData = media.addMedia(buffer);
2018-12-18 23:37:21 +00:00
2018-12-05 00:05:11 +00:00
wrapper.Relationships.createRelationship(
2018-12-24 16:50:53 +00:00
r.id,
2018-12-05 00:05:11 +00:00
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image",
`media/${mediaData.fileName}`,
);
2018-09-04 17:16:31 +03:00
}
2018-12-05 00:05:11 +00:00
for (const r of hyperLinkReferences) {
2018-12-05 00:05:11 +00:00
wrapper.Relationships.createRelationship(
r.id,
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink",
r.target,
TargetModeType.EXTERNAL,
);
}
2018-09-04 17:16:31 +03:00
}
2018-12-05 00:05:11 +00:00
private findReferenceFiles(xmlData: string): IRelationshipFileInfo[] {
2018-10-17 09:15:32 +03:00
const xmlObj = xml2js(xmlData, { compact: true }) as XMLElementCompact;
2018-09-06 08:30:23 +01:00
const relationXmlArray = Array.isArray(xmlObj.Relationships.Relationship)
? xmlObj.Relationships.Relationship
: [xmlObj.Relationships.Relationship];
const relationships: IRelationshipFileInfo[] = relationXmlArray
2018-10-17 09:15:32 +03:00
.map((item: XMLElementCompact) => {
if (item._attributes === undefined) {
throw Error("relationship element has no attributes");
}
2018-09-04 17:16:31 +03:00
return {
2018-10-17 09:15:32 +03:00
id: this.parseRefId(item._attributes.Id as string),
type: schemeToType[item._attributes.Type as string],
target: item._attributes.Target as string,
2018-09-06 08:30:23 +01:00
};
2018-09-04 17:16:31 +03:00
})
2018-09-06 08:30:23 +01:00
.filter((item) => item.type !== null);
2018-10-23 00:31:51 +01:00
return relationships;
2018-09-04 17:16:31 +03:00
}
2018-12-05 00:05:11 +00:00
private extractDocumentRefs(xmlData: string): IDocumentRefs {
2018-10-17 09:15:32 +03:00
const xmlObj = xml2js(xmlData, { compact: true }) as XMLElementCompact;
2018-09-06 08:30:23 +01:00
const sectionProp = xmlObj["w:document"]["w:body"]["w:sectPr"];
2018-10-17 09:15:32 +03:00
const headerProps: XMLElementCompact = sectionProp["w:headerReference"];
let headersXmlArray: XMLElementCompact[];
if (headerProps === undefined) {
headersXmlArray = [];
} else if (Array.isArray(headerProps)) {
headersXmlArray = headerProps;
} else {
headersXmlArray = [headerProps];
}
2018-09-06 08:30:23 +01:00
const headers = headersXmlArray.map((item) => {
2018-10-17 09:15:32 +03:00
if (item._attributes === undefined) {
throw Error("header referecne element has no attributes");
}
2018-09-06 08:30:23 +01:00
return {
2018-10-17 09:15:32 +03:00
type: item._attributes["w:type"] as HeaderReferenceType,
id: this.parseRefId(item._attributes["r:id"] as string),
2018-09-06 08:30:23 +01:00
};
});
2018-10-17 09:15:32 +03:00
const footerProps: XMLElementCompact = sectionProp["w:footerReference"];
let footersXmlArray: XMLElementCompact[];
if (footerProps === undefined) {
footersXmlArray = [];
} else if (Array.isArray(footerProps)) {
footersXmlArray = footerProps;
} else {
footersXmlArray = [footerProps];
}
2018-09-06 08:30:23 +01:00
const footers = footersXmlArray.map((item) => {
2018-10-17 09:15:32 +03:00
if (item._attributes === undefined) {
throw Error("footer referecne element has no attributes");
}
2018-09-06 08:30:23 +01:00
return {
2018-10-17 09:15:32 +03:00
type: item._attributes["w:type"] as FooterReferenceType,
id: this.parseRefId(item._attributes["r:id"] as string),
2018-09-06 08:30:23 +01:00
};
});
return { headers, footers };
2018-08-29 18:36:48 +03:00
}
2018-09-04 17:16:31 +03:00
2018-12-18 23:37:21 +00:00
private checkIfTitlePageIsDefined(xmlData: string): boolean {
2018-10-17 09:15:32 +03:00
const xmlObj = xml2js(xmlData, { compact: true }) as XMLElementCompact;
const sectionProp = xmlObj["w:document"]["w:body"]["w:sectPr"];
2018-12-16 01:56:42 +00:00
return sectionProp["w:titlePg"] !== undefined;
}
2018-12-05 00:05:11 +00:00
private parseRefId(str: string): number {
2018-09-06 08:30:23 +01:00
const match = /^rId(\d+)$/.exec(str);
if (match === null) {
throw new Error("Invalid ref id");
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
return parseInt(match[1], 10);
}
2018-08-29 18:36:48 +03:00
}