Files
docx-js/src/importDocx/importDocx.ts

156 lines
6.5 KiB
TypeScript
Raw Normal View History

2018-08-29 18:36:48 +03:00
import * as fastXmlParser from "fast-xml-parser";
2018-09-06 08:30:23 +01:00
import * as JSZip from "jszip";
import { FooterReferenceType } from "file/document/body/section-properties/footer-reference";
import { HeaderReferenceType } from "file/document/body/section-properties/header-reference";
2018-09-17 20:06:51 +01:00
import { FooterWrapper, IDocumentFooter } from "file/footer-wrapper";
import { HeaderWrapper, IDocumentHeader } from "file/header-wrapper";
2018-09-06 08:30:23 +01:00
import { convertToXmlComponent, ImportedXmlComponent, parseOptions } from "file/xml-components";
2018-09-04 17:16:31 +03:00
const schemeToType = {
2018-09-06 08:30:23 +01:00
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/header": "header",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer": "footer",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image": "image",
};
interface IDocumentRefs {
headers: Array<{ id: number; type: HeaderReferenceType }>;
footers: Array<{ id: number; type: FooterReferenceType }>;
2018-09-04 17:16:31 +03:00
}
2018-09-17 20:06:51 +01:00
interface IRelationshipFileInfo {
2018-09-06 08:30:23 +01:00
id: number;
targetFile: string;
type: "header" | "footer" | "image";
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
export interface ITemplateDocument {
currentRelationshipId: number;
2018-09-17 20:06:51 +01:00
headers: IDocumentHeader[];
footers: IDocumentFooter[];
2018-09-04 17:16:31 +03:00
}
2018-08-29 18:36:48 +03:00
export class ImportDocx {
2018-09-06 08:30:23 +01:00
private currentRelationshipId: number;
2018-09-04 17:16:31 +03:00
2018-08-29 18:36:48 +03:00
constructor() {
2018-09-06 08:30:23 +01:00
this.currentRelationshipId = 1;
2018-08-29 18:36:48 +03:00
}
2018-09-06 08:30:23 +01:00
public async extract(data: Buffer): Promise<ITemplateDocument> {
const zipContent = await JSZip.loadAsync(data);
2018-09-04 17:16:31 +03:00
2018-09-06 08:30:23 +01:00
const documentContent = zipContent.files["word/document.xml"];
const documentRefs: IDocumentRefs = this.extractDocumentRefs(await documentContent.async("text"));
2018-09-04 17:16:31 +03:00
2018-09-06 08:30:23 +01:00
const relationshipContent = zipContent.files["word/_rels/document.xml.rels"];
2018-09-17 20:06:51 +01:00
const documentRelationships: IRelationshipFileInfo[] = this.findReferenceFiles(await relationshipContent.async("text"));
2018-09-04 17:16:31 +03:00
2018-09-17 20:06:51 +01:00
const headers: IDocumentHeader[] = [];
2018-09-06 08:30:23 +01:00
for (const headerRef of documentRefs.headers) {
const headerKey = "w:hdr";
2018-09-17 20:06:51 +01:00
const relationFileInfo = documentRelationships.find((rel) => rel.id === headerRef.id);
2018-09-06 08:30:23 +01:00
if (relationFileInfo === null || !relationFileInfo) {
throw new Error(`Can not find target file for id ${headerRef.id}`);
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
const xmlData = await zipContent.files[`word/${relationFileInfo.targetFile}`].async("text");
2018-09-04 17:16:31 +03:00
const xmlObj = fastXmlParser.parse(xmlData, parseOptions);
2018-09-06 08:30:23 +01:00
const importedComp = convertToXmlComponent(headerKey, xmlObj[headerKey]) as ImportedXmlComponent;
const header = new HeaderWrapper(this.currentRelationshipId++, importedComp);
2018-09-04 17:16:31 +03:00
await this.addImagesToWrapper(relationFileInfo, zipContent, header);
2018-09-06 08:30:23 +01:00
headers.push({ type: headerRef.type, header });
2018-09-04 17:16:31 +03:00
}
2018-09-17 20:06:51 +01:00
const footers: IDocumentFooter[] = [];
2018-09-06 08:30:23 +01:00
for (const footerRef of documentRefs.footers) {
const footerKey = "w:ftr";
2018-09-17 20:06:51 +01:00
const relationFileInfo = documentRelationships.find((rel) => rel.id === footerRef.id);
2018-09-06 08:30:23 +01:00
if (relationFileInfo === null || !relationFileInfo) {
throw new Error(`Can not find target file for id ${footerRef.id}`);
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
const xmlData = await zipContent.files[`word/${relationFileInfo.targetFile}`].async("text");
2018-09-04 17:16:31 +03:00
const xmlObj = fastXmlParser.parse(xmlData, parseOptions);
2018-09-06 08:30:23 +01:00
const importedComp = convertToXmlComponent(footerKey, xmlObj[footerKey]) as ImportedXmlComponent;
const footer = new FooterWrapper(this.currentRelationshipId++, importedComp);
2018-09-04 17:16:31 +03:00
await this.addImagesToWrapper(relationFileInfo, zipContent, footer);
2018-09-06 08:30:23 +01:00
footers.push({ type: footerRef.type, footer });
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
const templateDocument: ITemplateDocument = { headers, footers, currentRelationshipId: this.currentRelationshipId };
2018-09-04 17:16:31 +03:00
return templateDocument;
}
2018-09-07 21:48:59 +01:00
public async addImagesToWrapper(
2018-09-17 20:06:51 +01:00
relationFile: IRelationshipFileInfo,
2018-09-07 21:48:59 +01:00
zipContent: JSZip,
wrapper: HeaderWrapper | FooterWrapper,
): Promise<void> {
2018-09-17 20:06:51 +01:00
let wrapperImagesReferences: IRelationshipFileInfo[] = [];
2018-09-06 08:30:23 +01:00
const refFile = zipContent.files[`word/_rels/${relationFile.targetFile}.rels`];
2018-09-04 17:16:31 +03:00
if (refFile) {
2018-09-06 08:30:23 +01:00
const xmlRef = await refFile.async("text");
wrapperImagesReferences = this.findReferenceFiles(xmlRef).filter((r) => r.type === "image");
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
for (const r of wrapperImagesReferences) {
const buffer = await zipContent.files[`word/${r.targetFile}`].async("nodebuffer");
2018-09-14 02:37:03 +01:00
wrapper.addImageRelationship(buffer, r.id);
2018-09-04 17:16:31 +03:00
}
}
2018-09-17 20:06:51 +01:00
public findReferenceFiles(xmlData: string): IRelationshipFileInfo[] {
2018-09-04 17:16:31 +03:00
const xmlObj = fastXmlParser.parse(xmlData, parseOptions);
2018-09-06 08:30:23 +01:00
const relationXmlArray = Array.isArray(xmlObj.Relationships.Relationship)
? xmlObj.Relationships.Relationship
: [xmlObj.Relationships.Relationship];
2018-09-17 20:06:51 +01:00
const relations: IRelationshipFileInfo[] = relationXmlArray
2018-09-06 08:30:23 +01:00
.map((item) => {
2018-09-04 17:16:31 +03:00
return {
2018-09-06 08:30:23 +01:00
id: this.parseRefId(item._attr.Id),
type: schemeToType[item._attr.Type],
targetFile: item._attr.Target,
};
2018-09-04 17:16:31 +03:00
})
2018-09-06 08:30:23 +01:00
.filter((item) => item.type !== null);
2018-09-04 17:16:31 +03:00
return relations;
}
2018-09-06 08:30:23 +01:00
public extractDocumentRefs(xmlData: string): IDocumentRefs {
2018-09-04 17:16:31 +03:00
const xmlObj = fastXmlParser.parse(xmlData, parseOptions);
2018-09-06 08:30:23 +01:00
const sectionProp = xmlObj["w:document"]["w:body"]["w:sectPr"];
const headersXmlArray = Array.isArray(sectionProp["w:headerReference"])
? sectionProp["w:headerReference"]
: [sectionProp["w:headerReference"]];
const headers = headersXmlArray.map((item) => {
return {
type: item._attr["w:type"],
id: this.parseRefId(item._attr["r:id"]),
};
});
const footersXmlArray = Array.isArray(sectionProp["w:footerReference"])
? sectionProp["w:footerReference"]
: [sectionProp["w:footerReference"]];
const footers = footersXmlArray.map((item) => {
return {
type: item._attr["w:type"],
id: this.parseRefId(item._attr["r:id"]),
};
});
return { headers, footers };
2018-08-29 18:36:48 +03:00
}
2018-09-04 17:16:31 +03:00
2018-09-06 08:30:23 +01:00
public parseRefId(str: string): number {
const match = /^rId(\d+)$/.exec(str);
if (match === null) {
throw new Error("Invalid ref id");
2018-09-04 17:16:31 +03:00
}
2018-09-06 08:30:23 +01:00
return parseInt(match[1], 10);
}
2018-08-29 18:36:48 +03:00
}