2023-05-01 20:37:39 +01:00
import JSZip from "jszip" ;
2023-02-18 20:36:24 +00:00
import { Element , js2xml } from "xml-js" ;
2023-02-25 19:33:12 +00:00
2024-10-21 03:57:15 +01:00
import { ImageReplacer } from "@export/packer/image-replacer" ;
2025-01-06 17:19:00 -05:00
import { DocumentAttributeNamespaces } from "@file/document" ;
2023-03-03 23:47:50 +00:00
import { IViewWrapper } from "@file/document-wrapper" ;
import { File } from "@file/file" ;
2024-10-21 03:57:15 +01:00
import { FileChild } from "@file/file-child" ;
import { IMediaData , Media } from "@file/media" ;
import { ConcreteHyperlink , ExternalHyperlink , ParagraphChild } from "@file/paragraph" ;
2023-03-08 23:30:51 +00:00
import { TargetModeType } from "@file/relationships/relationship/relationship" ;
2024-10-21 03:57:15 +01:00
import { IContext } from "@file/xml-components" ;
2023-03-08 23:30:51 +00:00
import { uniqueId } from "@util/convenience-functions" ;
2025-02-16 13:24:15 -05:00
import { OutputByType , OutputType } from "@util/output-type" ;
2023-02-25 19:33:12 +00:00
2024-10-21 03:57:15 +01:00
import { appendContentType } from "./content-types-manager" ;
import { appendRelationship , getNextRelationshipIndex } from "./relationship-manager" ;
2023-02-16 20:17:48 +00:00
import { replacer } from "./replacer" ;
2023-02-18 20:36:24 +00:00
import { toJson } from "./util" ;
2023-02-08 03:18:11 +00:00
// eslint-disable-next-line functional/prefer-readonly-type
2025-04-16 10:03:49 +01:00
export type InputDataType = Buffer | string | number [ ] | Uint8Array | ArrayBuffer | Blob | NodeJS . ReadableStream | JSZip ;
2023-02-08 03:18:11 +00:00
2023-12-22 10:25:00 +09:00
export const PatchType = {
DOCUMENT : "file" ,
PARAGRAPH : "paragraph" ,
} as const ;
2023-02-25 19:33:12 +00:00
type ParagraphPatch = {
2023-12-22 10:25:00 +09:00
readonly type : typeof PatchType . PARAGRAPH ;
2023-02-25 19:33:12 +00:00
readonly children : readonly ParagraphChild [ ] ;
} ;
type FilePatch = {
2023-12-22 10:25:00 +09:00
readonly type : typeof PatchType . DOCUMENT ;
2023-02-25 19:33:12 +00:00
readonly children : readonly FileChild [ ] ;
} ;
2024-10-21 03:57:15 +01:00
type IImageRelationshipAddition = {
2023-03-03 23:47:50 +00:00
readonly key : string ;
readonly mediaDatas : readonly IMediaData [ ] ;
2024-10-21 03:57:15 +01:00
} ;
2023-03-03 23:47:50 +00:00
2024-10-21 03:57:15 +01:00
type IHyperlinkRelationshipAddition = {
2023-03-08 23:30:51 +00:00
readonly key : string ;
readonly hyperlink : { readonly id : string ; readonly link : string } ;
2024-10-21 03:57:15 +01:00
} ;
2023-03-08 23:30:51 +00:00
2023-02-25 22:18:31 +00:00
export type IPatch = ParagraphPatch | FilePatch ;
2023-02-25 19:33:12 +00:00
2025-02-16 13:24:15 -05:00
export type PatchDocumentOutputType = OutputType ;
2023-12-31 23:16:48 +00:00
export type PatchDocumentOptions < T extends PatchDocumentOutputType = PatchDocumentOutputType > = {
readonly outputType : T ;
readonly data : InputDataType ;
2024-10-21 03:57:15 +01:00
readonly patches : Readonly < Record < string , IPatch > > ;
2023-10-18 12:58:01 -03:00
readonly keepOriginalStyles? : boolean ;
2025-04-16 12:48:55 +05:30
readonly placeholderDelimiters? : Readonly < {
readonly start : string ;
readonly end : string ;
} > ;
2025-04-16 10:03:49 +01:00
readonly recursive? : boolean ;
2023-12-31 23:16:48 +00:00
} ;
2023-02-16 20:17:48 +00:00
2023-03-03 23:47:50 +00:00
const imageReplacer = new ImageReplacer ( ) ;
2025-04-16 10:03:49 +01:00
const UTF16LE = Buffer . from ( [ 0xff , 0xfe ] ) ;
const UTF16BE = Buffer . from ( [ 0xfe , 0xff ] ) ;
2023-03-03 23:47:50 +00:00
2023-12-31 23:16:48 +00:00
export const patchDocument = async < T extends PatchDocumentOutputType = PatchDocumentOutputType > ( {
outputType ,
data ,
patches ,
keepOriginalStyles ,
2025-04-16 12:48:55 +05:30
placeholderDelimiters = { start : "{{" , end : "}}" } as const ,
2025-04-16 10:03:49 +01:00
/ * *
* Search for occurrences over patched document
* /
recursive = true ,
2023-12-31 23:16:48 +00:00
} : PatchDocumentOptions < T > ) : Promise < OutputByType [ T ] > = > {
2025-04-16 10:03:49 +01:00
const zipContent = data instanceof JSZip ? data : await JSZip . loadAsync ( data ) ;
2023-03-13 21:35:16 +00:00
const contexts = new Map < string , IContext > ( ) ;
const file = {
Media : new Media ( ) ,
} as unknown as File ;
2023-03-03 23:47:50 +00:00
2023-02-17 10:38:03 +00:00
const map = new Map < string , Element > ( ) ;
2023-02-08 03:18:11 +00:00
2023-03-03 23:47:50 +00:00
// eslint-disable-next-line functional/prefer-readonly-type
2023-03-08 23:30:51 +00:00
const imageRelationshipAdditions : IImageRelationshipAddition [ ] = [ ] ;
// eslint-disable-next-line functional/prefer-readonly-type
const hyperlinkRelationshipAdditions : IHyperlinkRelationshipAddition [ ] = [ ] ;
2023-03-03 23:47:50 +00:00
let hasMedia = false ;
2023-07-20 20:46:53 +01:00
const binaryContentMap = new Map < string , Uint8Array > ( ) ;
2023-03-17 16:30:35 +01:00
2023-02-08 03:18:11 +00:00
for ( const [ key , value ] of Object . entries ( zipContent . files ) ) {
2025-04-16 10:03:49 +01:00
const binaryValue = await value . async ( "uint8array" ) ;
const startBytes = binaryValue . slice ( 0 , 2 ) ;
if ( UTF16LE . equals ( startBytes ) || UTF16BE . equals ( startBytes ) ) {
binaryContentMap . set ( key , binaryValue ) ;
continue ;
}
2023-03-21 04:30:20 +00:00
if ( ! key . endsWith ( ".xml" ) && ! key . endsWith ( ".rels" ) ) {
2025-04-16 10:03:49 +01:00
binaryContentMap . set ( key , binaryValue ) ;
2023-03-17 16:30:35 +01:00
continue ;
}
2023-02-08 03:18:11 +00:00
const json = toJson ( await value . async ( "text" ) ) ;
2025-01-06 17:19:00 -05:00
if ( key === "word/document.xml" ) {
const document = json . elements ? . find ( ( i ) = > i . name === "w:document" ) ;
2025-04-16 10:03:49 +01:00
if ( document && document . attributes ) {
2025-01-06 17:19:00 -05:00
// We could check all namespaces from Document, but we'll instead
// check only those that may be used by our element types.
for ( const ns of [ "mc" , "wp" , "r" , "w15" , "m" ] as const ) {
// eslint-disable-next-line functional/immutable-data
document . attributes [ ` xmlns: ${ ns } ` ] = DocumentAttributeNamespaces [ ns ] ;
}
// eslint-disable-next-line functional/immutable-data
document . attributes [ "mc:Ignorable" ] = ` ${ document . attributes [ "mc:Ignorable" ] || "" } w15 ` . trim ( ) ;
}
}
2023-03-03 23:47:50 +00:00
if ( key . startsWith ( "word/" ) && ! key . endsWith ( ".xml.rels" ) ) {
2023-03-13 21:35:16 +00:00
const context : IContext = {
file ,
viewWrapper : {
Relationships : {
2023-12-22 10:25:00 +09:00
createRelationship : (
linkId : string ,
_ : string ,
target : string ,
__ : ( typeof TargetModeType ) [ keyof typeof TargetModeType ] ,
) = > {
2023-03-13 21:35:16 +00:00
// eslint-disable-next-line functional/immutable-data
hyperlinkRelationshipAdditions . push ( {
key ,
hyperlink : {
id : linkId ,
link : target ,
} ,
} ) ;
} ,
} ,
} as unknown as IViewWrapper ,
stack : [ ] ,
} ;
contexts . set ( key , context ) ;
2025-04-16 12:48:55 +05:30
if ( ! placeholderDelimiters ? . start . trim ( ) || ! placeholderDelimiters ? . end . trim ( ) ) {
throw new Error ( "Both start and end delimiters must be non-empty strings." ) ;
}
const { start , end } = placeholderDelimiters ;
2023-12-31 23:16:48 +00:00
for ( const [ patchKey , patchValue ] of Object . entries ( patches ) ) {
2025-04-16 12:48:55 +05:30
const patchText = ` ${ start } ${ patchKey } ${ end } ` ;
2023-02-25 22:18:56 +00:00
// TODO: mutates json. Make it immutable
2023-12-31 23:16:48 +00:00
// We need to loop through to catch every occurrence of the patch text
// It is possible that the patch text is in the same run
// This algorithm is limited to one patch per text run
2024-11-28 11:35:11 +01:00
// We break out of the loop once it cannot find any more occurrences
2023-12-31 23:16:48 +00:00
// https://github.com/dolanmiu/docx/issues/2267
while ( true ) {
2024-11-28 11:35:11 +01:00
const { didFindOccurrence } = replacer ( {
json ,
patch : {
. . . patchValue ,
children : patchValue.children.map ( ( element ) = > {
// We need to replace external hyperlinks with concrete hyperlinks
if ( element instanceof ExternalHyperlink ) {
const concreteHyperlink = new ConcreteHyperlink ( element . options . children , uniqueId ( ) ) ;
// eslint-disable-next-line functional/immutable-data
hyperlinkRelationshipAdditions . push ( {
key ,
hyperlink : {
id : concreteHyperlink.linkId ,
link : element.options.link ,
} ,
} ) ;
return concreteHyperlink ;
} else {
return element ;
}
} ) ,
// eslint-disable-next-line @typescript-eslint/no-explicit-any
} as any ,
patchText ,
context ,
keepOriginalStyles ,
} ) ;
2025-04-16 10:03:49 +01:00
// What the reason doing that? Once document is patched - it search over patched json again, that takes too long if patched document has big and deep structure.
if ( ! recursive || ! didFindOccurrence ) {
2023-12-31 23:16:48 +00:00
break ;
}
}
2023-03-03 23:47:50 +00:00
}
const mediaDatas = imageReplacer . getMediaData ( JSON . stringify ( json ) , context . file . Media ) ;
if ( mediaDatas . length > 0 ) {
hasMedia = true ;
// eslint-disable-next-line functional/immutable-data
2023-03-08 23:30:51 +00:00
imageRelationshipAdditions . push ( {
2023-03-03 23:47:50 +00:00
key ,
mediaDatas ,
} ) ;
2023-02-17 10:38:03 +00:00
}
2023-02-16 20:17:48 +00:00
}
2023-02-08 03:18:11 +00:00
map . set ( key , json ) ;
}
2023-03-08 23:30:51 +00:00
for ( const { key , mediaDatas } of imageRelationshipAdditions ) {
2023-03-03 23:47:50 +00:00
// eslint-disable-next-line functional/immutable-data
2023-03-08 23:30:51 +00:00
const relationshipKey = ` word/_rels/ ${ key . split ( "/" ) . pop ( ) } .rels ` ;
2023-03-16 01:55:18 +00:00
const relationshipsJson = map . get ( relationshipKey ) ? ? createRelationshipFile ( ) ;
map . set ( relationshipKey , relationshipsJson ) ;
2023-03-08 23:30:51 +00:00
const index = getNextRelationshipIndex ( relationshipsJson ) ;
const newJson = imageReplacer . replace ( JSON . stringify ( map . get ( key ) ) , mediaDatas , index ) ;
map . set ( key , JSON . parse ( newJson ) as Element ) ;
2023-03-16 01:55:18 +00:00
for ( let i = 0 ; i < mediaDatas . length ; i ++ ) {
const { fileName } = mediaDatas [ i ] ;
2023-03-08 23:30:51 +00:00
appendRelationship (
relationshipsJson ,
2023-03-16 01:55:18 +00:00
index + i ,
2023-03-08 23:30:51 +00:00
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image" ,
` media/ ${ fileName } ` ,
) ;
2023-03-03 23:47:50 +00:00
}
}
2023-03-08 23:30:51 +00:00
for ( const { key , hyperlink } of hyperlinkRelationshipAdditions ) {
// eslint-disable-next-line functional/immutable-data
const relationshipKey = ` word/_rels/ ${ key . split ( "/" ) . pop ( ) } .rels ` ;
2023-03-15 03:14:38 +00:00
const relationshipsJson = map . get ( relationshipKey ) ? ? createRelationshipFile ( ) ;
map . set ( relationshipKey , relationshipsJson ) ;
2023-03-08 23:30:51 +00:00
appendRelationship (
relationshipsJson ,
hyperlink . id ,
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink" ,
hyperlink . link ,
TargetModeType . EXTERNAL ,
) ;
}
2023-03-03 23:47:50 +00:00
if ( hasMedia ) {
const contentTypesJson = map . get ( "[Content_Types].xml" ) ;
if ( ! contentTypesJson ) {
throw new Error ( "Could not find content types file" ) ;
}
appendContentType ( contentTypesJson , "image/png" , "png" ) ;
appendContentType ( contentTypesJson , "image/jpeg" , "jpeg" ) ;
appendContentType ( contentTypesJson , "image/jpeg" , "jpg" ) ;
appendContentType ( contentTypesJson , "image/bmp" , "bmp" ) ;
appendContentType ( contentTypesJson , "image/gif" , "gif" ) ;
2023-12-31 23:16:48 +00:00
appendContentType ( contentTypesJson , "image/svg+xml" , "svg" ) ;
2023-03-03 23:47:50 +00:00
}
2023-02-08 03:18:11 +00:00
const zip = new JSZip ( ) ;
for ( const [ key , value ] of map ) {
const output = toXml ( value ) ;
zip . file ( key , output ) ;
}
2023-03-17 16:30:35 +01:00
for ( const [ key , value ] of binaryContentMap ) {
zip . file ( key , value ) ;
}
2023-12-31 18:54:35 +00:00
for ( const { data : stream , fileName } of file . Media . Array ) {
2023-03-03 23:47:50 +00:00
zip . file ( ` word/media/ ${ fileName } ` , stream ) ;
}
return zip . generateAsync ( {
2023-12-31 23:16:48 +00:00
type : outputType ,
2023-02-08 03:18:11 +00:00
mimeType : "application/vnd.openxmlformats-officedocument.wordprocessingml.document" ,
compression : "DEFLATE" ,
} ) ;
} ;
2023-02-17 10:38:03 +00:00
const toXml = ( jsonObj : Element ) : string = > {
2025-04-16 10:03:49 +01:00
const output = js2xml ( jsonObj , {
attributeValueFn : ( str ) = >
String ( str )
. replace ( /&(?!amp;|lt;|gt;|quot;|apos;)/g , "&" )
. replace ( /</g , "<" )
. replace ( />/g , ">" )
. replace ( /"/g , """ )
. replace ( /'/g , "'" ) , // cspell:words apos
} ) ;
2023-02-08 03:18:11 +00:00
return output ;
} ;
2023-03-08 23:30:51 +00:00
const createRelationshipFile = ( ) : Element = > ( {
declaration : {
attributes : {
version : "1.0" ,
encoding : "UTF-8" ,
standalone : "yes" ,
} ,
} ,
elements : [
{
type : "element" ,
name : "Relationships" ,
attributes : {
xmlns : "http://schemas.openxmlformats.org/package/2006/relationships" ,
} ,
elements : [ ] ,
} ,
] ,
} ) ;