From 154477be722ae5c4e18d22d0860e284006b09c4f Mon Sep 17 00:00:00 2001 From: Serhiy Storchaka Date: Wed, 23 Aug 2023 15:23:41 +0300 Subject: [PATCH] gh-50002: xml.dom.minidom now preserves whitespaces in attributes (GH-107947) Also double quotes (") are now only quoted in attributes. --- Lib/test/test_minidom.py | 40 +++++++++++++++++++ Lib/xml/dom/minidom.py | 30 ++++++++++---- ...3-08-14-20-01-14.gh-issue-50002.E-bpj8.rst | 1 + ...3-08-14-20-18-59.gh-issue-81555.cWdP4a.rst | 1 + 4 files changed, 65 insertions(+), 7 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst create mode 100644 Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst diff --git a/Lib/test/test_minidom.py b/Lib/test/test_minidom.py index 699265ccadc..3ecd1af31ee 100644 --- a/Lib/test/test_minidom.py +++ b/Lib/test/test_minidom.py @@ -505,6 +505,46 @@ class MinidomTest(unittest.TestCase): dom.unlink() self.confirm(str == domstr) + def test_toxml_quote_text(self): + dom = Document() + elem = dom.appendChild(dom.createElement('elem')) + elem.appendChild(dom.createTextNode('&<>"')) + cr = elem.appendChild(dom.createElement('cr')) + cr.appendChild(dom.createTextNode('\r')) + crlf = elem.appendChild(dom.createElement('crlf')) + crlf.appendChild(dom.createTextNode('\r\n')) + lflf = elem.appendChild(dom.createElement('lflf')) + lflf.appendChild(dom.createTextNode('\n\n')) + ws = elem.appendChild(dom.createElement('ws')) + ws.appendChild(dom.createTextNode('\t\n\r ')) + domstr = dom.toxml() + dom.unlink() + self.assertEqual(domstr, '' + '&<>"' + '\r' + '\r\n' + '\n\n' + '\t\n\r ') + + def test_toxml_quote_attrib(self): + dom = Document() + elem = dom.appendChild(dom.createElement('elem')) + elem.setAttribute("a", '&<>"') + elem.setAttribute("cr", "\r") + elem.setAttribute("lf", "\n") + elem.setAttribute("crlf", "\r\n") + elem.setAttribute("lflf", "\n\n") + elem.setAttribute("ws", "\t\n\r ") + domstr = dom.toxml() + dom.unlink() + self.assertEqual(domstr, '' + '') + def testAltNewline(self): str = '\n\n' dom = parseString(str) diff --git a/Lib/xml/dom/minidom.py b/Lib/xml/dom/minidom.py index ef8a159833b..db51f350ea0 100644 --- a/Lib/xml/dom/minidom.py +++ b/Lib/xml/dom/minidom.py @@ -300,12 +300,28 @@ def _in_document(node): node = node.parentNode return False -def _write_data(writer, data): +def _write_data(writer, text, attr): "Writes datachars to writer." - if data: - data = data.replace("&", "&").replace("<", "<"). \ - replace("\"", """).replace(">", ">") - writer.write(data) + if not text: + return + # See the comments in ElementTree.py for behavior and + # implementation details. + if "&" in text: + text = text.replace("&", "&") + if "<" in text: + text = text.replace("<", "<") + if ">" in text: + text = text.replace(">", ">") + if attr: + if '"' in text: + text = text.replace('"', """) + if "\r" in text: + text = text.replace("\r", " ") + if "\n" in text: + text = text.replace("\n", " ") + if "\t" in text: + text = text.replace("\t", " ") + writer.write(text) def _get_elements_by_tagName_helper(parent, name, rc): for node in parent.childNodes: @@ -883,7 +899,7 @@ class Element(Node): for a_name in attrs.keys(): writer.write(" %s=\"" % a_name) - _write_data(writer, attrs[a_name].value) + _write_data(writer, attrs[a_name].value, True) writer.write("\"") if self.childNodes: writer.write(">") @@ -1112,7 +1128,7 @@ class Text(CharacterData): return newText def writexml(self, writer, indent="", addindent="", newl=""): - _write_data(writer, "%s%s%s" % (indent, self.data, newl)) + _write_data(writer, "%s%s%s" % (indent, self.data, newl), False) # DOM Level 3 (WD 9 April 2002) diff --git a/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst b/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst new file mode 100644 index 00000000000..ca5c0740802 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-14-20-01-14.gh-issue-50002.E-bpj8.rst @@ -0,0 +1 @@ +:mod:`xml.dom.minidom` now preserves whitespaces in attributes. diff --git a/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst b/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst new file mode 100644 index 00000000000..241a50f8b41 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-14-20-18-59.gh-issue-81555.cWdP4a.rst @@ -0,0 +1 @@ +:mod:`xml.dom.minidom` now only quotes ``"`` in attributes.