diff --git a/django/utils/html.py b/django/utils/html.py index 094bc6660d..7fda015840 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -13,7 +13,7 @@ LEADING_PUNCTUATION = ['(', '<', '<'] TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '>'] # List of possible strings used for bullets in bulleted lists. -DOTS = ['·', '*', '\xe2\x80\xa2', '', '•', '•'] +DOTS = [u'·', u'*', u'\u2022', u'', u'•', u'•'] unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)') word_split_re = re.compile(r'(\s+)') @@ -180,13 +180,13 @@ def clean_html(text): text = html_gunk_re.sub('', text) # Convert hard-coded bullets into HTML unordered lists. def replace_p_tags(match): - s = match.group().replace('
', '') + s = match.group().replace(u'', u'') for d in DOTS: - s = s.replace('%s' % d, '
%s' % d, u'
", but only if it's at the bottom # of the text. - text = trailing_empty_content_re.sub('', text) + text = trailing_empty_content_re.sub(u'', text) return text clean_html = allow_lazy(clean_html, unicode) diff --git a/tests/regressiontests/utils/html.py b/tests/regressiontests/utils/html.py index 3acb218cd1..d8b9bde8bf 100644 --- a/tests/regressiontests/utils/html.py +++ b/tests/regressiontests/utils/html.py @@ -121,3 +121,15 @@ class TestUtilsHtml(unittest.TestCase): ) for value, output in items: self.check_output(f, value, output) + + def test_clean_html(self): + f = html.clean_html + items = ( + (u'
I believe in semantic markup!
', u'I believe in semantic markup!
'), + (u'I escape & I don\'t target', u'I escape & I don\'t target'), + (u'I kill whitespace
', u'
I kill whitespace
'), + # also a regression test for #7267: this used to raise an UnicodeDecodeError + (u'* foo
* bar
', u'