diff --git a/django/template/defaultfilters.py b/django/template/defaultfilters.py index 628b6627d2..d8b0c3439a 100644 --- a/django/template/defaultfilters.py +++ b/django/template/defaultfilters.py @@ -231,12 +231,12 @@ def make_list(value): @stringfilter def slugify(value): """ - Normalizes string, converts to lowercase, removes non-alpha characters, - and converts spaces to hyphens. + Converts to lowercase, removes non-word characters (alphanumerics and + underscores) and converts spaces to hyphens. Also strips leading and + trailing whitespace. """ - value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore').decode() - value = re.sub('[^\w\s-]', '', value).strip().lower() - return mark_safe(re.sub('[-\s]+', '-', value)) + from django.utils.text import slugify + return slugify(value) @register.filter(is_safe=True) def stringformat(value, arg): diff --git a/django/utils/html.py b/django/utils/html.py index 647982a15f..13954ce195 100644 --- a/django/utils/html.py +++ b/django/utils/html.py @@ -123,6 +123,17 @@ def strip_tags(value): return re.sub(r'<[^>]*?>', '', force_text(value)) strip_tags = allow_lazy(strip_tags) +def remove_tags(html, tags): + """Returns the given HTML with given tags removed.""" + tags = [re.escape(tag) for tag in tags.split()] + tags_re = u'(%s)' % u'|'.join(tags) + starttag_re = re.compile(ur'<%s(/?>|(\s+[^>]*>))' % tags_re, re.U) + endtag_re = re.compile(u'%s>' % tags_re) + html = starttag_re.sub(u'', html) + html = endtag_re.sub(u'', html) + return html +remove_tags = allow_lazy(remove_tags, unicode) + def strip_spaces_between_tags(value): """Returns the given HTML with spaces between tags removed.""" return re.sub(r'>\s+<', '><', force_text(value)) diff --git a/django/utils/text.py b/django/utils/text.py index ddeb29f2d2..cbafab0032 100644 --- a/django/utils/text.py +++ b/django/utils/text.py @@ -16,6 +16,7 @@ if not six.PY3: from django.utils.functional import allow_lazy, SimpleLazyObject from django.utils import six from django.utils.translation import ugettext_lazy, ugettext as _, pgettext +from django.utils.safestring import mark_safe # Capitalizes the first letter of a string. capfirst = lambda x: x and force_text(x)[0].upper() + force_text(x)[1:] @@ -383,3 +384,14 @@ def unescape_string_literal(s): quote = s[0] return s[1:-1].replace(r'\%s' % quote, quote).replace(r'\\', '\\') unescape_string_literal = allow_lazy(unescape_string_literal) + +def slugify(value): + """ + Converts to lowercase, removes non-word characters (alphanumerics and + underscores) and converts spaces to hyphens. Also strips leading and + trailing whitespace. + """ + value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore') + value = unicode(re.sub('[^\w\s-]', '', value).strip().lower()) + return mark_safe(re.sub('[-\s]+', '-', value)) +slugify = allow_lazy(slugify, unicode) diff --git a/docs/ref/utils.txt b/docs/ref/utils.txt index 20775fcc81..775d70738b 100644 --- a/docs/ref/utils.txt +++ b/docs/ref/utils.txt @@ -486,6 +486,33 @@ escaping HTML. through :func:`conditional_escape` which (ultimately) calls :func:`~django.utils.encoding.force_text` on the values. +.. function:: strip_tags(value) + + Removes anything that looks like an html tag from the string, that is + anything contained within ``<>``. + + For example:: + + strip_tags(value) + + If ``value`` is ``"Joel a slug"`` the + return value will be ``"Joel is a slug"``. + +.. function:: remove_tags(value, tags) + + Removes a list of [X]HTML tag names from the output. + + For example:: + + remove_tags(value, ["b", "span"]) + + If ``value`` is ``"Joel a slug"`` the + return value will be ``"Joel a slug"``. + + Note that this filter is case-sensitive. + + If ``value`` is ``"Joel a slug"`` the + return value will be ``"Joel a slug"``. .. _str.format: http://docs.python.org/library/stdtypes.html#str.format @@ -599,6 +626,24 @@ appropriate entities. Can be called multiple times on a single string (the resulting escaping is only applied once). +``django.utils.text`` +===================== + +.. module:: django.utils.text + :synopsis: Text manipulation. + +.. function:: slugify + + Converts to lowercase, removes non-word characters (alphanumerics and + underscores) and converts spaces to hyphens. Also strips leading and trailing + whitespace. + + For example:: + + slugify(value) + + If ``value`` is ``"Joel is a slug"``, the output will be ``"joel-is-a-slug"``. + ``django.utils.translation`` ============================ diff --git a/docs/releases/1.5.txt b/docs/releases/1.5.txt index 5f27b7ccbb..29cbd45c49 100644 --- a/docs/releases/1.5.txt +++ b/docs/releases/1.5.txt @@ -267,6 +267,10 @@ Miscellaneous * :func:`~django.utils.http.int_to_base36` properly raises a :exc:`TypeError` instead of :exc:`ValueError` for non-integer inputs. +* The ``slugify`` template filter is now available as a standard python + function at :func:`django.utils.text.slugify`. Similarly, ``remove_tags`` is + available at :func:`django.utils.html.remove_tags`. + Features deprecated in 1.5 ========================== diff --git a/tests/regressiontests/utils/html.py b/tests/regressiontests/utils/html.py index fe40d4eaae..98df80a5e2 100644 --- a/tests/regressiontests/utils/html.py +++ b/tests/regressiontests/utils/html.py @@ -146,3 +146,12 @@ class TestUtilsHtml(unittest.TestCase): ) for value, output in items: self.check_output(f, value, output) + + def test_remove_tags(self): + f = html.remove_tags + items = ( + ("Yes", "b i", "Yes"), + ("x
y
", "a b", "xy
"), + ) + for value, tags, output in items: + self.assertEquals(f(value, tags), output) diff --git a/tests/regressiontests/utils/text.py b/tests/regressiontests/utils/text.py index dd6de63841..9fa86d515c 100644 --- a/tests/regressiontests/utils/text.py +++ b/tests/regressiontests/utils/text.py @@ -113,3 +113,11 @@ class TestUtilsText(SimpleTestCase): self.assertEqual(text.wrap(long_word, 20), long_word) self.assertEqual(text.wrap('a %s word' % long_word, 10), 'a\n%s\nword' % long_word) + + def test_slugify(self): + items = ( + (u'Hello, World!', 'hello-world'), + (u'spam & eggs', 'spam-eggs'), + ) + for value, output in items: + self.assertEqual(text.slugify(value), output)