2 from django
import template
3 from django
.utils
.safestring
import mark_safe
6 from htmlentitydefs
import entitydefs
8 register
= template
.Library()
11 def remove_html_tags(html
):
12 # If we would want more speed, we could make these global
13 re_strip_tags
= re
.compile('<[^>]*>')
14 re_unicode_entities
= re
.compile('&#(\d{2,4});')
15 re_html_entities
= re
.compile('&(.{2,8});')
16 re_newline_tags
= re
.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re
.I
)
17 re_listing_tags
= re
.compile('<li[^>]*>', re
.I
)
21 # Convert common HTML elements to their text equivalent
22 result
= re_newline_tags
.sub('\n', result
)
23 result
= re_listing_tags
.sub('\n * ', result
)
24 result
= re
.sub('<[Pp]>', '\n\n', result
)
26 # Remove all HTML/XML tags from the string
27 result
= re_strip_tags
.sub('', result
)
29 # Convert numeric XML entities to their unicode character
30 result
= re_unicode_entities
.sub(lambda x
: unichr(int(x
.group(1))), result
)
32 # Convert named HTML entities to their unicode character
33 result
= re_html_entities
.sub(lambda x
: unicode(entitydefs
.get(x
.group(1),''), 'iso-8859-1'), result
)
35 # Convert more than two newlines to two newlines
36 result
= re
.sub('([\r\n]{2})([\r\n])+', '\\1', result
)
38 return mark_safe(result
.strip())