remove unnecessary imports
[mygpo.git] / mygpo / web / templatetags / mygpoutil.py
blobe2d91b6877be582389f7028a5f133e0993b38a80
2 from django import template
3 from django.utils.safestring import mark_safe
5 import re
6 from htmlentitydefs import entitydefs
8 register = template.Library()
10 @register.filter
11 def remove_html_tags(html):
12 # If we would want more speed, we could make these global
13 re_strip_tags = re.compile('<[^>]*>')
14 re_unicode_entities = re.compile('&#(\d{2,4});')
15 re_html_entities = re.compile('&(.{2,8});')
16 re_newline_tags = re.compile('(<br[^>]*>|<[/]?ul[^>]*>|</li>)', re.I)
17 re_listing_tags = re.compile('<li[^>]*>', re.I)
19 result = html
21 # Convert common HTML elements to their text equivalent
22 result = re_newline_tags.sub('\n', result)
23 result = re_listing_tags.sub('\n * ', result)
24 result = re.sub('<[Pp]>', '\n\n', result)
26 # Remove all HTML/XML tags from the string
27 result = re_strip_tags.sub('', result)
29 # Convert numeric XML entities to their unicode character
30 result = re_unicode_entities.sub(lambda x: unichr(int(x.group(1))), result)
32 # Convert named HTML entities to their unicode character
33 result = re_html_entities.sub(lambda x: unicode(entitydefs.get(x.group(1),''), 'iso-8859-1'), result)
35 # Convert more than two newlines to two newlines
36 result = re.sub('([\r\n]{2})([\r\n])+', '\\1', result)
38 return mark_safe(result.strip())