Merge pull request #793 from gpodder/remove-advertise
[mygpo.git] / mygpo / web / templatetags / mygpoutil.py
blobc3e182bed3bec60a21978b333b8f6c28e089cb2e
1 import re
2 from html.entities import entitydefs
4 from django.utils.safestring import mark_safe
5 from django import template
6 from django.utils.safestring import mark_safe
9 register = template.Library()
12 @register.filter()
13 def remove_html_tags(html):
14 # If we would want more speed, we could make these global
15 re_strip_tags = re.compile("<[^>]*>")
16 re_unicode_entities = re.compile(r"&#(\d{2,4});")
17 re_html_entities = re.compile("&(.{2,8});")
18 re_newline_tags = re.compile("(<br[^>]*>|<[/]?ul[^>]*>|</li>)", re.I)
19 re_listing_tags = re.compile("<li[^>]*>", re.I)
21 result = html
23 # Convert common HTML elements to their text equivalent
24 result = re_newline_tags.sub("\n", result)
25 result = re_listing_tags.sub("\n * ", result)
26 result = re.sub("<[Pp]>", "\n\n", result)
28 # Remove all HTML/XML tags from the string
29 result = re_strip_tags.sub("", result)
31 # Convert numeric XML entities to their unicode character
32 result = re_unicode_entities.sub(lambda x: chr(int(x.group(1))), result)
34 # Convert named HTML entities to their unicode character
35 result = re_html_entities.sub(
36 lambda x: str(entitydefs.get(x.group(1), ""), "iso-8859-1"), result
39 # Convert more than two newlines to two newlines
40 result = re.sub("([\r\n]{2})([\r\n])+", "\\1", result)
42 return mark_safe(result.strip())