mygpo/web/templatetags/mygpoutil.py

   1 import re
   2 from html.entities import entitydefs
   3
   4 from django.utils.safestring import mark_safe
   5 from django import template
   6 from django.utils.safestring import mark_safe
   7
   8
   9 register = template.Library()
  10
  11
  12 @register.filter()
  13 def remove_html_tags(html):
  14     # If we would want more speed, we could make these global
  15     re_strip_tags = re.compile("<[^>]*>")
  16     re_unicode_entities = re.compile(r"&#(\d{2,4});")
  17     re_html_entities = re.compile("&(.{2,8});")
  18     re_newline_tags = re.compile("(<br[^>]*>|<[/]?ul[^>]*>|</li>)", re.I)
  19     re_listing_tags = re.compile("<li[^>]*>", re.I)
  20
  21     result = html
  22
  23     # Convert common HTML elements to their text equivalent
  24     result = re_newline_tags.sub("\n", result)
  25     result = re_listing_tags.sub("\n * ", result)
  26     result = re.sub("<[Pp]>", "\n\n", result)
  27
  28     # Remove all HTML/XML tags from the string
  29     result = re_strip_tags.sub("", result)
  30
  31     # Convert numeric XML entities to their unicode character
  32     result = re_unicode_entities.sub(lambda x: chr(int(x.group(1))), result)
  33
  34     # Convert named HTML entities to their unicode character
  35     result = re_html_entities.sub(
  36         lambda x: str(entitydefs.get(x.group(1), ""), "iso-8859-1"), result
  37     )
  38
  39     # Convert more than two newlines to two newlines
  40     result = re.sub("([\r\n]{2})([\r\n])+", "\\1", result)
  41
  42     return mark_safe(result.strip())