lib/jinja2/_markupsafe/__init__.py

   1 # -*- coding: utf-8 -*-
   2 """
   3     markupsafe
   4     ~~~~~~~~~~
   5
   6     Implements a Markup string.
   7
   8     :copyright: (c) 2010 by Armin Ronacher.
   9     :license: BSD, see LICENSE for more details.
  10 """
  11 import re
  12 from itertools import imap
  13
  14
  15 __all__ = ['Markup', 'soft_unicode', 'escape', 'escape_silent']
  16
  17
  18 _striptags_re = re.compile(r'(<!--.*?-->|<[^>]*>)')
  19 _entity_re = re.compile(r'&([^;]+);')
  20
  21
  22 class Markup(unicode):
  23     r"""Marks a string as being safe for inclusion in HTML/XML output without
  24     needing to be escaped.  This implements the `__html__` interface a couple
  25     of frameworks and web applications use.  :class:`Markup` is a direct
  26     subclass of `unicode` and provides all the methods of `unicode` just that
  27     it escapes arguments passed and always returns `Markup`.
  28
  29     The `escape` function returns markup objects so that double escaping can't
  30     happen.
  31
  32     The constructor of the :class:`Markup` class can be used for three
  33     different things:  When passed an unicode object it's assumed to be safe,
  34     when passed an object with an HTML representation (has an `__html__`
  35     method) that representation is used, otherwise the object passed is
  36     converted into a unicode string and then assumed to be safe:
  37
  38     >>> Markup("Hello <em>World</em>!")
  39     Markup(u'Hello <em>World</em>!')
  40     >>> class Foo(object):
  41     ...  def __html__(self):
  42     ...   return '<a href="#">foo</a>'
  43     ...
  44     >>> Markup(Foo())
  45     Markup(u'<a href="#">foo</a>')
  46
  47     If you want object passed being always treated as unsafe you can use the
  48     :meth:`escape` classmethod to create a :class:`Markup` object:
  49
  50     >>> Markup.escape("Hello <em>World</em>!")
  51     Markup(u'Hello &lt;em&gt;World&lt;/em&gt;!')
  52
  53     Operations on a markup string are markup aware which means that all
  54     arguments are passed through the :func:`escape` function:
  55
  56     >>> em = Markup("<em>%s</em>")
  57     >>> em % "foo & bar"
  58     Markup(u'<em>foo &amp; bar</em>')
  59     >>> strong = Markup("<strong>%(text)s</strong>")
  60     >>> strong % {'text': '<blink>hacker here</blink>'}
  61     Markup(u'<strong>&lt;blink&gt;hacker here&lt;/blink&gt;</strong>')
  62     >>> Markup("<em>Hello</em> ") + "<foo>"
  63     Markup(u'<em>Hello</em> &lt;foo&gt;')
  64     """
  65     __slots__ = ()
  66
  67     def __new__(cls, base=u'', encoding=None, errors='strict'):
  68         if hasattr(base, '__html__'):
  69             base = base.__html__()
  70         if encoding is None:
  71             return unicode.__new__(cls, base)
  72         return unicode.__new__(cls, base, encoding, errors)
  73
  74     def __html__(self):
  75         return self
  76
  77     def __add__(self, other):
  78         if hasattr(other, '__html__') or isinstance(other, basestring):
  79             return self.__class__(unicode(self) + unicode(escape(other)))
  80         return NotImplemented
  81
  82     def __radd__(self, other):
  83         if hasattr(other, '__html__') or isinstance(other, basestring):
  84             return self.__class__(unicode(escape(other)) + unicode(self))
  85         return NotImplemented
  86
  87     def __mul__(self, num):
  88         if isinstance(num, (int, long)):
  89             return self.__class__(unicode.__mul__(self, num))
  90         return NotImplemented
  91     __rmul__ = __mul__
  92
  93     def __mod__(self, arg):
  94         if isinstance(arg, tuple):
  95             arg = tuple(imap(_MarkupEscapeHelper, arg))
  96         else:
  97             arg = _MarkupEscapeHelper(arg)
  98         return self.__class__(unicode.__mod__(self, arg))
  99
 100     def __repr__(self):
 101         return '%s(%s)' % (
 102             self.__class__.__name__,
 103             unicode.__repr__(self)
 104         )
 105
 106     def join(self, seq):
 107         return self.__class__(unicode.join(self, imap(escape, seq)))
 108     join.__doc__ = unicode.join.__doc__
 109
 110     def split(self, *args, **kwargs):
 111         return map(self.__class__, unicode.split(self, *args, **kwargs))
 112     split.__doc__ = unicode.split.__doc__
 113
 114     def rsplit(self, *args, **kwargs):
 115         return map(self.__class__, unicode.rsplit(self, *args, **kwargs))
 116     rsplit.__doc__ = unicode.rsplit.__doc__
 117
 118     def splitlines(self, *args, **kwargs):
 119         return map(self.__class__, unicode.splitlines(self, *args, **kwargs))
 120     splitlines.__doc__ = unicode.splitlines.__doc__
 121
 122     def unescape(self):
 123         r"""Unescape markup again into an unicode string.  This also resolves
 124         known HTML4 and XHTML entities:
 125
 126         >>> Markup("Main &raquo; <em>About</em>").unescape()
 127         u'Main \xbb <em>About</em>'
 128         """
 129         from jinja2._markupsafe._constants import HTML_ENTITIES
 130         def handle_match(m):
 131             name = m.group(1)
 132             if name in HTML_ENTITIES:
 133                 return unichr(HTML_ENTITIES[name])
 134             try:
 135                 if name[:2] in ('#x', '#X'):
 136                     return unichr(int(name[2:], 16))
 137                 elif name.startswith('#'):
 138                     return unichr(int(name[1:]))
 139             except ValueError:
 140                 pass
 141             return u''
 142         return _entity_re.sub(handle_match, unicode(self))
 143
 144     def striptags(self):
 145         r"""Unescape markup into an unicode string and strip all tags.  This
 146         also resolves known HTML4 and XHTML entities.  Whitespace is
 147         normalized to one:
 148
 149         >>> Markup("Main &raquo;  <em>About</em>").striptags()
 150         u'Main \xbb About'
 151         """
 152         stripped = u' '.join(_striptags_re.sub('', self).split())
 153         return Markup(stripped).unescape()
 154
 155     @classmethod
 156     def escape(cls, s):
 157         """Escape the string.  Works like :func:`escape` with the difference
 158         that for subclasses of :class:`Markup` this function would return the
 159         correct subclass.
 160         """
 161         rv = escape(s)
 162         if rv.__class__ is not cls:
 163             return cls(rv)
 164         return rv
 165
 166     def make_wrapper(name):
 167         orig = getattr(unicode, name)
 168         def func(self, *args, **kwargs):
 169             args = _escape_argspec(list(args), enumerate(args))
 170             _escape_argspec(kwargs, kwargs.iteritems())
 171             return self.__class__(orig(self, *args, **kwargs))
 172         func.__name__ = orig.__name__
 173         func.__doc__ = orig.__doc__
 174         return func
 175
 176     for method in '__getitem__', 'capitalize', \
 177                   'title', 'lower', 'upper', 'replace', 'ljust', \
 178                   'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
 179                   'translate', 'expandtabs', 'swapcase', 'zfill':
 180         locals()[method] = make_wrapper(method)
 181
 182     # new in python 2.5
 183     if hasattr(unicode, 'partition'):
 184         partition = make_wrapper('partition'),
 185         rpartition = make_wrapper('rpartition')
 186
 187     # new in python 2.6
 188     if hasattr(unicode, 'format'):
 189         format = make_wrapper('format')
 190
 191     # not in python 3
 192     if hasattr(unicode, '__getslice__'):
 193         __getslice__ = make_wrapper('__getslice__')
 194
 195     del method, make_wrapper
 196
 197
 198 def _escape_argspec(obj, iterable):
 199     """Helper for various string-wrapped functions."""
 200     for key, value in iterable:
 201         if hasattr(value, '__html__') or isinstance(value, basestring):
 202             obj[key] = escape(value)
 203     return obj
 204
 205
 206 class _MarkupEscapeHelper(object):
 207     """Helper for Markup.__mod__"""
 208
 209     def __init__(self, obj):
 210         self.obj = obj
 211
 212     __getitem__ = lambda s, x: _MarkupEscapeHelper(s.obj[x])
 213     __str__ = lambda s: str(escape(s.obj))
 214     __unicode__ = lambda s: unicode(escape(s.obj))
 215     __repr__ = lambda s: str(escape(repr(s.obj)))
 216     __int__ = lambda s: int(s.obj)
 217     __float__ = lambda s: float(s.obj)
 218
 219
 220 # we have to import it down here as the speedups and native
 221 # modules imports the markup type which is define above.
 222 try:
 223     from jinja2._markupsafe._speedups import escape, escape_silent, soft_unicode
 224 except ImportError:
 225     from jinja2._markupsafe._native import escape, escape_silent, soft_unicode