1 # -*- coding: utf-8 -*-
6 Implements a Markup string.
8 :copyright: (c) 2010 by Armin Ronacher.
9 :license: BSD, see LICENSE for more details.
12 from itertools
import imap
15 __all__
= ['Markup', 'soft_unicode', 'escape', 'escape_silent']
18 _striptags_re
= re
.compile(r
'(<!--.*?-->|<[^>]*>)')
19 _entity_re
= re
.compile(r
'&([^;]+);')
22 class Markup(unicode):
23 r
"""Marks a string as being safe for inclusion in HTML/XML output without
24 needing to be escaped. This implements the `__html__` interface a couple
25 of frameworks and web applications use. :class:`Markup` is a direct
26 subclass of `unicode` and provides all the methods of `unicode` just that
27 it escapes arguments passed and always returns `Markup`.
29 The `escape` function returns markup objects so that double escaping can't
32 The constructor of the :class:`Markup` class can be used for three
33 different things: When passed an unicode object it's assumed to be safe,
34 when passed an object with an HTML representation (has an `__html__`
35 method) that representation is used, otherwise the object passed is
36 converted into a unicode string and then assumed to be safe:
38 >>> Markup("Hello <em>World</em>!")
39 Markup(u'Hello <em>World</em>!')
40 >>> class Foo(object):
41 ... def __html__(self):
42 ... return '<a href="#">foo</a>'
45 Markup(u'<a href="#">foo</a>')
47 If you want object passed being always treated as unsafe you can use the
48 :meth:`escape` classmethod to create a :class:`Markup` object:
50 >>> Markup.escape("Hello <em>World</em>!")
51 Markup(u'Hello <em>World</em>!')
53 Operations on a markup string are markup aware which means that all
54 arguments are passed through the :func:`escape` function:
56 >>> em = Markup("<em>%s</em>")
58 Markup(u'<em>foo & bar</em>')
59 >>> strong = Markup("<strong>%(text)s</strong>")
60 >>> strong % {'text': '<blink>hacker here</blink>'}
61 Markup(u'<strong><blink>hacker here</blink></strong>')
62 >>> Markup("<em>Hello</em> ") + "<foo>"
63 Markup(u'<em>Hello</em> <foo>')
67 def __new__(cls
, base
=u
'', encoding
=None, errors
='strict'):
68 if hasattr(base
, '__html__'):
69 base
= base
.__html
__()
71 return unicode.__new
__(cls
, base
)
72 return unicode.__new
__(cls
, base
, encoding
, errors
)
77 def __add__(self
, other
):
78 if hasattr(other
, '__html__') or isinstance(other
, basestring
):
79 return self
.__class
__(unicode(self
) + unicode(escape(other
)))
82 def __radd__(self
, other
):
83 if hasattr(other
, '__html__') or isinstance(other
, basestring
):
84 return self
.__class
__(unicode(escape(other
)) + unicode(self
))
87 def __mul__(self
, num
):
88 if isinstance(num
, (int, long)):
89 return self
.__class
__(unicode.__mul
__(self
, num
))
93 def __mod__(self
, arg
):
94 if isinstance(arg
, tuple):
95 arg
= tuple(imap(_MarkupEscapeHelper
, arg
))
97 arg
= _MarkupEscapeHelper(arg
)
98 return self
.__class
__(unicode.__mod
__(self
, arg
))
102 self
.__class
__.__name
__,
103 unicode.__repr
__(self
)
107 return self
.__class
__(unicode.join(self
, imap(escape
, seq
)))
108 join
.__doc
__ = unicode.join
.__doc
__
110 def split(self
, *args
, **kwargs
):
111 return map(self
.__class
__, unicode.split(self
, *args
, **kwargs
))
112 split
.__doc
__ = unicode.split
.__doc
__
114 def rsplit(self
, *args
, **kwargs
):
115 return map(self
.__class
__, unicode.rsplit(self
, *args
, **kwargs
))
116 rsplit
.__doc
__ = unicode.rsplit
.__doc
__
118 def splitlines(self
, *args
, **kwargs
):
119 return map(self
.__class
__, unicode.splitlines(self
, *args
, **kwargs
))
120 splitlines
.__doc
__ = unicode.splitlines
.__doc
__
123 r
"""Unescape markup again into an unicode string. This also resolves
124 known HTML4 and XHTML entities:
126 >>> Markup("Main » <em>About</em>").unescape()
127 u'Main \xbb <em>About</em>'
129 from jinja2
._markupsafe
._constants
import HTML_ENTITIES
132 if name
in HTML_ENTITIES
:
133 return unichr(HTML_ENTITIES
[name
])
135 if name
[:2] in ('#x', '#X'):
136 return unichr(int(name
[2:], 16))
137 elif name
.startswith('#'):
138 return unichr(int(name
[1:]))
142 return _entity_re
.sub(handle_match
, unicode(self
))
145 r
"""Unescape markup into an unicode string and strip all tags. This
146 also resolves known HTML4 and XHTML entities. Whitespace is
149 >>> Markup("Main » <em>About</em>").striptags()
152 stripped
= u
' '.join(_striptags_re
.sub('', self
).split())
153 return Markup(stripped
).unescape()
157 """Escape the string. Works like :func:`escape` with the difference
158 that for subclasses of :class:`Markup` this function would return the
162 if rv
.__class
__ is not cls
:
166 def make_wrapper(name
):
167 orig
= getattr(unicode, name
)
168 def func(self
, *args
, **kwargs
):
169 args
= _escape_argspec(list(args
), enumerate(args
))
170 _escape_argspec(kwargs
, kwargs
.iteritems())
171 return self
.__class
__(orig(self
, *args
, **kwargs
))
172 func
.__name
__ = orig
.__name
__
173 func
.__doc
__ = orig
.__doc
__
176 for method
in '__getitem__', 'capitalize', \
177 'title', 'lower', 'upper', 'replace', 'ljust', \
178 'rjust', 'lstrip', 'rstrip', 'center', 'strip', \
179 'translate', 'expandtabs', 'swapcase', 'zfill':
180 locals()[method
] = make_wrapper(method
)
183 if hasattr(unicode, 'partition'):
184 partition
= make_wrapper('partition'),
185 rpartition
= make_wrapper('rpartition')
188 if hasattr(unicode, 'format'):
189 format
= make_wrapper('format')
192 if hasattr(unicode, '__getslice__'):
193 __getslice__
= make_wrapper('__getslice__')
195 del method
, make_wrapper
198 def _escape_argspec(obj
, iterable
):
199 """Helper for various string-wrapped functions."""
200 for key
, value
in iterable
:
201 if hasattr(value
, '__html__') or isinstance(value
, basestring
):
202 obj
[key
] = escape(value
)
206 class _MarkupEscapeHelper(object):
207 """Helper for Markup.__mod__"""
209 def __init__(self
, obj
):
212 __getitem__
= lambda s
, x
: _MarkupEscapeHelper(s
.obj
[x
])
213 __str__
= lambda s
: str(escape(s
.obj
))
214 __unicode__
= lambda s
: unicode(escape(s
.obj
))
215 __repr__
= lambda s
: str(escape(repr(s
.obj
)))
216 __int__
= lambda s
: int(s
.obj
)
217 __float__
= lambda s
: float(s
.obj
)
220 # we have to import it down here as the speedups and native
221 # modules imports the markup type which is define above.
223 from jinja2
._markupsafe
._speedups
import escape
, escape_silent
, soft_unicode
225 from jinja2
._markupsafe
._native
import escape
, escape_silent
, soft_unicode