Merged revisions 85328 via svnmerge from
[python/dscho.git] / Lib / string.py
bloba9898e8a06e0e6be66bdae876ac006b212e702d2
1 """A collection of string constants.
3 Public module variables:
5 whitespace -- a string containing all ASCII whitespace
6 ascii_lowercase -- a string containing all ASCII lowercase letters
7 ascii_uppercase -- a string containing all ASCII uppercase letters
8 ascii_letters -- a string containing all ASCII letters
9 digits -- a string containing all ASCII decimal digits
10 hexdigits -- a string containing all ASCII hexadecimal digits
11 octdigits -- a string containing all ASCII octal digits
12 punctuation -- a string containing all ASCII punctuation characters
13 printable -- a string containing all ASCII characters considered printable
15 """
17 # Some strings for ctype-style character classification
18 whitespace = ' \t\n\r\v\f'
19 ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20 ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
21 ascii_letters = ascii_lowercase + ascii_uppercase
22 digits = '0123456789'
23 hexdigits = digits + 'abcdef' + 'ABCDEF'
24 octdigits = '01234567'
25 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
26 printable = digits + ascii_letters + punctuation + whitespace
28 # Functions which aren't available as string methods.
30 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
31 def capwords(s, sep=None):
32 """capwords(s [,sep]) -> string
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
36 join. If the optional second argument sep is absent or None,
37 runs of whitespace characters are replaced by a single space
38 and leading and trailing whitespace are removed, otherwise
39 sep is used to split and join the words.
41 """
42 return (sep or ' ').join(x.capitalize() for x in s.split(sep))
45 # Construct a translation map for bytes.translate
46 def maketrans(frm: bytes, to: bytes) -> bytes:
47 """maketrans(frm, to) -> bytes
49 Return a translation table (a bytes object of length 256)
50 suitable for use in bytes.translate where each byte in frm is
51 mapped to the byte at the same position in to.
52 The strings frm and to must be of the same length.
53 """
54 import warnings
55 warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead",
56 DeprecationWarning, 2)
57 if len(frm) != len(to):
58 raise ValueError("maketrans arguments must have same length")
59 if not (isinstance(frm, bytes) and isinstance(to, bytes)):
60 raise TypeError("maketrans arguments must be bytes objects")
61 L = bytearray(range(256))
62 for i, c in enumerate(frm):
63 L[c] = to[i]
64 return bytes(L)
67 ####################################################################
68 import re as _re
70 class _multimap:
71 """Helper class for combining multiple mappings.
73 Used by .{safe_,}substitute() to combine the mapping and keyword
74 arguments.
75 """
76 def __init__(self, primary, secondary):
77 self._primary = primary
78 self._secondary = secondary
80 def __getitem__(self, key):
81 try:
82 return self._primary[key]
83 except KeyError:
84 return self._secondary[key]
87 class _TemplateMetaclass(type):
88 pattern = r"""
89 %(delim)s(?:
90 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
91 (?P<named>%(id)s) | # delimiter and a Python identifier
92 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
93 (?P<invalid>) # Other ill-formed delimiter exprs
95 """
97 def __init__(cls, name, bases, dct):
98 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
99 if 'pattern' in dct:
100 pattern = cls.pattern
101 else:
102 pattern = _TemplateMetaclass.pattern % {
103 'delim' : _re.escape(cls.delimiter),
104 'id' : cls.idpattern,
106 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
109 class Template(metaclass=_TemplateMetaclass):
110 """A string class for supporting $-substitutions."""
112 delimiter = '$'
113 idpattern = r'[_a-z][_a-z0-9]*'
115 def __init__(self, template):
116 self.template = template
118 # Search for $$, $identifier, ${identifier}, and any bare $'s
120 def _invalid(self, mo):
121 i = mo.start('invalid')
122 lines = self.template[:i].splitlines(True)
123 if not lines:
124 colno = 1
125 lineno = 1
126 else:
127 colno = i - len(''.join(lines[:-1]))
128 lineno = len(lines)
129 raise ValueError('Invalid placeholder in string: line %d, col %d' %
130 (lineno, colno))
132 def substitute(self, *args, **kws):
133 if len(args) > 1:
134 raise TypeError('Too many positional arguments')
135 if not args:
136 mapping = kws
137 elif kws:
138 mapping = _multimap(kws, args[0])
139 else:
140 mapping = args[0]
141 # Helper function for .sub()
142 def convert(mo):
143 # Check the most common path first.
144 named = mo.group('named') or mo.group('braced')
145 if named is not None:
146 val = mapping[named]
147 # We use this idiom instead of str() because the latter will
148 # fail if val is a Unicode containing non-ASCII characters.
149 return '%s' % (val,)
150 if mo.group('escaped') is not None:
151 return self.delimiter
152 if mo.group('invalid') is not None:
153 self._invalid(mo)
154 raise ValueError('Unrecognized named group in pattern',
155 self.pattern)
156 return self.pattern.sub(convert, self.template)
158 def safe_substitute(self, *args, **kws):
159 if len(args) > 1:
160 raise TypeError('Too many positional arguments')
161 if not args:
162 mapping = kws
163 elif kws:
164 mapping = _multimap(kws, args[0])
165 else:
166 mapping = args[0]
167 # Helper function for .sub()
168 def convert(mo):
169 named = mo.group('named')
170 if named is not None:
171 try:
172 # We use this idiom instead of str() because the latter
173 # will fail if val is a Unicode containing non-ASCII
174 return '%s' % (mapping[named],)
175 except KeyError:
176 return self.delimiter + named
177 braced = mo.group('braced')
178 if braced is not None:
179 try:
180 return '%s' % (mapping[braced],)
181 except KeyError:
182 return self.delimiter + '{' + braced + '}'
183 if mo.group('escaped') is not None:
184 return self.delimiter
185 if mo.group('invalid') is not None:
186 return self.delimiter
187 raise ValueError('Unrecognized named group in pattern',
188 self.pattern)
189 return self.pattern.sub(convert, self.template)
193 ########################################################################
194 # the Formatter class
195 # see PEP 3101 for details and purpose of this class
197 # The hard parts are reused from the C implementation. They're exposed as "_"
198 # prefixed methods of str and unicode.
200 # The overall parser is implemented in str._formatter_parser.
201 # The field name parser is implemented in str._formatter_field_name_split
203 class Formatter:
204 def format(self, format_string, *args, **kwargs):
205 return self.vformat(format_string, args, kwargs)
207 def vformat(self, format_string, args, kwargs):
208 used_args = set()
209 result = self._vformat(format_string, args, kwargs, used_args, 2)
210 self.check_unused_args(used_args, args, kwargs)
211 return result
213 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
214 if recursion_depth < 0:
215 raise ValueError('Max string recursion exceeded')
216 result = []
217 for literal_text, field_name, format_spec, conversion in \
218 self.parse(format_string):
220 # output the literal text
221 if literal_text:
222 result.append(literal_text)
224 # if there's a field, output it
225 if field_name is not None:
226 # this is some markup, find the object and do
227 # the formatting
229 # given the field_name, find the object it references
230 # and the argument it came from
231 obj, arg_used = self.get_field(field_name, args, kwargs)
232 used_args.add(arg_used)
234 # do any conversion on the resulting object
235 obj = self.convert_field(obj, conversion)
237 # expand the format spec, if needed
238 format_spec = self._vformat(format_spec, args, kwargs,
239 used_args, recursion_depth-1)
241 # format the object and append to the result
242 result.append(self.format_field(obj, format_spec))
244 return ''.join(result)
247 def get_value(self, key, args, kwargs):
248 if isinstance(key, int):
249 return args[key]
250 else:
251 return kwargs[key]
254 def check_unused_args(self, used_args, args, kwargs):
255 pass
258 def format_field(self, value, format_spec):
259 return format(value, format_spec)
262 def convert_field(self, value, conversion):
263 # do any conversion on the resulting object
264 if conversion == 'r':
265 return repr(value)
266 elif conversion == 's':
267 return str(value)
268 elif conversion is None:
269 return value
270 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
273 # returns an iterable that contains tuples of the form:
274 # (literal_text, field_name, format_spec, conversion)
275 # literal_text can be zero length
276 # field_name can be None, in which case there's no
277 # object to format and output
278 # if field_name is not None, it is looked up, formatted
279 # with format_spec and conversion and then used
280 def parse(self, format_string):
281 return format_string._formatter_parser()
284 # given a field_name, find the object it references.
285 # field_name: the field being looked up, e.g. "0.name"
286 # or "lookup[3]"
287 # used_args: a set of which args have been used
288 # args, kwargs: as passed in to vformat
289 def get_field(self, field_name, args, kwargs):
290 first, rest = field_name._formatter_field_name_split()
292 obj = self.get_value(first, args, kwargs)
294 # loop through the rest of the field_name, doing
295 # getattr or getitem as needed
296 for is_attr, i in rest:
297 if is_attr:
298 obj = getattr(obj, i)
299 else:
300 obj = obj[i]
302 return obj, first