Initialized merge tracking via "svnmerge" with revisions "1-73579" from
[python/dscho.git] / Lib / string.py
blobe071a2d1b23edea366504078091da5b410c8c434
1 """A collection of string constants.
3 Public module variables:
5 whitespace -- a string containing all ASCII whitespace
6 ascii_lowercase -- a string containing all ASCII lowercase letters
7 ascii_uppercase -- a string containing all ASCII uppercase letters
8 ascii_letters -- a string containing all ASCII letters
9 digits -- a string containing all ASCII decimal digits
10 hexdigits -- a string containing all ASCII hexadecimal digits
11 octdigits -- a string containing all ASCII octal digits
12 punctuation -- a string containing all ASCII punctuation characters
13 printable -- a string containing all ASCII characters considered printable
15 """
17 # Some strings for ctype-style character classification
18 whitespace = ' \t\n\r\v\f'
19 ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
20 ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
21 ascii_letters = ascii_lowercase + ascii_uppercase
22 digits = '0123456789'
23 hexdigits = digits + 'abcdef' + 'ABCDEF'
24 octdigits = '01234567'
25 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
26 printable = digits + ascii_letters + punctuation + whitespace
28 # Functions which aren't available as string methods.
30 # Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def".
31 def capwords(s, sep=None):
32 """capwords(s, [sep]) -> string
34 Split the argument into words using split, capitalize each
35 word using capitalize, and join the capitalized words using
36 join. Note that this replaces runs of whitespace characters by
37 a single space.
39 """
40 return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
43 # Construct a translation map for bytes.translate
44 def maketrans(frm: bytes, to: bytes) -> bytes:
45 """maketrans(frm, to) -> bytes
47 Return a translation table (a bytes object of length 256)
48 suitable for use in bytes.translate where each byte in frm is
49 mapped to the byte at the same position in to.
50 The strings frm and to must be of the same length.
51 """
52 import warnings
53 warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead",
54 DeprecationWarning, 2)
55 if len(frm) != len(to):
56 raise ValueError("maketrans arguments must have same length")
57 if not (isinstance(frm, bytes) and isinstance(to, bytes)):
58 raise TypeError("maketrans arguments must be bytes objects")
59 L = bytearray(range(256))
60 for i, c in enumerate(frm):
61 L[c] = to[i]
62 return bytes(L)
65 ####################################################################
66 import re as _re
68 class _multimap:
69 """Helper class for combining multiple mappings.
71 Used by .{safe_,}substitute() to combine the mapping and keyword
72 arguments.
73 """
74 def __init__(self, primary, secondary):
75 self._primary = primary
76 self._secondary = secondary
78 def __getitem__(self, key):
79 try:
80 return self._primary[key]
81 except KeyError:
82 return self._secondary[key]
85 class _TemplateMetaclass(type):
86 pattern = r"""
87 %(delim)s(?:
88 (?P<escaped>%(delim)s) | # Escape sequence of two delimiters
89 (?P<named>%(id)s) | # delimiter and a Python identifier
90 {(?P<braced>%(id)s)} | # delimiter and a braced identifier
91 (?P<invalid>) # Other ill-formed delimiter exprs
93 """
95 def __init__(cls, name, bases, dct):
96 super(_TemplateMetaclass, cls).__init__(name, bases, dct)
97 if 'pattern' in dct:
98 pattern = cls.pattern
99 else:
100 pattern = _TemplateMetaclass.pattern % {
101 'delim' : _re.escape(cls.delimiter),
102 'id' : cls.idpattern,
104 cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
107 class Template(metaclass=_TemplateMetaclass):
108 """A string class for supporting $-substitutions."""
110 delimiter = '$'
111 idpattern = r'[_a-z][_a-z0-9]*'
113 def __init__(self, template):
114 self.template = template
116 # Search for $$, $identifier, ${identifier}, and any bare $'s
118 def _invalid(self, mo):
119 i = mo.start('invalid')
120 lines = self.template[:i].splitlines(True)
121 if not lines:
122 colno = 1
123 lineno = 1
124 else:
125 colno = i - len(''.join(lines[:-1]))
126 lineno = len(lines)
127 raise ValueError('Invalid placeholder in string: line %d, col %d' %
128 (lineno, colno))
130 def substitute(self, *args, **kws):
131 if len(args) > 1:
132 raise TypeError('Too many positional arguments')
133 if not args:
134 mapping = kws
135 elif kws:
136 mapping = _multimap(kws, args[0])
137 else:
138 mapping = args[0]
139 # Helper function for .sub()
140 def convert(mo):
141 # Check the most common path first.
142 named = mo.group('named') or mo.group('braced')
143 if named is not None:
144 val = mapping[named]
145 # We use this idiom instead of str() because the latter will
146 # fail if val is a Unicode containing non-ASCII characters.
147 return '%s' % (val,)
148 if mo.group('escaped') is not None:
149 return self.delimiter
150 if mo.group('invalid') is not None:
151 self._invalid(mo)
152 raise ValueError('Unrecognized named group in pattern',
153 self.pattern)
154 return self.pattern.sub(convert, self.template)
156 def safe_substitute(self, *args, **kws):
157 if len(args) > 1:
158 raise TypeError('Too many positional arguments')
159 if not args:
160 mapping = kws
161 elif kws:
162 mapping = _multimap(kws, args[0])
163 else:
164 mapping = args[0]
165 # Helper function for .sub()
166 def convert(mo):
167 named = mo.group('named')
168 if named is not None:
169 try:
170 # We use this idiom instead of str() because the latter
171 # will fail if val is a Unicode containing non-ASCII
172 return '%s' % (mapping[named],)
173 except KeyError:
174 return self.delimiter + named
175 braced = mo.group('braced')
176 if braced is not None:
177 try:
178 return '%s' % (mapping[braced],)
179 except KeyError:
180 return self.delimiter + '{' + braced + '}'
181 if mo.group('escaped') is not None:
182 return self.delimiter
183 if mo.group('invalid') is not None:
184 return self.delimiter
185 raise ValueError('Unrecognized named group in pattern',
186 self.pattern)
187 return self.pattern.sub(convert, self.template)
191 ########################################################################
192 # the Formatter class
193 # see PEP 3101 for details and purpose of this class
195 # The hard parts are reused from the C implementation. They're exposed as "_"
196 # prefixed methods of str and unicode.
198 # The overall parser is implemented in str._formatter_parser.
199 # The field name parser is implemented in str._formatter_field_name_split
201 class Formatter:
202 def format(self, format_string, *args, **kwargs):
203 return self.vformat(format_string, args, kwargs)
205 def vformat(self, format_string, args, kwargs):
206 used_args = set()
207 result = self._vformat(format_string, args, kwargs, used_args, 2)
208 self.check_unused_args(used_args, args, kwargs)
209 return result
211 def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
212 if recursion_depth < 0:
213 raise ValueError('Max string recursion exceeded')
214 result = []
215 for literal_text, field_name, format_spec, conversion in \
216 self.parse(format_string):
218 # output the literal text
219 if literal_text:
220 result.append(literal_text)
222 # if there's a field, output it
223 if field_name is not None:
224 # this is some markup, find the object and do
225 # the formatting
227 # given the field_name, find the object it references
228 # and the argument it came from
229 obj, arg_used = self.get_field(field_name, args, kwargs)
230 used_args.add(arg_used)
232 # do any conversion on the resulting object
233 obj = self.convert_field(obj, conversion)
235 # expand the format spec, if needed
236 format_spec = self._vformat(format_spec, args, kwargs,
237 used_args, recursion_depth-1)
239 # format the object and append to the result
240 result.append(self.format_field(obj, format_spec))
242 return ''.join(result)
245 def get_value(self, key, args, kwargs):
246 if isinstance(key, int):
247 return args[key]
248 else:
249 return kwargs[key]
252 def check_unused_args(self, used_args, args, kwargs):
253 pass
256 def format_field(self, value, format_spec):
257 return format(value, format_spec)
260 def convert_field(self, value, conversion):
261 # do any conversion on the resulting object
262 if conversion == 'r':
263 return repr(value)
264 elif conversion == 's':
265 return str(value)
266 elif conversion is None:
267 return value
268 raise ValueError("Unknown converion specifier {0!s}".format(conversion))
271 # returns an iterable that contains tuples of the form:
272 # (literal_text, field_name, format_spec, conversion)
273 # literal_text can be zero length
274 # field_name can be None, in which case there's no
275 # object to format and output
276 # if field_name is not None, it is looked up, formatted
277 # with format_spec and conversion and then used
278 def parse(self, format_string):
279 return format_string._formatter_parser()
282 # given a field_name, find the object it references.
283 # field_name: the field being looked up, e.g. "0.name"
284 # or "lookup[3]"
285 # used_args: a set of which args have been used
286 # args, kwargs: as passed in to vformat
287 def get_field(self, field_name, args, kwargs):
288 first, rest = field_name._formatter_field_name_split()
290 obj = self.get_value(first, args, kwargs)
292 # loop through the rest of the field_name, doing
293 # getattr or getitem as needed
294 for is_attr, i in rest:
295 if is_attr:
296 obj = getattr(obj, i)
297 else:
298 obj = obj[i]
300 return obj, first