Lib/string.py

   1 """A collection of string constants.
   2
   3 Public module variables:
   4
   5 whitespace -- a string containing all ASCII whitespace
   6 ascii_lowercase -- a string containing all ASCII lowercase letters
   7 ascii_uppercase -- a string containing all ASCII uppercase letters
   8 ascii_letters -- a string containing all ASCII letters
   9 digits -- a string containing all ASCII decimal digits
  10 hexdigits -- a string containing all ASCII hexadecimal digits
  11 octdigits -- a string containing all ASCII octal digits
  12 punctuation -- a string containing all ASCII punctuation characters
  13 printable -- a string containing all ASCII characters considered printable
  14
  15 """
  16
  17 # Some strings for ctype-style character classification
  18 whitespace = ' \t\n\r\v\f'
  19 ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
  20 ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  21 ascii_letters = ascii_lowercase + ascii_uppercase
  22 digits = '0123456789'
  23 hexdigits = digits + 'abcdef' + 'ABCDEF'
  24 octdigits = '01234567'
  25 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
  26 printable = digits + ascii_letters + punctuation + whitespace
  27
  28 # Functions which aren't available as string methods.
  29
  30 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
  31 def capwords(s, sep=None):
  32     """capwords(s, [sep]) -> string
  33
  34     Split the argument into words using split, capitalize each
  35     word using capitalize, and join the capitalized words using
  36     join. Note that this replaces runs of whitespace characters by
  37     a single space.
  38
  39     """
  40     return (sep or ' ').join([x.capitalize() for x in s.split(sep)])
  41
  42
  43 # Construct a translation map for bytes.translate
  44 def maketrans(frm: bytes, to: bytes) -> bytes:
  45     """maketrans(frm, to) -> bytes
  46
  47     Return a translation table (a bytes object of length 256)
  48     suitable for use in bytes.translate where each byte in frm is
  49     mapped to the byte at the same position in to.
  50     The strings frm and to must be of the same length.
  51     """
  52     import warnings
  53     warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead",
  54                   DeprecationWarning, 2)
  55     if len(frm) != len(to):
  56         raise ValueError("maketrans arguments must have same length")
  57     if not (isinstance(frm, bytes) and isinstance(to, bytes)):
  58         raise TypeError("maketrans arguments must be bytes objects")
  59     L = bytearray(range(256))
  60     for i, c in enumerate(frm):
  61         L[c] = to[i]
  62     return bytes(L)
  63
  64
  65 ####################################################################
  66 import re as _re
  67
  68 class _multimap:
  69     """Helper class for combining multiple mappings.
  70
  71     Used by .{safe_,}substitute() to combine the mapping and keyword
  72     arguments.
  73     """
  74     def __init__(self, primary, secondary):
  75         self._primary = primary
  76         self._secondary = secondary
  77
  78     def __getitem__(self, key):
  79         try:
  80             return self._primary[key]
  81         except KeyError:
  82             return self._secondary[key]
  83
  84
  85 class _TemplateMetaclass(type):
  86     pattern = r"""
  87     %(delim)s(?:
  88       (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
  89       (?P<named>%(id)s)      |   # delimiter and a Python identifier
  90       {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
  91       (?P<invalid>)              # Other ill-formed delimiter exprs
  92     )
  93     """
  94
  95     def __init__(cls, name, bases, dct):
  96         super(_TemplateMetaclass, cls).__init__(name, bases, dct)
  97         if 'pattern' in dct:
  98             pattern = cls.pattern
  99         else:
 100             pattern = _TemplateMetaclass.pattern % {
 101                 'delim' : _re.escape(cls.delimiter),
 102                 'id'    : cls.idpattern,
 103                 }
 104         cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
 105
 106
 107 class Template(metaclass=_TemplateMetaclass):
 108     """A string class for supporting $-substitutions."""
 109
 110     delimiter = '$'
 111     idpattern = r'[_a-z][_a-z0-9]*'
 112
 113     def __init__(self, template):
 114         self.template = template
 115
 116     # Search for $$, $identifier, ${identifier}, and any bare $'s
 117
 118     def _invalid(self, mo):
 119         i = mo.start('invalid')
 120         lines = self.template[:i].splitlines(True)
 121         if not lines:
 122             colno = 1
 123             lineno = 1
 124         else:
 125             colno = i - len(''.join(lines[:-1]))
 126             lineno = len(lines)
 127         raise ValueError('Invalid placeholder in string: line %d, col %d' %
 128                          (lineno, colno))
 129
 130     def substitute(self, *args, **kws):
 131         if len(args) > 1:
 132             raise TypeError('Too many positional arguments')
 133         if not args:
 134             mapping = kws
 135         elif kws:
 136             mapping = _multimap(kws, args[0])
 137         else:
 138             mapping = args[0]
 139         # Helper function for .sub()
 140         def convert(mo):
 141             # Check the most common path first.
 142             named = mo.group('named') or mo.group('braced')
 143             if named is not None:
 144                 val = mapping[named]
 145                 # We use this idiom instead of str() because the latter will
 146                 # fail if val is a Unicode containing non-ASCII characters.
 147                 return '%s' % (val,)
 148             if mo.group('escaped') is not None:
 149                 return self.delimiter
 150             if mo.group('invalid') is not None:
 151                 self._invalid(mo)
 152             raise ValueError('Unrecognized named group in pattern',
 153                              self.pattern)
 154         return self.pattern.sub(convert, self.template)
 155
 156     def safe_substitute(self, *args, **kws):
 157         if len(args) > 1:
 158             raise TypeError('Too many positional arguments')
 159         if not args:
 160             mapping = kws
 161         elif kws:
 162             mapping = _multimap(kws, args[0])
 163         else:
 164             mapping = args[0]
 165         # Helper function for .sub()
 166         def convert(mo):
 167             named = mo.group('named')
 168             if named is not None:
 169                 try:
 170                     # We use this idiom instead of str() because the latter
 171                     # will fail if val is a Unicode containing non-ASCII
 172                     return '%s' % (mapping[named],)
 173                 except KeyError:
 174                     return self.delimiter + named
 175             braced = mo.group('braced')
 176             if braced is not None:
 177                 try:
 178                     return '%s' % (mapping[braced],)
 179                 except KeyError:
 180                     return self.delimiter + '{' + braced + '}'
 181             if mo.group('escaped') is not None:
 182                 return self.delimiter
 183             if mo.group('invalid') is not None:
 184                 return self.delimiter
 185             raise ValueError('Unrecognized named group in pattern',
 186                              self.pattern)
 187         return self.pattern.sub(convert, self.template)
 188
 189
 190
 191 ########################################################################
 192 # the Formatter class
 193 # see PEP 3101 for details and purpose of this class
 194
 195 # The hard parts are reused from the C implementation.  They're exposed as "_"
 196 # prefixed methods of str and unicode.
 197
 198 # The overall parser is implemented in str._formatter_parser.
 199 # The field name parser is implemented in str._formatter_field_name_split
 200
 201 class Formatter:
 202     def format(self, format_string, *args, **kwargs):
 203         return self.vformat(format_string, args, kwargs)
 204
 205     def vformat(self, format_string, args, kwargs):
 206         used_args = set()
 207         result = self._vformat(format_string, args, kwargs, used_args, 2)
 208         self.check_unused_args(used_args, args, kwargs)
 209         return result
 210
 211     def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
 212         if recursion_depth < 0:
 213             raise ValueError('Max string recursion exceeded')
 214         result = []
 215         for literal_text, field_name, format_spec, conversion in \
 216                 self.parse(format_string):
 217
 218             # output the literal text
 219             if literal_text:
 220                 result.append(literal_text)
 221
 222             # if there's a field, output it
 223             if field_name is not None:
 224                 # this is some markup, find the object and do
 225                 #  the formatting
 226
 227                 # given the field_name, find the object it references
 228                 #  and the argument it came from
 229                 obj, arg_used = self.get_field(field_name, args, kwargs)
 230                 used_args.add(arg_used)
 231
 232                 # do any conversion on the resulting object
 233                 obj = self.convert_field(obj, conversion)
 234
 235                 # expand the format spec, if needed
 236                 format_spec = self._vformat(format_spec, args, kwargs,
 237                                             used_args, recursion_depth-1)
 238
 239                 # format the object and append to the result
 240                 result.append(self.format_field(obj, format_spec))
 241
 242         return ''.join(result)
 243
 244
 245     def get_value(self, key, args, kwargs):
 246         if isinstance(key, int):
 247             return args[key]
 248         else:
 249             return kwargs[key]
 250
 251
 252     def check_unused_args(self, used_args, args, kwargs):
 253         pass
 254
 255
 256     def format_field(self, value, format_spec):
 257         return format(value, format_spec)
 258
 259
 260     def convert_field(self, value, conversion):
 261         # do any conversion on the resulting object
 262         if conversion == 'r':
 263             return repr(value)
 264         elif conversion == 's':
 265             return str(value)
 266         elif conversion is None:
 267             return value
 268         raise ValueError("Unknown converion specifier {0!s}".format(conversion))
 269
 270
 271     # returns an iterable that contains tuples of the form:
 272     # (literal_text, field_name, format_spec, conversion)
 273     # literal_text can be zero length
 274     # field_name can be None, in which case there's no
 275     #  object to format and output
 276     # if field_name is not None, it is looked up, formatted
 277     #  with format_spec and conversion and then used
 278     def parse(self, format_string):
 279         return format_string._formatter_parser()
 280
 281
 282     # given a field_name, find the object it references.
 283     #  field_name:   the field being looked up, e.g. "0.name"
 284     #                 or "lookup[3]"
 285     #  used_args:    a set of which args have been used
 286     #  args, kwargs: as passed in to vformat
 287     def get_field(self, field_name, args, kwargs):
 288         first, rest = field_name._formatter_field_name_split()
 289
 290         obj = self.get_value(first, args, kwargs)
 291
 292         # loop through the rest of the field_name, doing
 293         #  getattr or getitem as needed
 294         for is_attr, i in rest:
 295             if is_attr:
 296                 obj = getattr(obj, i)
 297             else:
 298                 obj = obj[i]
 299
 300         return obj, first