Lib/string.py

   1 """A collection of string constants.
   2
   3 Public module variables:
   4
   5 whitespace -- a string containing all ASCII whitespace
   6 ascii_lowercase -- a string containing all ASCII lowercase letters
   7 ascii_uppercase -- a string containing all ASCII uppercase letters
   8 ascii_letters -- a string containing all ASCII letters
   9 digits -- a string containing all ASCII decimal digits
  10 hexdigits -- a string containing all ASCII hexadecimal digits
  11 octdigits -- a string containing all ASCII octal digits
  12 punctuation -- a string containing all ASCII punctuation characters
  13 printable -- a string containing all ASCII characters considered printable
  14
  15 """
  16
  17 # Some strings for ctype-style character classification
  18 whitespace = ' \t\n\r\v\f'
  19 ascii_lowercase = 'abcdefghijklmnopqrstuvwxyz'
  20 ascii_uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
  21 ascii_letters = ascii_lowercase + ascii_uppercase
  22 digits = '0123456789'
  23 hexdigits = digits + 'abcdef' + 'ABCDEF'
  24 octdigits = '01234567'
  25 punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
  26 printable = digits + ascii_letters + punctuation + whitespace
  27
  28 # Functions which aren't available as string methods.
  29
  30 # Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
  31 def capwords(s, sep=None):
  32     """capwords(s [,sep]) -> string
  33
  34     Split the argument into words using split, capitalize each
  35     word using capitalize, and join the capitalized words using
  36     join.  If the optional second argument sep is absent or None,
  37     runs of whitespace characters are replaced by a single space
  38     and leading and trailing whitespace are removed, otherwise
  39     sep is used to split and join the words.
  40
  41     """
  42     return (sep or ' ').join(x.capitalize() for x in s.split(sep))
  43
  44
  45 # Construct a translation map for bytes.translate
  46 def maketrans(frm: bytes, to: bytes) -> bytes:
  47     """maketrans(frm, to) -> bytes
  48
  49     Return a translation table (a bytes object of length 256)
  50     suitable for use in bytes.translate where each byte in frm is
  51     mapped to the byte at the same position in to.
  52     The strings frm and to must be of the same length.
  53     """
  54     import warnings
  55     warnings.warn("string.maketrans is deprecated, use bytes.maketrans instead",
  56                   DeprecationWarning, 2)
  57     if len(frm) != len(to):
  58         raise ValueError("maketrans arguments must have same length")
  59     if not (isinstance(frm, bytes) and isinstance(to, bytes)):
  60         raise TypeError("maketrans arguments must be bytes objects")
  61     L = bytearray(range(256))
  62     for i, c in enumerate(frm):
  63         L[c] = to[i]
  64     return bytes(L)
  65
  66
  67 ####################################################################
  68 import re as _re
  69
  70 class _multimap:
  71     """Helper class for combining multiple mappings.
  72
  73     Used by .{safe_,}substitute() to combine the mapping and keyword
  74     arguments.
  75     """
  76     def __init__(self, primary, secondary):
  77         self._primary = primary
  78         self._secondary = secondary
  79
  80     def __getitem__(self, key):
  81         try:
  82             return self._primary[key]
  83         except KeyError:
  84             return self._secondary[key]
  85
  86
  87 class _TemplateMetaclass(type):
  88     pattern = r"""
  89     %(delim)s(?:
  90       (?P<escaped>%(delim)s) |   # Escape sequence of two delimiters
  91       (?P<named>%(id)s)      |   # delimiter and a Python identifier
  92       {(?P<braced>%(id)s)}   |   # delimiter and a braced identifier
  93       (?P<invalid>)              # Other ill-formed delimiter exprs
  94     )
  95     """
  96
  97     def __init__(cls, name, bases, dct):
  98         super(_TemplateMetaclass, cls).__init__(name, bases, dct)
  99         if 'pattern' in dct:
 100             pattern = cls.pattern
 101         else:
 102             pattern = _TemplateMetaclass.pattern % {
 103                 'delim' : _re.escape(cls.delimiter),
 104                 'id'    : cls.idpattern,
 105                 }
 106         cls.pattern = _re.compile(pattern, _re.IGNORECASE | _re.VERBOSE)
 107
 108
 109 class Template(metaclass=_TemplateMetaclass):
 110     """A string class for supporting $-substitutions."""
 111
 112     delimiter = '$'
 113     idpattern = r'[_a-z][_a-z0-9]*'
 114
 115     def __init__(self, template):
 116         self.template = template
 117
 118     # Search for $$, $identifier, ${identifier}, and any bare $'s
 119
 120     def _invalid(self, mo):
 121         i = mo.start('invalid')
 122         lines = self.template[:i].splitlines(True)
 123         if not lines:
 124             colno = 1
 125             lineno = 1
 126         else:
 127             colno = i - len(''.join(lines[:-1]))
 128             lineno = len(lines)
 129         raise ValueError('Invalid placeholder in string: line %d, col %d' %
 130                          (lineno, colno))
 131
 132     def substitute(self, *args, **kws):
 133         if len(args) > 1:
 134             raise TypeError('Too many positional arguments')
 135         if not args:
 136             mapping = kws
 137         elif kws:
 138             mapping = _multimap(kws, args[0])
 139         else:
 140             mapping = args[0]
 141         # Helper function for .sub()
 142         def convert(mo):
 143             # Check the most common path first.
 144             named = mo.group('named') or mo.group('braced')
 145             if named is not None:
 146                 val = mapping[named]
 147                 # We use this idiom instead of str() because the latter will
 148                 # fail if val is a Unicode containing non-ASCII characters.
 149                 return '%s' % (val,)
 150             if mo.group('escaped') is not None:
 151                 return self.delimiter
 152             if mo.group('invalid') is not None:
 153                 self._invalid(mo)
 154             raise ValueError('Unrecognized named group in pattern',
 155                              self.pattern)
 156         return self.pattern.sub(convert, self.template)
 157
 158     def safe_substitute(self, *args, **kws):
 159         if len(args) > 1:
 160             raise TypeError('Too many positional arguments')
 161         if not args:
 162             mapping = kws
 163         elif kws:
 164             mapping = _multimap(kws, args[0])
 165         else:
 166             mapping = args[0]
 167         # Helper function for .sub()
 168         def convert(mo):
 169             named = mo.group('named')
 170             if named is not None:
 171                 try:
 172                     # We use this idiom instead of str() because the latter
 173                     # will fail if val is a Unicode containing non-ASCII
 174                     return '%s' % (mapping[named],)
 175                 except KeyError:
 176                     return self.delimiter + named
 177             braced = mo.group('braced')
 178             if braced is not None:
 179                 try:
 180                     return '%s' % (mapping[braced],)
 181                 except KeyError:
 182                     return self.delimiter + '{' + braced + '}'
 183             if mo.group('escaped') is not None:
 184                 return self.delimiter
 185             if mo.group('invalid') is not None:
 186                 return self.delimiter
 187             raise ValueError('Unrecognized named group in pattern',
 188                              self.pattern)
 189         return self.pattern.sub(convert, self.template)
 190
 191
 192
 193 ########################################################################
 194 # the Formatter class
 195 # see PEP 3101 for details and purpose of this class
 196
 197 # The hard parts are reused from the C implementation.  They're exposed as "_"
 198 # prefixed methods of str and unicode.
 199
 200 # The overall parser is implemented in str._formatter_parser.
 201 # The field name parser is implemented in str._formatter_field_name_split
 202
 203 class Formatter:
 204     def format(self, format_string, *args, **kwargs):
 205         return self.vformat(format_string, args, kwargs)
 206
 207     def vformat(self, format_string, args, kwargs):
 208         used_args = set()
 209         result = self._vformat(format_string, args, kwargs, used_args, 2)
 210         self.check_unused_args(used_args, args, kwargs)
 211         return result
 212
 213     def _vformat(self, format_string, args, kwargs, used_args, recursion_depth):
 214         if recursion_depth < 0:
 215             raise ValueError('Max string recursion exceeded')
 216         result = []
 217         for literal_text, field_name, format_spec, conversion in \
 218                 self.parse(format_string):
 219
 220             # output the literal text
 221             if literal_text:
 222                 result.append(literal_text)
 223
 224             # if there's a field, output it
 225             if field_name is not None:
 226                 # this is some markup, find the object and do
 227                 #  the formatting
 228
 229                 # given the field_name, find the object it references
 230                 #  and the argument it came from
 231                 obj, arg_used = self.get_field(field_name, args, kwargs)
 232                 used_args.add(arg_used)
 233
 234                 # do any conversion on the resulting object
 235                 obj = self.convert_field(obj, conversion)
 236
 237                 # expand the format spec, if needed
 238                 format_spec = self._vformat(format_spec, args, kwargs,
 239                                             used_args, recursion_depth-1)
 240
 241                 # format the object and append to the result
 242                 result.append(self.format_field(obj, format_spec))
 243
 244         return ''.join(result)
 245
 246
 247     def get_value(self, key, args, kwargs):
 248         if isinstance(key, int):
 249             return args[key]
 250         else:
 251             return kwargs[key]
 252
 253
 254     def check_unused_args(self, used_args, args, kwargs):
 255         pass
 256
 257
 258     def format_field(self, value, format_spec):
 259         return format(value, format_spec)
 260
 261
 262     def convert_field(self, value, conversion):
 263         # do any conversion on the resulting object
 264         if conversion == 'r':
 265             return repr(value)
 266         elif conversion == 's':
 267             return str(value)
 268         elif conversion is None:
 269             return value
 270         raise ValueError("Unknown converion specifier {0!s}".format(conversion))
 271
 272
 273     # returns an iterable that contains tuples of the form:
 274     # (literal_text, field_name, format_spec, conversion)
 275     # literal_text can be zero length
 276     # field_name can be None, in which case there's no
 277     #  object to format and output
 278     # if field_name is not None, it is looked up, formatted
 279     #  with format_spec and conversion and then used
 280     def parse(self, format_string):
 281         return format_string._formatter_parser()
 282
 283
 284     # given a field_name, find the object it references.
 285     #  field_name:   the field being looked up, e.g. "0.name"
 286     #                 or "lookup[3]"
 287     #  used_args:    a set of which args have been used
 288     #  args, kwargs: as passed in to vformat
 289     def get_field(self, field_name, args, kwargs):
 290         first, rest = field_name._formatter_field_name_split()
 291
 292         obj = self.get_value(first, args, kwargs)
 293
 294         # loop through the rest of the field_name, doing
 295         #  getattr or getitem as needed
 296         for is_attr, i in rest:
 297             if is_attr:
 298                 obj = getattr(obj, i)
 299             else:
 300                 obj = obj[i]
 301
 302         return obj, first