2 #include "bytes_methods.h"
4 /* Our own locale-independent ctype.h-like macros */
6 const unsigned int _Py_ctype_table
[256] = {
16 FLAG_SPACE
, /* 0x9 '\t' */
17 FLAG_SPACE
, /* 0xa '\n' */
18 FLAG_SPACE
, /* 0xb '\v' */
19 FLAG_SPACE
, /* 0xc '\f' */
20 FLAG_SPACE
, /* 0xd '\r' */
39 FLAG_SPACE
, /* 0x20 ' ' */
55 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x30 '0' */
56 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x31 '1' */
57 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x32 '2' */
58 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x33 '3' */
59 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x34 '4' */
60 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x35 '5' */
61 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x36 '6' */
62 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x37 '7' */
63 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x38 '8' */
64 FLAG_DIGIT
|FLAG_XDIGIT
, /* 0x39 '9' */
72 FLAG_UPPER
|FLAG_XDIGIT
, /* 0x41 'A' */
73 FLAG_UPPER
|FLAG_XDIGIT
, /* 0x42 'B' */
74 FLAG_UPPER
|FLAG_XDIGIT
, /* 0x43 'C' */
75 FLAG_UPPER
|FLAG_XDIGIT
, /* 0x44 'D' */
76 FLAG_UPPER
|FLAG_XDIGIT
, /* 0x45 'E' */
77 FLAG_UPPER
|FLAG_XDIGIT
, /* 0x46 'F' */
78 FLAG_UPPER
, /* 0x47 'G' */
79 FLAG_UPPER
, /* 0x48 'H' */
80 FLAG_UPPER
, /* 0x49 'I' */
81 FLAG_UPPER
, /* 0x4a 'J' */
82 FLAG_UPPER
, /* 0x4b 'K' */
83 FLAG_UPPER
, /* 0x4c 'L' */
84 FLAG_UPPER
, /* 0x4d 'M' */
85 FLAG_UPPER
, /* 0x4e 'N' */
86 FLAG_UPPER
, /* 0x4f 'O' */
87 FLAG_UPPER
, /* 0x50 'P' */
88 FLAG_UPPER
, /* 0x51 'Q' */
89 FLAG_UPPER
, /* 0x52 'R' */
90 FLAG_UPPER
, /* 0x53 'S' */
91 FLAG_UPPER
, /* 0x54 'T' */
92 FLAG_UPPER
, /* 0x55 'U' */
93 FLAG_UPPER
, /* 0x56 'V' */
94 FLAG_UPPER
, /* 0x57 'W' */
95 FLAG_UPPER
, /* 0x58 'X' */
96 FLAG_UPPER
, /* 0x59 'Y' */
97 FLAG_UPPER
, /* 0x5a 'Z' */
104 FLAG_LOWER
|FLAG_XDIGIT
, /* 0x61 'a' */
105 FLAG_LOWER
|FLAG_XDIGIT
, /* 0x62 'b' */
106 FLAG_LOWER
|FLAG_XDIGIT
, /* 0x63 'c' */
107 FLAG_LOWER
|FLAG_XDIGIT
, /* 0x64 'd' */
108 FLAG_LOWER
|FLAG_XDIGIT
, /* 0x65 'e' */
109 FLAG_LOWER
|FLAG_XDIGIT
, /* 0x66 'f' */
110 FLAG_LOWER
, /* 0x67 'g' */
111 FLAG_LOWER
, /* 0x68 'h' */
112 FLAG_LOWER
, /* 0x69 'i' */
113 FLAG_LOWER
, /* 0x6a 'j' */
114 FLAG_LOWER
, /* 0x6b 'k' */
115 FLAG_LOWER
, /* 0x6c 'l' */
116 FLAG_LOWER
, /* 0x6d 'm' */
117 FLAG_LOWER
, /* 0x6e 'n' */
118 FLAG_LOWER
, /* 0x6f 'o' */
119 FLAG_LOWER
, /* 0x70 'p' */
120 FLAG_LOWER
, /* 0x71 'q' */
121 FLAG_LOWER
, /* 0x72 'r' */
122 FLAG_LOWER
, /* 0x73 's' */
123 FLAG_LOWER
, /* 0x74 't' */
124 FLAG_LOWER
, /* 0x75 'u' */
125 FLAG_LOWER
, /* 0x76 'v' */
126 FLAG_LOWER
, /* 0x77 'w' */
127 FLAG_LOWER
, /* 0x78 'x' */
128 FLAG_LOWER
, /* 0x79 'y' */
129 FLAG_LOWER
, /* 0x7a 'z' */
135 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
136 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
137 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
138 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
139 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
140 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
141 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
142 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
146 const unsigned char _Py_ctype_tolower
[256] = {
147 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
148 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
149 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
150 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
151 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
152 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
153 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
154 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
155 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
156 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
157 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
158 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
159 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
160 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
161 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
162 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
163 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
164 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
165 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
166 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
167 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
168 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
169 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
170 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
171 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
172 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
173 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
174 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
175 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
176 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
177 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
178 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
181 const unsigned char _Py_ctype_toupper
[256] = {
182 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
183 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
184 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
185 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
186 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
187 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
188 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
189 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
190 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
191 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
192 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
193 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
194 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
195 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,
196 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
197 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
198 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
199 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
200 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97,
201 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
202 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
203 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
204 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7,
205 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
206 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7,
207 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
208 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7,
209 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
210 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7,
211 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
212 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7,
213 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff,
217 PyDoc_STRVAR_shared(_Py_isspace__doc__
,
218 "B.isspace() -> bool\n\
220 Return True if all characters in B are whitespace\n\
221 and there is at least one character in B, False otherwise.");
224 _Py_bytes_isspace(const char *cptr
, Py_ssize_t len
)
226 register const unsigned char *p
227 = (unsigned char *) cptr
;
228 register const unsigned char *e
;
230 /* Shortcut for single character strings */
231 if (len
== 1 && ISSPACE(*p
))
234 /* Special case for empty strings */
247 PyDoc_STRVAR_shared(_Py_isalpha__doc__
,
248 "B.isalpha() -> bool\n\
250 Return True if all characters in B are alphabetic\n\
251 and there is at least one character in B, False otherwise.");
254 _Py_bytes_isalpha(const char *cptr
, Py_ssize_t len
)
256 register const unsigned char *p
257 = (unsigned char *) cptr
;
258 register const unsigned char *e
;
260 /* Shortcut for single character strings */
261 if (len
== 1 && ISALPHA(*p
))
264 /* Special case for empty strings */
277 PyDoc_STRVAR_shared(_Py_isalnum__doc__
,
278 "B.isalnum() -> bool\n\
280 Return True if all characters in B are alphanumeric\n\
281 and there is at least one character in B, False otherwise.");
284 _Py_bytes_isalnum(const char *cptr
, Py_ssize_t len
)
286 register const unsigned char *p
287 = (unsigned char *) cptr
;
288 register const unsigned char *e
;
290 /* Shortcut for single character strings */
291 if (len
== 1 && ISALNUM(*p
))
294 /* Special case for empty strings */
307 PyDoc_STRVAR_shared(_Py_isdigit__doc__
,
308 "B.isdigit() -> bool\n\
310 Return True if all characters in B are digits\n\
311 and there is at least one character in B, False otherwise.");
314 _Py_bytes_isdigit(const char *cptr
, Py_ssize_t len
)
316 register const unsigned char *p
317 = (unsigned char *) cptr
;
318 register const unsigned char *e
;
320 /* Shortcut for single character strings */
321 if (len
== 1 && ISDIGIT(*p
))
324 /* Special case for empty strings */
337 PyDoc_STRVAR_shared(_Py_islower__doc__
,
338 "B.islower() -> bool\n\
340 Return True if all cased characters in B are lowercase and there is\n\
341 at least one cased character in B, False otherwise.");
344 _Py_bytes_islower(const char *cptr
, Py_ssize_t len
)
346 register const unsigned char *p
347 = (unsigned char *) cptr
;
348 register const unsigned char *e
;
351 /* Shortcut for single character strings */
353 return PyBool_FromLong(ISLOWER(*p
));
355 /* Special case for empty strings */
364 else if (!cased
&& ISLOWER(*p
))
367 return PyBool_FromLong(cased
);
371 PyDoc_STRVAR_shared(_Py_isupper__doc__
,
372 "B.isupper() -> bool\n\
374 Return True if all cased characters in B are uppercase and there is\n\
375 at least one cased character in B, False otherwise.");
378 _Py_bytes_isupper(const char *cptr
, Py_ssize_t len
)
380 register const unsigned char *p
381 = (unsigned char *) cptr
;
382 register const unsigned char *e
;
385 /* Shortcut for single character strings */
387 return PyBool_FromLong(ISUPPER(*p
));
389 /* Special case for empty strings */
398 else if (!cased
&& ISUPPER(*p
))
401 return PyBool_FromLong(cased
);
405 PyDoc_STRVAR_shared(_Py_istitle__doc__
,
406 "B.istitle() -> bool\n\
408 Return True if B is a titlecased string and there is at least one\n\
409 character in B, i.e. uppercase characters may only follow uncased\n\
410 characters and lowercase characters only cased ones. Return False\n\
414 _Py_bytes_istitle(const char *cptr
, Py_ssize_t len
)
416 register const unsigned char *p
417 = (unsigned char *) cptr
;
418 register const unsigned char *e
;
419 int cased
, previous_is_cased
;
421 /* Shortcut for single character strings */
423 return PyBool_FromLong(ISUPPER(*p
));
425 /* Special case for empty strings */
431 previous_is_cased
= 0;
433 register const unsigned char ch
= *p
;
436 if (previous_is_cased
)
438 previous_is_cased
= 1;
441 else if (ISLOWER(ch
)) {
442 if (!previous_is_cased
)
444 previous_is_cased
= 1;
448 previous_is_cased
= 0;
450 return PyBool_FromLong(cased
);
454 PyDoc_STRVAR_shared(_Py_lower__doc__
,
455 "B.lower() -> copy of B\n\
457 Return a copy of B with all ASCII characters converted to lowercase.");
460 _Py_bytes_lower(char *result
, const char *cptr
, Py_ssize_t len
)
465 newobj = PyString_FromStringAndSize(NULL, len);
469 s = PyString_AS_STRING(newobj);
472 Py_MEMCPY(result
, cptr
, len
);
474 for (i
= 0; i
< len
; i
++) {
475 int c
= Py_CHARMASK(result
[i
]);
477 result
[i
] = TOLOWER(c
);
482 PyDoc_STRVAR_shared(_Py_upper__doc__
,
483 "B.upper() -> copy of B\n\
485 Return a copy of B with all ASCII characters converted to uppercase.");
488 _Py_bytes_upper(char *result
, const char *cptr
, Py_ssize_t len
)
493 newobj = PyString_FromStringAndSize(NULL, len);
497 s = PyString_AS_STRING(newobj);
500 Py_MEMCPY(result
, cptr
, len
);
502 for (i
= 0; i
< len
; i
++) {
503 int c
= Py_CHARMASK(result
[i
]);
505 result
[i
] = TOUPPER(c
);
510 PyDoc_STRVAR_shared(_Py_title__doc__
,
511 "B.title() -> copy of B\n\
513 Return a titlecased version of B, i.e. ASCII words start with uppercase\n\
514 characters, all remaining cased characters have lowercase.");
517 _Py_bytes_title(char *result
, char *s
, Py_ssize_t len
)
520 int previous_is_cased
= 0;
523 newobj = PyString_FromStringAndSize(NULL, len);
526 s_new = PyString_AsString(newobj);
528 for (i
= 0; i
< len
; i
++) {
529 int c
= Py_CHARMASK(*s
++);
531 if (!previous_is_cased
)
533 previous_is_cased
= 1;
534 } else if (ISUPPER(c
)) {
535 if (previous_is_cased
)
537 previous_is_cased
= 1;
539 previous_is_cased
= 0;
545 PyDoc_STRVAR_shared(_Py_capitalize__doc__
,
546 "B.capitalize() -> copy of B\n\
548 Return a copy of B with only its first character capitalized (ASCII).");
551 _Py_bytes_capitalize(char *result
, char *s
, Py_ssize_t len
)
556 newobj = PyString_FromStringAndSize(NULL, len);
559 s_new = PyString_AsString(newobj);
562 int c
= Py_CHARMASK(*s
++);
564 *result
= TOUPPER(c
);
569 for (i
= 1; i
< len
; i
++) {
570 int c
= Py_CHARMASK(*s
++);
572 *result
= TOLOWER(c
);
580 PyDoc_STRVAR_shared(_Py_swapcase__doc__
,
581 "B.swapcase() -> copy of B\n\
583 Return a copy of B with uppercase ASCII characters converted\n\
584 to lowercase ASCII and vice versa.");
587 _Py_bytes_swapcase(char *result
, char *s
, Py_ssize_t len
)
592 newobj = PyString_FromStringAndSize(NULL, len);
595 s_new = PyString_AsString(newobj);
597 for (i
= 0; i
< len
; i
++) {
598 int c
= Py_CHARMASK(*s
++);
600 *result
= TOUPPER(c
);
602 else if (ISUPPER(c
)) {
603 *result
= TOLOWER(c
);