[Bug #793553] Correct description of keyword arguments for SSL authentication
[pytest.git] / Include / unicodeobject.h
blobc7e07a86ef4bdcbb1f2e4484e5b1da832912b7dd
1 #ifndef Py_UNICODEOBJECT_H
2 #define Py_UNICODEOBJECT_H
4 /*
6 Unicode implementation based on original code by Fredrik Lundh,
7 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
8 Unicode Integration Proposal (see file Misc/unicode.txt).
10 Copyright (c) Corporation for National Research Initiatives.
13 Original header:
14 --------------------------------------------------------------------
16 * Yet another Unicode string type for Python. This type supports the
17 * 16-bit Basic Multilingual Plane (BMP) only.
19 * Written by Fredrik Lundh, January 1999.
21 * Copyright (c) 1999 by Secret Labs AB.
22 * Copyright (c) 1999 by Fredrik Lundh.
24 * fredrik@pythonware.com
25 * http://www.pythonware.com
27 * --------------------------------------------------------------------
28 * This Unicode String Type is
30 * Copyright (c) 1999 by Secret Labs AB
31 * Copyright (c) 1999 by Fredrik Lundh
33 * By obtaining, using, and/or copying this software and/or its
34 * associated documentation, you agree that you have read, understood,
35 * and will comply with the following terms and conditions:
37 * Permission to use, copy, modify, and distribute this software and its
38 * associated documentation for any purpose and without fee is hereby
39 * granted, provided that the above copyright notice appears in all
40 * copies, and that both that copyright notice and this permission notice
41 * appear in supporting documentation, and that the name of Secret Labs
42 * AB or the author not be used in advertising or publicity pertaining to
43 * distribution of the software without specific, written prior
44 * permission.
46 * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
47 * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
48 * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
49 * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
50 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
51 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
52 * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
53 * -------------------------------------------------------------------- */
55 #include <ctype.h>
57 /* === Internal API ======================================================= */
59 /* --- Internal Unicode Format -------------------------------------------- */
61 #ifndef Py_USING_UNICODE
63 #define PyUnicode_Check(op) 0
64 #define PyUnicode_CheckExact(op) 0
66 #else
68 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
69 properly set, but the default rules below doesn't set it. I'll
70 sort this out some other day -- fredrik@pythonware.com */
72 #ifndef Py_UNICODE_SIZE
73 #error Must define Py_UNICODE_SIZE
74 #endif
76 /* Setting Py_UNICODE_WIDE enables UCS-4 storage. Otherwise, Unicode
77 strings are stored as UCS-2 (with limited support for UTF-16) */
79 #if Py_UNICODE_SIZE >= 4
80 #define Py_UNICODE_WIDE
81 #endif
83 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
84 wchar_t type is a 16-bit unsigned type */
85 /* #define HAVE_WCHAR_H */
86 /* #define HAVE_USABLE_WCHAR_T */
88 /* Defaults for various platforms */
89 #ifndef PY_UNICODE_TYPE
91 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
92 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
93 # define HAVE_USABLE_WCHAR_T
94 # define PY_UNICODE_TYPE wchar_t
95 # endif
97 # if defined(Py_UNICODE_WIDE)
98 # define PY_UNICODE_TYPE Py_UCS4
99 # endif
101 #endif
103 /* If the compiler provides a wchar_t type we try to support it
104 through the interface functions PyUnicode_FromWideChar() and
105 PyUnicode_AsWideChar(). */
107 #ifdef HAVE_USABLE_WCHAR_T
108 # ifndef HAVE_WCHAR_H
109 # define HAVE_WCHAR_H
110 # endif
111 #endif
113 #ifdef HAVE_WCHAR_H
114 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
115 # ifdef _HAVE_BSDI
116 # include <time.h>
117 # endif
118 # include <wchar.h>
119 #endif
122 * Use this typedef when you need to represent a UTF-16 surrogate pair
123 * as single unsigned integer.
125 #if SIZEOF_INT >= 4
126 typedef unsigned int Py_UCS4;
127 #elif SIZEOF_LONG >= 4
128 typedef unsigned long Py_UCS4;
129 #endif
131 typedef PY_UNICODE_TYPE Py_UNICODE;
133 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
135 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
136 produce different external names and thus cause import errors in
137 case Python interpreters and extensions with mixed compiled in
138 Unicode width assumptions are combined. */
140 #ifndef Py_UNICODE_WIDE
142 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
143 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
144 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
145 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
146 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
147 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
148 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
149 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
150 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
151 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
152 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
153 # define PyUnicode_Compare PyUnicodeUCS2_Compare
154 # define PyUnicode_Concat PyUnicodeUCS2_Concat
155 # define PyUnicode_Contains PyUnicodeUCS2_Contains
156 # define PyUnicode_Count PyUnicodeUCS2_Count
157 # define PyUnicode_Decode PyUnicodeUCS2_Decode
158 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
159 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
160 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
161 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
162 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
163 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
164 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
165 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
166 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
167 # define PyUnicode_Encode PyUnicodeUCS2_Encode
168 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
169 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
170 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
171 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
172 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
173 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
174 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
175 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
176 # define PyUnicode_Find PyUnicodeUCS2_Find
177 # define PyUnicode_Format PyUnicodeUCS2_Format
178 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
179 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
180 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
181 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
182 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
183 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
184 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
185 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
186 # define PyUnicode_Join PyUnicodeUCS2_Join
187 # define PyUnicode_Partition PyUnicodeUCS2_Partition
188 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
189 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
190 # define PyUnicode_Replace PyUnicodeUCS2_Replace
191 # define PyUnicode_Resize PyUnicodeUCS2_Resize
192 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
193 # define PyUnicode_Split PyUnicodeUCS2_Split
194 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
195 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
196 # define PyUnicode_Translate PyUnicodeUCS2_Translate
197 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
198 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
199 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
200 # define _PyUnicode_Init _PyUnicodeUCS2_Init
201 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
202 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
203 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
204 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
205 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
206 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
207 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
208 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
209 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
210 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
211 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
212 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
213 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
214 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
215 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
217 #else
219 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
220 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
221 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
222 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
223 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
224 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
225 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
226 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
227 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
228 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
229 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
230 # define PyUnicode_Compare PyUnicodeUCS4_Compare
231 # define PyUnicode_Concat PyUnicodeUCS4_Concat
232 # define PyUnicode_Contains PyUnicodeUCS4_Contains
233 # define PyUnicode_Count PyUnicodeUCS4_Count
234 # define PyUnicode_Decode PyUnicodeUCS4_Decode
235 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
236 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
237 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
238 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
239 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
240 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
241 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
242 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
243 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
244 # define PyUnicode_Encode PyUnicodeUCS4_Encode
245 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
246 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
247 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
248 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
249 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
250 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
251 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
252 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
253 # define PyUnicode_Find PyUnicodeUCS4_Find
254 # define PyUnicode_Format PyUnicodeUCS4_Format
255 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
256 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
257 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
258 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
259 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
260 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
261 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
262 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
263 # define PyUnicode_Join PyUnicodeUCS4_Join
264 # define PyUnicode_Partition PyUnicodeUCS4_Partition
265 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
266 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
267 # define PyUnicode_Replace PyUnicodeUCS4_Replace
268 # define PyUnicode_Resize PyUnicodeUCS4_Resize
269 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
270 # define PyUnicode_Split PyUnicodeUCS4_Split
271 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
272 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
273 # define PyUnicode_Translate PyUnicodeUCS4_Translate
274 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
275 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
276 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
277 # define _PyUnicode_Init _PyUnicodeUCS4_Init
278 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
279 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
280 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
281 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
282 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
283 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
284 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
285 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
286 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
287 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
288 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
289 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
290 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
291 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
292 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
295 #endif
297 /* --- Internal Unicode Operations ---------------------------------------- */
299 /* If you want Python to use the compiler's wctype.h functions instead
300 of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
301 configure Python using --with-wctype-functions. This reduces the
302 interpreter's code size. */
304 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
306 #include <wctype.h>
308 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
310 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
311 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
312 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
313 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
315 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
316 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
317 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
319 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
320 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
321 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
323 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
324 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
325 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
327 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
329 #else
331 #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
333 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
334 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
335 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
336 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
338 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
339 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
340 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
342 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
343 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
344 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
346 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
347 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
348 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
350 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
352 #endif
354 #define Py_UNICODE_ISALNUM(ch) \
355 (Py_UNICODE_ISALPHA(ch) || \
356 Py_UNICODE_ISDECIMAL(ch) || \
357 Py_UNICODE_ISDIGIT(ch) || \
358 Py_UNICODE_ISNUMERIC(ch))
360 #define Py_UNICODE_COPY(target, source, length) \
361 Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
363 #define Py_UNICODE_FILL(target, value, length) do\
364 {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
365 for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
366 } while (0)
368 /* check if substring matches at given offset. the offset must be
369 valid, and the substring must not be empty */
370 #define Py_UNICODE_MATCH(string, offset, substring) \
371 ((*((string)->str + (offset)) == *((substring)->str)) && \
372 ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
373 !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
375 #ifdef __cplusplus
376 extern "C" {
377 #endif
379 /* --- Unicode Type ------------------------------------------------------- */
381 typedef struct {
382 PyObject_HEAD
383 Py_ssize_t length; /* Length of raw Unicode data in buffer */
384 Py_UNICODE *str; /* Raw Unicode buffer */
385 long hash; /* Hash value; -1 if not set */
386 PyObject *defenc; /* (Default) Encoded version as Python
387 string, or NULL; this is used for
388 implementing the buffer protocol */
389 } PyUnicodeObject;
391 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
393 #define PyUnicode_Check(op) PyObject_TypeCheck(op, &PyUnicode_Type)
394 #define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
396 /* Fast access macros */
397 #define PyUnicode_GET_SIZE(op) \
398 (((PyUnicodeObject *)(op))->length)
399 #define PyUnicode_GET_DATA_SIZE(op) \
400 (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
401 #define PyUnicode_AS_UNICODE(op) \
402 (((PyUnicodeObject *)(op))->str)
403 #define PyUnicode_AS_DATA(op) \
404 ((const char *)((PyUnicodeObject *)(op))->str)
406 /* --- Constants ---------------------------------------------------------- */
408 /* This Unicode character will be used as replacement character during
409 decoding if the errors argument is set to "replace". Note: the
410 Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
411 Unicode 3.0. */
413 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
415 /* === Public API ========================================================= */
417 /* --- Plain Py_UNICODE --------------------------------------------------- */
419 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
420 size.
422 u may be NULL which causes the contents to be undefined. It is the
423 user's responsibility to fill in the needed data afterwards. Note
424 that modifying the Unicode object contents after construction is
425 only allowed if u was set to NULL.
427 The buffer is copied into the new object. */
429 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
430 const Py_UNICODE *u, /* Unicode buffer */
431 Py_ssize_t size /* size of buffer */
434 /* Return a read-only pointer to the Unicode object's internal
435 Py_UNICODE buffer. */
437 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
438 PyObject *unicode /* Unicode object */
441 /* Get the length of the Unicode object. */
443 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
444 PyObject *unicode /* Unicode object */
447 /* Get the maximum ordinal for a Unicode character. */
448 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
450 /* Resize an already allocated Unicode object to the new size length.
452 *unicode is modified to point to the new (resized) object and 0
453 returned on success.
455 This API may only be called by the function which also called the
456 Unicode constructor. The refcount on the object must be 1. Otherwise,
457 an error is returned.
459 Error handling is implemented as follows: an exception is set, -1
460 is returned and *unicode left untouched.
464 PyAPI_FUNC(int) PyUnicode_Resize(
465 PyObject **unicode, /* Pointer to the Unicode object */
466 Py_ssize_t length /* New length */
469 /* Coerce obj to an Unicode object and return a reference with
470 *incremented* refcount.
472 Coercion is done in the following way:
474 1. String and other char buffer compatible objects are decoded
475 under the assumptions that they contain data using the current
476 default encoding. Decoding is done in "strict" mode.
478 2. All other objects (including Unicode objects) raise an
479 exception.
481 The API returns NULL in case of an error. The caller is responsible
482 for decref'ing the returned objects.
486 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
487 register PyObject *obj, /* Object */
488 const char *encoding, /* encoding */
489 const char *errors /* error handling */
492 /* Coerce obj to an Unicode object and return a reference with
493 *incremented* refcount.
495 Unicode objects are passed back as-is (subclasses are converted to
496 true Unicode objects), all other objects are delegated to
497 PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
498 using the default encoding as basis for decoding the object.
500 The API returns NULL in case of an error. The caller is responsible
501 for decref'ing the returned objects.
505 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
506 register PyObject *obj /* Object */
509 /* --- wchar_t support for platforms which support it --------------------- */
511 #ifdef HAVE_WCHAR_H
513 /* Create a Unicode Object from the whcar_t buffer w of the given
514 size.
516 The buffer is copied into the new object. */
518 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
519 register const wchar_t *w, /* wchar_t buffer */
520 Py_ssize_t size /* size of buffer */
523 /* Copies the Unicode Object contents into the wchar_t buffer w. At
524 most size wchar_t characters are copied.
526 Note that the resulting wchar_t string may or may not be
527 0-terminated. It is the responsibility of the caller to make sure
528 that the wchar_t string is 0-terminated in case this is required by
529 the application.
531 Returns the number of wchar_t characters copied (excluding a
532 possibly trailing 0-termination character) or -1 in case of an
533 error. */
535 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
536 PyUnicodeObject *unicode, /* Unicode object */
537 register wchar_t *w, /* wchar_t buffer */
538 Py_ssize_t size /* size of buffer */
541 #endif
543 /* --- Unicode ordinals --------------------------------------------------- */
545 /* Create a Unicode Object from the given Unicode code point ordinal.
547 The ordinal must be in range(0x10000) on narrow Python builds
548 (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
549 raised in case it is not.
553 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
555 /* === Builtin Codecs =====================================================
557 Many of these APIs take two arguments encoding and errors. These
558 parameters encoding and errors have the same semantics as the ones
559 of the builtin unicode() API.
561 Setting encoding to NULL causes the default encoding to be used.
563 Error handling is set by errors which may also be set to NULL
564 meaning to use the default handling defined for the codec. Default
565 error handling for all builtin codecs is "strict" (ValueErrors are
566 raised).
568 The codecs all use a similar interface. Only deviation from the
569 generic ones are documented.
573 /* --- Manage the default encoding ---------------------------------------- */
575 /* Return a Python string holding the default encoded value of the
576 Unicode object.
578 The resulting string is cached in the Unicode object for subsequent
579 usage by this function. The cached version is needed to implement
580 the character buffer interface and will live (at least) as long as
581 the Unicode object itself.
583 The refcount of the string is *not* incremented.
585 *** Exported for internal use by the interpreter only !!! ***
589 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
590 PyObject *, const char *);
592 /* Returns the currently active default encoding.
594 The default encoding is currently implemented as run-time settable
595 process global. This may change in future versions of the
596 interpreter to become a parameter which is managed on a per-thread
597 basis.
601 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
603 /* Sets the currently active default encoding.
605 Returns 0 on success, -1 in case of an error.
609 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
610 const char *encoding /* Encoding name in standard form */
613 /* --- Generic Codecs ----------------------------------------------------- */
615 /* Create a Unicode object by decoding the encoded string s of the
616 given size. */
618 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
619 const char *s, /* encoded string */
620 Py_ssize_t size, /* size of buffer */
621 const char *encoding, /* encoding */
622 const char *errors /* error handling */
625 /* Encodes a Py_UNICODE buffer of the given size and returns a
626 Python string object. */
628 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
629 const Py_UNICODE *s, /* Unicode char buffer */
630 Py_ssize_t size, /* number of Py_UNICODE chars to encode */
631 const char *encoding, /* encoding */
632 const char *errors /* error handling */
635 /* Encodes a Unicode object and returns the result as Python
636 object. */
638 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
639 PyObject *unicode, /* Unicode object */
640 const char *encoding, /* encoding */
641 const char *errors /* error handling */
644 /* Encodes a Unicode object and returns the result as Python string
645 object. */
647 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
648 PyObject *unicode, /* Unicode object */
649 const char *encoding, /* encoding */
650 const char *errors /* error handling */
653 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
654 PyObject* string /* 256 character map */
658 /* --- UTF-7 Codecs ------------------------------------------------------- */
660 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
661 const char *string, /* UTF-7 encoded string */
662 Py_ssize_t length, /* size of string */
663 const char *errors /* error handling */
666 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
667 const Py_UNICODE *data, /* Unicode char buffer */
668 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
669 int encodeSetO, /* force the encoder to encode characters in
670 Set O, as described in RFC2152 */
671 int encodeWhiteSpace, /* force the encoder to encode space, tab,
672 carriage return and linefeed characters */
673 const char *errors /* error handling */
676 /* --- UTF-8 Codecs ------------------------------------------------------- */
678 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
679 const char *string, /* UTF-8 encoded string */
680 Py_ssize_t length, /* size of string */
681 const char *errors /* error handling */
684 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
685 const char *string, /* UTF-8 encoded string */
686 Py_ssize_t length, /* size of string */
687 const char *errors, /* error handling */
688 Py_ssize_t *consumed /* bytes consumed */
691 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
692 PyObject *unicode /* Unicode object */
695 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
696 const Py_UNICODE *data, /* Unicode char buffer */
697 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
698 const char *errors /* error handling */
701 /* --- UTF-16 Codecs ------------------------------------------------------ */
703 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
704 the corresponding Unicode object.
706 errors (if non-NULL) defines the error handling. It defaults
707 to "strict".
709 If byteorder is non-NULL, the decoder starts decoding using the
710 given byte order:
712 *byteorder == -1: little endian
713 *byteorder == 0: native order
714 *byteorder == 1: big endian
716 In native mode, the first two bytes of the stream are checked for a
717 BOM mark. If found, the BOM mark is analysed, the byte order
718 adjusted and the BOM skipped. In the other modes, no BOM mark
719 interpretation is done. After completion, *byteorder is set to the
720 current byte order at the end of input data.
722 If byteorder is NULL, the codec starts in native order mode.
726 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
727 const char *string, /* UTF-16 encoded string */
728 Py_ssize_t length, /* size of string */
729 const char *errors, /* error handling */
730 int *byteorder /* pointer to byteorder to use
731 0=native;-1=LE,1=BE; updated on
732 exit */
735 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
736 const char *string, /* UTF-16 encoded string */
737 Py_ssize_t length, /* size of string */
738 const char *errors, /* error handling */
739 int *byteorder, /* pointer to byteorder to use
740 0=native;-1=LE,1=BE; updated on
741 exit */
742 Py_ssize_t *consumed /* bytes consumed */
745 /* Returns a Python string using the UTF-16 encoding in native byte
746 order. The string always starts with a BOM mark. */
748 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
749 PyObject *unicode /* Unicode object */
752 /* Returns a Python string object holding the UTF-16 encoded value of
753 the Unicode data.
755 If byteorder is not 0, output is written according to the following
756 byte order:
758 byteorder == -1: little endian
759 byteorder == 0: native byte order (writes a BOM mark)
760 byteorder == 1: big endian
762 If byteorder is 0, the output string will always start with the
763 Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
764 prepended.
766 Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
767 UCS-2. This trick makes it possible to add full UTF-16 capabilities
768 at a later point without compromising the APIs.
772 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
773 const Py_UNICODE *data, /* Unicode char buffer */
774 Py_ssize_t length, /* number of Py_UNICODE chars to encode */
775 const char *errors, /* error handling */
776 int byteorder /* byteorder to use 0=BOM+native;-1=LE,1=BE */
779 /* --- Unicode-Escape Codecs ---------------------------------------------- */
781 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
782 const char *string, /* Unicode-Escape encoded string */
783 Py_ssize_t length, /* size of string */
784 const char *errors /* error handling */
787 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
788 PyObject *unicode /* Unicode object */
791 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
792 const Py_UNICODE *data, /* Unicode char buffer */
793 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
796 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
798 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
799 const char *string, /* Raw-Unicode-Escape encoded string */
800 Py_ssize_t length, /* size of string */
801 const char *errors /* error handling */
804 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
805 PyObject *unicode /* Unicode object */
808 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
809 const Py_UNICODE *data, /* Unicode char buffer */
810 Py_ssize_t length /* Number of Py_UNICODE chars to encode */
813 /* --- Unicode Internal Codec ---------------------------------------------
815 Only for internal use in _codecsmodule.c */
817 PyObject *_PyUnicode_DecodeUnicodeInternal(
818 const char *string,
819 Py_ssize_t length,
820 const char *errors
823 /* --- Latin-1 Codecs -----------------------------------------------------
825 Note: Latin-1 corresponds to the first 256 Unicode ordinals.
829 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
830 const char *string, /* Latin-1 encoded string */
831 Py_ssize_t length, /* size of string */
832 const char *errors /* error handling */
835 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
836 PyObject *unicode /* Unicode object */
839 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
840 const Py_UNICODE *data, /* Unicode char buffer */
841 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
842 const char *errors /* error handling */
845 /* --- ASCII Codecs -------------------------------------------------------
847 Only 7-bit ASCII data is excepted. All other codes generate errors.
851 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
852 const char *string, /* ASCII encoded string */
853 Py_ssize_t length, /* size of string */
854 const char *errors /* error handling */
857 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
858 PyObject *unicode /* Unicode object */
861 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
862 const Py_UNICODE *data, /* Unicode char buffer */
863 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
864 const char *errors /* error handling */
867 /* --- Character Map Codecs -----------------------------------------------
869 This codec uses mappings to encode and decode characters.
871 Decoding mappings must map single string characters to single
872 Unicode characters, integers (which are then interpreted as Unicode
873 ordinals) or None (meaning "undefined mapping" and causing an
874 error).
876 Encoding mappings must map single Unicode characters to single
877 string characters, integers (which are then interpreted as Latin-1
878 ordinals) or None (meaning "undefined mapping" and causing an
879 error).
881 If a character lookup fails with a LookupError, the character is
882 copied as-is meaning that its ordinal value will be interpreted as
883 Unicode or Latin-1 ordinal resp. Because of this mappings only need
884 to contain those mappings which map characters to different code
885 points.
889 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
890 const char *string, /* Encoded string */
891 Py_ssize_t length, /* size of string */
892 PyObject *mapping, /* character mapping
893 (char ordinal -> unicode ordinal) */
894 const char *errors /* error handling */
897 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
898 PyObject *unicode, /* Unicode object */
899 PyObject *mapping /* character mapping
900 (unicode ordinal -> char ordinal) */
903 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
904 const Py_UNICODE *data, /* Unicode char buffer */
905 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
906 PyObject *mapping, /* character mapping
907 (unicode ordinal -> char ordinal) */
908 const char *errors /* error handling */
911 /* Translate a Py_UNICODE buffer of the given length by applying a
912 character mapping table to it and return the resulting Unicode
913 object.
915 The mapping table must map Unicode ordinal integers to Unicode
916 ordinal integers or None (causing deletion of the character).
918 Mapping tables may be dictionaries or sequences. Unmapped character
919 ordinals (ones which cause a LookupError) are left untouched and
920 are copied as-is.
924 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
925 const Py_UNICODE *data, /* Unicode char buffer */
926 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
927 PyObject *table, /* Translate table */
928 const char *errors /* error handling */
931 #ifdef MS_WIN32
933 /* --- MBCS codecs for Windows -------------------------------------------- */
935 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
936 const char *string, /* MBCS encoded string */
937 Py_ssize_t length, /* size of string */
938 const char *errors /* error handling */
941 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
942 const char *string, /* MBCS encoded string */
943 Py_ssize_t length, /* size of string */
944 const char *errors, /* error handling */
945 Py_ssize_t *consumed /* bytes consumed */
948 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
949 PyObject *unicode /* Unicode object */
952 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
953 const Py_UNICODE *data, /* Unicode char buffer */
954 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
955 const char *errors /* error handling */
958 #endif /* MS_WIN32 */
960 /* --- Decimal Encoder ---------------------------------------------------- */
962 /* Takes a Unicode string holding a decimal value and writes it into
963 an output buffer using standard ASCII digit codes.
965 The output buffer has to provide at least length+1 bytes of storage
966 area. The output string is 0-terminated.
968 The encoder converts whitespace to ' ', decimal characters to their
969 corresponding ASCII digit and all other Latin-1 characters except
970 \0 as-is. Characters outside this range (Unicode ordinals 1-256)
971 are treated as errors. This includes embedded NULL bytes.
973 Error handling is defined by the errors argument:
975 NULL or "strict": raise a ValueError
976 "ignore": ignore the wrong characters (these are not copied to the
977 output buffer)
978 "replace": replaces illegal characters with '?'
980 Returns 0 on success, -1 on failure.
984 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
985 Py_UNICODE *s, /* Unicode buffer */
986 Py_ssize_t length, /* Number of Py_UNICODE chars to encode */
987 char *output, /* Output buffer; must have size >= length */
988 const char *errors /* error handling */
991 /* --- Methods & Slots ----------------------------------------------------
993 These are capable of handling Unicode objects and strings on input
994 (we refer to them as strings in the descriptions) and return
995 Unicode objects or integers as apporpriate. */
997 /* Concat two strings giving a new Unicode string. */
999 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1000 PyObject *left, /* Left string */
1001 PyObject *right /* Right string */
1004 /* Split a string giving a list of Unicode strings.
1006 If sep is NULL, splitting will be done at all whitespace
1007 substrings. Otherwise, splits occur at the given separator.
1009 At most maxsplit splits will be done. If negative, no limit is set.
1011 Separators are not included in the resulting list.
1015 PyAPI_FUNC(PyObject*) PyUnicode_Split(
1016 PyObject *s, /* String to split */
1017 PyObject *sep, /* String separator */
1018 Py_ssize_t maxsplit /* Maxsplit count */
1021 /* Dito, but split at line breaks.
1023 CRLF is considered to be one line break. Line breaks are not
1024 included in the resulting list. */
1026 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1027 PyObject *s, /* String to split */
1028 int keepends /* If true, line end markers are included */
1031 /* Partition a string using a given separator. */
1033 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1034 PyObject *s, /* String to partition */
1035 PyObject *sep /* String separator */
1038 /* Partition a string using a given separator, searching from the end of the
1039 string. */
1041 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1042 PyObject *s, /* String to partition */
1043 PyObject *sep /* String separator */
1046 /* Split a string giving a list of Unicode strings.
1048 If sep is NULL, splitting will be done at all whitespace
1049 substrings. Otherwise, splits occur at the given separator.
1051 At most maxsplit splits will be done. But unlike PyUnicode_Split
1052 PyUnicode_RSplit splits from the end of the string. If negative,
1053 no limit is set.
1055 Separators are not included in the resulting list.
1059 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1060 PyObject *s, /* String to split */
1061 PyObject *sep, /* String separator */
1062 Py_ssize_t maxsplit /* Maxsplit count */
1065 /* Translate a string by applying a character mapping table to it and
1066 return the resulting Unicode object.
1068 The mapping table must map Unicode ordinal integers to Unicode
1069 ordinal integers or None (causing deletion of the character).
1071 Mapping tables may be dictionaries or sequences. Unmapped character
1072 ordinals (ones which cause a LookupError) are left untouched and
1073 are copied as-is.
1077 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1078 PyObject *str, /* String */
1079 PyObject *table, /* Translate table */
1080 const char *errors /* error handling */
1083 /* Join a sequence of strings using the given separator and return
1084 the resulting Unicode string. */
1086 PyAPI_FUNC(PyObject*) PyUnicode_Join(
1087 PyObject *separator, /* Separator string */
1088 PyObject *seq /* Sequence object */
1091 /* Return 1 if substr matches str[start:end] at the given tail end, 0
1092 otherwise. */
1094 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1095 PyObject *str, /* String */
1096 PyObject *substr, /* Prefix or Suffix string */
1097 Py_ssize_t start, /* Start index */
1098 Py_ssize_t end, /* Stop index */
1099 int direction /* Tail end: -1 prefix, +1 suffix */
1102 /* Return the first position of substr in str[start:end] using the
1103 given search direction or -1 if not found. -2 is returned in case
1104 an error occurred and an exception is set. */
1106 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
1107 PyObject *str, /* String */
1108 PyObject *substr, /* Substring to find */
1109 Py_ssize_t start, /* Start index */
1110 Py_ssize_t end, /* Stop index */
1111 int direction /* Find direction: +1 forward, -1 backward */
1114 /* Count the number of occurrences of substr in str[start:end]. */
1116 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
1117 PyObject *str, /* String */
1118 PyObject *substr, /* Substring to count */
1119 Py_ssize_t start, /* Start index */
1120 Py_ssize_t end /* Stop index */
1123 /* Replace at most maxcount occurrences of substr in str with replstr
1124 and return the resulting Unicode object. */
1126 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1127 PyObject *str, /* String */
1128 PyObject *substr, /* Substring to find */
1129 PyObject *replstr, /* Substring to replace */
1130 Py_ssize_t maxcount /* Max. number of replacements to apply;
1131 -1 = all */
1134 /* Compare two strings and return -1, 0, 1 for less than, equal,
1135 greater than resp. */
1137 PyAPI_FUNC(int) PyUnicode_Compare(
1138 PyObject *left, /* Left string */
1139 PyObject *right /* Right string */
1142 /* Apply a argument tuple or dictionary to a format string and return
1143 the resulting Unicode string. */
1145 PyAPI_FUNC(PyObject *) PyUnicode_Format(
1146 PyObject *format, /* Format string */
1147 PyObject *args /* Argument tuple or dictionary */
1150 /* Checks whether element is contained in container and return 1/0
1151 accordingly.
1153 element has to coerce to an one element Unicode string. -1 is
1154 returned in case of an error. */
1156 PyAPI_FUNC(int) PyUnicode_Contains(
1157 PyObject *container, /* Container string */
1158 PyObject *element /* Element string */
1161 /* Externally visible for str.strip(unicode) */
1162 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1163 PyUnicodeObject *self,
1164 int striptype,
1165 PyObject *sepobj
1168 /* === Characters Type APIs =============================================== */
1170 /* These should not be used directly. Use the Py_UNICODE_IS* and
1171 Py_UNICODE_TO* macros instead.
1173 These APIs are implemented in Objects/unicodectype.c.
1177 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1178 Py_UNICODE ch /* Unicode character */
1181 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1182 Py_UNICODE ch /* Unicode character */
1185 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1186 Py_UNICODE ch /* Unicode character */
1189 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1190 const Py_UNICODE ch /* Unicode character */
1193 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1194 const Py_UNICODE ch /* Unicode character */
1197 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
1198 Py_UNICODE ch /* Unicode character */
1201 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
1202 Py_UNICODE ch /* Unicode character */
1205 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
1206 Py_UNICODE ch /* Unicode character */
1209 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1210 Py_UNICODE ch /* Unicode character */
1213 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1214 Py_UNICODE ch /* Unicode character */
1217 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1218 Py_UNICODE ch /* Unicode character */
1221 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1222 Py_UNICODE ch /* Unicode character */
1225 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1226 Py_UNICODE ch /* Unicode character */
1229 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1230 Py_UNICODE ch /* Unicode character */
1233 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1234 Py_UNICODE ch /* Unicode character */
1237 #ifdef __cplusplus
1239 #endif
1240 #endif /* Py_USING_UNICODE */
1241 #endif /* !Py_UNICODEOBJECT_H */