Include/unicodeobject.h

   1 #ifndef Py_UNICODEOBJECT_H
   2 #define Py_UNICODEOBJECT_H
   3
   4 #include <stdarg.h>
   5
   6 /*
   7
   8 Unicode implementation based on original code by Fredrik Lundh,
   9 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
  10 Unicode Integration Proposal (see file Misc/unicode.txt).
  11
  12 Copyright (c) Corporation for National Research Initiatives.
  13
  14
  15  Original header:
  16  --------------------------------------------------------------------
  17
  18  * Yet another Unicode string type for Python.  This type supports the
  19  * 16-bit Basic Multilingual Plane (BMP) only.
  20  *
  21  * Written by Fredrik Lundh, January 1999.
  22  *
  23  * Copyright (c) 1999 by Secret Labs AB.
  24  * Copyright (c) 1999 by Fredrik Lundh.
  25  *
  26  * fredrik@pythonware.com
  27  * http://www.pythonware.com
  28  *
  29  * --------------------------------------------------------------------
  30  * This Unicode String Type is
  31  *
  32  * Copyright (c) 1999 by Secret Labs AB
  33  * Copyright (c) 1999 by Fredrik Lundh
  34  *
  35  * By obtaining, using, and/or copying this software and/or its
  36  * associated documentation, you agree that you have read, understood,
  37  * and will comply with the following terms and conditions:
  38  *
  39  * Permission to use, copy, modify, and distribute this software and its
  40  * associated documentation for any purpose and without fee is hereby
  41  * granted, provided that the above copyright notice appears in all
  42  * copies, and that both that copyright notice and this permission notice
  43  * appear in supporting documentation, and that the name of Secret Labs
  44  * AB or the author not be used in advertising or publicity pertaining to
  45  * distribution of the software without specific, written prior
  46  * permission.
  47  *
  48  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
  49  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  50  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
  51  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  52  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  53  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
  54  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  55  * -------------------------------------------------------------------- */
  56
  57 #include <ctype.h>
  58
  59 /* === Internal API ======================================================= */
  60
  61 /* --- Internal Unicode Format -------------------------------------------- */
  62
  63 /* Python 3.x requires unicode */
  64 #define Py_USING_UNICODE
  65
  66 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
  67    properly set, but the default rules below doesn't set it.  I'll
  68    sort this out some other day -- fredrik@pythonware.com */
  69
  70 #ifndef Py_UNICODE_SIZE
  71 #error Must define Py_UNICODE_SIZE
  72 #endif
  73
  74 /* Setting Py_UNICODE_WIDE enables UCS-4 storage.  Otherwise, Unicode
  75    strings are stored as UCS-2 (with limited support for UTF-16) */
  76
  77 #if Py_UNICODE_SIZE >= 4
  78 #define Py_UNICODE_WIDE
  79 #endif
  80
  81 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
  82    wchar_t type is a 16-bit unsigned type */
  83 /* #define HAVE_WCHAR_H */
  84 /* #define HAVE_USABLE_WCHAR_T */
  85
  86 /* Defaults for various platforms */
  87 #ifndef PY_UNICODE_TYPE
  88
  89 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
  90 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
  91 #  define HAVE_USABLE_WCHAR_T
  92 #  define PY_UNICODE_TYPE wchar_t
  93 # endif
  94
  95 # if defined(Py_UNICODE_WIDE)
  96 #  define PY_UNICODE_TYPE Py_UCS4
  97 # endif
  98
  99 #endif
 100
 101 /* If the compiler provides a wchar_t type we try to support it
 102    through the interface functions PyUnicode_FromWideChar() and
 103    PyUnicode_AsWideChar(). */
 104
 105 #ifdef HAVE_USABLE_WCHAR_T
 106 # ifndef HAVE_WCHAR_H
 107 #  define HAVE_WCHAR_H
 108 # endif
 109 #endif
 110
 111 #ifdef HAVE_WCHAR_H
 112 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
 113 # ifdef _HAVE_BSDI
 114 #  include <time.h>
 115 # endif
 116 #  include <wchar.h>
 117 #endif
 118
 119 /*
 120  * Use this typedef when you need to represent a UTF-16 surrogate pair
 121  * as single unsigned integer.
 122  */
 123 #if SIZEOF_INT >= 4
 124 typedef unsigned int Py_UCS4;
 125 #elif SIZEOF_LONG >= 4
 126 typedef unsigned long Py_UCS4;
 127 #endif
 128
 129 /* Py_UNICODE is the native Unicode storage format (code unit) used by
 130    Python and represents a single Unicode element in the Unicode
 131    type. */
 132
 133 typedef PY_UNICODE_TYPE Py_UNICODE;
 134
 135 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
 136
 137 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
 138    produce different external names and thus cause import errors in
 139    case Python interpreters and extensions with mixed compiled in
 140    Unicode width assumptions are combined. */
 141
 142 #ifndef Py_UNICODE_WIDE
 143
 144 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
 145 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
 146 # define PyUnicode_AsDecodedObject PyUnicodeUCS2_AsDecodedObject
 147 # define PyUnicode_AsDecodedUnicode PyUnicodeUCS2_AsDecodedUnicode
 148 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
 149 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
 150 # define PyUnicode_AsEncodedUnicode PyUnicodeUCS2_AsEncodedUnicode
 151 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
 152 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
 153 # define PyUnicode_AsUTF32String PyUnicodeUCS2_AsUTF32String
 154 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
 155 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
 156 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
 157 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
 158 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
 159 # define PyUnicode_ClearFreeList PyUnicodeUCS2_ClearFreelist
 160 # define PyUnicode_Compare PyUnicodeUCS2_Compare
 161 # define PyUnicode_Concat PyUnicodeUCS2_Concat
 162 # define PyUnicode_Append PyUnicodeUCS2_Append
 163 # define PyUnicode_AppendAndDel PyUnicodeUCS2_AppendAndDel
 164 # define PyUnicode_Contains PyUnicodeUCS2_Contains
 165 # define PyUnicode_Count PyUnicodeUCS2_Count
 166 # define PyUnicode_Decode PyUnicodeUCS2_Decode
 167 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 168 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 169 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
 170 # define PyUnicode_DecodeFSDefault PyUnicodeUCS2_DecodeFSDefault
 171 # define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS2_DecodeFSDefaultAndSize
 172 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 173 # define PyUnicode_DecodeUTF32 PyUnicodeUCS2_DecodeUTF32
 174 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS2_DecodeUTF32Stateful
 175 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
 176 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
 177 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
 178 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
 179 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
 180 # define PyUnicode_Encode PyUnicodeUCS2_Encode
 181 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
 182 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
 183 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
 184 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
 185 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
 186 # define PyUnicode_EncodeUTF32 PyUnicodeUCS2_EncodeUTF32
 187 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
 188 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
 189 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
 190 # define PyUnicode_Find PyUnicodeUCS2_Find
 191 # define PyUnicode_Format PyUnicodeUCS2_Format
 192 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
 193 # define PyUnicode_FromFormat PyUnicodeUCS2_FromFormat
 194 # define PyUnicode_FromFormatV PyUnicodeUCS2_FromFormatV
 195 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
 196 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
 197 # define PyUnicode_FromString PyUnicodeUCS2_FromString
 198 # define PyUnicode_FromStringAndSize PyUnicodeUCS2_FromStringAndSize
 199 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 200 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
 201 # define PyUnicode_FSConverter PyUnicodeUCS2_FSConverter
 202 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 203 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 204 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
 205 # define PyUnicode_IsIdentifier PyUnicodeUCS2_IsIdentifier
 206 # define PyUnicode_Join PyUnicodeUCS2_Join
 207 # define PyUnicode_Partition PyUnicodeUCS2_Partition
 208 # define PyUnicode_RPartition PyUnicodeUCS2_RPartition
 209 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
 210 # define PyUnicode_Replace PyUnicodeUCS2_Replace
 211 # define PyUnicode_Resize PyUnicodeUCS2_Resize
 212 # define PyUnicode_RichCompare PyUnicodeUCS2_RichCompare
 213 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
 214 # define PyUnicode_Split PyUnicodeUCS2_Split
 215 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
 216 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
 217 # define PyUnicode_Translate PyUnicodeUCS2_Translate
 218 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
 219 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
 220 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
 221 # define _PyUnicode_Init _PyUnicodeUCS2_Init
 222 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
 223 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
 224 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
 225 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
 226 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
 227 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
 228 # define _PyUnicode_IsPrintable _PyUnicodeUCS2_IsPrintable
 229 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
 230 # define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
 231 # define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
 232 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
 233 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
 234 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
 235 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
 236 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
 237 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
 238 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
 239 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
 240
 241 #else
 242
 243 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
 244 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
 245 # define PyUnicode_AsDecodedObject PyUnicodeUCS4_AsDecodedObject
 246 # define PyUnicode_AsDecodedUnicode PyUnicodeUCS4_AsDecodedUnicode
 247 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
 248 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
 249 # define PyUnicode_AsEncodedUnicode PyUnicodeUCS4_AsEncodedUnicode
 250 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
 251 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
 252 # define PyUnicode_AsUTF32String PyUnicodeUCS4_AsUTF32String
 253 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
 254 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
 255 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
 256 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
 257 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
 258 # define PyUnicode_ClearFreeList PyUnicodeUCS4_ClearFreelist
 259 # define PyUnicode_Compare PyUnicodeUCS4_Compare
 260 # define PyUnicode_Concat PyUnicodeUCS4_Concat
 261 # define PyUnicode_Append PyUnicodeUCS4_Append
 262 # define PyUnicode_AppendAndDel PyUnicodeUCS4_AppendAndDel
 263 # define PyUnicode_Contains PyUnicodeUCS4_Contains
 264 # define PyUnicode_Count PyUnicodeUCS4_Count
 265 # define PyUnicode_Decode PyUnicodeUCS4_Decode
 266 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 267 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 268 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
 269 # define PyUnicode_DecodeFSDefault PyUnicodeUCS4_DecodeFSDefault
 270 # define PyUnicode_DecodeFSDefaultAndSize PyUnicodeUCS4_DecodeFSDefaultAndSize
 271 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 272 # define PyUnicode_DecodeUTF32 PyUnicodeUCS4_DecodeUTF32
 273 # define PyUnicode_DecodeUTF32Stateful PyUnicodeUCS4_DecodeUTF32Stateful
 274 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
 275 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
 276 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
 277 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
 278 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
 279 # define PyUnicode_Encode PyUnicodeUCS4_Encode
 280 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
 281 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
 282 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
 283 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
 284 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
 285 # define PyUnicode_EncodeUTF32 PyUnicodeUCS4_EncodeUTF32
 286 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
 287 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
 288 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
 289 # define PyUnicode_Find PyUnicodeUCS4_Find
 290 # define PyUnicode_Format PyUnicodeUCS4_Format
 291 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
 292 # define PyUnicode_FromFormat PyUnicodeUCS4_FromFormat
 293 # define PyUnicode_FromFormatV PyUnicodeUCS4_FromFormatV
 294 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
 295 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
 296 # define PyUnicode_FromString PyUnicodeUCS4_FromString
 297 # define PyUnicode_FromStringAndSize PyUnicodeUCS4_FromStringAndSize
 298 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 299 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
 300 # define PyUnicode_FSConverter PyUnicodeUCS4_FSConverter
 301 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 302 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 303 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
 304 # define PyUnicode_IsIdentifier PyUnicodeUCS4_IsIdentifier
 305 # define PyUnicode_Join PyUnicodeUCS4_Join
 306 # define PyUnicode_Partition PyUnicodeUCS4_Partition
 307 # define PyUnicode_RPartition PyUnicodeUCS4_RPartition
 308 # define PyUnicode_RSplit PyUnicodeUCS4_RSplit
 309 # define PyUnicode_Replace PyUnicodeUCS4_Replace
 310 # define PyUnicode_Resize PyUnicodeUCS4_Resize
 311 # define PyUnicode_RichCompare PyUnicodeUCS4_RichCompare
 312 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
 313 # define PyUnicode_Split PyUnicodeUCS4_Split
 314 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
 315 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
 316 # define PyUnicode_Translate PyUnicodeUCS4_Translate
 317 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
 318 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
 319 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
 320 # define _PyUnicode_Init _PyUnicodeUCS4_Init
 321 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
 322 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
 323 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
 324 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
 325 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
 326 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
 327 # define _PyUnicode_IsPrintable _PyUnicodeUCS4_IsPrintable
 328 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
 329 # define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
 330 # define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
 331 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
 332 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
 333 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
 334 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
 335 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
 336 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
 337 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
 338 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
 339
 340
 341 #endif
 342
 343 /* --- Internal Unicode Operations ---------------------------------------- */
 344
 345 /* If you want Python to use the compiler's wctype.h functions instead
 346    of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
 347    configure Python using --with-wctype-functions.  This reduces the
 348    interpreter's code size. */
 349
 350 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
 351
 352 #include <wctype.h>
 353
 354 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
 355
 356 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
 357 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
 358 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 359 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 360
 361 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
 362 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
 363 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 364
 365 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 366 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 367 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 368 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
 369
 370 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 371 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 372 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 373
 374 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
 375
 376 #else
 377
 378 /* Since splitting on whitespace is an important use case, and
 379    whitespace in most situations is solely ASCII whitespace, we
 380    optimize for the common case by using a quick look-up table
 381    _Py_ascii_whitespace (see below) with an inlined check.
 382
 383  */
 384 #define Py_UNICODE_ISSPACE(ch) \
 385         ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
 386
 387 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
 388 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
 389 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 390 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 391
 392 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
 393 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
 394 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 395
 396 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 397 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 398 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 399 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
 400
 401 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 402 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 403 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 404
 405 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 406
 407 #endif
 408
 409 #define Py_UNICODE_ISALNUM(ch) \
 410        (Py_UNICODE_ISALPHA(ch) || \
 411         Py_UNICODE_ISDECIMAL(ch) || \
 412         Py_UNICODE_ISDIGIT(ch) || \
 413         Py_UNICODE_ISNUMERIC(ch))
 414
 415 #define Py_UNICODE_COPY(target, source, length)                         \
 416         Py_MEMCPY((target), (source), (length)*sizeof(Py_UNICODE))
 417
 418 #define Py_UNICODE_FILL(target, value, length) \
 419     do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
 420         for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
 421     } while (0)
 422
 423 /* Check if substring matches at given offset.  the offset must be
 424    valid, and the substring must not be empty */
 425
 426 #define Py_UNICODE_MATCH(string, offset, substring) \
 427     ((*((string)->str + (offset)) == *((substring)->str)) && \
 428     ((*((string)->str + (offset) + (substring)->length-1) == *((substring)->str + (substring)->length-1))) && \
 429      !memcmp((string)->str + (offset), (substring)->str, (substring)->length*sizeof(Py_UNICODE)))
 430
 431 #ifdef __cplusplus
 432 extern "C" {
 433 #endif
 434
 435 /* --- Unicode Type ------------------------------------------------------- */
 436
 437 typedef struct {
 438     PyObject_HEAD
 439     Py_ssize_t length;          /* Length of raw Unicode data in buffer */
 440     Py_UNICODE *str;            /* Raw Unicode buffer */
 441     long hash;                  /* Hash value; -1 if not set */
 442     int state;                  /* != 0 if interned. In this case the two
 443                                  * references from the dictionary to this object
 444                                  * are *not* counted in ob_refcnt. */
 445     PyObject *defenc;           /* (Default) Encoded version as Python
 446                                    string, or NULL; this is used for
 447                                    implementing the buffer protocol */
 448 } PyUnicodeObject;
 449
 450 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 451 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
 452
 453 #define SSTATE_NOT_INTERNED 0
 454 #define SSTATE_INTERNED_MORTAL 1
 455 #define SSTATE_INTERNED_IMMORTAL 2
 456
 457 #define PyUnicode_Check(op) \
 458                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
 459 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
 460
 461 /* Fast access macros */
 462 #define PyUnicode_GET_SIZE(op) \
 463         (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length))
 464 #define PyUnicode_GET_DATA_SIZE(op) \
 465         (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE)))
 466 #define PyUnicode_AS_UNICODE(op) \
 467         (assert(PyUnicode_Check(op)),(((PyUnicodeObject *)(op))->str))
 468 #define PyUnicode_AS_DATA(op) \
 469         (assert(PyUnicode_Check(op)),((const char *)((PyUnicodeObject *)(op))->str))
 470
 471 /* --- Constants ---------------------------------------------------------- */
 472
 473 /* This Unicode character will be used as replacement character during
 474    decoding if the errors argument is set to "replace". Note: the
 475    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
 476    Unicode 3.0. */
 477
 478 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
 479
 480 /* === Public API ========================================================= */
 481
 482 /* --- Plain Py_UNICODE --------------------------------------------------- */
 483
 484 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
 485    size.
 486
 487    u may be NULL which causes the contents to be undefined. It is the
 488    user's responsibility to fill in the needed data afterwards. Note
 489    that modifying the Unicode object contents after construction is
 490    only allowed if u was set to NULL.
 491
 492    The buffer is copied into the new object. */
 493
 494 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
 495     const Py_UNICODE *u,        /* Unicode buffer */
 496     Py_ssize_t size             /* size of buffer */
 497     );
 498
 499 /* Similar to PyUnicode_FromUnicode(), but u points to Latin-1 encoded bytes */
 500 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
 501     const char *u,        /* char buffer */
 502     Py_ssize_t size       /* size of buffer */
 503     );
 504
 505 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
 506    Latin-1 encoded bytes */
 507 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
 508     const char *u        /* string */
 509     );
 510
 511 /* Return a read-only pointer to the Unicode object's internal
 512    Py_UNICODE buffer. */
 513
 514 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
 515     PyObject *unicode           /* Unicode object */
 516     );
 517
 518 /* Get the length of the Unicode object. */
 519
 520 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
 521     PyObject *unicode           /* Unicode object */
 522     );
 523
 524 /* Get the maximum ordinal for a Unicode character. */
 525 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
 526
 527 /* Resize an already allocated Unicode object to the new size length.
 528
 529    *unicode is modified to point to the new (resized) object and 0
 530    returned on success.
 531
 532    This API may only be called by the function which also called the
 533    Unicode constructor. The refcount on the object must be 1. Otherwise,
 534    an error is returned.
 535
 536    Error handling is implemented as follows: an exception is set, -1
 537    is returned and *unicode left untouched.
 538
 539 */
 540
 541 PyAPI_FUNC(int) PyUnicode_Resize(
 542     PyObject **unicode,         /* Pointer to the Unicode object */
 543     Py_ssize_t length           /* New length */
 544     );
 545
 546 /* Coerce obj to an Unicode object and return a reference with
 547    *incremented* refcount.
 548
 549    Coercion is done in the following way:
 550
 551    1. String and other char buffer compatible objects are decoded
 552       under the assumptions that they contain data using the current
 553       default encoding. Decoding is done in "strict" mode.
 554
 555    2. All other objects (including Unicode objects) raise an
 556       exception.
 557
 558    The API returns NULL in case of an error. The caller is responsible
 559    for decref'ing the returned objects.
 560
 561 */
 562
 563 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
 564     register PyObject *obj,     /* Object */
 565     const char *encoding,       /* encoding */
 566     const char *errors          /* error handling */
 567     );
 568
 569 /* Coerce obj to an Unicode object and return a reference with
 570    *incremented* refcount.
 571
 572    Unicode objects are passed back as-is (subclasses are converted to
 573    true Unicode objects), all other objects are delegated to
 574    PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
 575    using the default encoding as basis for decoding the object.
 576
 577    The API returns NULL in case of an error. The caller is responsible
 578    for decref'ing the returned objects.
 579
 580 */
 581
 582 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
 583     register PyObject *obj      /* Object */
 584     );
 585
 586 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(const char*, va_list);
 587 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(const char*, ...);
 588
 589 /* Format the object based on the format_spec, as defined in PEP 3101
 590    (Advanced String Formatting). */
 591 PyAPI_FUNC(PyObject *) _PyUnicode_FormatAdvanced(PyObject *obj,
 592                                                  Py_UNICODE *format_spec,
 593                                                  Py_ssize_t format_spec_len);
 594
 595 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
 596 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
 597 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(const char *);
 598 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
 599
 600 /* Use only if you know it's a string */
 601 #define PyUnicode_CHECK_INTERNED(op) (((PyUnicodeObject *)(op))->state)
 602
 603 /* --- wchar_t support for platforms which support it --------------------- */
 604
 605 #ifdef HAVE_WCHAR_H
 606
 607 /* Create a Unicode Object from the whcar_t buffer w of the given
 608    size.
 609
 610    The buffer is copied into the new object. */
 611
 612 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
 613     register const wchar_t *w,  /* wchar_t buffer */
 614     Py_ssize_t size             /* size of buffer */
 615     );
 616
 617 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
 618    most size wchar_t characters are copied.
 619
 620    Note that the resulting wchar_t string may or may not be
 621    0-terminated.  It is the responsibility of the caller to make sure
 622    that the wchar_t string is 0-terminated in case this is required by
 623    the application.
 624
 625    Returns the number of wchar_t characters copied (excluding a
 626    possibly trailing 0-termination character) or -1 in case of an
 627    error. */
 628
 629 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
 630     PyUnicodeObject *unicode,   /* Unicode object */
 631     register wchar_t *w,        /* wchar_t buffer */
 632     Py_ssize_t size             /* size of buffer */
 633     );
 634
 635 #endif
 636
 637 /* --- Unicode ordinals --------------------------------------------------- */
 638
 639 /* Create a Unicode Object from the given Unicode code point ordinal.
 640
 641    The ordinal must be in range(0x10000) on narrow Python builds
 642    (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
 643    raised in case it is not.
 644
 645 */
 646
 647 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
 648
 649 /* --- Free-list management ----------------------------------------------- */
 650
 651 /* Clear the free list used by the Unicode implementation.
 652
 653    This can be used to release memory used for objects on the free
 654    list back to the Python memory allocator.
 655
 656 */
 657
 658 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
 659
 660 /* === Builtin Codecs =====================================================
 661
 662    Many of these APIs take two arguments encoding and errors. These
 663    parameters encoding and errors have the same semantics as the ones
 664    of the builtin unicode() API.
 665
 666    Setting encoding to NULL causes the default encoding to be used.
 667
 668    Error handling is set by errors which may also be set to NULL
 669    meaning to use the default handling defined for the codec. Default
 670    error handling for all builtin codecs is "strict" (ValueErrors are
 671    raised).
 672
 673    The codecs all use a similar interface. Only deviation from the
 674    generic ones are documented.
 675
 676 */
 677
 678 /* --- Manage the default encoding ---------------------------------------- */
 679
 680 /* Return a Python string holding the default encoded value of the
 681    Unicode object.
 682
 683    The resulting string is cached in the Unicode object for subsequent
 684    usage by this function. The cached version is needed to implement
 685    the character buffer interface and will live (at least) as long as
 686    the Unicode object itself.
 687
 688    The refcount of the string is *not* incremented.
 689
 690    *** Exported for internal use by the interpreter only !!! ***
 691
 692 */
 693
 694 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
 695     PyObject *unicode,
 696     const char *errors);
 697
 698 /* Returns a pointer to the default encoding (normally, UTF-8) of the
 699    Unicode object unicode and the size of the encoded representation
 700    in bytes stored in *size.
 701
 702    In case of an error, no *size is set.
 703
 704    *** This API is for interpreter INTERNAL USE ONLY and will likely
 705    *** be removed or changed for Python 3.1.
 706
 707    *** If you need to access the Unicode object as UTF-8 bytes string,
 708    *** please use PyUnicode_AsUTF8String() instead.
 709
 710 */
 711
 712 PyAPI_FUNC(char *) _PyUnicode_AsStringAndSize(
 713     PyObject *unicode,
 714     Py_ssize_t *size);
 715
 716 /* Returns a pointer to the default encoding (normally, UTf-8) of the
 717    Unicode object unicode.
 718
 719    Use of this API is DEPRECATED since no size information can be
 720    extracted from the returned data.
 721
 722    *** This API is for interpreter INTERNAL USE ONLY and will likely
 723    *** be removed or changed for Python 3.1.
 724
 725    *** If you need to access the Unicode object as UTF-8 bytes string,
 726    *** please use PyUnicode_AsUTF8String() instead.
 727
 728 */
 729
 730 PyAPI_FUNC(char *) _PyUnicode_AsString(PyObject *unicode);
 731
 732 /* Returns the currently active default encoding.
 733
 734    The default encoding is currently implemented as run-time settable
 735    process global.  This may change in future versions of the
 736    interpreter to become a parameter which is managed on a per-thread
 737    basis.
 738
 739  */
 740
 741 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
 742
 743 /* Sets the currently active default encoding.
 744
 745    Returns 0 on success, -1 in case of an error.
 746
 747  */
 748
 749 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
 750     const char *encoding        /* Encoding name in standard form */
 751     );
 752
 753 /* --- Generic Codecs ----------------------------------------------------- */
 754
 755 /* Create a Unicode object by decoding the encoded string s of the
 756    given size. */
 757
 758 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
 759     const char *s,              /* encoded string */
 760     Py_ssize_t size,            /* size of buffer */
 761     const char *encoding,       /* encoding */
 762     const char *errors          /* error handling */
 763     );
 764
 765 /* Decode a Unicode object unicode and return the result as Python
 766    object. */
 767
 768 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
 769     PyObject *unicode,          /* Unicode object */
 770     const char *encoding,       /* encoding */
 771     const char *errors          /* error handling */
 772     );
 773
 774 /* Decode a Unicode object unicode and return the result as Unicode
 775    object. */
 776
 777 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
 778     PyObject *unicode,          /* Unicode object */
 779     const char *encoding,       /* encoding */
 780     const char *errors          /* error handling */
 781     );
 782
 783 /* Encodes a Py_UNICODE buffer of the given size and returns a
 784    Python string object. */
 785
 786 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
 787     const Py_UNICODE *s,        /* Unicode char buffer */
 788     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
 789     const char *encoding,       /* encoding */
 790     const char *errors          /* error handling */
 791     );
 792
 793 /* Encodes a Unicode object and returns the result as Python
 794    object. */
 795
 796 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
 797     PyObject *unicode,          /* Unicode object */
 798     const char *encoding,       /* encoding */
 799     const char *errors          /* error handling */
 800     );
 801
 802 /* Encodes a Unicode object and returns the result as Python string
 803    object. */
 804
 805 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
 806     PyObject *unicode,          /* Unicode object */
 807     const char *encoding,       /* encoding */
 808     const char *errors          /* error handling */
 809     );
 810
 811 /* Encodes a Unicode object and returns the result as Unicode
 812    object. */
 813
 814 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
 815     PyObject *unicode,          /* Unicode object */
 816     const char *encoding,       /* encoding */
 817     const char *errors          /* error handling */
 818     );
 819
 820 /* Build an encoding map. */
 821
 822 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
 823     PyObject* string            /* 256 character map */
 824    );
 825
 826 /* --- UTF-7 Codecs ------------------------------------------------------- */
 827
 828 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
 829     const char *string,         /* UTF-7 encoded string */
 830     Py_ssize_t length,          /* size of string */
 831     const char *errors          /* error handling */
 832     );
 833
 834 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
 835     const char *string,         /* UTF-7 encoded string */
 836     Py_ssize_t length,          /* size of string */
 837     const char *errors,         /* error handling */
 838     Py_ssize_t *consumed        /* bytes consumed */
 839     );
 840
 841 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
 842     const Py_UNICODE *data,     /* Unicode char buffer */
 843     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
 844     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
 845     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
 846     const char *errors          /* error handling */
 847     );
 848
 849 /* --- UTF-8 Codecs ------------------------------------------------------- */
 850
 851 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
 852     const char *string,         /* UTF-8 encoded string */
 853     Py_ssize_t length,          /* size of string */
 854     const char *errors          /* error handling */
 855     );
 856
 857 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
 858     const char *string,         /* UTF-8 encoded string */
 859     Py_ssize_t length,          /* size of string */
 860     const char *errors,         /* error handling */
 861     Py_ssize_t *consumed        /* bytes consumed */
 862     );
 863
 864 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
 865     PyObject *unicode           /* Unicode object */
 866     );
 867
 868 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
 869     const Py_UNICODE *data,     /* Unicode char buffer */
 870     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
 871     const char *errors          /* error handling */
 872     );
 873
 874 /* --- UTF-32 Codecs ------------------------------------------------------ */
 875
 876 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
 877    the corresponding Unicode object.
 878
 879    errors (if non-NULL) defines the error handling. It defaults
 880    to "strict".
 881
 882    If byteorder is non-NULL, the decoder starts decoding using the
 883    given byte order:
 884
 885         *byteorder == -1: little endian
 886         *byteorder == 0:  native order
 887         *byteorder == 1:  big endian
 888
 889    In native mode, the first four bytes of the stream are checked for a
 890    BOM mark. If found, the BOM mark is analysed, the byte order
 891    adjusted and the BOM skipped.  In the other modes, no BOM mark
 892    interpretation is done. After completion, *byteorder is set to the
 893    current byte order at the end of input data.
 894
 895    If byteorder is NULL, the codec starts in native order mode.
 896
 897 */
 898
 899 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
 900     const char *string,         /* UTF-32 encoded string */
 901     Py_ssize_t length,          /* size of string */
 902     const char *errors,         /* error handling */
 903     int *byteorder              /* pointer to byteorder to use
 904                                    0=native;-1=LE,1=BE; updated on
 905                                    exit */
 906     );
 907
 908 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
 909     const char *string,         /* UTF-32 encoded string */
 910     Py_ssize_t length,          /* size of string */
 911     const char *errors,         /* error handling */
 912     int *byteorder,             /* pointer to byteorder to use
 913                                    0=native;-1=LE,1=BE; updated on
 914                                    exit */
 915     Py_ssize_t *consumed        /* bytes consumed */
 916     );
 917
 918 /* Returns a Python string using the UTF-32 encoding in native byte
 919    order. The string always starts with a BOM mark.  */
 920
 921 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
 922     PyObject *unicode           /* Unicode object */
 923     );
 924
 925 /* Returns a Python string object holding the UTF-32 encoded value of
 926    the Unicode data.
 927
 928    If byteorder is not 0, output is written according to the following
 929    byte order:
 930
 931    byteorder == -1: little endian
 932    byteorder == 0:  native byte order (writes a BOM mark)
 933    byteorder == 1:  big endian
 934
 935    If byteorder is 0, the output string will always start with the
 936    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
 937    prepended.
 938
 939 */
 940
 941 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
 942     const Py_UNICODE *data,     /* Unicode char buffer */
 943     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
 944     const char *errors,         /* error handling */
 945     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
 946     );
 947
 948 /* --- UTF-16 Codecs ------------------------------------------------------ */
 949
 950 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
 951    the corresponding Unicode object.
 952
 953    errors (if non-NULL) defines the error handling. It defaults
 954    to "strict".
 955
 956    If byteorder is non-NULL, the decoder starts decoding using the
 957    given byte order:
 958
 959         *byteorder == -1: little endian
 960         *byteorder == 0:  native order
 961         *byteorder == 1:  big endian
 962
 963    In native mode, the first two bytes of the stream are checked for a
 964    BOM mark. If found, the BOM mark is analysed, the byte order
 965    adjusted and the BOM skipped.  In the other modes, no BOM mark
 966    interpretation is done. After completion, *byteorder is set to the
 967    current byte order at the end of input data.
 968
 969    If byteorder is NULL, the codec starts in native order mode.
 970
 971 */
 972
 973 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
 974     const char *string,         /* UTF-16 encoded string */
 975     Py_ssize_t length,          /* size of string */
 976     const char *errors,         /* error handling */
 977     int *byteorder              /* pointer to byteorder to use
 978                                    0=native;-1=LE,1=BE; updated on
 979                                    exit */
 980     );
 981
 982 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
 983     const char *string,         /* UTF-16 encoded string */
 984     Py_ssize_t length,          /* size of string */
 985     const char *errors,         /* error handling */
 986     int *byteorder,             /* pointer to byteorder to use
 987                                    0=native;-1=LE,1=BE; updated on
 988                                    exit */
 989     Py_ssize_t *consumed        /* bytes consumed */
 990     );
 991
 992 /* Returns a Python string using the UTF-16 encoding in native byte
 993    order. The string always starts with a BOM mark.  */
 994
 995 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
 996     PyObject *unicode           /* Unicode object */
 997     );
 998
 999 /* Returns a Python string object holding the UTF-16 encoded value of
1000    the Unicode data.
1001
1002    If byteorder is not 0, output is written according to the following
1003    byte order:
1004
1005    byteorder == -1: little endian
1006    byteorder == 0:  native byte order (writes a BOM mark)
1007    byteorder == 1:  big endian
1008
1009    If byteorder is 0, the output string will always start with the
1010    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
1011    prepended.
1012
1013    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
1014    UCS-2. This trick makes it possible to add full UTF-16 capabilities
1015    at a later point without compromising the APIs.
1016
1017 */
1018
1019 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
1020     const Py_UNICODE *data,     /* Unicode char buffer */
1021     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
1022     const char *errors,         /* error handling */
1023     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
1024     );
1025
1026 /* --- Unicode-Escape Codecs ---------------------------------------------- */
1027
1028 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
1029     const char *string,         /* Unicode-Escape encoded string */
1030     Py_ssize_t length,          /* size of string */
1031     const char *errors          /* error handling */
1032     );
1033
1034 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
1035     PyObject *unicode           /* Unicode object */
1036     );
1037
1038 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
1039     const Py_UNICODE *data,     /* Unicode char buffer */
1040     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1041     );
1042
1043 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
1044
1045 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
1046     const char *string,         /* Raw-Unicode-Escape encoded string */
1047     Py_ssize_t length,          /* size of string */
1048     const char *errors          /* error handling */
1049     );
1050
1051 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
1052     PyObject *unicode           /* Unicode object */
1053     );
1054
1055 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
1056     const Py_UNICODE *data,     /* Unicode char buffer */
1057     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
1058     );
1059
1060 /* --- Unicode Internal Codec ---------------------------------------------
1061
1062     Only for internal use in _codecsmodule.c */
1063
1064 PyObject *_PyUnicode_DecodeUnicodeInternal(
1065     const char *string,
1066     Py_ssize_t length,
1067     const char *errors
1068     );
1069
1070 /* --- Latin-1 Codecs -----------------------------------------------------
1071
1072    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
1073
1074 */
1075
1076 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
1077     const char *string,         /* Latin-1 encoded string */
1078     Py_ssize_t length,          /* size of string */
1079     const char *errors          /* error handling */
1080     );
1081
1082 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
1083     PyObject *unicode           /* Unicode object */
1084     );
1085
1086 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
1087     const Py_UNICODE *data,     /* Unicode char buffer */
1088     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1089     const char *errors          /* error handling */
1090     );
1091
1092 /* --- ASCII Codecs -------------------------------------------------------
1093
1094    Only 7-bit ASCII data is excepted. All other codes generate errors.
1095
1096 */
1097
1098 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
1099     const char *string,         /* ASCII encoded string */
1100     Py_ssize_t length,          /* size of string */
1101     const char *errors          /* error handling */
1102     );
1103
1104 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
1105     PyObject *unicode           /* Unicode object */
1106     );
1107
1108 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
1109     const Py_UNICODE *data,     /* Unicode char buffer */
1110     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1111     const char *errors          /* error handling */
1112     );
1113
1114 /* --- Character Map Codecs -----------------------------------------------
1115
1116    This codec uses mappings to encode and decode characters.
1117
1118    Decoding mappings must map single string characters to single
1119    Unicode characters, integers (which are then interpreted as Unicode
1120    ordinals) or None (meaning "undefined mapping" and causing an
1121    error).
1122
1123    Encoding mappings must map single Unicode characters to single
1124    string characters, integers (which are then interpreted as Latin-1
1125    ordinals) or None (meaning "undefined mapping" and causing an
1126    error).
1127
1128    If a character lookup fails with a LookupError, the character is
1129    copied as-is meaning that its ordinal value will be interpreted as
1130    Unicode or Latin-1 ordinal resp. Because of this mappings only need
1131    to contain those mappings which map characters to different code
1132    points.
1133
1134 */
1135
1136 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
1137     const char *string,         /* Encoded string */
1138     Py_ssize_t length,          /* size of string */
1139     PyObject *mapping,          /* character mapping
1140                                    (char ordinal -> unicode ordinal) */
1141     const char *errors          /* error handling */
1142     );
1143
1144 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
1145     PyObject *unicode,          /* Unicode object */
1146     PyObject *mapping           /* character mapping
1147                                    (unicode ordinal -> char ordinal) */
1148     );
1149
1150 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
1151     const Py_UNICODE *data,     /* Unicode char buffer */
1152     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1153     PyObject *mapping,          /* character mapping
1154                                    (unicode ordinal -> char ordinal) */
1155     const char *errors          /* error handling */
1156     );
1157
1158 /* Translate a Py_UNICODE buffer of the given length by applying a
1159    character mapping table to it and return the resulting Unicode
1160    object.
1161
1162    The mapping table must map Unicode ordinal integers to Unicode
1163    ordinal integers or None (causing deletion of the character).
1164
1165    Mapping tables may be dictionaries or sequences. Unmapped character
1166    ordinals (ones which cause a LookupError) are left untouched and
1167    are copied as-is.
1168
1169 */
1170
1171 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
1172     const Py_UNICODE *data,     /* Unicode char buffer */
1173     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1174     PyObject *table,            /* Translate table */
1175     const char *errors          /* error handling */
1176     );
1177
1178 #ifdef MS_WIN32
1179
1180 /* --- MBCS codecs for Windows -------------------------------------------- */
1181
1182 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
1183     const char *string,         /* MBCS encoded string */
1184     Py_ssize_t length,              /* size of string */
1185     const char *errors          /* error handling */
1186     );
1187
1188 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
1189     const char *string,         /* MBCS encoded string */
1190     Py_ssize_t length,          /* size of string */
1191     const char *errors,         /* error handling */
1192     Py_ssize_t *consumed        /* bytes consumed */
1193     );
1194
1195 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
1196     PyObject *unicode           /* Unicode object */
1197     );
1198
1199 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
1200     const Py_UNICODE *data,     /* Unicode char buffer */
1201     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1202     const char *errors          /* error handling */
1203     );
1204
1205 #endif /* MS_WIN32 */
1206
1207 /* --- Decimal Encoder ---------------------------------------------------- */
1208
1209 /* Takes a Unicode string holding a decimal value and writes it into
1210    an output buffer using standard ASCII digit codes.
1211
1212    The output buffer has to provide at least length+1 bytes of storage
1213    area. The output string is 0-terminated.
1214
1215    The encoder converts whitespace to ' ', decimal characters to their
1216    corresponding ASCII digit and all other Latin-1 characters except
1217    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
1218    are treated as errors. This includes embedded NULL bytes.
1219
1220    Error handling is defined by the errors argument:
1221
1222       NULL or "strict": raise a ValueError
1223       "ignore": ignore the wrong characters (these are not copied to the
1224                 output buffer)
1225       "replace": replaces illegal characters with '?'
1226
1227    Returns 0 on success, -1 on failure.
1228
1229 */
1230
1231 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
1232     Py_UNICODE *s,              /* Unicode buffer */
1233     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
1234     char *output,               /* Output buffer; must have size >= length */
1235     const char *errors          /* error handling */
1236     );
1237
1238 /* --- File system encoding ---------------------------------------------- */
1239
1240 /* ParseTuple converter which converts a Unicode object into the file
1241    system encoding, using the PEP 383 error handler; bytes objects are
1242    output as-is. */
1243
1244 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
1245
1246 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding.
1247
1248    If the encoding is supported by one of the built-in codecs (i.e., UTF-8,
1249    UTF-16, UTF-32, Latin-1 or MBCS), otherwise fallback to UTF-8 and replace
1250    invalid characters with '?'.
1251
1252    The function is intended to be used for paths and file names only
1253    during bootstrapping process where the codecs are not set up.
1254 */
1255
1256 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
1257     const char *s               /* encoded string */
1258     );
1259
1260 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
1261     const char *s,               /* encoded string */
1262     Py_ssize_t size              /* size */
1263     );
1264
1265 /* --- Methods & Slots ----------------------------------------------------
1266
1267    These are capable of handling Unicode objects and strings on input
1268    (we refer to them as strings in the descriptions) and return
1269    Unicode objects or integers as apporpriate. */
1270
1271 /* Concat two strings giving a new Unicode string. */
1272
1273 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
1274     PyObject *left,             /* Left string */
1275     PyObject *right             /* Right string */
1276     );
1277
1278 /* Concat two strings and put the result in *pleft
1279    (sets *pleft to NULL on error) */
1280
1281 PyAPI_FUNC(void) PyUnicode_Append(
1282     PyObject **pleft,           /* Pointer to left string */
1283     PyObject *right             /* Right string */
1284     );
1285
1286 /* Concat two strings, put the result in *pleft and drop the right object
1287    (sets *pleft to NULL on error) */
1288
1289 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
1290     PyObject **pleft,           /* Pointer to left string */
1291     PyObject *right             /* Right string */
1292     );
1293
1294 /* Split a string giving a list of Unicode strings.
1295
1296    If sep is NULL, splitting will be done at all whitespace
1297    substrings. Otherwise, splits occur at the given separator.
1298
1299    At most maxsplit splits will be done. If negative, no limit is set.
1300
1301    Separators are not included in the resulting list.
1302
1303 */
1304
1305 PyAPI_FUNC(PyObject*) PyUnicode_Split(
1306     PyObject *s,                /* String to split */
1307     PyObject *sep,              /* String separator */
1308     Py_ssize_t maxsplit         /* Maxsplit count */
1309     );
1310
1311 /* Dito, but split at line breaks.
1312
1313    CRLF is considered to be one line break. Line breaks are not
1314    included in the resulting list. */
1315
1316 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1317     PyObject *s,                /* String to split */
1318     int keepends                /* If true, line end markers are included */
1319     );
1320
1321 /* Partition a string using a given separator. */
1322
1323 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
1324     PyObject *s,                /* String to partition */
1325     PyObject *sep               /* String separator */
1326     );
1327
1328 /* Partition a string using a given separator, searching from the end of the
1329    string. */
1330
1331 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
1332     PyObject *s,                /* String to partition */
1333     PyObject *sep               /* String separator */
1334     );
1335
1336 /* Split a string giving a list of Unicode strings.
1337
1338    If sep is NULL, splitting will be done at all whitespace
1339    substrings. Otherwise, splits occur at the given separator.
1340
1341    At most maxsplit splits will be done. But unlike PyUnicode_Split
1342    PyUnicode_RSplit splits from the end of the string. If negative,
1343    no limit is set.
1344
1345    Separators are not included in the resulting list.
1346
1347 */
1348
1349 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1350     PyObject *s,                /* String to split */
1351     PyObject *sep,              /* String separator */
1352     Py_ssize_t maxsplit         /* Maxsplit count */
1353     );
1354
1355 /* Translate a string by applying a character mapping table to it and
1356    return the resulting Unicode object.
1357
1358    The mapping table must map Unicode ordinal integers to Unicode
1359    ordinal integers or None (causing deletion of the character).
1360
1361    Mapping tables may be dictionaries or sequences. Unmapped character
1362    ordinals (ones which cause a LookupError) are left untouched and
1363    are copied as-is.
1364
1365 */
1366
1367 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1368     PyObject *str,              /* String */
1369     PyObject *table,            /* Translate table */
1370     const char *errors          /* error handling */
1371     );
1372
1373 /* Join a sequence of strings using the given separator and return
1374    the resulting Unicode string. */
1375
1376 PyAPI_FUNC(PyObject*) PyUnicode_Join(
1377     PyObject *separator,        /* Separator string */
1378     PyObject *seq               /* Sequence object */
1379     );
1380
1381 /* Return 1 if substr matches str[start:end] at the given tail end, 0
1382    otherwise. */
1383
1384 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
1385     PyObject *str,              /* String */
1386     PyObject *substr,           /* Prefix or Suffix string */
1387     Py_ssize_t start,           /* Start index */
1388     Py_ssize_t end,             /* Stop index */
1389     int direction               /* Tail end: -1 prefix, +1 suffix */
1390     );
1391
1392 /* Return the first position of substr in str[start:end] using the
1393    given search direction or -1 if not found. -2 is returned in case
1394    an error occurred and an exception is set. */
1395
1396 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
1397     PyObject *str,              /* String */
1398     PyObject *substr,           /* Substring to find */
1399     Py_ssize_t start,           /* Start index */
1400     Py_ssize_t end,             /* Stop index */
1401     int direction               /* Find direction: +1 forward, -1 backward */
1402     );
1403
1404 /* Count the number of occurrences of substr in str[start:end]. */
1405
1406 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
1407     PyObject *str,              /* String */
1408     PyObject *substr,           /* Substring to count */
1409     Py_ssize_t start,           /* Start index */
1410     Py_ssize_t end              /* Stop index */
1411     );
1412
1413 /* Replace at most maxcount occurrences of substr in str with replstr
1414    and return the resulting Unicode object. */
1415
1416 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1417     PyObject *str,              /* String */
1418     PyObject *substr,           /* Substring to find */
1419     PyObject *replstr,          /* Substring to replace */
1420     Py_ssize_t maxcount         /* Max. number of replacements to apply;
1421                                    -1 = all */
1422     );
1423
1424 /* Compare two strings and return -1, 0, 1 for less than, equal,
1425    greater than resp. */
1426
1427 PyAPI_FUNC(int) PyUnicode_Compare(
1428     PyObject *left,             /* Left string */
1429     PyObject *right             /* Right string */
1430     );
1431
1432 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
1433     PyObject *left,
1434     const char *right
1435     );
1436
1437 /* Rich compare two strings and return one of the following:
1438
1439    - NULL in case an exception was raised
1440    - Py_True or Py_False for successfuly comparisons
1441    - Py_NotImplemented in case the type combination is unknown
1442
1443    Note that Py_EQ and Py_NE comparisons can cause a UnicodeWarning in
1444    case the conversion of the arguments to Unicode fails with a
1445    UnicodeDecodeError.
1446
1447    Possible values for op:
1448
1449      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
1450
1451 */
1452
1453 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
1454     PyObject *left,             /* Left string */
1455     PyObject *right,            /* Right string */
1456     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
1457     );
1458
1459 /* Apply a argument tuple or dictionary to a format string and return
1460    the resulting Unicode string. */
1461
1462 PyAPI_FUNC(PyObject *) PyUnicode_Format(
1463     PyObject *format,           /* Format string */
1464     PyObject *args              /* Argument tuple or dictionary */
1465     );
1466
1467 /* Checks whether element is contained in container and return 1/0
1468    accordingly.
1469
1470    element has to coerce to an one element Unicode string. -1 is
1471    returned in case of an error. */
1472
1473 PyAPI_FUNC(int) PyUnicode_Contains(
1474     PyObject *container,        /* Container string */
1475     PyObject *element           /* Element string */
1476     );
1477
1478 /* Checks whether argument is a valid identifier. */
1479
1480 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
1481
1482 /* Externally visible for str.strip(unicode) */
1483 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1484     PyUnicodeObject *self,
1485     int striptype,
1486     PyObject *sepobj
1487     );
1488
1489 /* Using the current locale, insert the thousands grouping
1490    into the string pointed to by buffer.  For the argument descriptions,
1491    see Objects/stringlib/localeutil.h */
1492
1493 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGroupingLocale(Py_UNICODE *buffer,
1494                                                    Py_ssize_t n_buffer,
1495                                                    Py_UNICODE *digits,
1496                                                    Py_ssize_t n_digits,
1497                                                    Py_ssize_t min_width);
1498
1499 /* Using explicit passed-in values, insert the thousands grouping
1500    into the string pointed to by buffer.  For the argument descriptions,
1501    see Objects/stringlib/localeutil.h */
1502 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(Py_UNICODE *buffer,
1503                                                    Py_ssize_t n_buffer,
1504                                                    Py_UNICODE *digits,
1505                                                    Py_ssize_t n_digits,
1506                                                    Py_ssize_t min_width,
1507                                                    const char *grouping,
1508                                                    const char *thousands_sep);
1509 /* === Characters Type APIs =============================================== */
1510
1511 /* Helper array used by Py_UNICODE_ISSPACE(). */
1512
1513 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
1514
1515 /* These should not be used directly. Use the Py_UNICODE_IS* and
1516    Py_UNICODE_TO* macros instead.
1517
1518    These APIs are implemented in Objects/unicodectype.c.
1519
1520 */
1521
1522 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1523     Py_UNICODE ch       /* Unicode character */
1524     );
1525
1526 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1527     Py_UNICODE ch       /* Unicode character */
1528     );
1529
1530 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1531     Py_UNICODE ch       /* Unicode character */
1532     );
1533
1534 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
1535     Py_UNICODE ch       /* Unicode character */
1536     );
1537
1538 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
1539     Py_UNICODE ch       /* Unicode character */
1540     );
1541
1542 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1543     const Py_UNICODE ch         /* Unicode character */
1544     );
1545
1546 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1547     const Py_UNICODE ch         /* Unicode character */
1548     );
1549
1550 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
1551     Py_UNICODE ch       /* Unicode character */
1552     );
1553
1554 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
1555     Py_UNICODE ch       /* Unicode character */
1556     );
1557
1558 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
1559     Py_UNICODE ch       /* Unicode character */
1560     );
1561
1562 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1563     Py_UNICODE ch       /* Unicode character */
1564     );
1565
1566 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1567     Py_UNICODE ch       /* Unicode character */
1568     );
1569
1570 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1571     Py_UNICODE ch       /* Unicode character */
1572     );
1573
1574 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1575     Py_UNICODE ch       /* Unicode character */
1576     );
1577
1578 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1579     Py_UNICODE ch       /* Unicode character */
1580     );
1581
1582 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1583     Py_UNICODE ch       /* Unicode character */
1584     );
1585
1586 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
1587     Py_UNICODE ch       /* Unicode character */
1588     );
1589
1590 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1591     Py_UNICODE ch       /* Unicode character */
1592     );
1593
1594 PyAPI_FUNC(size_t) Py_UNICODE_strlen(const Py_UNICODE *u);
1595
1596 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
1597     Py_UNICODE *s1, const Py_UNICODE *s2);
1598
1599 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
1600     Py_UNICODE *s1, const Py_UNICODE *s2, size_t n);
1601
1602 PyAPI_FUNC(int) Py_UNICODE_strcmp(
1603     const Py_UNICODE *s1, const Py_UNICODE *s2);
1604
1605 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
1606     const Py_UNICODE *s, Py_UNICODE c
1607     );
1608
1609 #ifdef __cplusplus
1610 }
1611 #endif
1612 #endif /* !Py_UNICODEOBJECT_H */