Include/unicodeobject.h

   1 #ifndef Py_UNICODEOBJECT_H
   2 #define Py_UNICODEOBJECT_H
   3
   4 /*
   5
   6 Unicode implementation based on original code by Fredrik Lundh,
   7 modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
   8 Unicode Integration Proposal (see file Misc/unicode.txt).
   9
  10 Copyright (c) Corporation for National Research Initiatives.
  11
  12
  13  Original header:
  14  --------------------------------------------------------------------
  15
  16  * Yet another Unicode string type for Python.  This type supports the
  17  * 16-bit Basic Multilingual Plane (BMP) only.
  18  *
  19  * Written by Fredrik Lundh, January 1999.
  20  *
  21  * Copyright (c) 1999 by Secret Labs AB.
  22  * Copyright (c) 1999 by Fredrik Lundh.
  23  *
  24  * fredrik@pythonware.com
  25  * http://www.pythonware.com
  26  *
  27  * --------------------------------------------------------------------
  28  * This Unicode String Type is
  29  *
  30  * Copyright (c) 1999 by Secret Labs AB
  31  * Copyright (c) 1999 by Fredrik Lundh
  32  *
  33  * By obtaining, using, and/or copying this software and/or its
  34  * associated documentation, you agree that you have read, understood,
  35  * and will comply with the following terms and conditions:
  36  *
  37  * Permission to use, copy, modify, and distribute this software and its
  38  * associated documentation for any purpose and without fee is hereby
  39  * granted, provided that the above copyright notice appears in all
  40  * copies, and that both that copyright notice and this permission notice
  41  * appear in supporting documentation, and that the name of Secret Labs
  42  * AB or the author not be used in advertising or publicity pertaining to
  43  * distribution of the software without specific, written prior
  44  * permission.
  45  *
  46  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
  47  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
  48  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
  49  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
  50  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
  51  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
  52  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  53  * -------------------------------------------------------------------- */
  54
  55 #include <ctype.h>
  56
  57 /* === Internal API ======================================================= */
  58
  59 /* --- Internal Unicode Format -------------------------------------------- */
  60
  61 #ifndef Py_USING_UNICODE
  62
  63 #define PyUnicode_Check(op)                 0
  64 #define PyUnicode_CheckExact(op)            0
  65
  66 #else
  67
  68 /* FIXME: MvL's new implementation assumes that Py_UNICODE_SIZE is
  69    properly set, but the default rules below doesn't set it.  I'll
  70    sort this out some other day -- fredrik@pythonware.com */
  71
  72 #ifndef Py_UNICODE_SIZE
  73 #error Must define Py_UNICODE_SIZE
  74 #endif
  75
  76 /* Setting Py_UNICODE_WIDE enables UCS-4 storage.  Otherwise, Unicode
  77    strings are stored as UCS-2 (with limited support for UTF-16) */
  78
  79 #if Py_UNICODE_SIZE >= 4
  80 #define Py_UNICODE_WIDE
  81 #endif
  82
  83 /* Set these flags if the platform has "wchar.h", "wctype.h" and the
  84    wchar_t type is a 16-bit unsigned type */
  85 /* #define HAVE_WCHAR_H */
  86 /* #define HAVE_USABLE_WCHAR_T */
  87
  88 /* Defaults for various platforms */
  89 #ifndef PY_UNICODE_TYPE
  90
  91 /* Windows has a usable wchar_t type (unless we're using UCS-4) */
  92 # if defined(MS_WIN32) && Py_UNICODE_SIZE == 2
  93 #  define HAVE_USABLE_WCHAR_T
  94 #  define PY_UNICODE_TYPE wchar_t
  95 # endif
  96
  97 # if defined(Py_UNICODE_WIDE)
  98 #  define PY_UNICODE_TYPE Py_UCS4
  99 # endif
 100
 101 #endif
 102
 103 /* If the compiler provides a wchar_t type we try to support it
 104    through the interface functions PyUnicode_FromWideChar() and
 105    PyUnicode_AsWideChar(). */
 106
 107 #ifdef HAVE_USABLE_WCHAR_T
 108 # ifndef HAVE_WCHAR_H
 109 #  define HAVE_WCHAR_H
 110 # endif
 111 #endif
 112
 113 #ifdef HAVE_WCHAR_H
 114 /* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
 115 # ifdef _HAVE_BSDI
 116 #  include <time.h>
 117 # endif
 118 #  include <wchar.h>
 119 #endif
 120
 121 /*
 122  * Use this typedef when you need to represent a UTF-16 surrogate pair
 123  * as single unsigned integer.
 124  */
 125 #if SIZEOF_INT >= 4
 126 typedef unsigned int Py_UCS4;
 127 #elif SIZEOF_LONG >= 4
 128 typedef unsigned long Py_UCS4;
 129 #endif
 130
 131 typedef PY_UNICODE_TYPE Py_UNICODE;
 132
 133 /* --- UCS-2/UCS-4 Name Mangling ------------------------------------------ */
 134
 135 /* Unicode API names are mangled to assure that UCS-2 and UCS-4 builds
 136    produce different external names and thus cause import errors in
 137    case Python interpreters and extensions with mixed compiled in
 138    Unicode width assumptions are combined. */
 139
 140 #ifndef Py_UNICODE_WIDE
 141
 142 # define PyUnicode_AsASCIIString PyUnicodeUCS2_AsASCIIString
 143 # define PyUnicode_AsCharmapString PyUnicodeUCS2_AsCharmapString
 144 # define PyUnicode_AsEncodedObject PyUnicodeUCS2_AsEncodedObject
 145 # define PyUnicode_AsEncodedString PyUnicodeUCS2_AsEncodedString
 146 # define PyUnicode_AsLatin1String PyUnicodeUCS2_AsLatin1String
 147 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS2_AsRawUnicodeEscapeString
 148 # define PyUnicode_AsUTF16String PyUnicodeUCS2_AsUTF16String
 149 # define PyUnicode_AsUTF8String PyUnicodeUCS2_AsUTF8String
 150 # define PyUnicode_AsUnicode PyUnicodeUCS2_AsUnicode
 151 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS2_AsUnicodeEscapeString
 152 # define PyUnicode_AsWideChar PyUnicodeUCS2_AsWideChar
 153 # define PyUnicode_Compare PyUnicodeUCS2_Compare
 154 # define PyUnicode_Concat PyUnicodeUCS2_Concat
 155 # define PyUnicode_Contains PyUnicodeUCS2_Contains
 156 # define PyUnicode_Count PyUnicodeUCS2_Count
 157 # define PyUnicode_Decode PyUnicodeUCS2_Decode
 158 # define PyUnicode_DecodeASCII PyUnicodeUCS2_DecodeASCII
 159 # define PyUnicode_DecodeCharmap PyUnicodeUCS2_DecodeCharmap
 160 # define PyUnicode_DecodeLatin1 PyUnicodeUCS2_DecodeLatin1
 161 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS2_DecodeRawUnicodeEscape
 162 # define PyUnicode_DecodeUTF16 PyUnicodeUCS2_DecodeUTF16
 163 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS2_DecodeUTF16Stateful
 164 # define PyUnicode_DecodeUTF8 PyUnicodeUCS2_DecodeUTF8
 165 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS2_DecodeUTF8Stateful
 166 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS2_DecodeUnicodeEscape
 167 # define PyUnicode_Encode PyUnicodeUCS2_Encode
 168 # define PyUnicode_EncodeASCII PyUnicodeUCS2_EncodeASCII
 169 # define PyUnicode_EncodeCharmap PyUnicodeUCS2_EncodeCharmap
 170 # define PyUnicode_EncodeDecimal PyUnicodeUCS2_EncodeDecimal
 171 # define PyUnicode_EncodeLatin1 PyUnicodeUCS2_EncodeLatin1
 172 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS2_EncodeRawUnicodeEscape
 173 # define PyUnicode_EncodeUTF16 PyUnicodeUCS2_EncodeUTF16
 174 # define PyUnicode_EncodeUTF8 PyUnicodeUCS2_EncodeUTF8
 175 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS2_EncodeUnicodeEscape
 176 # define PyUnicode_Find PyUnicodeUCS2_Find
 177 # define PyUnicode_Format PyUnicodeUCS2_Format
 178 # define PyUnicode_FromEncodedObject PyUnicodeUCS2_FromEncodedObject
 179 # define PyUnicode_FromObject PyUnicodeUCS2_FromObject
 180 # define PyUnicode_FromOrdinal PyUnicodeUCS2_FromOrdinal
 181 # define PyUnicode_FromUnicode PyUnicodeUCS2_FromUnicode
 182 # define PyUnicode_FromWideChar PyUnicodeUCS2_FromWideChar
 183 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS2_GetDefaultEncoding
 184 # define PyUnicode_GetMax PyUnicodeUCS2_GetMax
 185 # define PyUnicode_GetSize PyUnicodeUCS2_GetSize
 186 # define PyUnicode_Join PyUnicodeUCS2_Join
 187 # define PyUnicode_Replace PyUnicodeUCS2_Replace
 188 # define PyUnicode_Resize PyUnicodeUCS2_Resize
 189 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS2_SetDefaultEncoding
 190 # define PyUnicode_Split PyUnicodeUCS2_Split
 191 # define PyUnicode_RSplit PyUnicodeUCS2_RSplit
 192 # define PyUnicode_Splitlines PyUnicodeUCS2_Splitlines
 193 # define PyUnicode_Tailmatch PyUnicodeUCS2_Tailmatch
 194 # define PyUnicode_Translate PyUnicodeUCS2_Translate
 195 # define PyUnicode_TranslateCharmap PyUnicodeUCS2_TranslateCharmap
 196 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS2_AsDefaultEncodedString
 197 # define _PyUnicode_Fini _PyUnicodeUCS2_Fini
 198 # define _PyUnicode_Init _PyUnicodeUCS2_Init
 199 # define _PyUnicode_IsAlpha _PyUnicodeUCS2_IsAlpha
 200 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS2_IsDecimalDigit
 201 # define _PyUnicode_IsDigit _PyUnicodeUCS2_IsDigit
 202 # define _PyUnicode_IsLinebreak _PyUnicodeUCS2_IsLinebreak
 203 # define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
 204 # define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
 205 # define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
 206 # define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
 207 # define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
 208 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
 209 # define _PyUnicode_ToDigit _PyUnicodeUCS2_ToDigit
 210 # define _PyUnicode_ToLowercase _PyUnicodeUCS2_ToLowercase
 211 # define _PyUnicode_ToNumeric _PyUnicodeUCS2_ToNumeric
 212 # define _PyUnicode_ToTitlecase _PyUnicodeUCS2_ToTitlecase
 213 # define _PyUnicode_ToUppercase _PyUnicodeUCS2_ToUppercase
 214
 215 #else
 216
 217 # define PyUnicode_AsASCIIString PyUnicodeUCS4_AsASCIIString
 218 # define PyUnicode_AsCharmapString PyUnicodeUCS4_AsCharmapString
 219 # define PyUnicode_AsEncodedObject PyUnicodeUCS4_AsEncodedObject
 220 # define PyUnicode_AsEncodedString PyUnicodeUCS4_AsEncodedString
 221 # define PyUnicode_AsLatin1String PyUnicodeUCS4_AsLatin1String
 222 # define PyUnicode_AsRawUnicodeEscapeString PyUnicodeUCS4_AsRawUnicodeEscapeString
 223 # define PyUnicode_AsUTF16String PyUnicodeUCS4_AsUTF16String
 224 # define PyUnicode_AsUTF8String PyUnicodeUCS4_AsUTF8String
 225 # define PyUnicode_AsUnicode PyUnicodeUCS4_AsUnicode
 226 # define PyUnicode_AsUnicodeEscapeString PyUnicodeUCS4_AsUnicodeEscapeString
 227 # define PyUnicode_AsWideChar PyUnicodeUCS4_AsWideChar
 228 # define PyUnicode_Compare PyUnicodeUCS4_Compare
 229 # define PyUnicode_Concat PyUnicodeUCS4_Concat
 230 # define PyUnicode_Contains PyUnicodeUCS4_Contains
 231 # define PyUnicode_Count PyUnicodeUCS4_Count
 232 # define PyUnicode_Decode PyUnicodeUCS4_Decode
 233 # define PyUnicode_DecodeASCII PyUnicodeUCS4_DecodeASCII
 234 # define PyUnicode_DecodeCharmap PyUnicodeUCS4_DecodeCharmap
 235 # define PyUnicode_DecodeLatin1 PyUnicodeUCS4_DecodeLatin1
 236 # define PyUnicode_DecodeRawUnicodeEscape PyUnicodeUCS4_DecodeRawUnicodeEscape
 237 # define PyUnicode_DecodeUTF16 PyUnicodeUCS4_DecodeUTF16
 238 # define PyUnicode_DecodeUTF16Stateful PyUnicodeUCS4_DecodeUTF16Stateful
 239 # define PyUnicode_DecodeUTF8 PyUnicodeUCS4_DecodeUTF8
 240 # define PyUnicode_DecodeUTF8Stateful PyUnicodeUCS4_DecodeUTF8Stateful
 241 # define PyUnicode_DecodeUnicodeEscape PyUnicodeUCS4_DecodeUnicodeEscape
 242 # define PyUnicode_Encode PyUnicodeUCS4_Encode
 243 # define PyUnicode_EncodeASCII PyUnicodeUCS4_EncodeASCII
 244 # define PyUnicode_EncodeCharmap PyUnicodeUCS4_EncodeCharmap
 245 # define PyUnicode_EncodeDecimal PyUnicodeUCS4_EncodeDecimal
 246 # define PyUnicode_EncodeLatin1 PyUnicodeUCS4_EncodeLatin1
 247 # define PyUnicode_EncodeRawUnicodeEscape PyUnicodeUCS4_EncodeRawUnicodeEscape
 248 # define PyUnicode_EncodeUTF16 PyUnicodeUCS4_EncodeUTF16
 249 # define PyUnicode_EncodeUTF8 PyUnicodeUCS4_EncodeUTF8
 250 # define PyUnicode_EncodeUnicodeEscape PyUnicodeUCS4_EncodeUnicodeEscape
 251 # define PyUnicode_Find PyUnicodeUCS4_Find
 252 # define PyUnicode_Format PyUnicodeUCS4_Format
 253 # define PyUnicode_FromEncodedObject PyUnicodeUCS4_FromEncodedObject
 254 # define PyUnicode_FromObject PyUnicodeUCS4_FromObject
 255 # define PyUnicode_FromOrdinal PyUnicodeUCS4_FromOrdinal
 256 # define PyUnicode_FromUnicode PyUnicodeUCS4_FromUnicode
 257 # define PyUnicode_FromWideChar PyUnicodeUCS4_FromWideChar
 258 # define PyUnicode_GetDefaultEncoding PyUnicodeUCS4_GetDefaultEncoding
 259 # define PyUnicode_GetMax PyUnicodeUCS4_GetMax
 260 # define PyUnicode_GetSize PyUnicodeUCS4_GetSize
 261 # define PyUnicode_Join PyUnicodeUCS4_Join
 262 # define PyUnicode_Replace PyUnicodeUCS4_Replace
 263 # define PyUnicode_Resize PyUnicodeUCS4_Resize
 264 # define PyUnicode_SetDefaultEncoding PyUnicodeUCS4_SetDefaultEncoding
 265 # define PyUnicode_Split PyUnicodeUCS4_Split
 266 # define PyUnicode_Splitlines PyUnicodeUCS4_Splitlines
 267 # define PyUnicode_Tailmatch PyUnicodeUCS4_Tailmatch
 268 # define PyUnicode_Translate PyUnicodeUCS4_Translate
 269 # define PyUnicode_TranslateCharmap PyUnicodeUCS4_TranslateCharmap
 270 # define _PyUnicode_AsDefaultEncodedString _PyUnicodeUCS4_AsDefaultEncodedString
 271 # define _PyUnicode_Fini _PyUnicodeUCS4_Fini
 272 # define _PyUnicode_Init _PyUnicodeUCS4_Init
 273 # define _PyUnicode_IsAlpha _PyUnicodeUCS4_IsAlpha
 274 # define _PyUnicode_IsDecimalDigit _PyUnicodeUCS4_IsDecimalDigit
 275 # define _PyUnicode_IsDigit _PyUnicodeUCS4_IsDigit
 276 # define _PyUnicode_IsLinebreak _PyUnicodeUCS4_IsLinebreak
 277 # define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
 278 # define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
 279 # define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
 280 # define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
 281 # define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
 282 # define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
 283 # define _PyUnicode_ToDigit _PyUnicodeUCS4_ToDigit
 284 # define _PyUnicode_ToLowercase _PyUnicodeUCS4_ToLowercase
 285 # define _PyUnicode_ToNumeric _PyUnicodeUCS4_ToNumeric
 286 # define _PyUnicode_ToTitlecase _PyUnicodeUCS4_ToTitlecase
 287 # define _PyUnicode_ToUppercase _PyUnicodeUCS4_ToUppercase
 288
 289
 290 #endif
 291
 292 /* --- Internal Unicode Operations ---------------------------------------- */
 293
 294 /* If you want Python to use the compiler's wctype.h functions instead
 295    of the ones supplied with Python, define WANT_WCTYPE_FUNCTIONS or
 296    configure Python using --with-wctype-functions.  This reduces the
 297    interpreter's code size. */
 298
 299 #if defined(HAVE_USABLE_WCHAR_T) && defined(WANT_WCTYPE_FUNCTIONS)
 300
 301 #include <wctype.h>
 302
 303 #define Py_UNICODE_ISSPACE(ch) iswspace(ch)
 304
 305 #define Py_UNICODE_ISLOWER(ch) iswlower(ch)
 306 #define Py_UNICODE_ISUPPER(ch) iswupper(ch)
 307 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 308 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 309
 310 #define Py_UNICODE_TOLOWER(ch) towlower(ch)
 311 #define Py_UNICODE_TOUPPER(ch) towupper(ch)
 312 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 313
 314 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 315 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 316 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 317
 318 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 319 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 320 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 321
 322 #define Py_UNICODE_ISALPHA(ch) iswalpha(ch)
 323
 324 #else
 325
 326 #define Py_UNICODE_ISSPACE(ch) _PyUnicode_IsWhitespace(ch)
 327
 328 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
 329 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
 330 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
 331 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
 332
 333 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
 334 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
 335 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
 336
 337 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
 338 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
 339 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
 340
 341 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
 342 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
 343 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
 344
 345 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
 346
 347 #endif
 348
 349 #define Py_UNICODE_ISALNUM(ch) \
 350        (Py_UNICODE_ISALPHA(ch) || \
 351         Py_UNICODE_ISDECIMAL(ch) || \
 352         Py_UNICODE_ISDIGIT(ch) || \
 353         Py_UNICODE_ISNUMERIC(ch))
 354
 355 #define Py_UNICODE_COPY(target, source, length)\
 356     (memcpy((target), (source), (length)*sizeof(Py_UNICODE)))
 357
 358 #define Py_UNICODE_FILL(target, value, length) do\
 359     {int i; for (i = 0; i < (length); i++) (target)[i] = (value);}\
 360     while (0)
 361
 362 #define Py_UNICODE_MATCH(string, offset, substring)\
 363     ((*((string)->str + (offset)) == *((substring)->str)) &&\
 364      !memcmp((string)->str + (offset), (substring)->str,\
 365              (substring)->length*sizeof(Py_UNICODE)))
 366
 367 #ifdef __cplusplus
 368 extern "C" {
 369 #endif
 370
 371 /* --- Unicode Type ------------------------------------------------------- */
 372
 373 typedef struct {
 374     PyObject_HEAD
 375     int length;                 /* Length of raw Unicode data in buffer */
 376     Py_UNICODE *str;            /* Raw Unicode buffer */
 377     long hash;                  /* Hash value; -1 if not set */
 378     PyObject *defenc;           /* (Default) Encoded version as Python
 379                                    string, or NULL; this is used for
 380                                    implementing the buffer protocol */
 381 } PyUnicodeObject;
 382
 383 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
 384
 385 #define PyUnicode_Check(op) PyObject_TypeCheck(op, &PyUnicode_Type)
 386 #define PyUnicode_CheckExact(op) ((op)->ob_type == &PyUnicode_Type)
 387
 388 /* Fast access macros */
 389 #define PyUnicode_GET_SIZE(op) \
 390         (((PyUnicodeObject *)(op))->length)
 391 #define PyUnicode_GET_DATA_SIZE(op) \
 392         (((PyUnicodeObject *)(op))->length * sizeof(Py_UNICODE))
 393 #define PyUnicode_AS_UNICODE(op) \
 394         (((PyUnicodeObject *)(op))->str)
 395 #define PyUnicode_AS_DATA(op) \
 396         ((const char *)((PyUnicodeObject *)(op))->str)
 397
 398 /* --- Constants ---------------------------------------------------------- */
 399
 400 /* This Unicode character will be used as replacement character during
 401    decoding if the errors argument is set to "replace". Note: the
 402    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
 403    Unicode 3.0. */
 404
 405 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UNICODE) 0xFFFD)
 406
 407 /* === Public API ========================================================= */
 408
 409 /* --- Plain Py_UNICODE --------------------------------------------------- */
 410
 411 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
 412    size.
 413
 414    u may be NULL which causes the contents to be undefined. It is the
 415    user's responsibility to fill in the needed data afterwards. Note
 416    that modifying the Unicode object contents after construction is
 417    only allowed if u was set to NULL.
 418
 419    The buffer is copied into the new object. */
 420
 421 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
 422     const Py_UNICODE *u,        /* Unicode buffer */
 423     int size                    /* size of buffer */
 424     );
 425
 426 /* Return a read-only pointer to the Unicode object's internal
 427    Py_UNICODE buffer. */
 428
 429 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
 430     PyObject *unicode           /* Unicode object */
 431     );
 432
 433 /* Get the length of the Unicode object. */
 434
 435 PyAPI_FUNC(int) PyUnicode_GetSize(
 436     PyObject *unicode           /* Unicode object */
 437     );
 438
 439 /* Get the maximum ordinal for a Unicode character. */
 440 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
 441
 442 /* Resize an already allocated Unicode object to the new size length.
 443
 444    *unicode is modified to point to the new (resized) object and 0
 445    returned on success.
 446
 447    This API may only be called by the function which also called the
 448    Unicode constructor. The refcount on the object must be 1. Otherwise,
 449    an error is returned.
 450
 451    Error handling is implemented as follows: an exception is set, -1
 452    is returned and *unicode left untouched.
 453
 454 */
 455
 456 PyAPI_FUNC(int) PyUnicode_Resize(
 457     PyObject **unicode,         /* Pointer to the Unicode object */
 458     int length                  /* New length */
 459     );
 460
 461 /* Coerce obj to an Unicode object and return a reference with
 462    *incremented* refcount.
 463
 464    Coercion is done in the following way:
 465
 466    1. String and other char buffer compatible objects are decoded
 467       under the assumptions that they contain data using the current
 468       default encoding. Decoding is done in "strict" mode.
 469
 470    2. All other objects (including Unicode objects) raise an
 471       exception.
 472
 473    The API returns NULL in case of an error. The caller is responsible
 474    for decref'ing the returned objects.
 475
 476 */
 477
 478 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
 479     register PyObject *obj,     /* Object */
 480     const char *encoding,       /* encoding */
 481     const char *errors          /* error handling */
 482     );
 483
 484 /* Coerce obj to an Unicode object and return a reference with
 485    *incremented* refcount.
 486
 487    Unicode objects are passed back as-is (subclasses are converted to
 488    true Unicode objects), all other objects are delegated to
 489    PyUnicode_FromEncodedObject(obj, NULL, "strict") which results in
 490    using the default encoding as basis for decoding the object.
 491
 492    The API returns NULL in case of an error. The caller is responsible
 493    for decref'ing the returned objects.
 494
 495 */
 496
 497 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
 498     register PyObject *obj      /* Object */
 499     );
 500
 501 /* --- wchar_t support for platforms which support it --------------------- */
 502
 503 #ifdef HAVE_WCHAR_H
 504
 505 /* Create a Unicode Object from the whcar_t buffer w of the given
 506    size.
 507
 508    The buffer is copied into the new object. */
 509
 510 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
 511     register const wchar_t *w,  /* wchar_t buffer */
 512     int size                    /* size of buffer */
 513     );
 514
 515 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
 516    most size wchar_t characters are copied.
 517
 518    Note that the resulting wchar_t string may or may not be
 519    0-terminated.  It is the responsibility of the caller to make sure
 520    that the wchar_t string is 0-terminated in case this is required by
 521    the application.
 522
 523    Returns the number of wchar_t characters copied (excluding a
 524    possibly trailing 0-termination character) or -1 in case of an
 525    error. */
 526
 527 PyAPI_FUNC(int) PyUnicode_AsWideChar(
 528     PyUnicodeObject *unicode,   /* Unicode object */
 529     register wchar_t *w,        /* wchar_t buffer */
 530     int size                    /* size of buffer */
 531     );
 532
 533 #endif
 534
 535 /* --- Unicode ordinals --------------------------------------------------- */
 536
 537 /* Create a Unicode Object from the given Unicode code point ordinal.
 538
 539    The ordinal must be in range(0x10000) on narrow Python builds
 540    (UCS2), and range(0x110000) on wide builds (UCS4). A ValueError is
 541    raised in case it is not.
 542
 543 */
 544
 545 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
 546
 547 /* === Builtin Codecs =====================================================
 548
 549    Many of these APIs take two arguments encoding and errors. These
 550    parameters encoding and errors have the same semantics as the ones
 551    of the builtin unicode() API.
 552
 553    Setting encoding to NULL causes the default encoding to be used.
 554
 555    Error handling is set by errors which may also be set to NULL
 556    meaning to use the default handling defined for the codec. Default
 557    error handling for all builtin codecs is "strict" (ValueErrors are
 558    raised).
 559
 560    The codecs all use a similar interface. Only deviation from the
 561    generic ones are documented.
 562
 563 */
 564
 565 /* --- Manage the default encoding ---------------------------------------- */
 566
 567 /* Return a Python string holding the default encoded value of the
 568    Unicode object.
 569
 570    The resulting string is cached in the Unicode object for subsequent
 571    usage by this function. The cached version is needed to implement
 572    the character buffer interface and will live (at least) as long as
 573    the Unicode object itself.
 574
 575    The refcount of the string is *not* incremented.
 576
 577    *** Exported for internal use by the interpreter only !!! ***
 578
 579 */
 580
 581 PyAPI_FUNC(PyObject *) _PyUnicode_AsDefaultEncodedString(
 582     PyObject *, const char *);
 583
 584 /* Returns the currently active default encoding.
 585
 586    The default encoding is currently implemented as run-time settable
 587    process global.  This may change in future versions of the
 588    interpreter to become a parameter which is managed on a per-thread
 589    basis.
 590
 591  */
 592
 593 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
 594
 595 /* Sets the currently active default encoding.
 596
 597    Returns 0 on success, -1 in case of an error.
 598
 599  */
 600
 601 PyAPI_FUNC(int) PyUnicode_SetDefaultEncoding(
 602     const char *encoding        /* Encoding name in standard form */
 603     );
 604
 605 /* --- Generic Codecs ----------------------------------------------------- */
 606
 607 /* Create a Unicode object by decoding the encoded string s of the
 608    given size. */
 609
 610 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
 611     const char *s,              /* encoded string */
 612     int size,                   /* size of buffer */
 613     const char *encoding,       /* encoding */
 614     const char *errors          /* error handling */
 615     );
 616
 617 /* Encodes a Py_UNICODE buffer of the given size and returns a
 618    Python string object. */
 619
 620 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
 621     const Py_UNICODE *s,        /* Unicode char buffer */
 622     int size,                   /* number of Py_UNICODE chars to encode */
 623     const char *encoding,       /* encoding */
 624     const char *errors          /* error handling */
 625     );
 626
 627 /* Encodes a Unicode object and returns the result as Python
 628    object. */
 629
 630 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
 631     PyObject *unicode,          /* Unicode object */
 632     const char *encoding,       /* encoding */
 633     const char *errors          /* error handling */
 634     );
 635
 636 /* Encodes a Unicode object and returns the result as Python string
 637    object. */
 638
 639 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
 640     PyObject *unicode,          /* Unicode object */
 641     const char *encoding,       /* encoding */
 642     const char *errors          /* error handling */
 643     );
 644
 645 /* --- UTF-7 Codecs ------------------------------------------------------- */
 646
 647 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
 648     const char *string,         /* UTF-7 encoded string */
 649     int length,                 /* size of string */
 650     const char *errors          /* error handling */
 651     );
 652
 653 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
 654     const Py_UNICODE *data,     /* Unicode char buffer */
 655     int length,                 /* number of Py_UNICODE chars to encode */
 656     int encodeSetO,             /* force the encoder to encode characters in
 657                                    Set O, as described in RFC2152 */
 658     int encodeWhiteSpace,       /* force the encoder to encode space, tab,
 659                                    carriage return and linefeed characters */
 660     const char *errors          /* error handling */
 661     );
 662
 663 /* --- UTF-8 Codecs ------------------------------------------------------- */
 664
 665 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
 666     const char *string,         /* UTF-8 encoded string */
 667     int length,                 /* size of string */
 668     const char *errors          /* error handling */
 669     );
 670
 671 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
 672     const char *string,         /* UTF-8 encoded string */
 673     int length,                 /* size of string */
 674     const char *errors,         /* error handling */
 675     int *consumed               /* bytes consumed */
 676     );
 677
 678 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
 679     PyObject *unicode           /* Unicode object */
 680     );
 681
 682 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
 683     const Py_UNICODE *data,     /* Unicode char buffer */
 684     int length,                 /* number of Py_UNICODE chars to encode */
 685     const char *errors          /* error handling */
 686     );
 687
 688 /* --- UTF-16 Codecs ------------------------------------------------------ */
 689
 690 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
 691    the corresponding Unicode object.
 692
 693    errors (if non-NULL) defines the error handling. It defaults
 694    to "strict".
 695
 696    If byteorder is non-NULL, the decoder starts decoding using the
 697    given byte order:
 698
 699         *byteorder == -1: little endian
 700         *byteorder == 0:  native order
 701         *byteorder == 1:  big endian
 702
 703    In native mode, the first two bytes of the stream are checked for a
 704    BOM mark. If found, the BOM mark is analysed, the byte order
 705    adjusted and the BOM skipped.  In the other modes, no BOM mark
 706    interpretation is done. After completion, *byteorder is set to the
 707    current byte order at the end of input data.
 708
 709    If byteorder is NULL, the codec starts in native order mode.
 710
 711 */
 712
 713 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
 714     const char *string,         /* UTF-16 encoded string */
 715     int length,                 /* size of string */
 716     const char *errors,         /* error handling */
 717     int *byteorder              /* pointer to byteorder to use
 718                                    0=native;-1=LE,1=BE; updated on
 719                                    exit */
 720     );
 721
 722 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
 723     const char *string,         /* UTF-16 encoded string */
 724     int length,                 /* size of string */
 725     const char *errors,         /* error handling */
 726     int *byteorder,             /* pointer to byteorder to use
 727                                    0=native;-1=LE,1=BE; updated on
 728                                    exit */
 729     int *consumed               /* bytes consumed */
 730     );
 731
 732 /* Returns a Python string using the UTF-16 encoding in native byte
 733    order. The string always starts with a BOM mark.  */
 734
 735 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
 736     PyObject *unicode           /* Unicode object */
 737     );
 738
 739 /* Returns a Python string object holding the UTF-16 encoded value of
 740    the Unicode data.
 741
 742    If byteorder is not 0, output is written according to the following
 743    byte order:
 744
 745    byteorder == -1: little endian
 746    byteorder == 0:  native byte order (writes a BOM mark)
 747    byteorder == 1:  big endian
 748
 749    If byteorder is 0, the output string will always start with the
 750    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
 751    prepended.
 752
 753    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
 754    UCS-2. This trick makes it possible to add full UTF-16 capabilities
 755    at a later point without compromising the APIs.
 756
 757 */
 758
 759 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
 760     const Py_UNICODE *data,     /* Unicode char buffer */
 761     int length,                 /* number of Py_UNICODE chars to encode */
 762     const char *errors,         /* error handling */
 763     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
 764     );
 765
 766 /* --- Unicode-Escape Codecs ---------------------------------------------- */
 767
 768 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
 769     const char *string,         /* Unicode-Escape encoded string */
 770     int length,                 /* size of string */
 771     const char *errors          /* error handling */
 772     );
 773
 774 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
 775     PyObject *unicode           /* Unicode object */
 776     );
 777
 778 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
 779     const Py_UNICODE *data,     /* Unicode char buffer */
 780     int length                  /* Number of Py_UNICODE chars to encode */
 781     );
 782
 783 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
 784
 785 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
 786     const char *string,         /* Raw-Unicode-Escape encoded string */
 787     int length,                 /* size of string */
 788     const char *errors          /* error handling */
 789     );
 790
 791 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
 792     PyObject *unicode           /* Unicode object */
 793     );
 794
 795 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
 796     const Py_UNICODE *data,     /* Unicode char buffer */
 797     int length                  /* Number of Py_UNICODE chars to encode */
 798     );
 799
 800 /* --- Unicode Internal Codec ---------------------------------------------
 801
 802     Only for internal use in _codecsmodule.c */
 803
 804 PyObject *_PyUnicode_DecodeUnicodeInternal(
 805     const char *string,
 806     int length,
 807     const char *errors
 808     );
 809
 810 /* --- Latin-1 Codecs -----------------------------------------------------
 811
 812    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
 813
 814 */
 815
 816 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
 817     const char *string,         /* Latin-1 encoded string */
 818     int length,                 /* size of string */
 819     const char *errors          /* error handling */
 820     );
 821
 822 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
 823     PyObject *unicode           /* Unicode object */
 824     );
 825
 826 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
 827     const Py_UNICODE *data,     /* Unicode char buffer */
 828     int length,                 /* Number of Py_UNICODE chars to encode */
 829     const char *errors          /* error handling */
 830     );
 831
 832 /* --- ASCII Codecs -------------------------------------------------------
 833
 834    Only 7-bit ASCII data is excepted. All other codes generate errors.
 835
 836 */
 837
 838 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
 839     const char *string,         /* ASCII encoded string */
 840     int length,                 /* size of string */
 841     const char *errors          /* error handling */
 842     );
 843
 844 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
 845     PyObject *unicode           /* Unicode object */
 846     );
 847
 848 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
 849     const Py_UNICODE *data,     /* Unicode char buffer */
 850     int length,                 /* Number of Py_UNICODE chars to encode */
 851     const char *errors          /* error handling */
 852     );
 853
 854 /* --- Character Map Codecs -----------------------------------------------
 855
 856    This codec uses mappings to encode and decode characters.
 857
 858    Decoding mappings must map single string characters to single
 859    Unicode characters, integers (which are then interpreted as Unicode
 860    ordinals) or None (meaning "undefined mapping" and causing an
 861    error).
 862
 863    Encoding mappings must map single Unicode characters to single
 864    string characters, integers (which are then interpreted as Latin-1
 865    ordinals) or None (meaning "undefined mapping" and causing an
 866    error).
 867
 868    If a character lookup fails with a LookupError, the character is
 869    copied as-is meaning that its ordinal value will be interpreted as
 870    Unicode or Latin-1 ordinal resp. Because of this mappings only need
 871    to contain those mappings which map characters to different code
 872    points.
 873
 874 */
 875
 876 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
 877     const char *string,         /* Encoded string */
 878     int length,                 /* size of string */
 879     PyObject *mapping,          /* character mapping
 880                                    (char ordinal -> unicode ordinal) */
 881     const char *errors          /* error handling */
 882     );
 883
 884 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
 885     PyObject *unicode,          /* Unicode object */
 886     PyObject *mapping           /* character mapping
 887                                    (unicode ordinal -> char ordinal) */
 888     );
 889
 890 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
 891     const Py_UNICODE *data,     /* Unicode char buffer */
 892     int length,                 /* Number of Py_UNICODE chars to encode */
 893     PyObject *mapping,          /* character mapping
 894                                    (unicode ordinal -> char ordinal) */
 895     const char *errors          /* error handling */
 896     );
 897
 898 /* Translate a Py_UNICODE buffer of the given length by applying a
 899    character mapping table to it and return the resulting Unicode
 900    object.
 901
 902    The mapping table must map Unicode ordinal integers to Unicode
 903    ordinal integers or None (causing deletion of the character).
 904
 905    Mapping tables may be dictionaries or sequences. Unmapped character
 906    ordinals (ones which cause a LookupError) are left untouched and
 907    are copied as-is.
 908
 909 */
 910
 911 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
 912     const Py_UNICODE *data,     /* Unicode char buffer */
 913     int length,                 /* Number of Py_UNICODE chars to encode */
 914     PyObject *table,            /* Translate table */
 915     const char *errors          /* error handling */
 916     );
 917
 918 #ifdef MS_WIN32
 919
 920 /* --- MBCS codecs for Windows -------------------------------------------- */
 921
 922 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
 923     const char *string,         /* MBCS encoded string */
 924     int length,                 /* size of string */
 925     const char *errors          /* error handling */
 926     );
 927
 928 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
 929     PyObject *unicode           /* Unicode object */
 930     );
 931
 932 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
 933     const Py_UNICODE *data,     /* Unicode char buffer */
 934     int length,                 /* Number of Py_UNICODE chars to encode */
 935     const char *errors          /* error handling */
 936     );
 937
 938 #endif /* MS_WIN32 */
 939
 940 /* --- Decimal Encoder ---------------------------------------------------- */
 941
 942 /* Takes a Unicode string holding a decimal value and writes it into
 943    an output buffer using standard ASCII digit codes.
 944
 945    The output buffer has to provide at least length+1 bytes of storage
 946    area. The output string is 0-terminated.
 947
 948    The encoder converts whitespace to ' ', decimal characters to their
 949    corresponding ASCII digit and all other Latin-1 characters except
 950    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
 951    are treated as errors. This includes embedded NULL bytes.
 952
 953    Error handling is defined by the errors argument:
 954
 955       NULL or "strict": raise a ValueError
 956       "ignore": ignore the wrong characters (these are not copied to the
 957                 output buffer)
 958       "replace": replaces illegal characters with '?'
 959
 960    Returns 0 on success, -1 on failure.
 961
 962 */
 963
 964 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
 965     Py_UNICODE *s,              /* Unicode buffer */
 966     int length,                 /* Number of Py_UNICODE chars to encode */
 967     char *output,               /* Output buffer; must have size >= length */
 968     const char *errors          /* error handling */
 969     );
 970
 971 /* --- Methods & Slots ----------------------------------------------------
 972
 973    These are capable of handling Unicode objects and strings on input
 974    (we refer to them as strings in the descriptions) and return
 975    Unicode objects or integers as apporpriate. */
 976
 977 /* Concat two strings giving a new Unicode string. */
 978
 979 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
 980     PyObject *left,             /* Left string */
 981     PyObject *right             /* Right string */
 982     );
 983
 984 /* Split a string giving a list of Unicode strings.
 985
 986    If sep is NULL, splitting will be done at all whitespace
 987    substrings. Otherwise, splits occur at the given separator.
 988
 989    At most maxsplit splits will be done. If negative, no limit is set.
 990
 991    Separators are not included in the resulting list.
 992
 993 */
 994
 995 PyAPI_FUNC(PyObject*) PyUnicode_Split(
 996     PyObject *s,                /* String to split */
 997     PyObject *sep,              /* String separator */
 998     int maxsplit                /* Maxsplit count */
 999     );
1000
1001 /* Dito, but split at line breaks.
1002
1003    CRLF is considered to be one line break. Line breaks are not
1004    included in the resulting list. */
1005
1006 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
1007     PyObject *s,                /* String to split */
1008     int keepends                /* If true, line end markers are included */
1009     );
1010
1011 /* Split a string giving a list of Unicode strings.
1012
1013    If sep is NULL, splitting will be done at all whitespace
1014    substrings. Otherwise, splits occur at the given separator.
1015
1016    At most maxsplit splits will be done. But unlike PyUnicode_Split
1017    PyUnicode_RSplit splits from the end of the string. If negative,
1018    no limit is set.
1019
1020    Separators are not included in the resulting list.
1021
1022 */
1023
1024 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
1025     PyObject *s,                /* String to split */
1026     PyObject *sep,              /* String separator */
1027     int maxsplit                /* Maxsplit count */
1028     );
1029
1030 /* Translate a string by applying a character mapping table to it and
1031    return the resulting Unicode object.
1032
1033    The mapping table must map Unicode ordinal integers to Unicode
1034    ordinal integers or None (causing deletion of the character).
1035
1036    Mapping tables may be dictionaries or sequences. Unmapped character
1037    ordinals (ones which cause a LookupError) are left untouched and
1038    are copied as-is.
1039
1040 */
1041
1042 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
1043     PyObject *str,              /* String */
1044     PyObject *table,            /* Translate table */
1045     const char *errors          /* error handling */
1046     );
1047
1048 /* Join a sequence of strings using the given separator and return
1049    the resulting Unicode string. */
1050
1051 PyAPI_FUNC(PyObject*) PyUnicode_Join(
1052     PyObject *separator,        /* Separator string */
1053     PyObject *seq               /* Sequence object */
1054     );
1055
1056 /* Return 1 if substr matches str[start:end] at the given tail end, 0
1057    otherwise. */
1058
1059 PyAPI_FUNC(int) PyUnicode_Tailmatch(
1060     PyObject *str,              /* String */
1061     PyObject *substr,           /* Prefix or Suffix string */
1062     int start,                  /* Start index */
1063     int end,                    /* Stop index */
1064     int direction               /* Tail end: -1 prefix, +1 suffix */
1065     );
1066
1067 /* Return the first position of substr in str[start:end] using the
1068    given search direction or -1 if not found. -2 is returned in case
1069    an error occurred and an exception is set. */
1070
1071 PyAPI_FUNC(int) PyUnicode_Find(
1072     PyObject *str,              /* String */
1073     PyObject *substr,           /* Substring to find */
1074     int start,                  /* Start index */
1075     int end,                    /* Stop index */
1076     int direction               /* Find direction: +1 forward, -1 backward */
1077     );
1078
1079 /* Count the number of occurrences of substr in str[start:end]. */
1080
1081 PyAPI_FUNC(int) PyUnicode_Count(
1082     PyObject *str,              /* String */
1083     PyObject *substr,           /* Substring to count */
1084     int start,                  /* Start index */
1085     int end                     /* Stop index */
1086     );
1087
1088 /* Replace at most maxcount occurrences of substr in str with replstr
1089    and return the resulting Unicode object. */
1090
1091 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
1092     PyObject *str,              /* String */
1093     PyObject *substr,           /* Substring to find */
1094     PyObject *replstr,          /* Substring to replace */
1095     int maxcount                /* Max. number of replacements to apply;
1096                                    -1 = all */
1097     );
1098
1099 /* Compare two strings and return -1, 0, 1 for less than, equal,
1100    greater than resp. */
1101
1102 PyAPI_FUNC(int) PyUnicode_Compare(
1103     PyObject *left,             /* Left string */
1104     PyObject *right             /* Right string */
1105     );
1106
1107 /* Apply a argument tuple or dictionary to a format string and return
1108    the resulting Unicode string. */
1109
1110 PyAPI_FUNC(PyObject *) PyUnicode_Format(
1111     PyObject *format,           /* Format string */
1112     PyObject *args              /* Argument tuple or dictionary */
1113     );
1114
1115 /* Checks whether element is contained in container and return 1/0
1116    accordingly.
1117
1118    element has to coerce to an one element Unicode string. -1 is
1119    returned in case of an error. */
1120
1121 PyAPI_FUNC(int) PyUnicode_Contains(
1122     PyObject *container,        /* Container string */
1123     PyObject *element           /* Element string */
1124     );
1125
1126 /* Externally visible for str.strip(unicode) */
1127 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
1128     PyUnicodeObject *self,
1129     int striptype,
1130     PyObject *sepobj
1131     );
1132
1133 /* === Characters Type APIs =============================================== */
1134
1135 /* These should not be used directly. Use the Py_UNICODE_IS* and
1136    Py_UNICODE_TO* macros instead.
1137
1138    These APIs are implemented in Objects/unicodectype.c.
1139
1140 */
1141
1142 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
1143     Py_UNICODE ch       /* Unicode character */
1144     );
1145
1146 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
1147     Py_UNICODE ch       /* Unicode character */
1148     );
1149
1150 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
1151     Py_UNICODE ch       /* Unicode character */
1152     );
1153
1154 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
1155     const Py_UNICODE ch         /* Unicode character */
1156     );
1157
1158 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
1159     const Py_UNICODE ch         /* Unicode character */
1160     );
1161
1162 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToLowercase(
1163     Py_UNICODE ch       /* Unicode character */
1164     );
1165
1166 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToUppercase(
1167     Py_UNICODE ch       /* Unicode character */
1168     );
1169
1170 PyAPI_FUNC(Py_UNICODE) _PyUnicode_ToTitlecase(
1171     Py_UNICODE ch       /* Unicode character */
1172     );
1173
1174 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
1175     Py_UNICODE ch       /* Unicode character */
1176     );
1177
1178 PyAPI_FUNC(int) _PyUnicode_ToDigit(
1179     Py_UNICODE ch       /* Unicode character */
1180     );
1181
1182 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
1183     Py_UNICODE ch       /* Unicode character */
1184     );
1185
1186 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
1187     Py_UNICODE ch       /* Unicode character */
1188     );
1189
1190 PyAPI_FUNC(int) _PyUnicode_IsDigit(
1191     Py_UNICODE ch       /* Unicode character */
1192     );
1193
1194 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
1195     Py_UNICODE ch       /* Unicode character */
1196     );
1197
1198 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
1199     Py_UNICODE ch       /* Unicode character */
1200     );
1201
1202 #ifdef __cplusplus
1203 }
1204 #endif
1205 #endif /* Py_USING_UNICODE */
1206 #endif /* !Py_UNICODEOBJECT_H */