1 /* ------------------------------------------------------------------------
3 unicodedata -- Provides access to the Unicode 3.2 data base.
5 Data was extracted from the Unicode 3.2 UnicodeData.txt file.
7 Written by Marc-Andre Lemburg (mal@lemburg.com).
8 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
9 Modified by Martin v. Löwis (martin@v.loewis.de)
11 Copyright (c) Corporation for National Research Initiatives.
13 ------------------------------------------------------------------------ */
18 /* character properties */
21 const unsigned char category
; /* index into
22 _PyUnicode_CategoryNames */
23 const unsigned char combining
; /* combining class value 0 - 255 */
24 const unsigned char bidirectional
; /* index into
25 _PyUnicode_BidirectionalNames */
26 const unsigned char mirrored
; /* true if mirrored in bidir mode */
27 const unsigned char east_asian_width
; /* index into
28 _PyUnicode_EastAsianWidth */
29 } _PyUnicode_DatabaseRecord
;
31 /* data file generated by Tools/unicode/makeunicodedata.py */
32 #include "unicodedata_db.h"
34 static const _PyUnicode_DatabaseRecord
*
35 _getrecord_ex(Py_UCS4 code
)
41 index
= index1
[(code
>>SHIFT
)];
42 index
= index2
[(index
<<SHIFT
)+(code
&((1<<SHIFT
)-1))];
45 return &_PyUnicode_Database_Records
[index
];
48 static const _PyUnicode_DatabaseRecord
*
49 _getrecord(PyUnicodeObject
* v
)
51 return _getrecord_ex(*PyUnicode_AS_UNICODE(v
));
54 /* --- Module API --------------------------------------------------------- */
56 PyDoc_STRVAR(unicodedata_decimal__doc__
,
57 "decimal(unichr[, default])\n\
59 Returns the decimal value assigned to the Unicode character unichr\n\
60 as integer. If no such value is defined, default is returned, or, if\n\
61 not given, ValueError is raised.");
64 unicodedata_decimal(PyObject
*self
, PyObject
*args
)
67 PyObject
*defobj
= NULL
;
70 if (!PyArg_ParseTuple(args
, "O!|O:decimal", &PyUnicode_Type
, &v
, &defobj
))
72 if (PyUnicode_GET_SIZE(v
) != 1) {
73 PyErr_SetString(PyExc_TypeError
,
74 "need a single Unicode character as parameter");
77 rc
= Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v
));
80 PyErr_SetString(PyExc_ValueError
,
89 return PyInt_FromLong(rc
);
92 PyDoc_STRVAR(unicodedata_digit__doc__
,
93 "digit(unichr[, default])\n\
95 Returns the digit value assigned to the Unicode character unichr as\n\
96 integer. If no such value is defined, default is returned, or, if\n\
97 not given, ValueError is raised.");
100 unicodedata_digit(PyObject
*self
, PyObject
*args
)
103 PyObject
*defobj
= NULL
;
106 if (!PyArg_ParseTuple(args
, "O!|O:digit", &PyUnicode_Type
, &v
, &defobj
))
108 if (PyUnicode_GET_SIZE(v
) != 1) {
109 PyErr_SetString(PyExc_TypeError
,
110 "need a single Unicode character as parameter");
113 rc
= Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v
));
115 if (defobj
== NULL
) {
116 PyErr_SetString(PyExc_ValueError
, "not a digit");
124 return PyInt_FromLong(rc
);
127 PyDoc_STRVAR(unicodedata_numeric__doc__
,
128 "numeric(unichr[, default])\n\
130 Returns the numeric value assigned to the Unicode character unichr\n\
131 as float. If no such value is defined, default is returned, or, if\n\
132 not given, ValueError is raised.");
135 unicodedata_numeric(PyObject
*self
, PyObject
*args
)
138 PyObject
*defobj
= NULL
;
141 if (!PyArg_ParseTuple(args
, "O!|O:numeric", &PyUnicode_Type
, &v
, &defobj
))
143 if (PyUnicode_GET_SIZE(v
) != 1) {
144 PyErr_SetString(PyExc_TypeError
,
145 "need a single Unicode character as parameter");
148 rc
= Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v
));
150 if (defobj
== NULL
) {
151 PyErr_SetString(PyExc_ValueError
, "not a numeric character");
159 return PyFloat_FromDouble(rc
);
162 PyDoc_STRVAR(unicodedata_category__doc__
,
165 Returns the general category assigned to the Unicode character\n\
169 unicodedata_category(PyObject
*self
, PyObject
*args
)
174 if (!PyArg_ParseTuple(args
, "O!:category",
175 &PyUnicode_Type
, &v
))
177 if (PyUnicode_GET_SIZE(v
) != 1) {
178 PyErr_SetString(PyExc_TypeError
,
179 "need a single Unicode character as parameter");
182 index
= (int) _getrecord(v
)->category
;
183 return PyString_FromString(_PyUnicode_CategoryNames
[index
]);
186 PyDoc_STRVAR(unicodedata_bidirectional__doc__
,
187 "bidirectional(unichr)\n\
189 Returns the bidirectional category assigned to the Unicode character\n\
190 unichr as string. If no such value is defined, an empty string is\n\
194 unicodedata_bidirectional(PyObject
*self
, PyObject
*args
)
199 if (!PyArg_ParseTuple(args
, "O!:bidirectional",
200 &PyUnicode_Type
, &v
))
202 if (PyUnicode_GET_SIZE(v
) != 1) {
203 PyErr_SetString(PyExc_TypeError
,
204 "need a single Unicode character as parameter");
207 index
= (int) _getrecord(v
)->bidirectional
;
208 return PyString_FromString(_PyUnicode_BidirectionalNames
[index
]);
211 PyDoc_STRVAR(unicodedata_combining__doc__
,
212 "combining(unichr)\n\
214 Returns the canonical combining class assigned to the Unicode\n\
215 character unichr as integer. Returns 0 if no combining class is\n\
219 unicodedata_combining(PyObject
*self
, PyObject
*args
)
223 if (!PyArg_ParseTuple(args
, "O!:combining",
224 &PyUnicode_Type
, &v
))
226 if (PyUnicode_GET_SIZE(v
) != 1) {
227 PyErr_SetString(PyExc_TypeError
,
228 "need a single Unicode character as parameter");
231 return PyInt_FromLong((int) _getrecord(v
)->combining
);
234 PyDoc_STRVAR(unicodedata_mirrored__doc__
,
237 Returns the mirrored property assigned to the Unicode character\n\
238 unichr as integer. Returns 1 if the character has been identified as\n\
239 a \"mirrored\" character in bidirectional text, 0 otherwise.");
242 unicodedata_mirrored(PyObject
*self
, PyObject
*args
)
246 if (!PyArg_ParseTuple(args
, "O!:mirrored",
247 &PyUnicode_Type
, &v
))
249 if (PyUnicode_GET_SIZE(v
) != 1) {
250 PyErr_SetString(PyExc_TypeError
,
251 "need a single Unicode character as parameter");
254 return PyInt_FromLong((int) _getrecord(v
)->mirrored
);
257 PyDoc_STRVAR(unicodedata_east_asian_width__doc__
,
258 "east_asian_width(unichr)\n\
260 Returns the east asian width assigned to the Unicode character\n\
264 unicodedata_east_asian_width(PyObject
*self
, PyObject
*args
)
269 if (!PyArg_ParseTuple(args
, "O!:east_asian_width",
270 &PyUnicode_Type
, &v
))
272 if (PyUnicode_GET_SIZE(v
) != 1) {
273 PyErr_SetString(PyExc_TypeError
,
274 "need a single Unicode character as parameter");
277 index
= (int) _getrecord(v
)->east_asian_width
;
278 return PyString_FromString(_PyUnicode_EastAsianWidthNames
[index
]);
281 PyDoc_STRVAR(unicodedata_decomposition__doc__
,
282 "decomposition(unichr)\n\
284 Returns the character decomposition mapping assigned to the Unicode\n\
285 character unichr as string. An empty string is returned in case no\n\
286 such mapping is defined.");
289 unicodedata_decomposition(PyObject
*self
, PyObject
*args
)
293 int code
, index
, count
, i
;
295 if (!PyArg_ParseTuple(args
, "O!:decomposition",
296 &PyUnicode_Type
, &v
))
298 if (PyUnicode_GET_SIZE(v
) != 1) {
299 PyErr_SetString(PyExc_TypeError
,
300 "need a single Unicode character as parameter");
304 code
= (int) *PyUnicode_AS_UNICODE(v
);
306 if (code
< 0 || code
>= 0x110000)
309 index
= decomp_index1
[(code
>>DECOMP_SHIFT
)];
310 index
= decomp_index2
[(index
<<DECOMP_SHIFT
)+
311 (code
&((1<<DECOMP_SHIFT
)-1))];
314 /* high byte is number of hex bytes (usually one or two), low byte
315 is prefix code (from*/
316 count
= decomp_data
[index
] >> 8;
318 /* XXX: could allocate the PyString up front instead
319 (strlen(prefix) + 5 * count + 1 bytes) */
322 i
= strlen(decomp_prefix
[decomp_data
[index
] & 255]);
323 memcpy(decomp
, decomp_prefix
[decomp_data
[index
] & 255], i
);
325 while (count
-- > 0) {
328 assert((size_t)i
< sizeof(decomp
));
329 PyOS_snprintf(decomp
+ i
, sizeof(decomp
) - i
, "%04X",
330 decomp_data
[++index
]);
331 i
+= strlen(decomp
+ i
);
336 return PyString_FromString(decomp
);
340 get_decomp_record(Py_UCS4 code
, int *index
, int *prefix
, int *count
)
342 if (code
>= 0x110000) {
346 *index
= decomp_index1
[(code
>>DECOMP_SHIFT
)];
347 *index
= decomp_index2
[(*index
<<DECOMP_SHIFT
)+
348 (code
&((1<<DECOMP_SHIFT
)-1))];
351 /* high byte is number of hex bytes (usually one or two), low byte
352 is prefix code (from*/
353 *count
= decomp_data
[*index
] >> 8;
354 *prefix
= decomp_data
[*index
] & 255;
366 #define NCount (VCount*TCount)
367 #define SCount (LCount*NCount)
370 nfd_nfkd(PyObject
*input
, int k
)
373 Py_UNICODE
*i
, *end
, *o
;
374 /* Longest decomposition in Unicode 3.2: U+FDFA */
375 Py_UNICODE stack
[20];
376 int space
, stackptr
, isize
;
377 int index
, prefix
, count
;
378 unsigned char prev
, cur
;
381 isize
= PyUnicode_GET_SIZE(input
);
382 /* Overallocate atmost 10 characters. */
383 space
= (isize
> 10 ? 10 : isize
) + isize
;
384 result
= PyUnicode_FromUnicode(NULL
, space
);
387 i
= PyUnicode_AS_UNICODE(input
);
389 o
= PyUnicode_AS_UNICODE(result
);
392 stack
[stackptr
++] = *i
++;
394 Py_UNICODE code
= stack
[--stackptr
];
395 /* Hangul Decomposition adds three characters in
396 a single step, so we need atleast that much room. */
398 int newsize
= PyString_GET_SIZE(result
) + 10;
400 if (PyUnicode_Resize(&result
, newsize
) == -1)
402 o
= PyUnicode_AS_UNICODE(result
) + newsize
- space
;
404 /* Hangul Decomposition. */
405 if (SBase
<= code
&& code
< (SBase
+SCount
)) {
406 int SIndex
= code
- SBase
;
407 int L
= LBase
+ SIndex
/ NCount
;
408 int V
= VBase
+ (SIndex
% NCount
) / TCount
;
409 int T
= TBase
+ SIndex
% TCount
;
419 /* Other decompoistions. */
420 get_decomp_record(code
, &index
, &prefix
, &count
);
422 /* Copy character if it is not decomposable, or has a
423 compatibility decomposition, but we do NFD. */
424 if (!count
|| (prefix
&& !k
)) {
429 /* Copy decomposition onto the stack, in reverse
432 code
= decomp_data
[index
+ (--count
)];
433 stack
[stackptr
++] = code
;
438 /* Drop overallocation. Cannot fail. */
439 PyUnicode_Resize(&result
, PyUnicode_GET_SIZE(result
) - space
);
441 /* Sort canonically. */
442 i
= PyUnicode_AS_UNICODE(result
);
443 prev
= _getrecord_ex(*i
)->combining
;
444 end
= i
+ PyUnicode_GET_SIZE(result
);
445 for (i
++; i
< end
; i
++) {
446 cur
= _getrecord_ex(*i
)->combining
;
447 if (prev
== 0 || cur
== 0 || prev
<= cur
) {
451 /* Non-canonical order. Need to switch *i with previous. */
454 Py_UNICODE tmp
= o
[1];
458 if (o
< PyUnicode_AS_UNICODE(result
))
460 prev
= _getrecord_ex(*o
)->combining
;
461 if (prev
== 0 || prev
<= cur
)
464 prev
= _getrecord_ex(*i
)->combining
;
470 find_nfc_index(struct reindex
* nfc
, Py_UNICODE code
)
473 for (index
= 0; nfc
[index
].start
; index
++) {
474 int start
= nfc
[index
].start
;
477 if (code
<= start
+ nfc
[index
].count
) {
478 int delta
= code
- start
;
479 return nfc
[index
].index
+ delta
;
486 nfc_nfkc(PyObject
*input
, int k
)
489 Py_UNICODE
*i
, *i1
, *o
, *end
;
490 int f
,l
,index
,index1
,comb
;
492 Py_UNICODE
*skipped
[20];
495 result
= nfd_nfkd(input
, k
);
499 /* We are going to modify result in-place.
500 If nfd_nfkd is changed to sometimes return the input,
501 this code needs to be reviewed. */
502 assert(result
!= input
);
504 i
= PyUnicode_AS_UNICODE(result
);
505 end
= i
+ PyUnicode_GET_SIZE(result
);
506 o
= PyUnicode_AS_UNICODE(result
);
510 for (index
= 0; index
< cskipped
; index
++) {
511 if (skipped
[index
] == i
) {
512 /* *i character is skipped.
514 skipped
[index
] = skipped
[cskipped
-1];
517 goto again
; /* continue while */
520 /* Hangul Composition. We don't need to check for <LV,T>
521 pairs, since we always have decomposed data. */
522 if (LBase
<= *i
&& *i
< (LBase
+LCount
) &&
524 VBase
<= i
[1] && i
[1] <= (VBase
+VCount
)) {
526 LIndex
= i
[0] - LBase
;
527 VIndex
= i
[1] - VBase
;
528 code
= SBase
+ (LIndex
*VCount
+VIndex
)*TCount
;
531 TBase
<= *i
&& *i
<= (TBase
+TCount
)) {
539 f
= find_nfc_index(nfc_first
, *i
);
544 /* Find next unblocked character. */
548 int comb1
= _getrecord_ex(*i1
)->combining
;
549 if (comb1
&& comb
== comb1
) {
550 /* Character is blocked. */
554 l
= find_nfc_index(nfc_last
, *i1
);
555 /* *i1 cannot be combined with *i. If *i1
556 is a starter, we don't need to look further.
557 Otherwise, record the combining class. */
566 index
= f
*TOTAL_LAST
+ l
;
567 index1
= comp_index
[index
>> COMP_SHIFT
];
568 code
= comp_data
[(index1
<<COMP_SHIFT
)+
569 (index
&((1<<COMP_SHIFT
)-1))];
573 /* Replace the original character. */
575 /* Mark the second character unused. */
576 skipped
[cskipped
++] = i1
;
578 f
= find_nfc_index(nfc_first
, *i
);
585 PyUnicode_Resize(&result
, o
- PyUnicode_AS_UNICODE(result
));
589 PyDoc_STRVAR(unicodedata_normalize__doc__
,
590 "normalize(form, unistr)\n\
592 Return the normal form 'form' for the Unicode string unistr. Valid\n\
593 values for form are 'NFC', 'NFKC', 'NFD', and 'NFKD'.");
596 unicodedata_normalize(PyObject
*self
, PyObject
*args
)
601 if(!PyArg_ParseTuple(args
, "sO!:normalize",
602 &form
, &PyUnicode_Type
, &input
))
605 if (PyUnicode_GetSize(input
) == 0) {
606 /* Special case empty input strings, since resizing
607 them later would cause internal errors. */
612 if (strcmp(form
, "NFC") == 0)
613 return nfc_nfkc(input
, 0);
614 if (strcmp(form
, "NFKC") == 0)
615 return nfc_nfkc(input
, 1);
616 if (strcmp(form
, "NFD") == 0)
617 return nfd_nfkd(input
, 0);
618 if (strcmp(form
, "NFKD") == 0)
619 return nfd_nfkd(input
, 1);
620 PyErr_SetString(PyExc_ValueError
, "invalid normalization form");
624 /* -------------------------------------------------------------------- */
625 /* unicode character name tables */
627 /* data file generated by Tools/unicode/makeunicodedata.py */
628 #include "unicodename_db.h"
630 /* -------------------------------------------------------------------- */
631 /* database code (cut and pasted from the unidb package) */
634 _gethash(const char *s
, int len
, int scale
)
639 for (i
= 0; i
< len
; i
++) {
640 h
= (h
* scale
) + (unsigned char) toupper(s
[i
]);
643 h
= (h
^ ((ix
>>24) & 0xff)) & 0x00ffffff;
648 static char *hangul_syllables
[][3] = {
652 { "D", "YAE", "GS" },
653 { "DD", "EO", "N", },
655 { "M", "YEO", "NH" },
659 { "SS", "WAE", "LM" },
663 { "C", "WEO", "LP" },
680 is_unified_ideograph(Py_UCS4 code
)
683 (0x3400 <= code
&& code
<= 0x4DB5) || /* CJK Ideograph Extension A */
684 (0x4E00 <= code
&& code
<= 0x9FA5) || /* CJK Ideograph */
685 (0x20000 <= code
&& code
<= 0x2A6D6));/* CJK Ideograph Extension B */
689 _getucname(Py_UCS4 code
, char* buffer
, int buflen
)
696 if (SBase
<= code
&& code
< SBase
+SCount
) {
697 /* Hangul syllable. */
698 int SIndex
= code
- SBase
;
699 int L
= SIndex
/ NCount
;
700 int V
= (SIndex
% NCount
) / TCount
;
701 int T
= SIndex
% TCount
;
704 /* Worst case: HANGUL SYLLABLE <10chars>. */
706 strcpy(buffer
, "HANGUL SYLLABLE ");
708 strcpy(buffer
, hangul_syllables
[L
][0]);
709 buffer
+= strlen(hangul_syllables
[L
][0]);
710 strcpy(buffer
, hangul_syllables
[V
][1]);
711 buffer
+= strlen(hangul_syllables
[V
][1]);
712 strcpy(buffer
, hangul_syllables
[T
][2]);
713 buffer
+= strlen(hangul_syllables
[T
][2]);
718 if (is_unified_ideograph(code
)) {
720 /* Worst case: CJK UNIFIED IDEOGRAPH-20000 */
722 sprintf(buffer
, "CJK UNIFIED IDEOGRAPH-%X", code
);
726 if (code
>= 0x110000)
729 /* get offset into phrasebook */
730 offset
= phrasebook_offset1
[(code
>>phrasebook_shift
)];
731 offset
= phrasebook_offset2
[(offset
<<phrasebook_shift
) +
732 (code
&((1<<phrasebook_shift
)-1))];
740 word
= phrasebook
[offset
] - phrasebook_short
;
742 word
= (word
<< 8) + phrasebook
[offset
+1];
745 word
= phrasebook
[offset
++];
748 return 0; /* buffer overflow */
751 /* copy word string from lexicon. the last character in the
752 word has bit 7 set. the last word in a string ends with
754 w
= lexicon
+ lexicon_offset
[word
];
757 return 0; /* buffer overflow */
761 return 0; /* buffer overflow */
762 buffer
[i
++] = *w
& 127;
764 break; /* end of word */
771 _cmpname(int code
, const char* name
, int namelen
)
773 /* check if code corresponds to the given name */
775 char buffer
[NAME_MAXLEN
];
776 if (!_getucname(code
, buffer
, sizeof(buffer
)))
778 for (i
= 0; i
< namelen
; i
++) {
779 if (toupper(name
[i
]) != buffer
[i
])
782 return buffer
[namelen
] == '\0';
786 find_syllable(const char *str
, int *len
, int *pos
, int count
, int column
)
790 for (i
= 0; i
< count
; i
++) {
791 char *s
= hangul_syllables
[i
][column
];
795 if (strncmp(str
, s
, len1
) == 0) {
807 _getcode(const char* name
, int namelen
, Py_UCS4
* code
)
810 unsigned int mask
= code_size
-1;
811 unsigned int i
, incr
;
813 /* Check for hangul syllables. */
814 if (strncmp(name
, "HANGUL SYLLABLE ", 16) == 0) {
816 const char *pos
= name
+ 16;
817 find_syllable(pos
, &len
, &L
, LCount
, 0);
819 find_syllable(pos
, &len
, &V
, VCount
, 1);
821 find_syllable(pos
, &len
, &T
, TCount
, 2);
823 if (L
!= -1 && V
!= -1 && T
!= -1 && pos
-name
== namelen
) {
824 *code
= SBase
+ (L
*VCount
+V
)*TCount
+ T
;
827 /* Otherwise, it's an illegal syllable name. */
831 /* Check for unified ideographs. */
832 if (strncmp(name
, "CJK UNIFIED IDEOGRAPH-", 22) == 0) {
833 /* Four or five hexdigits must follow. */
837 if (namelen
!= 4 && namelen
!= 5)
841 if (*name
>= '0' && *name
<= '9')
843 else if (*name
>= 'A' && *name
<= 'F')
844 v
+= *name
- 'A' + 10;
849 if (!is_unified_ideograph(v
))
855 /* the following is the same as python's dictionary lookup, with
856 only minor changes. see the makeunicodedata script for more
859 h
= (unsigned int) _gethash(name
, namelen
, code_magic
);
864 if (_cmpname(v
, name
, namelen
)) {
868 incr
= (h
^ (h
>> 3)) & mask
;
872 i
= (i
+ incr
) & mask
;
876 if (_cmpname(v
, name
, namelen
)) {
882 incr
= incr
^ code_poly
;
886 static const _PyUnicode_Name_CAPI hashAPI
=
888 sizeof(_PyUnicode_Name_CAPI
),
893 /* -------------------------------------------------------------------- */
894 /* Python bindings */
896 PyDoc_STRVAR(unicodedata_name__doc__
,
897 "name(unichr[, default])\n\
898 Returns the name assigned to the Unicode character unichr as a\n\
899 string. If no name is defined, default is returned, or, if not\n\
900 given, ValueError is raised.");
903 unicodedata_name(PyObject
* self
, PyObject
* args
)
905 char name
[NAME_MAXLEN
];
908 PyObject
* defobj
= NULL
;
909 if (!PyArg_ParseTuple(args
, "O!|O:name", &PyUnicode_Type
, &v
, &defobj
))
912 if (PyUnicode_GET_SIZE(v
) != 1) {
913 PyErr_SetString(PyExc_TypeError
,
914 "need a single Unicode character as parameter");
918 if (!_getucname((Py_UCS4
) *PyUnicode_AS_UNICODE(v
),
919 name
, sizeof(name
))) {
920 if (defobj
== NULL
) {
921 PyErr_SetString(PyExc_ValueError
, "no such name");
930 return Py_BuildValue("s", name
);
933 PyDoc_STRVAR(unicodedata_lookup__doc__
,
936 Look up character by name. If a character with the\n\
937 given name is found, return the corresponding Unicode\n\
938 character. If not found, KeyError is raised.");
941 unicodedata_lookup(PyObject
* self
, PyObject
* args
)
948 if (!PyArg_ParseTuple(args
, "s#:lookup", &name
, &namelen
))
951 if (!_getcode(name
, namelen
, &code
)) {
952 char fmt
[] = "undefined character name '%s'";
953 char *buf
= PyMem_MALLOC(sizeof(fmt
) + namelen
);
954 sprintf(buf
, fmt
, name
);
955 PyErr_SetString(PyExc_KeyError
, buf
);
960 str
[0] = (Py_UNICODE
) code
;
961 return PyUnicode_FromUnicode(str
, 1);
964 /* XXX Add doc strings. */
966 static PyMethodDef unicodedata_functions
[] = {
967 {"decimal", unicodedata_decimal
, METH_VARARGS
, unicodedata_decimal__doc__
},
968 {"digit", unicodedata_digit
, METH_VARARGS
, unicodedata_digit__doc__
},
969 {"numeric", unicodedata_numeric
, METH_VARARGS
, unicodedata_numeric__doc__
},
970 {"category", unicodedata_category
, METH_VARARGS
,
971 unicodedata_category__doc__
},
972 {"bidirectional", unicodedata_bidirectional
, METH_VARARGS
,
973 unicodedata_bidirectional__doc__
},
974 {"combining", unicodedata_combining
, METH_VARARGS
,
975 unicodedata_combining__doc__
},
976 {"mirrored", unicodedata_mirrored
, METH_VARARGS
,
977 unicodedata_mirrored__doc__
},
978 {"east_asian_width", unicodedata_east_asian_width
, METH_VARARGS
,
979 unicodedata_east_asian_width__doc__
},
980 {"decomposition", unicodedata_decomposition
, METH_VARARGS
,
981 unicodedata_decomposition__doc__
},
982 {"name", unicodedata_name
, METH_VARARGS
, unicodedata_name__doc__
},
983 {"lookup", unicodedata_lookup
, METH_VARARGS
, unicodedata_lookup__doc__
},
984 {"normalize", unicodedata_normalize
, METH_VARARGS
,
985 unicodedata_normalize__doc__
},
986 {NULL
, NULL
} /* sentinel */
989 PyDoc_STRVAR(unicodedata_docstring
,
990 "This module provides access to the Unicode Character Database which\n\
991 defines character properties for all Unicode characters. The data in\n\
992 this database is based on the UnicodeData.txt file version\n\
993 3.2.0 which is publically available from ftp://ftp.unicode.org/.\n\
995 The module uses the same names and symbols as defined by the\n\
996 UnicodeData File Format 3.2.0 (see\n\
997 http://www.unicode.org/Public/3.2-Update/UnicodeData-3.2.0.html).");
1000 initunicodedata(void)
1005 "unicodedata", unicodedata_functions
, unicodedata_docstring
);
1009 PyModule_AddStringConstant(m
, "unidata_version", UNIDATA_VERSION
);
1012 v
= PyCObject_FromVoidPtr((void *) &hashAPI
, NULL
);
1014 PyModule_AddObject(m
, "ucnhash_CAPI", v
);
1020 indent-tabs-mode: nil