Point users to the subprocess module in the docs for os.system, os.spawn*, os.popen2...
[pytest.git] / Objects / unicodectype.c
blob73def09dbb88cc887fedc73ae97433abfe791d87
1 /*
2 Unicode character type helpers.
4 Written by Marc-Andre Lemburg (mal@lemburg.com).
5 Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
7 Copyright (c) Corporation for National Research Initiatives.
9 */
11 #include "Python.h"
12 #include "unicodeobject.h"
14 #define ALPHA_MASK 0x01
15 #define DECIMAL_MASK 0x02
16 #define DIGIT_MASK 0x04
17 #define LOWER_MASK 0x08
18 #define LINEBREAK_MASK 0x10
19 #define SPACE_MASK 0x20
20 #define TITLE_MASK 0x40
21 #define UPPER_MASK 0x80
23 typedef struct {
24 const Py_UNICODE upper;
25 const Py_UNICODE lower;
26 const Py_UNICODE title;
27 const unsigned char decimal;
28 const unsigned char digit;
29 const unsigned short flags;
30 } _PyUnicode_TypeRecord;
32 #include "unicodetype_db.h"
34 static const _PyUnicode_TypeRecord *
35 gettyperecord(Py_UNICODE code)
37 int index;
39 #ifdef Py_UNICODE_WIDE
40 if (code >= 0x110000)
41 index = 0;
42 else
43 #endif
45 index = index1[(code>>SHIFT)];
46 index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
49 return &_PyUnicode_TypeRecords[index];
52 /* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
53 type 'B', 0 otherwise. */
55 int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
57 switch (ch) {
58 case 0x000A: /* LINE FEED */
59 case 0x000D: /* CARRIAGE RETURN */
60 case 0x001C: /* FILE SEPARATOR */
61 case 0x001D: /* GROUP SEPARATOR */
62 case 0x001E: /* RECORD SEPARATOR */
63 case 0x0085: /* NEXT LINE */
64 case 0x2028: /* LINE SEPARATOR */
65 case 0x2029: /* PARAGRAPH SEPARATOR */
66 return 1;
67 default:
68 return 0;
72 /* Returns the titlecase Unicode characters corresponding to ch or just
73 ch if no titlecase mapping is known. */
75 Py_UNICODE _PyUnicode_ToTitlecase(register Py_UNICODE ch)
77 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
78 int delta;
80 if (ctype->title)
81 delta = ctype->title;
82 else
83 delta = ctype->upper;
85 if (delta >= 32768)
86 delta -= 65536;
88 return ch + delta;
91 /* Returns 1 for Unicode characters having the category 'Lt', 0
92 otherwise. */
94 int _PyUnicode_IsTitlecase(Py_UNICODE ch)
96 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
98 return (ctype->flags & TITLE_MASK) != 0;
101 /* Returns the integer decimal (0-9) for Unicode characters having
102 this property, -1 otherwise. */
104 int _PyUnicode_ToDecimalDigit(Py_UNICODE ch)
106 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
108 return (ctype->flags & DECIMAL_MASK) ? ctype->decimal : -1;
111 int _PyUnicode_IsDecimalDigit(Py_UNICODE ch)
113 if (_PyUnicode_ToDecimalDigit(ch) < 0)
114 return 0;
115 return 1;
118 /* Returns the integer digit (0-9) for Unicode characters having
119 this property, -1 otherwise. */
121 int _PyUnicode_ToDigit(Py_UNICODE ch)
123 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
125 return (ctype->flags & DIGIT_MASK) ? ctype->digit : -1;
128 int _PyUnicode_IsDigit(Py_UNICODE ch)
130 if (_PyUnicode_ToDigit(ch) < 0)
131 return 0;
132 return 1;
135 /* Returns the numeric value as double for Unicode characters having
136 this property, -1.0 otherwise. */
138 /* TODO: replace with unicodetype_db.h table */
140 double _PyUnicode_ToNumeric(Py_UNICODE ch)
142 switch (ch) {
143 case 0x0F33:
144 return (double) -1 / 2;
145 case 0x17F0:
146 case 0x3007:
147 #ifdef Py_UNICODE_WIDE
148 case 0x1018A:
149 #endif
150 return (double) 0;
151 case 0x09F4:
152 case 0x17F1:
153 case 0x215F:
154 case 0x2160:
155 case 0x2170:
156 case 0x3021:
157 case 0x3192:
158 case 0x3220:
159 case 0x3280:
160 #ifdef Py_UNICODE_WIDE
161 case 0x10107:
162 case 0x10142:
163 case 0x10158:
164 case 0x10159:
165 case 0x1015A:
166 case 0x10320:
167 case 0x103D1:
168 #endif
169 return (double) 1;
170 case 0x00BD:
171 case 0x0F2A:
172 case 0x2CFD:
173 #ifdef Py_UNICODE_WIDE
174 case 0x10141:
175 case 0x10175:
176 case 0x10176:
177 #endif
178 return (double) 1 / 2;
179 case 0x2153:
180 return (double) 1 / 3;
181 case 0x00BC:
182 #ifdef Py_UNICODE_WIDE
183 case 0x10140:
184 #endif
185 return (double) 1 / 4;
186 case 0x2155:
187 return (double) 1 / 5;
188 case 0x2159:
189 return (double) 1 / 6;
190 case 0x215B:
191 return (double) 1 / 8;
192 case 0x0BF0:
193 case 0x1372:
194 case 0x2169:
195 case 0x2179:
196 case 0x2469:
197 case 0x247D:
198 case 0x2491:
199 case 0x24FE:
200 case 0x277F:
201 case 0x2789:
202 case 0x2793:
203 case 0x3038:
204 case 0x3229:
205 case 0x3289:
206 #ifdef Py_UNICODE_WIDE
207 case 0x10110:
208 case 0x10149:
209 case 0x10150:
210 case 0x10157:
211 case 0x10160:
212 case 0x10161:
213 case 0x10162:
214 case 0x10163:
215 case 0x10164:
216 case 0x10322:
217 case 0x103D3:
218 case 0x10A44:
219 #endif
220 return (double) 10;
221 case 0x0BF1:
222 case 0x137B:
223 case 0x216D:
224 case 0x217D:
225 #ifdef Py_UNICODE_WIDE
226 case 0x10119:
227 case 0x1014B:
228 case 0x10152:
229 case 0x1016A:
230 case 0x103D5:
231 case 0x10A46:
232 #endif
233 return (double) 100;
234 case 0x0BF2:
235 case 0x216F:
236 case 0x217F:
237 case 0x2180:
238 #ifdef Py_UNICODE_WIDE
239 case 0x10122:
240 case 0x1014D:
241 case 0x10154:
242 case 0x10171:
243 case 0x10A47:
244 #endif
245 return (double) 1000;
246 case 0x137C:
247 case 0x2182:
248 #ifdef Py_UNICODE_WIDE
249 case 0x1012B:
250 case 0x10155:
251 #endif
252 return (double) 10000;
253 case 0x216A:
254 case 0x217A:
255 case 0x246A:
256 case 0x247E:
257 case 0x2492:
258 case 0x24EB:
259 return (double) 11;
260 case 0x0F2F:
261 return (double) 11 / 2;
262 case 0x216B:
263 case 0x217B:
264 case 0x246B:
265 case 0x247F:
266 case 0x2493:
267 case 0x24EC:
268 return (double) 12;
269 case 0x246C:
270 case 0x2480:
271 case 0x2494:
272 case 0x24ED:
273 return (double) 13;
274 case 0x0F30:
275 return (double) 13 / 2;
276 case 0x246D:
277 case 0x2481:
278 case 0x2495:
279 case 0x24EE:
280 return (double) 14;
281 case 0x246E:
282 case 0x2482:
283 case 0x2496:
284 case 0x24EF:
285 return (double) 15;
286 case 0x0F31:
287 return (double) 15 / 2;
288 case 0x09F9:
289 case 0x246F:
290 case 0x2483:
291 case 0x2497:
292 case 0x24F0:
293 return (double) 16;
294 case 0x16EE:
295 case 0x2470:
296 case 0x2484:
297 case 0x2498:
298 case 0x24F1:
299 return (double) 17;
300 case 0x0F32:
301 return (double) 17 / 2;
302 case 0x16EF:
303 case 0x2471:
304 case 0x2485:
305 case 0x2499:
306 case 0x24F2:
307 return (double) 18;
308 case 0x16F0:
309 case 0x2472:
310 case 0x2486:
311 case 0x249A:
312 case 0x24F3:
313 return (double) 19;
314 case 0x09F5:
315 case 0x17F2:
316 case 0x2161:
317 case 0x2171:
318 case 0x3022:
319 case 0x3193:
320 case 0x3221:
321 case 0x3281:
322 #ifdef Py_UNICODE_WIDE
323 case 0x10108:
324 case 0x1015B:
325 case 0x1015C:
326 case 0x1015D:
327 case 0x1015E:
328 case 0x103D2:
329 #endif
330 return (double) 2;
331 case 0x2154:
332 #ifdef Py_UNICODE_WIDE
333 case 0x10177:
334 #endif
335 return (double) 2 / 3;
336 case 0x2156:
337 return (double) 2 / 5;
338 case 0x1373:
339 case 0x2473:
340 case 0x2487:
341 case 0x249B:
342 case 0x24F4:
343 case 0x3039:
344 #ifdef Py_UNICODE_WIDE
345 case 0x10111:
346 case 0x103D4:
347 case 0x10A45:
348 #endif
349 return (double) 20;
350 #ifdef Py_UNICODE_WIDE
351 case 0x1011A:
352 return (double) 200;
353 case 0x10123:
354 return (double) 2000;
355 case 0x1012C:
356 return (double) 20000;
357 #endif
358 case 0x3251:
359 return (double) 21;
360 case 0x3252:
361 return (double) 22;
362 case 0x3253:
363 return (double) 23;
364 case 0x3254:
365 return (double) 24;
366 case 0x3255:
367 return (double) 25;
368 case 0x3256:
369 return (double) 26;
370 case 0x3257:
371 return (double) 27;
372 case 0x3258:
373 return (double) 28;
374 case 0x3259:
375 return (double) 29;
376 case 0x09F6:
377 case 0x17F3:
378 case 0x2162:
379 case 0x2172:
380 case 0x3023:
381 case 0x3194:
382 case 0x3222:
383 case 0x3282:
384 #ifdef Py_UNICODE_WIDE
385 case 0x10109:
386 #endif
387 return (double) 3;
388 case 0x0F2B:
389 return (double) 3 / 2;
390 case 0x00BE:
391 #ifdef Py_UNICODE_WIDE
392 case 0x10178:
393 #endif
394 return (double) 3 / 4;
395 case 0x2157:
396 return (double) 3 / 5;
397 case 0x215C:
398 return (double) 3 / 8;
399 case 0x1374:
400 case 0x303A:
401 case 0x325A:
402 #ifdef Py_UNICODE_WIDE
403 case 0x10112:
404 case 0x10165:
405 #endif
406 return (double) 30;
407 #ifdef Py_UNICODE_WIDE
408 case 0x1011B:
409 case 0x1016B:
410 return (double) 300;
411 case 0x10124:
412 return (double) 3000;
413 case 0x1012D:
414 return (double) 30000;
415 #endif
416 case 0x325B:
417 return (double) 31;
418 case 0x325C:
419 return (double) 32;
420 case 0x325D:
421 return (double) 33;
422 case 0x325E:
423 return (double) 34;
424 case 0x325F:
425 return (double) 35;
426 case 0x32B1:
427 return (double) 36;
428 case 0x32B2:
429 return (double) 37;
430 case 0x32B3:
431 return (double) 38;
432 case 0x32B4:
433 return (double) 39;
434 case 0x09F7:
435 case 0x17F4:
436 case 0x2163:
437 case 0x2173:
438 case 0x3024:
439 case 0x3195:
440 case 0x3223:
441 case 0x3283:
442 #ifdef Py_UNICODE_WIDE
443 case 0x1010A:
444 #endif
445 return (double) 4;
446 case 0x2158:
447 return (double) 4 / 5;
448 case 0x1375:
449 case 0x32B5:
450 #ifdef Py_UNICODE_WIDE
451 case 0x10113:
452 #endif
453 return (double) 40;
454 #ifdef Py_UNICODE_WIDE
455 case 0x1011C:
456 return (double) 400;
457 case 0x10125:
458 return (double) 4000;
459 case 0x1012E:
460 return (double) 40000;
461 #endif
462 case 0x32B6:
463 return (double) 41;
464 case 0x32B7:
465 return (double) 42;
466 case 0x32B8:
467 return (double) 43;
468 case 0x32B9:
469 return (double) 44;
470 case 0x32BA:
471 return (double) 45;
472 case 0x32BB:
473 return (double) 46;
474 case 0x32BC:
475 return (double) 47;
476 case 0x32BD:
477 return (double) 48;
478 case 0x32BE:
479 return (double) 49;
480 case 0x17F5:
481 case 0x2164:
482 case 0x2174:
483 case 0x3025:
484 case 0x3224:
485 case 0x3284:
486 #ifdef Py_UNICODE_WIDE
487 case 0x1010B:
488 case 0x10143:
489 case 0x10148:
490 case 0x1014F:
491 case 0x1015F:
492 case 0x10173:
493 case 0x10321:
494 #endif
495 return (double) 5;
496 case 0x0F2C:
497 return (double) 5 / 2;
498 case 0x215A:
499 return (double) 5 / 6;
500 case 0x215D:
501 return (double) 5 / 8;
502 case 0x1376:
503 case 0x216C:
504 case 0x217C:
505 case 0x32BF:
506 #ifdef Py_UNICODE_WIDE
507 case 0x10114:
508 case 0x10144:
509 case 0x1014A:
510 case 0x10151:
511 case 0x10166:
512 case 0x10167:
513 case 0x10168:
514 case 0x10169:
515 case 0x10174:
516 case 0x10323:
517 #endif
518 return (double) 50;
519 case 0x216E:
520 case 0x217E:
521 #ifdef Py_UNICODE_WIDE
522 case 0x1011D:
523 case 0x10145:
524 case 0x1014C:
525 case 0x10153:
526 case 0x1016C:
527 case 0x1016D:
528 case 0x1016E:
529 case 0x1016F:
530 case 0x10170:
531 #endif
532 return (double) 500;
533 case 0x2181:
534 #ifdef Py_UNICODE_WIDE
535 case 0x10126:
536 case 0x10146:
537 case 0x1014E:
538 case 0x10172:
539 #endif
540 return (double) 5000;
541 #ifdef Py_UNICODE_WIDE
542 case 0x1012F:
543 case 0x10147:
544 case 0x10156:
545 return (double) 50000;
546 #endif
547 case 0x17F6:
548 case 0x2165:
549 case 0x2175:
550 case 0x3026:
551 case 0x3225:
552 case 0x3285:
553 #ifdef Py_UNICODE_WIDE
554 case 0x1010C:
555 #endif
556 return (double) 6;
557 case 0x1377:
558 #ifdef Py_UNICODE_WIDE
559 case 0x10115:
560 #endif
561 return (double) 60;
562 #ifdef Py_UNICODE_WIDE
563 case 0x1011E:
564 return (double) 600;
565 case 0x10127:
566 return (double) 6000;
567 case 0x10130:
568 return (double) 60000;
569 #endif
570 case 0x17F7:
571 case 0x2166:
572 case 0x2176:
573 case 0x3027:
574 case 0x3226:
575 case 0x3286:
576 #ifdef Py_UNICODE_WIDE
577 case 0x1010D:
578 #endif
579 return (double) 7;
580 case 0x0F2D:
581 return (double) 7 / 2;
582 case 0x215E:
583 return (double) 7 / 8;
584 case 0x1378:
585 #ifdef Py_UNICODE_WIDE
586 case 0x10116:
587 #endif
588 return (double) 70;
589 #ifdef Py_UNICODE_WIDE
590 case 0x1011F:
591 return (double) 700;
592 case 0x10128:
593 return (double) 7000;
594 case 0x10131:
595 return (double) 70000;
596 #endif
597 case 0x17F8:
598 case 0x2167:
599 case 0x2177:
600 case 0x3028:
601 case 0x3227:
602 case 0x3287:
603 #ifdef Py_UNICODE_WIDE
604 case 0x1010E:
605 #endif
606 return (double) 8;
607 case 0x1379:
608 #ifdef Py_UNICODE_WIDE
609 case 0x10117:
610 #endif
611 return (double) 80;
612 #ifdef Py_UNICODE_WIDE
613 case 0x10120:
614 return (double) 800;
615 case 0x10129:
616 return (double) 8000;
617 case 0x10132:
618 return (double) 80000;
619 #endif
620 case 0x17F9:
621 case 0x2168:
622 case 0x2178:
623 case 0x3029:
624 case 0x3228:
625 case 0x3288:
626 #ifdef Py_UNICODE_WIDE
627 case 0x1010F:
628 #endif
629 return (double) 9;
630 case 0x0F2E:
631 return (double) 9 / 2;
632 case 0x137A:
633 #ifdef Py_UNICODE_WIDE
634 case 0x10118:
635 #endif
636 return (double) 90;
637 #ifdef Py_UNICODE_WIDE
638 case 0x10121:
639 case 0x1034A:
640 return (double) 900;
641 case 0x1012A:
642 return (double) 9000;
643 case 0x10133:
644 return (double) 90000;
645 #endif
646 default:
647 return (double) _PyUnicode_ToDigit(ch);
651 int _PyUnicode_IsNumeric(Py_UNICODE ch)
653 return _PyUnicode_ToNumeric(ch) != -1.0;
656 #ifndef WANT_WCTYPE_FUNCTIONS
658 /* Returns 1 for Unicode characters having the bidirectional type
659 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
661 int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
663 switch (ch) {
664 case 0x0009: /* HORIZONTAL TABULATION */
665 case 0x000A: /* LINE FEED */
666 case 0x000B: /* VERTICAL TABULATION */
667 case 0x000C: /* FORM FEED */
668 case 0x000D: /* CARRIAGE RETURN */
669 case 0x001C: /* FILE SEPARATOR */
670 case 0x001D: /* GROUP SEPARATOR */
671 case 0x001E: /* RECORD SEPARATOR */
672 case 0x001F: /* UNIT SEPARATOR */
673 case 0x0020: /* SPACE */
674 case 0x0085: /* NEXT LINE */
675 case 0x00A0: /* NO-BREAK SPACE */
676 case 0x1680: /* OGHAM SPACE MARK */
677 case 0x2000: /* EN QUAD */
678 case 0x2001: /* EM QUAD */
679 case 0x2002: /* EN SPACE */
680 case 0x2003: /* EM SPACE */
681 case 0x2004: /* THREE-PER-EM SPACE */
682 case 0x2005: /* FOUR-PER-EM SPACE */
683 case 0x2006: /* SIX-PER-EM SPACE */
684 case 0x2007: /* FIGURE SPACE */
685 case 0x2008: /* PUNCTUATION SPACE */
686 case 0x2009: /* THIN SPACE */
687 case 0x200A: /* HAIR SPACE */
688 case 0x200B: /* ZERO WIDTH SPACE */
689 case 0x2028: /* LINE SEPARATOR */
690 case 0x2029: /* PARAGRAPH SEPARATOR */
691 case 0x202F: /* NARROW NO-BREAK SPACE */
692 case 0x205F: /* MEDIUM MATHEMATICAL SPACE */
693 case 0x3000: /* IDEOGRAPHIC SPACE */
694 return 1;
695 default:
696 return 0;
700 /* Returns 1 for Unicode characters having the category 'Ll', 0
701 otherwise. */
703 int _PyUnicode_IsLowercase(Py_UNICODE ch)
705 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
707 return (ctype->flags & LOWER_MASK) != 0;
710 /* Returns 1 for Unicode characters having the category 'Lu', 0
711 otherwise. */
713 int _PyUnicode_IsUppercase(Py_UNICODE ch)
715 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
717 return (ctype->flags & UPPER_MASK) != 0;
720 /* Returns the uppercase Unicode characters corresponding to ch or just
721 ch if no uppercase mapping is known. */
723 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
725 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
726 int delta = ctype->upper;
727 if (delta >= 32768)
728 delta -= 65536;
729 return ch + delta;
732 /* Returns the lowercase Unicode characters corresponding to ch or just
733 ch if no lowercase mapping is known. */
735 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
737 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
738 int delta = ctype->lower;
739 if (delta >= 32768)
740 delta -= 65536;
741 return ch + delta;
744 /* Returns 1 for Unicode characters having the category 'Ll', 'Lu', 'Lt',
745 'Lo' or 'Lm', 0 otherwise. */
747 int _PyUnicode_IsAlpha(Py_UNICODE ch)
749 const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
751 return (ctype->flags & ALPHA_MASK) != 0;
754 #else
756 /* Export the interfaces using the wchar_t type for portability
757 reasons: */
759 int _PyUnicode_IsWhitespace(Py_UNICODE ch)
761 return iswspace(ch);
764 int _PyUnicode_IsLowercase(Py_UNICODE ch)
766 return iswlower(ch);
769 int _PyUnicode_IsUppercase(Py_UNICODE ch)
771 return iswupper(ch);
774 Py_UNICODE _PyUnicode_ToLowercase(Py_UNICODE ch)
776 return towlower(ch);
779 Py_UNICODE _PyUnicode_ToUppercase(Py_UNICODE ch)
781 return towupper(ch);
784 int _PyUnicode_IsAlpha(Py_UNICODE ch)
786 return iswalpha(ch);
789 #endif