2 * This code handles decoding UTF strings for foreach_reverse loops. There are
3 * 6 combinations of conversions between char, wchar, and dchar, and 2 of each
6 * Copyright: Copyright Digital Mars 2004 - 2010.
7 * License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
8 * Authors: Walter Bright, Sean Kelly
9 * Source: $(DRUNTIMESRC rt/_aApplyR.d)
13 import core
.internal
.utf
;
15 /**********************************************/
16 /* 1 argument versions */
18 // dg is D, but _aApplyRcd() is C
19 extern (D
) alias int delegate(void *) dg_t
;
21 extern (C
) int _aApplyRcd1(in char[] aa
, dg_t dg
)
24 debug(apply
) printf("_aApplyRcd1(), len = %d\n", aa
.length
);
25 for (size_t i
= aa
.length
; i
!= 0; )
31 { char c
= cast(char)d
;
35 while ((c
& 0xC0) != 0xC0)
37 onUnicodeError("Invalid UTF-8 sequence", 0);
46 result
= dg(cast(void *)&d
);
55 debug(apply
) printf("_aApplyRcd1.unittest\n");
60 foreach_reverse (dchar d
; s
)
64 case 0: assert(d
== 'o'); break;
65 case 1: assert(d
== 'l'); break;
66 case 2: assert(d
== 'l'); break;
67 case 3: assert(d
== 'e'); break;
68 case 4: assert(d
== 'h'); break;
75 s
= "a\u1234\U000A0456b";
77 foreach_reverse (dchar d
; s
)
79 //printf("i = %d, d = %x\n", i, d);
82 case 0: assert(d
== 'b'); break;
83 case 1: assert(d
== '\U000A0456'); break;
84 case 2: assert(d
== '\u1234'); break;
85 case 3: assert(d
== 'a'); break;
93 /*****************************/
95 extern (C
) int _aApplyRwd1(in wchar[] aa
, dg_t dg
)
98 debug(apply
) printf("_aApplyRwd1(), len = %d\n", aa
.length
);
99 for (size_t i
= aa
.length
; i
!= 0; )
104 if (d
>= 0xDC00 && d
<= 0xDFFF)
106 onUnicodeError("Invalid UTF-16 sequence", 0);
108 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
110 result
= dg(cast(void *)&d
);
119 debug(apply
) printf("_aApplyRwd1.unittest\n");
124 foreach_reverse (dchar d
; s
)
128 case 0: assert(d
== 'o'); break;
129 case 1: assert(d
== 'l'); break;
130 case 2: assert(d
== 'l'); break;
131 case 3: assert(d
== 'e'); break;
132 case 4: assert(d
== 'h'); break;
139 s
= "a\u1234\U000A0456b";
141 foreach_reverse (dchar d
; s
)
143 //printf("i = %d, d = %x\n", i, d);
146 case 0: assert(d
== 'b'); break;
147 case 1: assert(d
== '\U000A0456'); break;
148 case 2: assert(d
== '\u1234'); break;
149 case 3: assert(d
== 'a'); break;
157 /*****************************/
159 extern (C
) int _aApplyRcw1(in char[] aa
, dg_t dg
)
162 debug(apply
) printf("_aApplyRcw1(), len = %d\n", aa
.length
);
163 for (size_t i
= aa
.length
; i
!= 0; )
170 { char c
= cast(char)w
;
174 while ((c
& 0xC0) != 0xC0)
176 onUnicodeError("Invalid UTF-8 sequence", 0);
178 d |
= (c
& 0x3F) << j
;
189 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
190 result
= dg(cast(void *)&w
);
193 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
196 result
= dg(cast(void *)&w
);
205 debug(apply
) printf("_aApplyRcw1.unittest\n");
210 foreach_reverse (wchar d
; s
)
214 case 0: assert(d
== 'o'); break;
215 case 1: assert(d
== 'l'); break;
216 case 2: assert(d
== 'l'); break;
217 case 3: assert(d
== 'e'); break;
218 case 4: assert(d
== 'h'); break;
225 s
= "a\u1234\U000A0456b";
227 foreach_reverse (wchar d
; s
)
229 //printf("i = %d, d = %x\n", i, d);
232 case 0: assert(d
== 'b'); break;
233 case 1: assert(d
== 0xDA41); break;
234 case 2: assert(d
== 0xDC56); break;
235 case 3: assert(d
== 0x1234); break;
236 case 4: assert(d
== 'a'); break;
244 /*****************************/
246 extern (C
) int _aApplyRwc1(in wchar[] aa
, dg_t dg
)
249 debug(apply
) printf("_aApplyRwc1(), len = %d\n", aa
.length
);
250 for (size_t i
= aa
.length
; i
!= 0; )
256 if (d
>= 0xDC00 && d
<= 0xDFFF)
258 onUnicodeError("Invalid UTF-16 sequence", 0);
260 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
267 auto b
= toUTF8(buf
, d
);
270 result
= dg(cast(void *)&c2
);
277 result
= dg(cast(void *)&c
);
286 debug(apply
) printf("_aApplyRwc1.unittest\n");
291 foreach_reverse (char d
; s
)
295 case 0: assert(d
== 'o'); break;
296 case 1: assert(d
== 'l'); break;
297 case 2: assert(d
== 'l'); break;
298 case 3: assert(d
== 'e'); break;
299 case 4: assert(d
== 'h'); break;
306 s
= "a\u1234\U000A0456b";
308 foreach_reverse (char d
; s
)
310 //printf("i = %d, d = %x\n", i, d);
313 case 0: assert(d
== 'b'); break;
314 case 1: assert(d
== 0xF2); break;
315 case 2: assert(d
== 0xA0); break;
316 case 3: assert(d
== 0x91); break;
317 case 4: assert(d
== 0x96); break;
318 case 5: assert(d
== 0xE1); break;
319 case 6: assert(d
== 0x88); break;
320 case 7: assert(d
== 0xB4); break;
321 case 8: assert(d
== 'a'); break;
329 /*****************************/
331 extern (C
) int _aApplyRdc1(in dchar[] aa
, dg_t dg
)
334 debug(apply
) printf("_aApplyRdc1(), len = %d\n", aa
.length
);
335 for (size_t i
= aa
.length
; i
!= 0;)
343 auto b
= toUTF8(buf
, d
);
346 result
= dg(cast(void *)&c2
);
356 result
= dg(cast(void *)&c
);
365 debug(apply
) printf("_aApplyRdc1.unittest\n");
370 foreach_reverse (char d
; s
)
374 case 0: assert(d
== 'o'); break;
375 case 1: assert(d
== 'l'); break;
376 case 2: assert(d
== 'l'); break;
377 case 3: assert(d
== 'e'); break;
378 case 4: assert(d
== 'h'); break;
385 s
= "a\u1234\U000A0456b";
387 foreach_reverse (char d
; s
)
389 //printf("i = %d, d = %x\n", i, d);
392 case 0: assert(d
== 'b'); break;
393 case 1: assert(d
== 0xF2); break;
394 case 2: assert(d
== 0xA0); break;
395 case 3: assert(d
== 0x91); break;
396 case 4: assert(d
== 0x96); break;
397 case 5: assert(d
== 0xE1); break;
398 case 6: assert(d
== 0x88); break;
399 case 7: assert(d
== 0xB4); break;
400 case 8: assert(d
== 'a'); break;
408 /*****************************/
410 extern (C
) int _aApplyRdw1(in dchar[] aa
, dg_t dg
)
413 debug(apply
) printf("_aApplyRdw1(), len = %d\n", aa
.length
);
414 for (size_t i
= aa
.length
; i
!= 0; )
422 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
423 result
= dg(cast(void *)&w
);
426 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
428 result
= dg(cast(void *)&w
);
437 debug(apply
) printf("_aApplyRdw1.unittest\n");
442 foreach_reverse (wchar d
; s
)
446 case 0: assert(d
== 'o'); break;
447 case 1: assert(d
== 'l'); break;
448 case 2: assert(d
== 'l'); break;
449 case 3: assert(d
== 'e'); break;
450 case 4: assert(d
== 'h'); break;
457 s
= "a\u1234\U000A0456b";
459 foreach_reverse (wchar d
; s
)
461 //printf("i = %d, d = %x\n", i, d);
464 case 0: assert(d
== 'b'); break;
465 case 1: assert(d
== 0xDA41); break;
466 case 2: assert(d
== 0xDC56); break;
467 case 3: assert(d
== 0x1234); break;
468 case 4: assert(d
== 'a'); break;
477 /****************************************************************************/
478 /* 2 argument versions */
480 // dg is D, but _aApplyRcd2() is C
481 extern (D
) alias int delegate(void *, void *) dg2_t
;
483 extern (C
) int _aApplyRcd2(in char[] aa
, dg2_t dg
)
486 size_t len
= aa
.length
;
488 debug(apply
) printf("_aApplyRcd2(), len = %d\n", len
);
489 for (i
= len
; i
!= 0; )
495 { char c
= cast(char)d
;
499 while ((c
& 0xC0) != 0xC0)
501 onUnicodeError("Invalid UTF-8 sequence", 0);
503 d |
= (c
& 0x3F) << j
;
510 result
= dg(&i
, cast(void *)&d
);
519 debug(apply
) printf("_aApplyRcd2.unittest\n");
524 foreach_reverse (k
, dchar d
; s
)
529 case 0: assert(d
== 'o'); break;
530 case 1: assert(d
== 'l'); break;
531 case 2: assert(d
== 'l'); break;
532 case 3: assert(d
== 'e'); break;
533 case 4: assert(d
== 'h'); break;
540 s
= "a\u1234\U000A0456b";
542 foreach_reverse (k
, dchar d
; s
)
544 //printf("i = %d, k = %d, d = %x\n", i, k, d);
547 case 0: assert(d
== 'b'); assert(k
== 8); break;
548 case 1: assert(d
== '\U000A0456'); assert(k
== 4); break;
549 case 2: assert(d
== '\u1234'); assert(k
== 1); break;
550 case 3: assert(d
== 'a'); assert(k
== 0); break;
558 /*****************************/
560 extern (C
) int _aApplyRwd2(in wchar[] aa
, dg2_t dg
)
563 debug(apply
) printf("_aApplyRwd2(), len = %d\n", aa
.length
);
564 for (size_t i
= aa
.length
; i
!= 0; )
569 if (d
>= 0xDC00 && d
<= 0xDFFF)
571 onUnicodeError("Invalid UTF-16 sequence", 0);
573 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
575 result
= dg(&i
, cast(void *)&d
);
584 debug(apply
) printf("_aApplyRwd2.unittest\n");
589 foreach_reverse (k
, dchar d
; s
)
591 //printf("i = %d, k = %d, d = %x\n", i, k, d);
595 case 0: assert(d
== 'o'); break;
596 case 1: assert(d
== 'l'); break;
597 case 2: assert(d
== 'l'); break;
598 case 3: assert(d
== 'e'); break;
599 case 4: assert(d
== 'h'); break;
606 s
= "a\u1234\U000A0456b";
608 foreach_reverse (k
, dchar d
; s
)
610 //printf("i = %d, k = %d, d = %x\n", i, k, d);
613 case 0: assert(k
== 4); assert(d
== 'b'); break;
614 case 1: assert(k
== 2); assert(d
== '\U000A0456'); break;
615 case 2: assert(k
== 1); assert(d
== '\u1234'); break;
616 case 3: assert(k
== 0); assert(d
== 'a'); break;
624 /*****************************/
626 extern (C
) int _aApplyRcw2(in char[] aa
, dg2_t dg
)
629 debug(apply
) printf("_aApplyRcw2(), len = %d\n", aa
.length
);
630 for (size_t i
= aa
.length
; i
!= 0; )
637 { char c
= cast(char)w
;
641 while ((c
& 0xC0) != 0xC0)
643 onUnicodeError("Invalid UTF-8 sequence", 0);
645 d |
= (c
& 0x3F) << j
;
656 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
657 result
= dg(&i
, cast(void *)&w
);
660 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
663 result
= dg(&i
, cast(void *)&w
);
672 debug(apply
) printf("_aApplyRcw2.unittest\n");
677 foreach_reverse (k
, wchar d
; s
)
679 //printf("i = %d, k = %d, d = %x\n", i, k, d);
683 case 0: assert(d
== 'o'); break;
684 case 1: assert(d
== 'l'); break;
685 case 2: assert(d
== 'l'); break;
686 case 3: assert(d
== 'e'); break;
687 case 4: assert(d
== 'h'); break;
694 s
= "a\u1234\U000A0456b";
696 foreach_reverse (k
, wchar d
; s
)
698 //printf("i = %d, k = %d, d = %x\n", i, k, d);
701 case 0: assert(k
== 8); assert(d
== 'b'); break;
702 case 1: assert(k
== 4); assert(d
== 0xDA41); break;
703 case 2: assert(k
== 4); assert(d
== 0xDC56); break;
704 case 3: assert(k
== 1); assert(d
== 0x1234); break;
705 case 4: assert(k
== 0); assert(d
== 'a'); break;
713 /*****************************/
715 extern (C
) int _aApplyRwc2(in wchar[] aa
, dg2_t dg
)
718 debug(apply
) printf("_aApplyRwc2(), len = %d\n", aa
.length
);
719 for (size_t i
= aa
.length
; i
!= 0; )
725 if (d
>= 0xDC00 && d
<= 0xDFFF)
727 onUnicodeError("Invalid UTF-16 sequence", 0);
729 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
736 auto b
= toUTF8(buf
, d
);
739 result
= dg(&i
, cast(void *)&c2
);
746 result
= dg(&i
, cast(void *)&c
);
755 debug(apply
) printf("_aApplyRwc2.unittest\n");
760 foreach_reverse (k
, char d
; s
)
762 //printf("i = %d, k = %d, d = %x\n", i, k, d);
766 case 0: assert(d
== 'o'); break;
767 case 1: assert(d
== 'l'); break;
768 case 2: assert(d
== 'l'); break;
769 case 3: assert(d
== 'e'); break;
770 case 4: assert(d
== 'h'); break;
777 s
= "a\u1234\U000A0456b";
779 foreach_reverse (k
, char d
; s
)
781 //printf("i = %d, k = %d, d = %x\n", i, k, d);
784 case 0: assert(k
== 4); assert(d
== 'b'); break;
785 case 1: assert(k
== 2); assert(d
== 0xF2); break;
786 case 2: assert(k
== 2); assert(d
== 0xA0); break;
787 case 3: assert(k
== 2); assert(d
== 0x91); break;
788 case 4: assert(k
== 2); assert(d
== 0x96); break;
789 case 5: assert(k
== 1); assert(d
== 0xE1); break;
790 case 6: assert(k
== 1); assert(d
== 0x88); break;
791 case 7: assert(k
== 1); assert(d
== 0xB4); break;
792 case 8: assert(k
== 0); assert(d
== 'a'); break;
800 /*****************************/
802 extern (C
) int _aApplyRdc2(in dchar[] aa
, dg2_t dg
)
805 debug(apply
) printf("_aApplyRdc2(), len = %d\n", aa
.length
);
806 for (size_t i
= aa
.length
; i
!= 0; )
814 auto b
= toUTF8(buf
, d
);
817 result
= dg(&i
, cast(void *)&c2
);
826 result
= dg(&i
, cast(void *)&c
);
835 debug(apply
) printf("_aApplyRdc2.unittest\n");
840 foreach_reverse (k
, char d
; s
)
842 //printf("i = %d, k = %d, d = %x\n", i, k, d);
846 case 0: assert(d
== 'o'); break;
847 case 1: assert(d
== 'l'); break;
848 case 2: assert(d
== 'l'); break;
849 case 3: assert(d
== 'e'); break;
850 case 4: assert(d
== 'h'); break;
857 s
= "a\u1234\U000A0456b";
859 foreach_reverse (k
, char d
; s
)
861 //printf("i = %d, k = %d, d = %x\n", i, k, d);
864 case 0: assert(k
== 3); assert(d
== 'b'); break;
865 case 1: assert(k
== 2); assert(d
== 0xF2); break;
866 case 2: assert(k
== 2); assert(d
== 0xA0); break;
867 case 3: assert(k
== 2); assert(d
== 0x91); break;
868 case 4: assert(k
== 2); assert(d
== 0x96); break;
869 case 5: assert(k
== 1); assert(d
== 0xE1); break;
870 case 6: assert(k
== 1); assert(d
== 0x88); break;
871 case 7: assert(k
== 1); assert(d
== 0xB4); break;
872 case 8: assert(k
== 0); assert(d
== 'a'); break;
880 /*****************************/
882 extern (C
) int _aApplyRdw2(in dchar[] aa
, dg2_t dg
)
885 debug(apply
) printf("_aApplyRdw2(), len = %d\n", aa
.length
);
886 for (size_t i
= aa
.length
; i
!= 0; )
894 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
895 result
= dg(&i
, cast(void *)&w
);
898 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
900 result
= dg(&i
, cast(void *)&w
);
909 debug(apply
) printf("_aApplyRdw2.unittest\n");
914 foreach_reverse (k
, wchar d
; s
)
916 //printf("i = %d, k = %d, d = %x\n", i, k, d);
920 case 0: assert(d
== 'o'); break;
921 case 1: assert(d
== 'l'); break;
922 case 2: assert(d
== 'l'); break;
923 case 3: assert(d
== 'e'); break;
924 case 4: assert(d
== 'h'); break;
931 s
= "a\u1234\U000A0456b";
933 foreach_reverse (k
, wchar d
; s
)
935 //printf("i = %d, k = %d, d = %x\n", i, k, d);
938 case 0: assert(k
== 3); assert(d
== 'b'); break;
939 case 1: assert(k
== 2); assert(d
== 0xDA41); break;
940 case 2: assert(k
== 2); assert(d
== 0xDC56); break;
941 case 3: assert(k
== 1); assert(d
== 0x1234); break;
942 case 4: assert(k
== 0); assert(d
== 'a'); break;