2 * This code handles decoding UTF strings for foreach_reverse loops. There are
3 * 6 combinations of conversions between char, wchar, and dchar, and 2 of each
6 * Copyright: Copyright Digital Mars 2004 - 2010.
7 * License: $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
8 * Authors: Walter Bright, Sean Kelly
11 /* Copyright Digital Mars 2004 - 2010.
12 * Distributed under the Boost Software License, Version 1.0.
13 * (See accompanying file LICENSE or copy at
14 * http://www.boost.org/LICENSE_1_0.txt)
18 /* This code handles decoding UTF strings for foreach_reverse loops.
19 * There are 6 combinations of conversions between char, wchar,
20 * and dchar, and 2 of each of those.
23 private import rt
.util
.utf
;
25 /**********************************************/
26 /* 1 argument versions */
28 // dg is D, but _aApplyRcd() is C
29 extern (D
) alias int delegate(void *) dg_t
;
31 extern (C
) int _aApplyRcd1(in char[] aa
, dg_t dg
)
34 debug(apply
) printf("_aApplyRcd1(), len = %d\n", aa
.length
);
35 for (size_t i
= aa
.length
; i
!= 0; )
41 { char c
= cast(char)d
;
45 while ((c
& 0xC0) != 0xC0)
47 onUnicodeError("Invalid UTF-8 sequence", 0);
56 result
= dg(cast(void *)&d
);
65 debug(apply
) printf("_aApplyRcd1.unittest\n");
70 foreach_reverse (dchar d
; s
)
74 case 0: assert(d
== 'o'); break;
75 case 1: assert(d
== 'l'); break;
76 case 2: assert(d
== 'l'); break;
77 case 3: assert(d
== 'e'); break;
78 case 4: assert(d
== 'h'); break;
85 s
= "a\u1234\U000A0456b";
87 foreach_reverse (dchar d
; s
)
89 //printf("i = %d, d = %x\n", i, d);
92 case 0: assert(d
== 'b'); break;
93 case 1: assert(d
== '\U000A0456'); break;
94 case 2: assert(d
== '\u1234'); break;
95 case 3: assert(d
== 'a'); break;
103 /*****************************/
105 extern (C
) int _aApplyRwd1(in wchar[] aa
, dg_t dg
)
108 debug(apply
) printf("_aApplyRwd1(), len = %d\n", aa
.length
);
109 for (size_t i
= aa
.length
; i
!= 0; )
114 if (d
>= 0xDC00 && d
<= 0xDFFF)
116 onUnicodeError("Invalid UTF-16 sequence", 0);
118 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
120 result
= dg(cast(void *)&d
);
129 debug(apply
) printf("_aApplyRwd1.unittest\n");
134 foreach_reverse (dchar d
; s
)
138 case 0: assert(d
== 'o'); break;
139 case 1: assert(d
== 'l'); break;
140 case 2: assert(d
== 'l'); break;
141 case 3: assert(d
== 'e'); break;
142 case 4: assert(d
== 'h'); break;
149 s
= "a\u1234\U000A0456b";
151 foreach_reverse (dchar d
; s
)
153 //printf("i = %d, d = %x\n", i, d);
156 case 0: assert(d
== 'b'); break;
157 case 1: assert(d
== '\U000A0456'); break;
158 case 2: assert(d
== '\u1234'); break;
159 case 3: assert(d
== 'a'); break;
167 /*****************************/
169 extern (C
) int _aApplyRcw1(in char[] aa
, dg_t dg
)
172 debug(apply
) printf("_aApplyRcw1(), len = %d\n", aa
.length
);
173 for (size_t i
= aa
.length
; i
!= 0; )
180 { char c
= cast(char)w
;
184 while ((c
& 0xC0) != 0xC0)
186 onUnicodeError("Invalid UTF-8 sequence", 0);
188 d |
= (c
& 0x3F) << j
;
199 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
200 result
= dg(cast(void *)&w
);
203 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
206 result
= dg(cast(void *)&w
);
215 debug(apply
) printf("_aApplyRcw1.unittest\n");
220 foreach_reverse (wchar d
; s
)
224 case 0: assert(d
== 'o'); break;
225 case 1: assert(d
== 'l'); break;
226 case 2: assert(d
== 'l'); break;
227 case 3: assert(d
== 'e'); break;
228 case 4: assert(d
== 'h'); break;
235 s
= "a\u1234\U000A0456b";
237 foreach_reverse (wchar d
; s
)
239 //printf("i = %d, d = %x\n", i, d);
242 case 0: assert(d
== 'b'); break;
243 case 1: assert(d
== 0xDA41); break;
244 case 2: assert(d
== 0xDC56); break;
245 case 3: assert(d
== 0x1234); break;
246 case 4: assert(d
== 'a'); break;
254 /*****************************/
256 extern (C
) int _aApplyRwc1(in wchar[] aa
, dg_t dg
)
259 debug(apply
) printf("_aApplyRwc1(), len = %d\n", aa
.length
);
260 for (size_t i
= aa
.length
; i
!= 0; )
266 if (d
>= 0xDC00 && d
<= 0xDFFF)
268 onUnicodeError("Invalid UTF-16 sequence", 0);
270 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
277 auto b
= toUTF8(buf
, d
);
280 result
= dg(cast(void *)&c2
);
287 result
= dg(cast(void *)&c
);
296 debug(apply
) printf("_aApplyRwc1.unittest\n");
301 foreach_reverse (char d
; s
)
305 case 0: assert(d
== 'o'); break;
306 case 1: assert(d
== 'l'); break;
307 case 2: assert(d
== 'l'); break;
308 case 3: assert(d
== 'e'); break;
309 case 4: assert(d
== 'h'); break;
316 s
= "a\u1234\U000A0456b";
318 foreach_reverse (char d
; s
)
320 //printf("i = %d, d = %x\n", i, d);
323 case 0: assert(d
== 'b'); break;
324 case 1: assert(d
== 0xF2); break;
325 case 2: assert(d
== 0xA0); break;
326 case 3: assert(d
== 0x91); break;
327 case 4: assert(d
== 0x96); break;
328 case 5: assert(d
== 0xE1); break;
329 case 6: assert(d
== 0x88); break;
330 case 7: assert(d
== 0xB4); break;
331 case 8: assert(d
== 'a'); break;
339 /*****************************/
341 extern (C
) int _aApplyRdc1(in dchar[] aa
, dg_t dg
)
344 debug(apply
) printf("_aApplyRdc1(), len = %d\n", aa
.length
);
345 for (size_t i
= aa
.length
; i
!= 0;)
353 auto b
= toUTF8(buf
, d
);
356 result
= dg(cast(void *)&c2
);
366 result
= dg(cast(void *)&c
);
375 debug(apply
) printf("_aApplyRdc1.unittest\n");
380 foreach_reverse (char d
; s
)
384 case 0: assert(d
== 'o'); break;
385 case 1: assert(d
== 'l'); break;
386 case 2: assert(d
== 'l'); break;
387 case 3: assert(d
== 'e'); break;
388 case 4: assert(d
== 'h'); break;
395 s
= "a\u1234\U000A0456b";
397 foreach_reverse (char d
; s
)
399 //printf("i = %d, d = %x\n", i, d);
402 case 0: assert(d
== 'b'); break;
403 case 1: assert(d
== 0xF2); break;
404 case 2: assert(d
== 0xA0); break;
405 case 3: assert(d
== 0x91); break;
406 case 4: assert(d
== 0x96); break;
407 case 5: assert(d
== 0xE1); break;
408 case 6: assert(d
== 0x88); break;
409 case 7: assert(d
== 0xB4); break;
410 case 8: assert(d
== 'a'); break;
418 /*****************************/
420 extern (C
) int _aApplyRdw1(in dchar[] aa
, dg_t dg
)
423 debug(apply
) printf("_aApplyRdw1(), len = %d\n", aa
.length
);
424 for (size_t i
= aa
.length
; i
!= 0; )
432 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
433 result
= dg(cast(void *)&w
);
436 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
438 result
= dg(cast(void *)&w
);
447 debug(apply
) printf("_aApplyRdw1.unittest\n");
452 foreach_reverse (wchar d
; s
)
456 case 0: assert(d
== 'o'); break;
457 case 1: assert(d
== 'l'); break;
458 case 2: assert(d
== 'l'); break;
459 case 3: assert(d
== 'e'); break;
460 case 4: assert(d
== 'h'); break;
467 s
= "a\u1234\U000A0456b";
469 foreach_reverse (wchar d
; s
)
471 //printf("i = %d, d = %x\n", i, d);
474 case 0: assert(d
== 'b'); break;
475 case 1: assert(d
== 0xDA41); break;
476 case 2: assert(d
== 0xDC56); break;
477 case 3: assert(d
== 0x1234); break;
478 case 4: assert(d
== 'a'); break;
487 /****************************************************************************/
488 /* 2 argument versions */
490 // dg is D, but _aApplyRcd2() is C
491 extern (D
) alias int delegate(void *, void *) dg2_t
;
493 extern (C
) int _aApplyRcd2(in char[] aa
, dg2_t dg
)
496 size_t len
= aa
.length
;
498 debug(apply
) printf("_aApplyRcd2(), len = %d\n", len
);
499 for (i
= len
; i
!= 0; )
505 { char c
= cast(char)d
;
509 while ((c
& 0xC0) != 0xC0)
511 onUnicodeError("Invalid UTF-8 sequence", 0);
513 d |
= (c
& 0x3F) << j
;
520 result
= dg(&i
, cast(void *)&d
);
529 debug(apply
) printf("_aApplyRcd2.unittest\n");
534 foreach_reverse (k
, dchar d
; s
)
539 case 0: assert(d
== 'o'); break;
540 case 1: assert(d
== 'l'); break;
541 case 2: assert(d
== 'l'); break;
542 case 3: assert(d
== 'e'); break;
543 case 4: assert(d
== 'h'); break;
550 s
= "a\u1234\U000A0456b";
552 foreach_reverse (k
, dchar d
; s
)
554 //printf("i = %d, k = %d, d = %x\n", i, k, d);
557 case 0: assert(d
== 'b'); assert(k
== 8); break;
558 case 1: assert(d
== '\U000A0456'); assert(k
== 4); break;
559 case 2: assert(d
== '\u1234'); assert(k
== 1); break;
560 case 3: assert(d
== 'a'); assert(k
== 0); break;
568 /*****************************/
570 extern (C
) int _aApplyRwd2(in wchar[] aa
, dg2_t dg
)
573 debug(apply
) printf("_aApplyRwd2(), len = %d\n", aa
.length
);
574 for (size_t i
= aa
.length
; i
!= 0; )
579 if (d
>= 0xDC00 && d
<= 0xDFFF)
581 onUnicodeError("Invalid UTF-16 sequence", 0);
583 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
585 result
= dg(&i
, cast(void *)&d
);
594 debug(apply
) printf("_aApplyRwd2.unittest\n");
599 foreach_reverse (k
, dchar d
; s
)
601 //printf("i = %d, k = %d, d = %x\n", i, k, d);
605 case 0: assert(d
== 'o'); break;
606 case 1: assert(d
== 'l'); break;
607 case 2: assert(d
== 'l'); break;
608 case 3: assert(d
== 'e'); break;
609 case 4: assert(d
== 'h'); break;
616 s
= "a\u1234\U000A0456b";
618 foreach_reverse (k
, dchar d
; s
)
620 //printf("i = %d, k = %d, d = %x\n", i, k, d);
623 case 0: assert(k
== 4); assert(d
== 'b'); break;
624 case 1: assert(k
== 2); assert(d
== '\U000A0456'); break;
625 case 2: assert(k
== 1); assert(d
== '\u1234'); break;
626 case 3: assert(k
== 0); assert(d
== 'a'); break;
634 /*****************************/
636 extern (C
) int _aApplyRcw2(in char[] aa
, dg2_t dg
)
639 debug(apply
) printf("_aApplyRcw2(), len = %d\n", aa
.length
);
640 for (size_t i
= aa
.length
; i
!= 0; )
647 { char c
= cast(char)w
;
651 while ((c
& 0xC0) != 0xC0)
653 onUnicodeError("Invalid UTF-8 sequence", 0);
655 d |
= (c
& 0x3F) << j
;
666 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
667 result
= dg(&i
, cast(void *)&w
);
670 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
673 result
= dg(&i
, cast(void *)&w
);
682 debug(apply
) printf("_aApplyRcw2.unittest\n");
687 foreach_reverse (k
, wchar d
; s
)
689 //printf("i = %d, k = %d, d = %x\n", i, k, d);
693 case 0: assert(d
== 'o'); break;
694 case 1: assert(d
== 'l'); break;
695 case 2: assert(d
== 'l'); break;
696 case 3: assert(d
== 'e'); break;
697 case 4: assert(d
== 'h'); break;
704 s
= "a\u1234\U000A0456b";
706 foreach_reverse (k
, wchar d
; s
)
708 //printf("i = %d, k = %d, d = %x\n", i, k, d);
711 case 0: assert(k
== 8); assert(d
== 'b'); break;
712 case 1: assert(k
== 4); assert(d
== 0xDA41); break;
713 case 2: assert(k
== 4); assert(d
== 0xDC56); break;
714 case 3: assert(k
== 1); assert(d
== 0x1234); break;
715 case 4: assert(k
== 0); assert(d
== 'a'); break;
723 /*****************************/
725 extern (C
) int _aApplyRwc2(in wchar[] aa
, dg2_t dg
)
728 debug(apply
) printf("_aApplyRwc2(), len = %d\n", aa
.length
);
729 for (size_t i
= aa
.length
; i
!= 0; )
735 if (d
>= 0xDC00 && d
<= 0xDFFF)
737 onUnicodeError("Invalid UTF-16 sequence", 0);
739 d
= ((aa
[i
] - 0xD7C0) << 10) + (d
- 0xDC00);
746 auto b
= toUTF8(buf
, d
);
749 result
= dg(&i
, cast(void *)&c2
);
756 result
= dg(&i
, cast(void *)&c
);
765 debug(apply
) printf("_aApplyRwc2.unittest\n");
770 foreach_reverse (k
, char d
; s
)
772 //printf("i = %d, k = %d, d = %x\n", i, k, d);
776 case 0: assert(d
== 'o'); break;
777 case 1: assert(d
== 'l'); break;
778 case 2: assert(d
== 'l'); break;
779 case 3: assert(d
== 'e'); break;
780 case 4: assert(d
== 'h'); break;
787 s
= "a\u1234\U000A0456b";
789 foreach_reverse (k
, char d
; s
)
791 //printf("i = %d, k = %d, d = %x\n", i, k, d);
794 case 0: assert(k
== 4); assert(d
== 'b'); break;
795 case 1: assert(k
== 2); assert(d
== 0xF2); break;
796 case 2: assert(k
== 2); assert(d
== 0xA0); break;
797 case 3: assert(k
== 2); assert(d
== 0x91); break;
798 case 4: assert(k
== 2); assert(d
== 0x96); break;
799 case 5: assert(k
== 1); assert(d
== 0xE1); break;
800 case 6: assert(k
== 1); assert(d
== 0x88); break;
801 case 7: assert(k
== 1); assert(d
== 0xB4); break;
802 case 8: assert(k
== 0); assert(d
== 'a'); break;
810 /*****************************/
812 extern (C
) int _aApplyRdc2(in dchar[] aa
, dg2_t dg
)
815 debug(apply
) printf("_aApplyRdc2(), len = %d\n", aa
.length
);
816 for (size_t i
= aa
.length
; i
!= 0; )
824 auto b
= toUTF8(buf
, d
);
827 result
= dg(&i
, cast(void *)&c2
);
836 result
= dg(&i
, cast(void *)&c
);
845 debug(apply
) printf("_aApplyRdc2.unittest\n");
850 foreach_reverse (k
, char d
; s
)
852 //printf("i = %d, k = %d, d = %x\n", i, k, d);
856 case 0: assert(d
== 'o'); break;
857 case 1: assert(d
== 'l'); break;
858 case 2: assert(d
== 'l'); break;
859 case 3: assert(d
== 'e'); break;
860 case 4: assert(d
== 'h'); break;
867 s
= "a\u1234\U000A0456b";
869 foreach_reverse (k
, char d
; s
)
871 //printf("i = %d, k = %d, d = %x\n", i, k, d);
874 case 0: assert(k
== 3); assert(d
== 'b'); break;
875 case 1: assert(k
== 2); assert(d
== 0xF2); break;
876 case 2: assert(k
== 2); assert(d
== 0xA0); break;
877 case 3: assert(k
== 2); assert(d
== 0x91); break;
878 case 4: assert(k
== 2); assert(d
== 0x96); break;
879 case 5: assert(k
== 1); assert(d
== 0xE1); break;
880 case 6: assert(k
== 1); assert(d
== 0x88); break;
881 case 7: assert(k
== 1); assert(d
== 0xB4); break;
882 case 8: assert(k
== 0); assert(d
== 'a'); break;
890 /*****************************/
892 extern (C
) int _aApplyRdw2(in dchar[] aa
, dg2_t dg
)
895 debug(apply
) printf("_aApplyRdw2(), len = %d\n", aa
.length
);
896 for (size_t i
= aa
.length
; i
!= 0; )
904 w
= cast(wchar) ((((d
- 0x10000) >> 10) & 0x3FF) + 0xD800);
905 result
= dg(&i
, cast(void *)&w
);
908 w
= cast(wchar) (((d
- 0x10000) & 0x3FF) + 0xDC00);
910 result
= dg(&i
, cast(void *)&w
);
919 debug(apply
) printf("_aApplyRdw2.unittest\n");
924 foreach_reverse (k
, wchar d
; s
)
926 //printf("i = %d, k = %d, d = %x\n", i, k, d);
930 case 0: assert(d
== 'o'); break;
931 case 1: assert(d
== 'l'); break;
932 case 2: assert(d
== 'l'); break;
933 case 3: assert(d
== 'e'); break;
934 case 4: assert(d
== 'h'); break;
941 s
= "a\u1234\U000A0456b";
943 foreach_reverse (k
, wchar d
; s
)
945 //printf("i = %d, k = %d, d = %x\n", i, k, d);
948 case 0: assert(k
== 3); assert(d
== 'b'); break;
949 case 1: assert(k
== 2); assert(d
== 0xDA41); break;
950 case 2: assert(k
== 2); assert(d
== 0xDC56); break;
951 case 3: assert(k
== 1); assert(d
== 0x1234); break;
952 case 4: assert(k
== 0); assert(d
== 'a'); break;