2 // Copyright (c) 1999-2006 by Digital Mars
4 // written by Walter Bright
6 // License for redistribution is by either the Artistic License
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
8 // See the included readme.txt for details.
21 // Converts a char string to Unicode
23 dchar
*Dchar::dup(char *p
)
31 s
= (dchar
*)mem
.malloc((len
+ 1) * sizeof(dchar
));
32 for (unsigned i
= 0; i
< len
; i
++)
34 s
[i
] = (dchar
)(p
[i
] & 0xFF);
40 dchar
*Dchar::memchr(dchar
*p
, int c
, int count
)
44 for (u
= 0; u
< count
; u
++)
54 unsigned Dchar::calcHash(const dchar
*str
, unsigned len
)
86 hash_t
Dchar::calcHash(const dchar
*str
, size_t len
)
98 hash
+= *(const uint16_t *)str
;
102 hash
+= *(const uint32_t *)str
;
106 hash
+= *(const uint32_t *)str
;
116 hash_t
Dchar::icalcHash(const dchar
*str
, size_t len
)
128 hash
+= *(const uint16_t *)str
| 0x20;
132 hash
+= *(const uint32_t *)str
| 0x200020;
136 hash
+= *(const uint32_t *)str
| 0x200020;
147 hash_t
Dchar::calcHash(const dchar
*str
, size_t len
)
160 hash
+= *(const uint8_t *)str
;
165 hash
+= *(const uint16_t *)str
;
170 hash
+= (*(const uint16_t *)str
<< 8) +
171 ((const uint8_t *)str
)[2];
176 hash
+= *(const uint32_t *)str
;
186 // Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
188 char Dchar::mblen
[256] =
190 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
191 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
192 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
194 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
195 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
196 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
197 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
198 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
199 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
201 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
203 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
204 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
205 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
208 dchar
*Dchar::dec(dchar
*pstart
, dchar
*p
)
210 while ((p
[-1] & 0xC0) == 0x80)
215 int Dchar::get(dchar
*p
)
218 unsigned char *q
= (unsigned char *)p
;
224 c
= ((c
- 0xC0) << 6) |
229 c
= ((c
- 0xE0) << 12) |
230 ((q
[1] - 0x80) << 6) |
235 c
= ((c
- 0xF0) << 18) |
236 ((q
[1] - 0x80) << 12) |
237 ((q
[2] - 0x80) << 6) |
242 c
= ((c
- 0xF8) << 24) |
243 ((q
[1] - 0x80) << 18) |
244 ((q
[2] - 0x80) << 12) |
245 ((q
[3] - 0x80) << 6) |
250 c
= ((c
- 0xFC) << 30) |
251 ((q
[1] - 0x80) << 24) |
252 ((q
[2] - 0x80) << 18) |
253 ((q
[3] - 0x80) << 12) |
254 ((q
[4] - 0x80) << 6) |
261 dchar
*Dchar::put(dchar
*p
, unsigned c
)
269 p
[0] = 0xC0 + (c
>> 6);
270 p
[1] = 0x80 + (c
& 0x3F);
273 else if (c
<= 0xFFFF)
275 p
[0] = 0xE0 + (c
>> 12);
276 p
[1] = 0x80 + ((c
>> 6) & 0x3F);
277 p
[2] = 0x80 + (c
& 0x3F);
280 else if (c
<= 0x1FFFFF)
282 p
[0] = 0xF0 + (c
>> 18);
283 p
[1] = 0x80 + ((c
>> 12) & 0x3F);
284 p
[2] = 0x80 + ((c
>> 6) & 0x3F);
285 p
[3] = 0x80 + (c
& 0x3F);
288 else if (c
<= 0x3FFFFFF)
290 p
[0] = 0xF8 + (c
>> 24);
291 p
[1] = 0x80 + ((c
>> 18) & 0x3F);
292 p
[2] = 0x80 + ((c
>> 12) & 0x3F);
293 p
[3] = 0x80 + ((c
>> 6) & 0x3F);
294 p
[4] = 0x80 + (c
& 0x3F);
297 else if (c
<= 0x7FFFFFFF)
299 p
[0] = 0xFC + (c
>> 30);
300 p
[1] = 0x80 + ((c
>> 24) & 0x3F);
301 p
[2] = 0x80 + ((c
>> 18) & 0x3F);
302 p
[3] = 0x80 + ((c
>> 12) & 0x3F);
303 p
[4] = 0x80 + ((c
>> 6) & 0x3F);
304 p
[5] = 0x80 + (c
& 0x3F);
308 assert(0); // not a UCS-4 character
312 hash_t
Dchar::calcHash(const dchar
*str
, size_t len
)
325 hash
+= *(const uint8_t *)str
;
331 hash
+= *(const uint16_t *)str
;
333 hash
+= str
[0] * 256 + str
[1];
340 hash
+= (*(const uint16_t *)str
<< 8) +
341 ((const uint8_t *)str
)[2];
343 hash
+= (str
[0] * 256 + str
[1]) * 256 + str
[2];
350 hash
+= *(const uint32_t *)str
;
352 hash
+= ((str
[0] * 256 + str
[1]) * 256 + str
[2]) * 256 + str
[3];
364 hash_t
Dchar::calcHash(const dchar
*str
, size_t len
)
377 hash
+= *(const uint8_t *)str
;
383 hash
+= *(const uint16_t *)str
;
385 hash
+= str
[0] * 256 + str
[1];
392 hash
+= (*(const uint16_t *)str
<< 8) +
393 ((const uint8_t *)str
)[2];
395 hash
+= (str
[0] * 256 + str
[1]) * 256 + str
[2];
402 hash
+= *(const uint32_t *)str
;
404 hash
+= ((str
[0] * 256 + str
[1]) * 256 + str
[2]) * 256 + str
[3];
413 hash_t
Dchar::icalcHash(const dchar
*str
, size_t len
)
426 hash
+= *(const uint8_t *)str
| 0x20;
431 hash
+= *(const uint16_t *)str
| 0x2020;
436 hash
+= ((*(const uint16_t *)str
<< 8) +
437 ((const uint8_t *)str
)[2]) | 0x202020;
442 hash
+= *(const uint32_t *)str
| 0x20202020;
457 // Print out values to hardcode into Dchar::mblen[]
461 for (c
= 0; c
< 256; c
++)
464 if (c
>= 0xC0 && c
<= 0xDF)
466 if (c
>= 0xE0 && c
<= 0xEF)
468 if (c
>= 0xF0 && c
<= 0xF7)
470 if (c
>= 0xF8 && c
<= 0xFB)
472 if (c
>= 0xFC && c
<= 0xFD)