Cope with Main not being a class.
[delight/core.git] / dmd2 / dchar.c
blob7f0ed19baac581c77d694c68808817c15efa649f
2 // Copyright (c) 1999-2006 by Digital Mars
3 // All Rights Reserved
4 // written by Walter Bright
5 // www.digitalmars.com
6 // License for redistribution is by either the Artistic License
7 // in artistic.txt, or the GNU General Public License in gnu.txt.
8 // See the included readme.txt for details.
11 #include <stdio.h>
12 #include <stdlib.h>
13 #include <stdint.h>
14 #include <assert.h>
16 #include "dchar.h"
17 #include "mem.h"
19 #if M_UNICODE
21 // Converts a char string to Unicode
23 dchar *Dchar::dup(char *p)
25 dchar *s;
26 size_t len;
28 if (!p)
29 return NULL;
30 len = strlen(p);
31 s = (dchar *)mem.malloc((len + 1) * sizeof(dchar));
32 for (unsigned i = 0; i < len; i++)
34 s[i] = (dchar)(p[i] & 0xFF);
36 s[len] = 0;
37 return s;
40 dchar *Dchar::memchr(dchar *p, int c, int count)
42 int u;
44 for (u = 0; u < count; u++)
46 if (p[u] == c)
47 return p + u;
49 return NULL;
52 #if _WIN32 && __DMC__
53 __declspec(naked)
54 unsigned Dchar::calcHash(const dchar *str, unsigned len)
56 __asm
58 mov ECX,4[ESP]
59 mov EDX,8[ESP]
60 xor EAX,EAX
61 test EDX,EDX
62 je L92
64 LC8: cmp EDX,1
65 je L98
66 cmp EDX,2
67 je LAE
69 add EAX,[ECX]
70 // imul EAX,EAX,025h
71 lea EAX,[EAX][EAX*8]
72 add ECX,4
73 sub EDX,2
74 jmp LC8
76 L98: mov DX,[ECX]
77 and EDX,0FFFFh
78 add EAX,EDX
79 ret
81 LAE: add EAX,[ECX]
82 L92: ret
85 #else
86 hash_t Dchar::calcHash(const dchar *str, size_t len)
88 unsigned hash = 0;
90 for (;;)
92 switch (len)
94 case 0:
95 return hash;
97 case 1:
98 hash += *(const uint16_t *)str;
99 return hash;
101 case 2:
102 hash += *(const uint32_t *)str;
103 return hash;
105 default:
106 hash += *(const uint32_t *)str;
107 hash *= 37;
108 str += 2;
109 len -= 2;
110 break;
114 #endif
116 hash_t Dchar::icalcHash(const dchar *str, size_t len)
118 hash_t hash = 0;
120 for (;;)
122 switch (len)
124 case 0:
125 return hash;
127 case 1:
128 hash += *(const uint16_t *)str | 0x20;
129 return hash;
131 case 2:
132 hash += *(const uint32_t *)str | 0x200020;
133 return hash;
135 default:
136 hash += *(const uint32_t *)str | 0x200020;
137 hash *= 37;
138 str += 2;
139 len -= 2;
140 break;
145 #elif MCBS
147 hash_t Dchar::calcHash(const dchar *str, size_t len)
149 hash_t hash = 0;
151 while (1)
153 switch (len)
155 case 0:
156 return hash;
158 case 1:
159 hash *= 37;
160 hash += *(const uint8_t *)str;
161 return hash;
163 case 2:
164 hash *= 37;
165 hash += *(const uint16_t *)str;
166 return hash;
168 case 3:
169 hash *= 37;
170 hash += (*(const uint16_t *)str << 8) +
171 ((const uint8_t *)str)[2];
172 return hash;
174 default:
175 hash *= 37;
176 hash += *(const uint32_t *)str;
177 str += 4;
178 len -= 4;
179 break;
184 #elif UTF8
186 // Specification is: http://anubis.dkuug.dk/JTC1/SC2/WG2/docs/n1335
188 char Dchar::mblen[256] =
190 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
191 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
192 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
193 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
194 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
195 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
196 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
197 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
198 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
199 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
200 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
201 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
202 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
203 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
204 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
205 4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1,
208 dchar *Dchar::dec(dchar *pstart, dchar *p)
210 while ((p[-1] & 0xC0) == 0x80)
211 p--;
212 return p;
215 int Dchar::get(dchar *p)
217 unsigned c;
218 unsigned char *q = (unsigned char *)p;
220 c = q[0];
221 switch (mblen[c])
223 case 2:
224 c = ((c - 0xC0) << 6) |
225 (q[1] - 0x80);
226 break;
228 case 3:
229 c = ((c - 0xE0) << 12) |
230 ((q[1] - 0x80) << 6) |
231 (q[2] - 0x80);
232 break;
234 case 4:
235 c = ((c - 0xF0) << 18) |
236 ((q[1] - 0x80) << 12) |
237 ((q[2] - 0x80) << 6) |
238 (q[3] - 0x80);
239 break;
241 case 5:
242 c = ((c - 0xF8) << 24) |
243 ((q[1] - 0x80) << 18) |
244 ((q[2] - 0x80) << 12) |
245 ((q[3] - 0x80) << 6) |
246 (q[4] - 0x80);
247 break;
249 case 6:
250 c = ((c - 0xFC) << 30) |
251 ((q[1] - 0x80) << 24) |
252 ((q[2] - 0x80) << 18) |
253 ((q[3] - 0x80) << 12) |
254 ((q[4] - 0x80) << 6) |
255 (q[5] - 0x80);
256 break;
258 return c;
261 dchar *Dchar::put(dchar *p, unsigned c)
263 if (c <= 0x7F)
265 *p++ = c;
267 else if (c <= 0x7FF)
269 p[0] = 0xC0 + (c >> 6);
270 p[1] = 0x80 + (c & 0x3F);
271 p += 2;
273 else if (c <= 0xFFFF)
275 p[0] = 0xE0 + (c >> 12);
276 p[1] = 0x80 + ((c >> 6) & 0x3F);
277 p[2] = 0x80 + (c & 0x3F);
278 p += 3;
280 else if (c <= 0x1FFFFF)
282 p[0] = 0xF0 + (c >> 18);
283 p[1] = 0x80 + ((c >> 12) & 0x3F);
284 p[2] = 0x80 + ((c >> 6) & 0x3F);
285 p[3] = 0x80 + (c & 0x3F);
286 p += 4;
288 else if (c <= 0x3FFFFFF)
290 p[0] = 0xF8 + (c >> 24);
291 p[1] = 0x80 + ((c >> 18) & 0x3F);
292 p[2] = 0x80 + ((c >> 12) & 0x3F);
293 p[3] = 0x80 + ((c >> 6) & 0x3F);
294 p[4] = 0x80 + (c & 0x3F);
295 p += 5;
297 else if (c <= 0x7FFFFFFF)
299 p[0] = 0xFC + (c >> 30);
300 p[1] = 0x80 + ((c >> 24) & 0x3F);
301 p[2] = 0x80 + ((c >> 18) & 0x3F);
302 p[3] = 0x80 + ((c >> 12) & 0x3F);
303 p[4] = 0x80 + ((c >> 6) & 0x3F);
304 p[5] = 0x80 + (c & 0x3F);
305 p += 6;
307 else
308 assert(0); // not a UCS-4 character
309 return p;
312 hash_t Dchar::calcHash(const dchar *str, size_t len)
314 hash_t hash = 0;
316 while (1)
318 switch (len)
320 case 0:
321 return hash;
323 case 1:
324 hash *= 37;
325 hash += *(const uint8_t *)str;
326 return hash;
328 case 2:
329 hash *= 37;
330 #if __I86__
331 hash += *(const uint16_t *)str;
332 #else
333 hash += str[0] * 256 + str[1];
334 #endif
335 return hash;
337 case 3:
338 hash *= 37;
339 #if __I86__
340 hash += (*(const uint16_t *)str << 8) +
341 ((const uint8_t *)str)[2];
342 #else
343 hash += (str[0] * 256 + str[1]) * 256 + str[2];
344 #endif
345 return hash;
347 default:
348 hash *= 37;
349 #if __I86__
350 hash += *(const uint32_t *)str;
351 #else
352 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3];
353 #endif
355 str += 4;
356 len -= 4;
357 break;
362 #else // ascii
364 hash_t Dchar::calcHash(const dchar *str, size_t len)
366 hash_t hash = 0;
368 while (1)
370 switch (len)
372 case 0:
373 return hash;
375 case 1:
376 hash *= 37;
377 hash += *(const uint8_t *)str;
378 return hash;
380 case 2:
381 hash *= 37;
382 #if __I86__
383 hash += *(const uint16_t *)str;
384 #else
385 hash += str[0] * 256 + str[1];
386 #endif
387 return hash;
389 case 3:
390 hash *= 37;
391 #if __I86__
392 hash += (*(const uint16_t *)str << 8) +
393 ((const uint8_t *)str)[2];
394 #else
395 hash += (str[0] * 256 + str[1]) * 256 + str[2];
396 #endif
397 return hash;
399 default:
400 hash *= 37;
401 #if __I86__
402 hash += *(const uint32_t *)str;
403 #else
404 hash += ((str[0] * 256 + str[1]) * 256 + str[2]) * 256 + str[3];
405 #endif
406 str += 4;
407 len -= 4;
408 break;
413 hash_t Dchar::icalcHash(const dchar *str, size_t len)
415 hash_t hash = 0;
417 while (1)
419 switch (len)
421 case 0:
422 return hash;
424 case 1:
425 hash *= 37;
426 hash += *(const uint8_t *)str | 0x20;
427 return hash;
429 case 2:
430 hash *= 37;
431 hash += *(const uint16_t *)str | 0x2020;
432 return hash;
434 case 3:
435 hash *= 37;
436 hash += ((*(const uint16_t *)str << 8) +
437 ((const uint8_t *)str)[2]) | 0x202020;
438 return hash;
440 default:
441 hash *= 37;
442 hash += *(const uint32_t *)str | 0x20202020;
443 str += 4;
444 len -= 4;
445 break;
450 #endif
452 #if 0
453 #include <stdio.h>
455 void main()
457 // Print out values to hardcode into Dchar::mblen[]
458 int c;
459 int s;
461 for (c = 0; c < 256; c++)
463 s = 1;
464 if (c >= 0xC0 && c <= 0xDF)
465 s = 2;
466 if (c >= 0xE0 && c <= 0xEF)
467 s = 3;
468 if (c >= 0xF0 && c <= 0xF7)
469 s = 4;
470 if (c >= 0xF8 && c <= 0xFB)
471 s = 5;
472 if (c >= 0xFC && c <= 0xFD)
473 s = 6;
475 printf("%d", s);
476 if ((c & 15) == 15)
477 printf(",\n");
478 else
479 printf(",");
482 #endif