beta-0.89.2
[luatex.git] / source / texk / web2c / luatexdir / unilib / ustring.c
blob5ac9ce15c2272309e569e8456d1826ca576ae740
1 /* Copyright (C) 2000-2012 by George Williams */
2 /*
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions are met:
6 * Redistributions of source code must retain the above copyright notice, this
7 * list of conditions and the following disclaimer.
9 * Redistributions in binary form must reproduce the above copyright notice,
10 * this list of conditions and the following disclaimer in the documentation
11 * and/or other materials provided with the distribution.
13 * The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "ustring.h"
28 #include "utype.h"
29 #include <stddef.h>
31 long uc_strcmp(const unichar_t *str1,const char *str2) {
32 long ch1, ch2;
33 for (;;) {
34 ch1 = *str1++; ch2 = *(unsigned char *) str2++ ;
35 if ( ch1!=ch2 || ch1=='\0' )
36 return(ch1-ch2);
40 long uc_strncmp(const unichar_t *str1,const char *str2,int n) {
41 long ch1, ch2;
42 while ( --n>=0 ) {
43 ch1 = *str1++; ch2 = *(unsigned char *) str2++ ;
44 if ( ch1!=ch2 || ch1=='\0' )
45 return(ch1-ch2);
47 return( 0 );
50 long uc_strmatch(const unichar_t *str1, const char *str2) {
51 long ch1, ch2;
52 for (;;) {
53 ch1 = *str1++; ch2 = *(unsigned char *) str2++ ;
54 ch1 = tolower(ch1);
55 ch2 = tolower(ch2);
56 if ( ch1!=ch2 || ch1=='\0' )
57 return(ch1-ch2);
61 long uc_strnmatch(const unichar_t *str1, const char *str2, int len) {
62 long ch1, ch2;
63 for (;--len>=0;) {
64 ch1 = *str1++; ch2 = *(unsigned char *) str2++ ;
65 ch1 = tolower(ch1);
66 ch2 = tolower(ch2);
67 if ( ch1!=ch2 || ch1=='\0' || len<=0 )
68 return(ch1-ch2);
70 return( 0 );
73 long u_strnmatch(const unichar_t *str1, const unichar_t *str2, int len) {
74 long ch1, ch2;
75 for (;--len>=0;) {
76 ch1 = *str1++; ch2 = *str2++ ;
77 ch1 = tolower(ch1);
78 ch2 = tolower(ch2);
79 if ( ch1!=ch2 || ch1=='\0' || len<=0 )
80 return(ch1-ch2);
82 return( 0 );
85 long u_strcmp(const unichar_t *str1,const unichar_t *str2) {
86 long ch1, ch2;
87 for (;;) {
88 ch1 = *str1++; ch2 = *str2++ ;
89 if ( ch1!=ch2 || ch1=='\0' )
90 return(ch1-ch2);
94 long u_strncmp(const unichar_t *str1,const unichar_t *str2,int n) {
95 long ch1, ch2;
96 while ( --n>=0 ) {
97 ch1 = *str1++; ch2 = *str2++ ;
98 if ( ch1!=ch2 || ch1=='\0' )
99 return(ch1-ch2);
101 return( 0 );
104 long u_strmatch(const unichar_t *str1, const unichar_t *str2) {
105 long ch1, ch2;
106 for (;;) {
107 ch1 = *str1++; ch2 = *str2++ ;
108 ch1 = tolower(ch1);
109 ch2 = tolower(ch2);
110 if ( ch1!=ch2 || ch1=='\0' )
111 return(ch1-ch2);
115 void cu_strcpy(char *to, const unichar_t *from) {
116 register unichar_t ch;
117 while ( (ch = *from++) != '\0' )
118 *(to++) = ch;
119 *to = 0;
122 void uc_strcpy(unichar_t *to, const char *from) {
123 register unichar_t ch;
124 while ( (ch = *(unsigned char *) from++) != '\0' )
125 *(to++) = ch;
126 *to = 0;
129 void u_strcpy(unichar_t *to, const unichar_t *from) {
130 register unichar_t ch;
131 while ( (ch = *from++) != '\0' )
132 *(to++) = ch;
133 *to = 0;
136 void u_strncpy(register unichar_t *to, const unichar_t *from, int len) {
137 register unichar_t ch;
138 while ( (ch = *from++) != '\0' && --len>=0 )
139 *(to++) = ch;
140 *to = 0;
143 void cu_strncpy(register char *to, const unichar_t *from, int len) {
144 register unichar_t ch;
145 while ( (ch = *from++) != '\0' && --len>=0 )
146 *(to++) = ch;
147 *to = 0;
150 void uc_strncpy(register unichar_t *to, const char *from, int len) {
151 register unichar_t ch;
152 while ( (ch = *(unsigned char *) from++) != '\0' && --len>=0 )
153 *(to++) = ch;
154 *to = 0;
157 void uc_strcat(unichar_t *to, const char *from) {
158 uc_strcpy(to+u_strlen(to),from);
161 void uc_strncat(unichar_t *to, const char *from,int len) {
162 uc_strncpy(to+u_strlen(to),from,len);
165 void cu_strcat(char *to, const unichar_t *from) {
166 cu_strcpy(to+strlen(to),from);
169 void cu_strncat(char *to, const unichar_t *from, int len) {
170 cu_strncpy(to+strlen(to),from,len);
173 void u_strcat(unichar_t *to, const unichar_t *from) {
174 u_strcpy(to+u_strlen(to),from);
177 void u_strncat(unichar_t *to, const unichar_t *from, int len) {
178 u_strncpy(to+u_strlen(to),from,len);
181 int u_strlen(register const unichar_t *str) {
182 register int len = 0;
184 while ( *str++!='\0' )
185 ++len;
186 return( len );
189 unichar_t *u_strchr(const unichar_t *str ,unichar_t ch) {
190 register unichar_t test;
192 while ( (test=*(str++))!='\0' )
193 if ( test==ch )
194 return( (unichar_t *) str-1 );
196 return( NULL );
199 unichar_t *u_strrchr(const unichar_t *str ,unichar_t ch) {
200 register unichar_t test, *last = NULL;
202 while ( (test=*(str++))!='\0' )
203 if ( test==ch )
204 last = (unichar_t *) str-1;
206 return( last );
209 unichar_t *uc_strstr(const unichar_t *longer, const char *substr) {
210 long ch1, ch2;
211 const unichar_t *lpt, *str1; const char *str2;
213 for ( lpt=longer; *lpt!='\0'; ++lpt ) {
214 str1 = lpt; str2 = substr;
215 for (;;) {
216 ch1 = *str1++; ch2 = *(unsigned char *) str2++ ;
217 if ( ch2=='\0' )
218 return((unichar_t *) lpt);
219 if ( ch1!=ch2 )
220 break;
223 return( NULL );
226 unichar_t *u_strstr(const unichar_t *longer, const unichar_t *substr) {
227 long ch1, ch2;
228 const unichar_t *lpt, *str1, *str2;
230 for ( lpt=longer; *lpt!='\0'; ++lpt ) {
231 str1 = lpt; str2 = substr;
232 for (;;) {
233 ch1 = *str1++; ch2 = *str2++ ;
234 if ( ch2=='\0' )
235 return((unichar_t *) lpt);
236 if ( ch1!=ch2 )
237 break;
240 return( NULL );
243 unichar_t *uc_strstrmatch(const unichar_t *longer, const char *substr) {
244 long ch1, ch2;
245 const unichar_t *lpt, *str1; const unsigned char *str2;
247 for ( lpt=longer; *lpt!='\0'; ++lpt ) {
248 str1 = lpt; str2 = (unsigned char *) substr;
249 for (;;) {
250 ch1 = *str1++; ch2 = *str2++ ;
251 ch1 = tolower(ch1);
252 ch2 = tolower(ch2);
253 if ( ch2=='\0' )
254 return((unichar_t *) lpt);
255 if ( ch1!=ch2 )
256 break;
259 return( NULL );
262 unichar_t *u_strstrmatch(const unichar_t *longer, const unichar_t *substr) {
263 long ch1, ch2;
264 const unichar_t *lpt, *str1, *str2;
266 for ( lpt=longer; *lpt!='\0'; ++lpt ) {
267 str1 = lpt; str2 = substr;
268 for (;;) {
269 ch1 = *str1++; ch2 = *str2++ ;
270 ch1 = tolower(ch1);
271 ch2 = tolower(ch2);
272 if ( ch2=='\0' )
273 return((unichar_t *) lpt);
274 if ( ch1!=ch2 )
275 break;
278 return( NULL );
281 unichar_t *u_copyn(const unichar_t *pt, long n) {
282 unichar_t *res;
283 #ifdef MEMORY_MASK
284 if ( n*sizeof(unichar_t)>=MEMORY_MASK )
285 n = MEMORY_MASK/sizeof(unichar_t)-1;
286 #endif
287 res = (unichar_t *) xmalloc((n+1)*sizeof(unichar_t));
288 memcpy(res,pt,n*sizeof(unichar_t));
289 res[n]='\0';
290 return(res);
293 unichar_t *u_copynallocm(const unichar_t *pt, long n, long m) {
294 unichar_t *res;
295 #ifdef MEMORY_MASK
296 if ( n*sizeof(unichar_t)>=MEMORY_MASK )
297 n = MEMORY_MASK/sizeof(unichar_t)-1;
298 #endif
299 res = xmalloc((m+1)*sizeof(unichar_t));
300 memcpy(res,pt,n*sizeof(unichar_t));
301 res[n]='\0';
302 return(res);
305 unichar_t *u_copy(const unichar_t *pt) {
306 if(pt)
307 return u_copyn(pt,u_strlen(pt));
309 return((unichar_t *)0);
312 unichar_t *u_concat(const unichar_t *s1, const unichar_t *s2) {
313 long len1, len2;
314 unichar_t *pt;
316 if ( s1==NULL )
317 return( u_copy( s2 ));
318 else if ( s2==NULL )
319 return( u_copy( s1 ));
320 len1 = u_strlen(s1); len2 = u_strlen(s2);
321 pt = (unichar_t *) xmalloc((len1+len2+1)*sizeof(unichar_t));
322 u_strcpy(pt,s1);
323 u_strcpy(pt+len1,s2);
324 return( pt );
327 unichar_t *uc_copyn(const char *pt,int len) {
328 unichar_t *res, *rpt;
330 if(!pt)
331 return((unichar_t *)0);
333 #ifdef MEMORY_MASK
334 if ( (len+1)*sizeof(unichar_t)>=MEMORY_MASK )
335 len = MEMORY_MASK/sizeof(unichar_t)-1;
336 #endif
337 res = (unichar_t *) xmalloc((len+1)*sizeof(unichar_t));
338 for ( rpt=res; --len>=0 ; *rpt++ = *(unsigned char *) pt++ );
339 *rpt = '\0';
340 return(res);
343 unichar_t *uc_copy(const char *pt) {
344 unichar_t *res, *rpt;
345 int n;
347 if(!pt)
348 return((unichar_t *)0);
350 n = strlen(pt);
351 #ifdef MEMORY_MASK
352 if ( (n+1)*sizeof(unichar_t)>=MEMORY_MASK )
353 n = MEMORY_MASK/sizeof(unichar_t)-1;
354 #endif
355 res = (unichar_t *) xmalloc((n+1)*sizeof(unichar_t));
356 for ( rpt=res; --n>=0 ; *rpt++ = *(unsigned char *) pt++ );
357 *rpt = '\0';
358 return(res);
361 char *cu_copyn(const unichar_t *pt,int len) {
362 char *res, *rpt;
364 if(!pt)
365 return(NULL);
367 #ifdef MEMORY_MASK
368 if ( (len+1)>=MEMORY_MASK )
369 len = MEMORY_MASK-1;
370 #endif
371 res = (char *) xmalloc(len+1);
372 for ( rpt=res; --len>=0 ; *rpt++ = *pt++ );
373 *rpt = '\0';
374 return(res);
377 char *cu_copy(const unichar_t *pt) {
378 char *res, *rpt;
379 int n;
381 if(!pt)
382 return((char *)0);
384 n = u_strlen(pt);
385 #ifdef MEMORY_MASK
386 if ( (n+1)>=MEMORY_MASK )
387 n = MEMORY_MASK/sizeof(unichar_t)-1;
388 #endif
389 res = (char *) xmalloc(n+1);
390 for ( rpt=res; --n>=0 ; *rpt++ = *pt++ );
391 *rpt = '\0';
392 return(res);
395 double u_strtod(const unichar_t *str, unichar_t **ptr) {
396 char buf[60], *pt, *ret;
397 const unichar_t *upt;
398 double val;
399 extern double strtod(); /* Please don't delete this, not all of us have good ansi headers */
401 for ( upt=str, pt=buf; *upt<128 && *upt!='\0' && pt-buf<sizeof(buf)-1; )
402 *pt++ = *upt++;
403 *pt = '\0';
404 val = strtod(buf,&ret);
405 if ( ptr!=NULL ) {
406 if ( pt==ret )
407 *ptr = (unichar_t *) upt;
408 else
409 *ptr = (unichar_t *) (str + (ret-buf));
411 return( val );
414 long u_strtol(const unichar_t *str, unichar_t **ptr, int base) {
415 char buf[60], *pt, *ret;
416 const unichar_t *upt;
417 long val;
418 extern long strtol(); /* Please don't delete this, not all of us have good ansi headers */
420 for ( upt=str, pt=buf; *upt<128 && *upt!='\0' && pt<buf+sizeof(buf)-1; )
421 *pt++ = *upt++;
422 *pt = '\0';
423 val = strtol(buf,&ret,base);
424 if ( ptr!=NULL ) {
425 if ( pt==ret )
426 *ptr = (unichar_t *) upt;
427 else
428 *ptr = (unichar_t *) (str + (ret-buf));
430 return( val );
433 unsigned long u_strtoul(const unichar_t *str, unichar_t **ptr, int base) {
434 char buf[60], *pt, *ret;
435 const unichar_t *upt;
436 unsigned long val;
438 for ( upt=str, pt=buf; *upt<128 && *upt!='\0' && pt<buf+sizeof(buf)-1; )
439 *pt++ = *upt++;
440 *pt = '\0';
441 val = strtoul(buf,&ret,base);
442 if ( ptr!=NULL ) {
443 if ( pt==ret )
444 *ptr = (unichar_t *) upt;
445 else
446 *ptr = (unichar_t *) (str + (ret-buf));
448 return( val );
451 unichar_t *cu_strstartmatch(const char *key,const unichar_t *str) {
452 if ( key && str ) {
453 while( *key ) {
454 if(tolower(*key) != tolower(*str))
455 return 0;
456 key++;
457 str++;
460 return (unichar_t *)str;
463 unichar_t *u_strstartmatch(const unichar_t *initial, const unichar_t *full) {
464 int ch1, ch2;
465 for (;;) {
466 ch1 = *initial++; ch2 = *full++ ;
467 if ( ch1=='\0' )
468 return( (unichar_t *) full );
469 ch1 = tolower(ch1);
470 ch2 = tolower(ch2);
471 if ( ch1!=ch2 || ch1=='\0' )
472 return(NULL);
476 char *u_to_c(const unichar_t *ubuf) {
477 static char buf[400];
478 cu_strncpy(buf,ubuf,sizeof(buf));
479 return( buf );
482 unichar_t *c_to_u(const char *buf) {
483 static unichar_t ubuf[400];
484 uc_strncpy(ubuf,buf,sizeof(ubuf));
485 return( ubuf );
488 unichar_t *utf82u_strncpy(unichar_t *ubuf,const char *utf8buf,int len) {
489 unichar_t *upt=ubuf, *uend=ubuf+len-1;
490 const uint8 *pt = (const uint8 *) utf8buf, *end = pt+strlen(utf8buf);
491 int w, w2;
493 while ( pt<end && *pt!='\0' && upt<uend ) {
494 if ( *pt<=127 )
495 *upt = *pt++;
496 else if ( *pt<=0xdf ) {
497 *upt = ((*pt&0x1f)<<6) | (pt[1]&0x3f);
498 pt += 2;
499 } else if ( *pt<=0xef ) {
500 *upt = ((*pt&0xf)<<12) | ((pt[1]&0x3f)<<6) | (pt[2]&0x3f);
501 pt += 3;
502 } else {
503 w = ( ((*pt&0x7)<<2) | ((pt[1]&0x30)>>4) )-1;
504 w = (w<<6) | ((pt[1]&0xf)<<2) | ((pt[2]&0x30)>>4);
505 w2 = ((pt[2]&0xf)<<6) | (pt[3]&0x3f);
506 *upt = w*0x400 + w2 + 0x10000;
507 pt += 4;
509 ++upt;
511 *upt = '\0';
512 return( ubuf );
515 unichar_t *utf82u_strcpy(unichar_t *ubuf,const char *utf8buf) {
516 return( utf82u_strncpy(ubuf,utf8buf,strlen(utf8buf)+1));
519 unichar_t *utf82u_copyn(const char *utf8buf,int len) {
520 unichar_t *ubuf = (unichar_t *) xmalloc((len+1)*sizeof(unichar_t));
521 return( utf82u_strncpy(ubuf,utf8buf,len+1));
524 unichar_t *utf82u_copy(const char *utf8buf) {
525 int len;
526 unichar_t *ubuf;
528 if ( utf8buf==NULL )
529 return( NULL );
531 len = strlen(utf8buf);
532 ubuf = (unichar_t *) xmalloc((len+1)*sizeof(unichar_t));
533 return( utf82u_strncpy(ubuf,utf8buf,len+1));
536 void utf82u_strcat(unichar_t *to,const char *from) {
537 utf82u_strcpy(to+u_strlen(to),from);
540 char *u2utf8_strcpy(char *utf8buf,const unichar_t *ubuf) {
541 char *pt = utf8buf;
543 while ( *ubuf ) {
544 if ( *ubuf<0x80 )
545 *pt++ = *ubuf;
546 else if ( *ubuf<0x800 ) {
547 *pt++ = 0xc0 | (*ubuf>>6);
548 *pt++ = 0x80 | (*ubuf&0x3f);
549 } else if ( *ubuf < 0x10000 ) {
550 *pt++ = 0xe0 | (*ubuf>>12);
551 *pt++ = 0x80 | ((*ubuf>>6)&0x3f);
552 *pt++ = 0x80 | (*ubuf&0x3f);
553 } else {
554 uint32 val = *ubuf-0x10000;
555 int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f;
556 *pt++ = 0xf0 | (u>>2);
557 *pt++ = 0x80 | ((u&3)<<4) | z;
558 *pt++ = 0x80 | y;
559 *pt++ = 0x80 | x;
561 ++ubuf;
563 *pt = '\0';
564 return( utf8buf );
567 char *utf8_strchr(const char *str, int search) {
568 int ch;
569 const char *old = str;
571 while ( (ch = utf8_ildb(&str))!=0 ) {
572 if ( ch==search )
573 return( (char *) old );
574 old = str;
576 return( NULL );
579 char *latin1_2_utf8_strcpy(char *utf8buf,const char *lbuf) {
580 char *pt = utf8buf;
581 const unsigned char *lpt = (const unsigned char *) lbuf;
583 while ( *lpt ) {
584 if ( *lpt<0x80 )
585 *pt++ = *lpt;
586 else {
587 *pt++ = 0xc0 | (*lpt>>6);
588 *pt++ = 0x80 | (*lpt&0x3f);
590 ++lpt;
592 *pt = '\0';
593 return( utf8buf );
596 char *latin1_2_utf8_copy(const char *lbuf) {
597 int len;
598 char *utf8buf;
600 if ( lbuf==NULL )
601 return( NULL );
603 len = strlen(lbuf);
604 utf8buf = (char *) xmalloc(2*len+1);
605 return( latin1_2_utf8_strcpy(utf8buf,lbuf));
608 char *utf8_2_latin1_copy(const char *utf8buf) {
609 int len;
610 int ch;
611 char *lbuf, *pt; const char *upt;
613 if ( utf8buf==NULL )
614 return( NULL );
616 len = strlen(utf8buf);
617 pt = lbuf = (char *) xmalloc(len+1);
618 for ( upt=utf8buf; (ch=utf8_ildb(&upt))!='\0'; )
619 if ( ch>=0xff )
620 *pt++ = '?';
621 else
622 *pt++ = ch;
623 *pt = '\0';
624 return( lbuf );
627 char *u2utf8_copy(const unichar_t *ubuf) {
628 int len;
629 char *utf8buf;
631 if ( ubuf==NULL )
632 return( NULL );
634 len = u_strlen(ubuf);
635 utf8buf = (char *) xmalloc((len+1)*4);
636 return( u2utf8_strcpy(utf8buf,ubuf));
639 char *u2utf8_copyn(const unichar_t *ubuf,int len) {
640 int i;
641 char *utf8buf, *pt;
643 if ( ubuf==NULL )
644 return( NULL );
646 utf8buf = pt = (char *) xmalloc((len+1)*4);
647 for ( i=0; i<len && *ubuf!='\0'; ++i )
648 pt = utf8_idpb(pt, *ubuf++);
649 *pt = '\0';
650 return( utf8buf );
653 int32 utf8_ildb(const char **_text) {
654 int32 val= -1;
655 int ch;
656 const uint8 *text = (const uint8 *) *_text;
657 /* Increment and load character */
659 if ( (ch = *text++)<0x80 ) {
660 val = ch;
661 } else if ( ch<=0xbf ) {
662 /* error */
663 } else if ( ch<=0xdf ) {
664 if ( *text>=0x80 && *text<0xc0 )
665 val = ((ch&0x1f)<<6) | (*text++&0x3f);
666 } else if ( ch<=0xef ) {
667 if ( *text>=0x80 && *text<0xc0 && text[1]>=0x80 && text[1]<0xc0 ) {
668 val = ((ch&0xf)<<12) | ((text[0]&0x3f)<<6) | (text[1]&0x3f);
669 text += 2;
671 } else {
672 int w = ( ((ch&0x7)<<2) | ((text[0]&0x30)>>4) )-1, w2;
673 w = (w<<6) | ((text[0]&0xf)<<2) | ((text[1]&0x30)>>4);
674 w2 = ((text[1]&0xf)<<6) | (text[2]&0x3f);
675 val = w*0x400 + w2 + 0x10000;
676 if ( *text<0x80 || text[1]<0x80 || text[2]<0x80 ||
677 *text>=0xc0 || text[1]>=0xc0 || text[2]>=0xc0 )
678 val = -1;
679 else
680 text += 3;
682 *_text = (const char *) text;
683 return( val );
686 char *utf8_idpb(char *utf8_text,uint32 ch) {
687 /* Increment and deposit character */
688 if ( ch>=17*65536 )
689 return( utf8_text );
691 if ( ch<=127 )
692 *utf8_text++ = ch;
693 else if ( ch<=0x7ff ) {
694 *utf8_text++ = 0xc0 | (ch>>6);
695 *utf8_text++ = 0x80 | (ch&0x3f);
696 } else if ( ch<=0xffff ) {
697 *utf8_text++ = 0xe0 | (ch>>12);
698 *utf8_text++ = 0x80 | ((ch>>6)&0x3f);
699 *utf8_text++ = 0x80 | (ch&0x3f);
700 } else {
701 uint32 val = ch-0x10000;
702 int u = ((val&0xf0000)>>16)+1, z=(val&0x0f000)>>12, y=(val&0x00fc0)>>6, x=val&0x0003f;
703 *utf8_text++ = 0xf0 | (u>>2);
704 *utf8_text++ = 0x80 | ((u&3)<<4) | z;
705 *utf8_text++ = 0x80 | y;
706 *utf8_text++ = 0x80 | x;
708 return( utf8_text );
712 char *utf8_ib(char *utf8_text) {
713 int ch;
715 /* Increment character */
716 if ( (ch = *utf8_text)=='\0' )
717 return( utf8_text );
718 else if ( ch<=127 )
719 return( utf8_text+1 );
720 else if ( ch<0xe0 )
721 return( utf8_text+2 );
722 else if ( ch<0xf0 )
723 return( utf8_text+3 );
724 else
725 return( utf8_text+4 );
728 int utf8_valid(const char *str) {
729 /* Is this a valid utf8 string? */
730 int ch;
732 while ( (ch=utf8_ildb(&str))!='\0' )
733 if ( ch==-1 )
734 return( false );
736 return( true );
739 void utf8_truncatevalid(char *str) {
740 /* There are certain cases where we have a fixed amount of space to display */
741 /* something, and if it doesn't fit in that, then we truncate it. But... */
742 /* that can leave us with a half completed utf8 byte sequence. So truncate*/
743 /* again, right before the start of the bad sequence */
744 int ch;
745 char *old;
747 old = str;
748 while ( (ch=utf8_ildb((const char **) &str))!='\0' ) {
749 if ( ch==-1 ) {
750 *old = '\0';
751 return;
753 old = str;
757 char *utf8_db(char *utf8_text) {
758 /* Decrement utf8 pointer */
759 unsigned char *pt = (unsigned char *) utf8_text;
761 --pt;
762 if ( *pt>=0xc0 )
763 /* This should never happen. The pointer was looking at an intermediate */
764 /* character. However, if it does happen then we are now properly */
765 /* positioned at the start of a new char */;
766 else if ( *pt>=0x80 ) {
767 --pt;
768 if ( *pt>=0xc0 )
769 /* Done */;
770 else if ( *pt>=0x80 ) {
771 --pt;
772 if ( *pt>=0xc0 )
773 /* Done */;
774 else if ( *pt>=0x80 )
775 --pt;
778 return( (char *) pt );
781 int utf8_strlen(const char *utf8_str) {
782 /* how many characters in the string NOT bytes */
783 int len = 0;
785 while ( utf8_ildb(&utf8_str)>0 )
786 ++len;
787 return( len );
790 int utf82u_strlen(const char *utf8_str) {
791 /* how many shorts needed to represent it in UCS2 */
792 int ch;
793 int len = 0;
795 while ( (ch = utf8_ildb(&utf8_str))>0 )
796 if ( ch>0x10000 )
797 len += 2;
798 else
799 ++len;
800 return( len );
803 void utf8_strncpy(register char *to, const char *from, int len) {
804 /* copy n characters NOT bytes */
805 const char *old = from;
806 while ( len && *old ) {
807 utf8_ildb(&old);
808 len--;
810 strncpy(to, from, old-from);
811 to[old-from] = 0;
814 #include <chardata.h>
815 char *StripToASCII(const char *utf8_str) {
816 /* Remove any non-ascii characters: Special case, convert the copyright symbol to (c) */
817 char *newcr, *pt, *end;
818 int len, ch;
819 const unichar_t *alt;
821 len = strlen(utf8_str);
822 pt = newcr = (char *) xmalloc(len+1);
823 end = pt+len;
824 while ( (ch= utf8_ildb(&utf8_str))!='\0' ) {
825 if ( pt>=end ) {
826 int off = pt-newcr;
827 newcr = (char *) xrealloc(newcr,(off+10)+1);
828 pt = newcr+off;
829 end = pt+10;
831 if ( (ch>=' ' && ch<'\177' ) || ch=='\n' || ch=='\t' )
832 *pt++ = ch;
833 else if ( ch=='\r' && *utf8_str!='\n' )
834 *pt++ = '\n';
835 else if ( ch==0xa9 /* Copyright sign */ ) {
836 char *str = "(c)";
837 if ( pt+strlen(str)>=end ) {
838 int off = pt-newcr;
839 newcr = (char *) xrealloc(newcr,(off+10+strlen(str))+1);
840 pt = newcr+off;
841 end = pt+10;
843 while ( *str )
844 *pt++ = *str++;
845 } else if ( unicode_alternates[ch>>8]!=NULL &&
846 (alt = unicode_alternates[ch>>8][ch&0xff])!=NULL ) {
847 while ( *alt!='\0' ) {
848 if ( pt>=end ) {
849 int off = pt-newcr;
850 newcr = (char *) xrealloc(newcr,(off+10)+1);
851 pt = newcr+off;
852 end = pt+10;
854 if ( *alt>=' ' && *alt<'\177' )
855 *pt++ = *alt;
856 else if ( *alt==0x300 )
857 *pt++ = '`';
858 else if ( *alt==0x301 )
859 *pt++ = '\'';
860 else if ( *alt==0x302 )
861 *pt++ = '^';
862 else if ( *alt==0x303 )
863 *pt++ = '~';
864 else if ( *alt==0x308 )
865 *pt++ = ':';
866 ++alt;
870 *pt = '\0';
871 return( newcr );
874 int AllAscii(const char *txt) {
875 for ( ; *txt!='\0'; ++txt ) {
876 if ( *txt=='\t' || *txt=='\n' || *txt=='\r' )
877 /* All right */;
878 else if ( *txt<' ' || *txt>='\177' )
879 return( false );
881 return( true );
884 int uAllAscii(const unichar_t *txt) {
885 for ( ; *txt!='\0'; ++txt ) {
886 if ( *txt=='\t' || *txt=='\n' || *txt=='\r' )
887 /* All right */;
888 else if ( *txt<' ' || *txt>='\177' )
889 return( false );
891 return( true );
894 char* chomp( char* line ) {
895 if( !line )
896 return line;
897 if ( line[strlen(line)-1]=='\n' )
898 line[strlen(line)-1] = '\0';
899 if ( line[strlen(line)-1]=='\r' )
900 line[strlen(line)-1] = '\0';
901 return line;
904 char *copytolower(const char *input)
906 char* ret = xstrdup(input);
907 char* p = ret;
908 for( ; *p; ++p ) {
909 *p = tolower(*p);
911 return ret;
915 int endswith(const char *haystack,const char *needle) {
916 int haylen = strlen( haystack );
917 int nedlen = strlen( needle );
918 char* p;
919 if( haylen < nedlen )
920 return 0;
921 p = strstr( haystack + haylen - nedlen, needle );
922 return p == ( haystack + haylen - nedlen );
925 int endswithi(const char *haystackZ,const char *needleZ) {
926 char* haystack = copytolower(haystackZ);
927 char* needle = copytolower(needleZ);
928 int ret = endswith( haystack, needle );
929 free( haystack );
930 free( needle );
931 return ret;
934 int endswithi_partialExtension( const char *haystackZ,const char *needleZ) {
935 int nedlen = strlen(needleZ);
936 char* haystack;
937 char* needle;
938 int ret;
939 int i;
940 if( nedlen == 0 ) {
941 return 0;
943 haystack = copytolower(haystackZ);
944 needle = copytolower(needleZ);
945 ret = 0;
946 i = nedlen-1;
947 ret |= endswith( haystack, needle );
948 for( ; i>=0 && !ret ; --i ) {
949 needle[i] = '\0';
950 ret |= endswith( haystack, needle );
952 free( haystack );
953 free( needle );
954 return ret;
957 int u_endswith(const unichar_t *haystack,const unichar_t *needle) {
958 int haylen = u_strlen( haystack );
959 int nedlen = u_strlen( needle );
960 unichar_t* p;
961 if( haylen < nedlen )
962 return 0;
963 p = u_strstr( haystack + haylen - nedlen, needle );
964 return p == ( haystack + haylen - nedlen );