Implementation of 'system' based on Qt
[texmacs.git] / src / src / Data / String / analyze.cpp
blobf1ca0569e7da11b94804e111e0a3217532a160ed
2 /******************************************************************************
3 * MODULE : analyze.cpp
4 * DESCRIPTION: Properties of characters and strings
5 * COPYRIGHT : (C) 1999 Joris van der Hoeven
6 *******************************************************************************
7 * This software falls under the GNU general public license version 3 or later.
8 * It comes WITHOUT ANY WARRANTY WHATSOEVER. For details, see the file LICENSE
9 * in the root directory or <http://www.gnu.org/licenses/gpl-3.0.html>.
10 ******************************************************************************/
12 #include "analyze.hpp"
13 #include "merge_sort.hpp"
14 #include "converter.hpp"
15 #include "Scheme/object.hpp"
17 /******************************************************************************
18 * Tests for caracters
19 ******************************************************************************/
21 bool
22 is_alpha (register char c) {
23 return ((c>='a') && (c<='z')) || ((c>='A') && (c<='Z'));
26 bool
27 is_iso_alpha (register char c) {
28 int i= ((int) ((unsigned char) c));
29 return
30 ((c>='a') && (c<='z')) ||
31 ((c>='A') && (c<='Z')) ||
32 ((i >= 128) && (i != 159) && (i != 189) && (i != 190) && (i != 191));
35 bool
36 is_locase (register char c) {
37 int code= (int) ((unsigned char) c);
38 return
39 ((c>='a') && (c<='z')) ||
40 ((code >= 160) && (code < 189)) ||
41 (code >= 224);
44 bool
45 is_upcase (register char c) {
46 int code= (int) ((unsigned char) c);
47 return
48 ((c>='A') && (c<='Z')) ||
49 ((code >= 128) && (code < 159)) ||
50 ((code >= 192) && (code < 224));
53 bool
54 is_digit (register char c) {
55 return (c>='0') && (c<='9');
58 bool
59 is_numeric (register char c) {
60 return ((c>='0') && (c<='9')) || (c=='.');
63 bool
64 is_punctuation (register char c) {
65 return
66 (c=='.') || (c==',') || (c==':') || (c=='\'') || (c=='`') ||
67 (c==';') || (c=='!') || (c=='?');
70 bool
71 is_space (register char c) {
72 return (c == ' ') || (c == '\11') || (c == '\12') || (c == '\15');\
75 /******************************************************************************
76 * Tests for strings
77 ******************************************************************************/
79 bool
80 is_alpha (string s) {
81 int i;
82 if (N(s)==0) return false;
83 for (i=0; i<N(s); i++)
84 if (!is_alpha (s[i])) return false;
85 return true;
88 bool
89 is_locase_alpha (string s) {
90 int i;
91 if (N(s)==0) return false;
92 for (i=0; i<N(s); i++)
93 if (s[i]<'a' || s[i]>'z') return false;
94 return true;
97 bool
98 is_iso_alpha (string s) {
99 int i;
100 if (N(s)==0) return false;
101 for (i=0; i<N(s); i++)
102 if (!is_iso_alpha (s[i])) return false;
103 return true;
106 bool
107 is_numeric (string s) {
108 int i;
109 if (N(s)==0) return false;
110 for (i=0; i<N(s); i++)
111 if (!is_numeric (s[i])) return false;
112 return true;
115 /******************************************************************************
116 * Changing cases
117 ******************************************************************************/
119 char
120 upcase (char c) {
121 if (is_locase (c))
122 return (char) (((int) ((unsigned char) c)) - 32);
123 else return c;
126 char
127 locase (char c) {
128 if (is_upcase (c))
129 return (char) (((int) ((unsigned char) c)) + 32);
130 else return c;
133 string
134 upcase_first (string s) {
135 if ((N(s)==0) || (!is_locase (s[0]))) return s;
136 return string ((char) (((int) ((unsigned char) s[0]))-32)) * s (1, N(s));
139 string
140 locase_first (string s) {
141 if ((N(s)==0) || (!is_upcase (s[0]))) return s;
142 return string ((char) (((int) ((unsigned char) s[0]))+32)) * s (1, N(s));
145 string
146 upcase_all (string s) {
147 int i;
148 string r (N(s));
149 for (i=0; i<N(s); i++)
150 if (!is_locase (s[i])) r[i]= s[i];
151 else r[i]= (char) (((int) ((unsigned char) s[i]))-32);
152 return r;
155 string
156 locase_all (string s) {
157 int i;
158 string r (N(s));
159 for (i=0; i<N(s); i++)
160 if (!is_upcase (s[i])) r[i]= s[i];
161 else r[i]= (char) (((int) ((unsigned char) s[i]))+32);
162 return r;
165 /******************************************************************************
166 * Inserting or removing a character into a string as a set of characters
167 ******************************************************************************/
169 string
170 string_union (string s1, string s2) {
171 return string_minus (s1, s2) * s2;
174 string
175 string_minus (string s1, string s2) {
176 string r;
177 int i1, n1= N(s1), i2, n2= N(s2);
178 for (i1=0; i1<n1; i1++) {
179 for (i2=0; i2<n2; i2++)
180 if (s1[i1] == s2[i2]) break;
181 if (i2==n2) r << s1[i1];
183 return r;
186 /******************************************************************************
187 * Spanish in relation with ispell
188 ******************************************************************************/
190 string
191 ispanish_to_spanish (string s) {
192 int i, n= N(s);
193 string r;
194 for (i=0; i<n; i++)
195 if ((s[i] == '\'') && ((i+1)<n)) {
196 switch (s[i+1]) {
197 case 'A': r << 'Á'; break;
198 case 'E': r << 'É'; break;
199 case 'I': r << 'Í'; break;
200 case 'N': r << 'Ñ'; break;
201 case 'O': r << 'Ó'; break;
202 case 'U': r << 'Ú'; break;
203 case 'Y': r << 'Ý'; break;
204 case 'a': r << 'á'; break;
205 case 'e': r << 'é'; break;
206 case 'i': r << 'í'; break;
207 case 'n': r << 'ñ'; break;
208 case 'o': r << 'ó'; break;
209 case 'u': r << 'ú'; break;
210 case 'y': r << 'ý'; break;
211 default : r << '\'' << s[i+1];
213 i++;
215 else r << s[i];
216 return r;
219 string
220 spanish_to_ispanish (string s) {
221 int i, n= N(s);
222 string r;
223 for (i=0; i<n; i++)
224 switch (s[i]) {
225 case 'Á': r << "'A"; break;
226 case 'É': r << "'E"; break;
227 case 'Í': r << "'I"; break;
228 case 'Ñ': r << "'N"; break;
229 case 'Ó': r << "'O"; break;
230 case 'Ú': r << "'U"; break;
231 case 'Ý': r << "'Y"; break;
232 case 'á': r << "'a"; break;
233 case 'é': r << "'e"; break;
234 case 'í': r << "'i"; break;
235 case 'ñ': r << "'n"; break;
236 case 'ó': r << "'o"; break;
237 case 'ú': r << "'u"; break;
238 case 'ý': r << "'y"; break;
239 default : r << s[i];
241 return r;
244 string
245 igerman_to_german (string s) {
246 int i, n= N(s);
247 string r;
248 for (i=0; i<n; i++)
249 if (s[i] == 'ß') r << 'ÿ';
250 else r << s[i];
251 return r;
254 string
255 german_to_igerman (string s) {
256 int i, n= N(s);
257 string r;
258 for (i=0; i<n; i++)
259 if (s[i] == 'ÿ') r << 'ß';
260 else r << s[i];
261 return r;
264 /******************************************************************************
265 * Iso latin 2 encoding for polish and czech
266 ******************************************************************************/
268 static string il2_to_cork_string=
269 "€�‚ƒ„…†‡ˆ‰Š‹Œ�Ž��‘’“”•–—˜™š›œ�žŸ �\bŠ ‰‘Ÿ\x04’“”™\x7fš› ¡\fª\x01©±\a\v²³´¹\x05º»�Á€Ĉ‚ǃɆ˅Í΄ЋŒÓÔŽÖ.�—Ú–ÜÝ•ÿ¯áâ ä¨¢ç£é¦ë¥í«¬óô®ö/°·ú¶üýµ ";
270 static string cork_to_il2_string=
271 "áÆÈÏÌÊGÅ¥£ÑÒ ÕÀئ©ª«ÞÛÙY¬®¯IIð§ã±æèïìêgåµ³ñò õàø¶¹º»þûùy¼¾¿i!?LAÁÂAÄAAÇEÉEËIÍÎIÐNOÓÔOÖOOUÚUÜÝ Saáâaäaaçeéeëiíîiðnoóôoöoouúuüý ß";
273 static char
274 il2_to_cork (char c) {
275 int i= (int) ((unsigned char) c);
276 if (i<128) return c;
277 return il2_to_cork_string [i-128];
280 static char
281 cork_to_il2 (char c) {
282 int i= (int) ((unsigned char) c);
283 if (i<128) return c;
284 return cork_to_il2_string [i-128];
287 string
288 il2_to_cork (string s) {
289 int i, n= N(s);
290 string r (n);
291 for (i=0; i<n; i++)
292 r[i]= il2_to_cork (s[i]);
293 return r;
296 string
297 cork_to_il2 (string s) {
298 int i, n= N(s);
299 string r (n);
300 for (i=0; i<n; i++)
301 r[i]= cork_to_il2 (s[i]);
302 return r;
305 /******************************************************************************
306 * Koi8 encoding for russian
307 ******************************************************************************/
309 static string koi8_to_iso_string=
310 "áâ÷çäåöúéêëìíîïðòóôõæèãþûýÿùøüàñÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝßÙØÜÀÑ";
311 static string iso_to_koi8_string=
312 "þàáöäåôãõèéêëìíîïÿðñòóæâüûçøýù÷úÞÀÁÖÄÅÔÃÕÈÉÊËÌÍÎÏßÐÑÒÓÆÂÜÛÇØÝÙ×Ú";
314 static char
315 koi8_to_iso (char c, bool ukrainian) {
316 int i= (int) ((unsigned char) c);
317 if (i==156) return '³';
318 if (i==188) return '£';
319 if (ukrainian)
321 switch(c)
323 case 'I':return '¶';
324 case 'ˆ':return '·';
325 case '™':return '´';
326 case '€':return '½';
327 case 'i':return '¦';
328 case '¨':return '§';
329 case '¹':return '¤';
330 case ' ':return '­';
333 if (i<192) return c;
334 return koi8_to_iso_string [i-192];
337 static char
338 iso_to_koi8 (char c, bool ukrainian) {
339 int i= (int) ((unsigned char) c);
340 if (c=='³') return (char) 156;
341 if (c=='£') return (char) 188;
342 if (ukrainian)
344 switch(c)
346 case '¶':return 'I';
347 case '·':return 'ˆ';
348 case '´':return '™';
349 case '½':return '€';
350 case '¦':return 'i';
351 case '§':return '¨';
352 case '¤':return '¹';
353 case '­':return ' ';
356 if (i<192) return c;
357 return iso_to_koi8_string [i-192];
360 string
361 koi8_to_iso (string s) {
362 int i, n= N(s);
363 string r (n);
364 for (i=0; i<n; i++)
365 r[i]= koi8_to_iso (s[i], false);
366 return r;
369 string
370 iso_to_koi8 (string s) {
371 int i, n= N(s);
372 string r (n);
373 for (i=0; i<n; i++)
374 r[i]= iso_to_koi8 (s[i], false);
375 return r;
378 string
379 koi8uk_to_iso (string s) {
380 int i, n= N(s);
381 string r (n);
382 for (i=0; i<n; i++)
383 r[i]= koi8_to_iso (s[i], true);
384 return r;
387 string
388 iso_to_koi8uk (string s) {
389 int i, n= N(s);
390 string r (n);
391 for (i=0; i<n; i++)
392 r[i]= iso_to_koi8 (s[i], true);
393 return r;
396 /******************************************************************************
397 * Convert between TeXmacs and XML strings
398 ******************************************************************************/
400 static bool
401 is_xml_name (char c) {
402 return
403 is_alpha (c) || is_numeric (c) ||
404 (c == '.') || (c == '-') || (c == ':');
407 string
408 tm_to_xml_name (string s) {
409 string r;
410 int i, n= N(s);
411 for (i=0; i<n; i++)
412 if (is_xml_name (s[i])) r << s[i];
413 else r << "_" << as_string ((int) ((unsigned char) s[i])) << "_";
414 return r;
417 string
418 xml_name_to_tm (string s) {
419 string r;
420 int i, n= N(s);
421 for (i=0; i<n; i++)
422 if (s[i] != '_') r << s[i];
423 else {
424 int start= ++i;
425 while ((i<n) && (s[i]!='_')) i++;
426 r << (char) ((unsigned char) as_int (s (start, i)));
428 return r;
431 string
432 old_tm_to_xml_cdata (string s) {
433 string r;
434 int i, n= N(s);
435 for (i=0; i<n; i++)
436 if (s[i] == '&') r << "&amp;";
437 else if (s[i] == '>') r << "&gt;";
438 else if (s[i] != '<') r << s[i];
439 else {
440 int start= ++i;
441 while ((i<n) && (s[i]!='>')) i++;
442 r << "&" << tm_to_xml_name (s (start, i)) << ";";
444 return r;
447 object
448 tm_to_xml_cdata (string s) {
449 array<object> a;
450 a << symbol_object ("!concat");
451 string r;
452 int i, n= N(s);
453 for (i=0; i<n; i++)
454 if (s[i] == '&') r << "&amp;";
455 else if (s[i] == '>') r << "&gt;";
456 else if (s[i] == '\\') r << "\\";
457 else if (s[i] != '<') r << cork_to_utf8 (s (i, i+1));
458 else {
459 int start= i++;
460 while ((i<n) && (s[i]!='>')) i++;
461 string ss= s (start, i+1);
462 string rr= cork_to_utf8 (ss);
463 string qq= utf8_to_cork (rr);
464 if (rr != ss && qq == ss) r << rr;
465 else {
466 if (r != "") a << object (r);
467 a << cons (symbol_object ("tm-sym"),
468 cons (ss (1, N(ss)-1),
469 null_object ()));
470 r= "";
473 if (r != "") a << object (r);
474 if (N(a) == 1) return object ("");
475 else if (N(a) == 2) return a[1];
476 else return call ("list", a);
479 string
480 old_xml_cdata_to_tm (string s) {
481 string r;
482 int i, n= N(s);
483 for (i=0; i<n; i++)
484 if (s[i] == '<') r << "<less>";
485 else if (s[i] == '>') r << "<gtr>";
486 else if (s[i] != '&') r << s[i];
487 else {
488 int start= ++i;
489 while ((i<n) && (s[i]!=';')) i++;
490 string x= "<" * xml_name_to_tm (s (start, i)) * ">";
491 if (x == "<amp>") r << "&";
492 else r << x;
494 return r;
497 string
498 xml_unspace (string s, bool first, bool last) {
499 string r;
500 int i= 0, n= N(s);
501 if (first) while ((i<n) && is_space (s[i])) i++;
502 while (i<n)
503 if (!is_space (s[i])) r << s[i++];
504 else {
505 while ((i<n) && is_space (s[i])) i++;
506 if ((i<n) || (!last)) r << ' ';
508 return r;
511 bool
512 contains_unicode_char (string s) {
513 int i= 0, n= N(s);
514 while (i+1<n) {
515 if (s[i] == '<' && s[i+1] == '#') return true;
516 tm_char_forwards (s, i);
518 return false;
521 /******************************************************************************
522 * Roman and alpha numbers
523 ******************************************************************************/
525 static string ones[10]= {
526 "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" };
527 static string tens[10]= {
528 "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" };
529 static string hundreds[10]= {
530 "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" };
532 string
533 roman_nr (int nr) {
534 if (nr<0) return "-" * roman_nr (nr);
535 if (nr==0) return "o";
536 if (nr>1000) return "m" * roman_nr (nr-1000);
537 if (nr==1000) return "m";
538 if (nr==999) return "im";
539 if (nr==499) return "id";
540 if ((nr%100)==99) return hundreds[nr/100] * "ic";
541 if ((nr%100)==49) return hundreds[nr/100] * "il";
542 return hundreds[nr/100] * tens[(nr%100)/10] * ones[nr%10];
545 string
546 Roman_nr (int nr) {
547 return upcase_all (roman_nr (nr));
550 string
551 alpha_nr (int nr) {
552 if (nr<0) return "-" * alpha_nr (nr);
553 if (nr==0) return "0";
554 if (nr<=26) return string ((char) (((int) 'a')+ nr-1));
555 return alpha_nr ((nr-1)/26) * alpha_nr (((nr-1)%26)+1);
558 string
559 Alpha_nr (int nr) {
560 return upcase_all (alpha_nr (nr));
563 string
564 fnsymbol_nr (int nr) {
565 string sym, r;
566 int i, m= (nr-1)%3, n= ((nr-1)/3)+1;
567 switch (m) {
568 case 0: sym= "<ast>"; break;
569 case 1: sym= "<dag>"; break;
570 case 2: sym= "<ddag>"; break;
572 for (i=0; i<n; i++) r << sym;
573 return r;
576 /******************************************************************************
577 * Conversions to and from hexadecimal
578 ******************************************************************************/
580 static const char* hex_string= "0123456789ABCDEF";
582 string
583 as_hexadecimal (int i) {
584 if (i<0) return "-" * as_hexadecimal (-i);
585 if (i<16) return hex_string [i & 15];
586 return as_hexadecimal (i >> 4) * hex_string [i & 15];
589 string
590 as_hexadecimal (pointer ptr) {
591 intptr_t i= (intptr_t) ptr;
592 if (i<0) return "-" * as_hexadecimal (-i);
593 if (i<16) return hex_string [i & 15];
594 return as_hexadecimal (i >> 4) * hex_string [i & 15];
597 string
598 as_hexadecimal (int i, int len) {
599 if (len==1) return hex_string [i & 15];
600 else return as_hexadecimal (i >> 4, len-1) * hex_string [i & 15];
604 from_hexadecimal (string s) {
605 int i, n= N(s), res= 0;
606 if ((n>0) && (s[0]=='-'))
607 return -from_hexadecimal (s (1, n));
608 for (i=0; i<n; i++) {
609 res= res << 4;
610 if ((s[i] >= '0') && (s[i] <= '9')) res += (int) (s[i] - '0');
611 if ((s[i] >= 'A') && (s[i] <= 'F')) res += (int) (s[i] + 10 - 'A');
612 if ((s[i] >= 'a') && (s[i] <= 'f')) res += (int) (s[i] + 10 - 'a');
614 return res;
617 /******************************************************************************
618 * Routines for the TeXmacs encoding
619 ******************************************************************************/
621 string
622 tm_encode (string s) {
623 // verbatim to TeXmacs encoding
624 register int i;
625 string r;
626 for (i=0; i<N(s); i++) {
627 if (s[i]=='<') r << "<less>";
628 else if (s[i]=='>') r << "<gtr>";
629 else r << s[i];
631 return r;
634 string
635 tm_decode (string s) {
636 // TeXmacs encoding to verbatim
637 register int i;
638 string r;
639 for (i=0; i<N(s); i++) {
640 if (s[i]=='<') {
641 register int j;
642 for (j=i+1; j<N(s); j++)
643 if (s[j]=='>') break;
644 if (j<N(s)) j++;
645 if (s(i,j) == "<less>") r << "<";
646 else if (s(i,j) == "<gtr>") r << ">";
647 i=j-1;
648 if (s[i]!='>') return r;
650 else if (s[i]!='>') r << s[i];
652 return r;
655 string
656 tm_var_encode (string s) {
657 register int i, n= N(s);
658 string r;
659 for (i=0; i<n; i++) {
660 if (s[i]=='<') {
661 if (i+1 < n && s[i+1] == '#') {
662 while (i<n && s[i] != '>') r << s[i++];
663 if (i<n) r << s[i];
665 else r << "<less>";
667 else if (s[i]=='>') r << "<gtr>";
668 else r << s[i];
670 return r;
673 string
674 tm_correct (string s) {
675 register int i;
676 string r;
677 for (i=0; i<N(s); i++) {
678 if (s[i]=='<') {
679 register bool flag= true;
680 register int j, k;
681 for (j=i+1; j<N(s); j++)
682 if (s[j]=='>') break;
683 if (j==N(s)) return r;
684 for (k=i+1; k<j; k++)
685 if (s[k]=='<') flag= false;
686 if (flag) r << s(i,j+1);
687 i=j;
689 else if (s[i]!='>') r << s[i];
691 return r;
694 void
695 tm_char_forwards (string s, int& pos) {
696 ASSERT (pos >= 0 && pos <= N(s), "out of range");
697 int n= N(s);
698 if (pos == n);
699 else if (s[pos] != '<') pos++;
700 else {
701 while (pos<n && s[pos] != '>') pos++;
702 if (pos<n) pos++;
706 void
707 tm_char_backwards (string s, int& pos) {
708 ASSERT (pos >= 0 && pos <= N(s), "out of range");
709 if (pos == 0);
710 else if (s[pos-1] != '>') pos--;
711 else {
712 while (pos>0 && s[pos-1] != '<') pos--;
713 if (pos>0) pos--;
717 /******************************************************************************
718 * Quoting
719 ******************************************************************************/
721 string
722 scm_quote (string s) {
723 // R5RS compliant external string representation.
724 int i, n= N(s);
725 string r;
726 r << '"';
727 for (i=0; i<n; i++)
728 switch (s[i]) {
729 case '\"':
730 case '\\':
731 r << '\\' << s[i];
732 break;
733 default:
734 r << s[i];
736 r << '"';
737 return r;
740 string
741 scm_unquote (string s) {
742 if ((N(s)>=2) && (s[0]=='\"') && (s[N(s)-1]=='\"')) {
743 int i, n= N(s);
744 string r;
745 for (i=1; i<n-1; i++)
746 if (s[i] == '\\' && (s[i+1] == '\"' || s[i+1] == '\\')) r << s[++i];
747 else r << s[i];
748 return r;
750 else return s;
753 string
754 raw_quote (string s) {
755 // Mark the label of a STRING tree as representing a string and not a symbol.
756 return "\"" * s * "\"";
759 string
760 raw_unquote (string s) {
761 // Get the string value of a STRING tree label representing a string.
762 if ((N(s)>=2) && (s[0]=='\"') && (s[N(s)-1]=='\"'))
763 return s (1, N(s)-1);
764 else return s;
767 /******************************************************************************
768 * Handling escape characters
769 ******************************************************************************/
771 string
772 escape_sh (string s) {
773 #if defined (__MINGW__) || defined (__MINGW32__) || defined (OS_WIN32)
774 return raw_quote (s);
775 #else
776 int i, n= N(s);
777 string r;
778 for (i=0; i<n; i++)
779 switch (s[i]) {
780 case '?':
781 case '&':
782 case '$':
783 case '`':
784 case '\"':
785 case '\\':
786 case ' ':
787 r << '\\' << s[i];
788 break;
789 default:
790 r << s[i];
792 return r;
793 #endif
796 string
797 escape_generic (string s) {
798 int i, n= N(s);
799 string r;
800 for (i=0; i<n; i++) {
801 if ((s[i] == '\2') || (s[i] == '\5') || (s[i] == '\33')) r << '\33';
802 r << s[i];
804 return r;
807 string
808 escape_verbatim (string s) {
809 int i, n= N(s);
810 string r;
811 for (i=0; i<n; i++) {
812 unsigned char c= (unsigned char) s[i];
813 if ((c == '\n') || (c == '\t')) r << ' ';
814 else if (((int) c) >= 32) r << s[i];
816 return r;
819 string
820 escape_spaces (string s) {
821 int i, n= N(s);
822 string r;
823 for (i=0; i<n; i++) {
824 unsigned char c= (unsigned char) s[i];
825 if (c == ' ') r << '\\';
826 r << c;
828 return r;
831 string
832 dos_to_better (string s) {
833 int i, n= N(s);
834 string r;
835 for (i=0; i<n; i++)
836 if (s[i] == '\015');
837 else r << s[i];
838 return r;
841 /******************************************************************************
842 * Reading input from a string
843 ******************************************************************************/
845 bool
846 test (string s, int i, const char* test) {
847 int n= N(s), j=0;
848 while (test[j]!='\0') {
849 if (i>=n) return false;
850 if (s[i]!=test[j]) return false;
851 i++; j++;
853 return true;
856 bool
857 test (string s, int i, string test) {
858 int n= N(s), m= N(test), j=0;
859 while (j<m) {
860 if (i>=n) return false;
861 if (s[i]!=test[j]) return false;
862 i++; j++;
864 return true;
867 bool
868 starts (string s, const char* what) {
869 return test (s, 0, what);
872 bool
873 starts (string s, const string what) {
874 return test (s, 0, what);
877 bool
878 ends (string s, const char* what) {
879 string r (what);
880 if (N(r) > N(s)) return false;
881 return s (N(s)-N(r), N(s)) == r;
884 bool
885 ends (string s, const string r) {
886 if (N(r) > N(s)) return false;
887 return s (N(s)-N(r), N(s)) == r;
890 bool
891 read (string s, int& i, const char* test) {
892 int n= N(s), j=0, k=i;
893 while (test[j]!='\0') {
894 if (k>=n) return false;
895 if (s[k]!=test[j]) return false;
896 j++; k++;
898 i=k;
899 return true;
902 bool
903 read (string s, int& i, string test) {
904 int n= N(s), m= N(test), j=0, k=i;
905 while (j<m) {
906 if (k>=n) return false;
907 if (s[k]!=test[j]) return false;
908 j++; k++;
910 i=k;
911 return true;
914 bool
915 read_line (string s, int& i, string& result) {
916 int start= i;
917 for (; i<N(s); i++) {
918 if (s[i]=='\n') {
919 result= s(start,i++);
920 return true;
923 result= s(start,i);
924 return false;
927 bool
928 read_int (string s, int& i, int& result) {
929 int n= N(s), start= i;
930 result= 0;
931 if (i==n) return false;
932 if (s[i]=='-') {
933 if (i+1==n) return false;
934 if (!is_digit (s[i+1])) return false;
935 i++;
937 else if (!is_digit (s[i])) return false;
938 while ((i<n) && is_digit (s[i])) i++;
939 result= as_int (s(start,i));
940 return true;
943 bool
944 read_double (string s, int& i, double& result) {
945 int n= N(s), start= i;
946 result= 0.0;
947 if (i==n) return false;
948 if (s[i]=='-') {
949 if (i+1==n) return false;
950 if (!is_numeric (s[i+1])) return false;
951 i++;
953 else if (!is_numeric (s[i])) return false;
954 while ((i<n) && is_digit (s[i])) i++;
955 if ((i<n) && (s[i]=='.')) i++;
956 while ((i<n) && is_digit (s[i])) i++;
957 if ((i<n) && ((s[i]=='e') || (s[i]=='E'))) {
958 i++;
959 if ((i<n) && (s[i]=='-')) i++;
960 if ((i==n) || (!is_digit (s[i]))) { i=start; return false; }
961 while ((i<n) && is_digit (s[i])) i++;
963 result= as_double (s(start,i));
964 return true;
967 void
968 skip_spaces (string s, int& i) {
969 int n=N(s);
970 while ((i<n) && ((s[i]==' ') || (s[i]=='\t'))) i++;
973 void
974 skip_line (string s, int& i) {
975 int n=N(s);
976 while ((i<n) && (s[i]!='\n')) i++;
977 if (i<n) i++;
980 void
981 skip_symbol (string s, int& i) {
982 int n=N(s);
983 if (i<n) {
984 if (s[i]=='<') {
985 for (i++; i<n; i++)
986 if (s[i-1]=='>') break;
988 else i++;
992 /******************************************************************************
993 * Parsing binary data
994 ******************************************************************************/
996 void
997 parse (string s, int& pos, QI& ret) {
998 ret= (QI) s[pos++];
1001 void
1002 parse (string s, int& pos, QN& ret) {
1003 ret= (QN) s[pos++];
1006 void
1007 parse (string s, int& pos, HI& ret) {
1008 QI c1= (QI) s[pos++];
1009 QN c2= (QN) s[pos++];
1010 ret= (((HI) c1)<<8)+ c2;
1013 void
1014 parse (string s, int& pos, HN& ret) {
1015 QN c1= (QN) s[pos++];
1016 QN c2= (QN) s[pos++];
1017 ret= (((HN) c1)<<8)+ c2;
1020 void
1021 parse (string s, int& pos, SI& ret) {
1022 QI c1= (QI) s[pos++];
1023 QN c2= (QN) s[pos++];
1024 QN c3= (QN) s[pos++];
1025 QN c4= (QN) s[pos++];
1026 ret= (((((((SI) c1)<<8)+ ((SI) c2))<<8)+ ((SI) c3))<<8)+ c4;
1029 void
1030 parse (string s, int& pos, SI*& a, int len) {
1031 int i;
1032 a= tm_new_array<int> (len);
1033 for (i=0; i<len; i++) parse (s, pos, a[i]);
1036 /******************************************************************************
1037 * Searching, replacing and pattern matching
1038 ******************************************************************************/
1041 search_forwards (string s, int pos, string in) {
1042 int k= N(s), n= N(in);
1043 if (k == 0) return pos;
1044 char c= s[0];
1045 while (pos+k <= n) {
1046 if (in[pos] == c && test (in, pos, s)) return pos;
1047 pos++;
1049 return -1;
1053 search_forwards (string s, string in) {
1054 return search_forwards (s, 0, in);
1058 search_backwards (string s, int pos, string in) {
1059 while (pos >= 0) {
1060 if (test (in, pos, s)) return pos;
1061 pos--;
1063 return -1;
1067 search_backwards (string s, string in) {
1068 return search_backwards (s, N(in)-N(s), in);
1072 count_occurrences (string s, string in) {
1073 int count= 0;
1074 int i=0, next, n= N(s);
1075 while (i<n) {
1076 next= search_forwards (s, i, in);
1077 if (next == -1) break;
1078 count++;
1079 i= next+1;
1081 return count;
1084 string
1085 replace (string s, string what, string by) {
1086 int i, n= N(s);
1087 string r;
1088 for (i=0; i<n; )
1089 if (test (s, i, what)) {
1090 r << by;
1091 i += N(what);
1093 else {
1094 r << s[i];
1095 i++;
1097 return r;
1100 static bool
1101 match_wildcard (string s, int spos, string w, int wpos) {
1102 if (wpos == N(w)) return spos == N(s);
1103 if (w[wpos] != '*')
1104 return (spos < N(s)) && (s[spos] == w[wpos]) &&
1105 match_wildcard (s, spos+1, w, wpos+1);
1106 while ((wpos<N(w)) && (w[wpos]=='*')) wpos++;
1107 while (spos <= N(s)) {
1108 if (match_wildcard (s, spos, w, wpos)) return true;
1109 spos++;
1111 return false;
1114 bool
1115 match_wildcard (string s, string w) {
1116 return match_wildcard (s, 0, w, 0);
1119 /******************************************************************************
1120 * Computations with completions
1121 ******************************************************************************/
1123 array<string>
1124 as_completions (hashset<string> h) {
1125 tree t= (tree) h;
1126 int i, n= N(t);
1127 array<string> a (n);
1128 for (i=0; i<n; i++) a[i]= t[i]->label;
1129 merge_sort (a);
1130 return a;
1134 static void
1135 close_completions (hashset<string>& h) {
1136 array<string> a= as_completions (h);
1137 int i, j, n= N(a);
1138 for (i=1; i<n; i++) {
1139 for (j=0; j < min (N(a[i-1]), N(a[i])); j++)
1140 if (a[i-1][j] != a[i][j]) break;
1141 if (j < min (N(a[i-1]), N(a[i])))
1142 h->insert (a[i](0,j));
1146 array<string>
1147 close_completions (array<string> a) {
1148 int i, n= N(a);
1149 hashset<string> h;
1150 for (i=0; i<n; i++) h->insert (a[i]);
1151 close_completions (h);
1152 return as_completions (h);
1156 array<string>
1157 close_completions (array<string> a) {
1158 if (N(a) == 0) return a;
1159 merge_sort (a);
1160 int i, j, n= N(a), l= N(a[0]);
1161 for (i=1; i<n; i++) {
1162 for (j=0; j<l && j<N(a[i]); j++)
1163 if (a[i-1][j] != a[i][j]) break;
1164 l= j;
1166 array<string> r;
1167 r << a[0] (0, l);
1168 for (i=0; i<n; i++)
1169 if (a[i] != r[N(r)-1])
1170 r << a[i];
1171 return r;
1174 array<string>
1175 strip_completions (array<string> a, string prefix) {
1176 int i, n= N(a);
1177 array<string> b;
1178 for (i=0; i<n; i++)
1179 if (starts (a[i], prefix))
1180 b << a[i] (N(prefix), N(a[i]));
1181 return b;