Updating ChangeLog for 4.22.10
[centerim.git] / kkstrtext / kkstrtext.cc
bloba2fbfd80e0f2c7e7665fbd667ea26c729f3df381
1 /*
3 * kkstrtext string related and text processing routines
4 * $Id: kkstrtext.cc,v 1.45 2005/02/01 00:13:24 konst Exp $
6 * Copyright (C) 1999-2004 by Konstantin Klyagin <k@thekonst.net>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21 * USA
25 #include "kkstrtext.h"
26 #include <errno.h>
28 #ifdef HAVE_STDINT_H
29 #include <stdint.h> /* for intptr_t */
30 #endif
32 char *strcut(char *strin, int frompos, int count) {
33 if(count > 0) {
34 if(count > strlen(strin)-frompos)
35 count = strlen(strin)-frompos;
36 char *buf = (char *) malloc(strlen(strin) - frompos - count + 1);
37 memcpy(buf, strin + frompos + count, strlen(strin) - frompos - count);
38 memcpy(strin + frompos, buf, strlen(strin) - frompos - count);
39 strin[strlen(strin) - count] = 0;
40 free(buf);
42 return strin;
45 char *strimlead(char *str) { return trimlead(str, " \t"); }
46 char *strimtrail(char *str) { return trimtrail(str, " \t"); }
47 char *strim(char *str) { return trim(str, " \t"); }
49 char *trimlead(char *str, char *chr) {
50 while(strchr(chr, str[0]) && strlen(str)) strcpy(str, str + 1);
51 return str;
54 char *trimtrail(char *str, char *chr) {
55 while(strlen(str) && strchr(chr, str[strlen(str)-1]))
56 str[strlen(str)-1] = 0;
57 return str;
60 char *trim(char *str, char *chr) {
61 return trimlead(trimtrail(str, chr), chr);
64 char *time2str(const time_t *t, char *mask, char *sout) {
65 struct tm *s;
66 char ch, b[10], b1[20];
67 int len, i, j;
69 sout[0] = 0;
70 s = localtime(t);
72 for(i = 0; i < strlen(mask); i++) {
73 len = 0;
75 if(strchr("DMYhms", ch = mask[i])) {
76 j = i; len = 1;
77 while(mask[++j] == ch) len++;
78 sprintf(b, "%%0%dd", len);
79 i += len-1;
81 switch(ch) {
82 case 'D': sprintf(b1, b, s->tm_mday); break;
83 case 'M': sprintf(b1, b, s->tm_mon+1); break;
84 case 'Y':
85 j = s->tm_year + 1900;
86 sprintf(b1, b, j);
87 if(len <= 3) strcut(b1, 0, 2);
88 break;
89 case 'h': sprintf(b1, b, s->tm_hour); break;
90 case 'm': sprintf(b1, b, s->tm_min); break;
91 case 's': sprintf(b1, b, s->tm_sec); break;
93 strcat(sout, b1);
94 } else {
95 len = strlen(sout);
96 sout[len+1] = 0;
97 sout[len] = mask[i];
100 return sout;
103 time_t str2time(char *sdate, char *mask, time_t *t) {
104 struct tm *s;
105 int i, len, j, k;
106 char ch, b[10];
108 s = (struct tm*) malloc(sizeof(struct tm));
110 for(i = 0; i < strlen(mask); i++) {
111 len = 0;
113 if(strchr("DMYhms", ch = mask[i])) {
114 j = i; len = 1;
115 while(mask[++j] == ch) len++;
116 i += len-1;
118 b[0] = 0;
119 for(j = i-len+1; j < i+1; j++) {
120 k = strlen(b);
121 b[k+1] = 0;
122 b[k] = sdate[j];
125 switch(ch) {
126 case 'D': s->tm_mday=atoi(b); break;
127 case 'M': s->tm_mon=atoi(b); s->tm_mon--; break;
128 case 'Y': s->tm_year=atoi(b); s->tm_year-=1900; break;
129 case 'h': s->tm_hour=atoi(b); s->tm_hour--; break;
130 case 'm': s->tm_min=atoi(b); break;
131 case 's': s->tm_sec=atoi(b); break;
136 s->tm_isdst = -1;
137 *t = mktime(s);
138 free(s);
139 return *t;
142 string unmime(const string &text) {
143 string r;
144 char *buf = new char[text.size()+1];
145 strcpy(buf, text.c_str());
146 r = unmime(buf);
147 delete[] buf;
148 return r;
151 string mime(const string &text) {
152 string r;
153 char *buf = new char[text.size()*3+1];
154 r = mime(buf, text.c_str());
155 delete[] buf;
156 return r;
159 string fromutf8(const string &text) {
160 string r;
161 char *buf = (char *) utf8_to_str(text.c_str());
162 r = buf;
163 delete buf;
164 return r;
167 string toutf8(const string &text) {
168 string r;
169 char *buf = str_to_utf8(text.c_str());
170 r = buf;
171 delete buf;
172 return r;
175 char *unmime(char *text) {
176 register int s, d;
177 int htm;
179 for(s = 0, d = 0; text[s] != 0; s++) {
180 if(text[s] == '+') text[d++] = ' '; else
181 if(text[s] == '%') {
182 sscanf(text + s + 1, "%2x", &htm);
183 text[d++] = htm;
184 s += 2;
185 } else
186 text[d++] = text[s];
189 text[d] = 0;
190 return(text);
193 char *mime(char *dst, const char *src) {
194 register int s, d;
195 char c;
197 for(s = 0, d = 0; src[s]; s++) {
198 if((src[s] >= 'a' && src[s] <= 'z') ||
199 (src[s] >= 'A' && src[s] <= 'Z') ||
200 (src[s] >= '0' && src[s] <= '9')) dst[d++] = src[s]; else {
201 if(src[s] != ' ') {
202 dst[d++] = '%';
203 c = (src[s] >> 4 & 0x0F);
204 dst[d++] = (c > 9) ? 'A'+c-10 : '0'+c;
205 c = (src[s] & 0x0F);
206 dst[d++] = (c > 9) ? 'A'+c-10 : '0'+c;
207 } else
208 dst[d++] = '+';
212 dst[d] = '\0';
213 return(dst);
216 char *strccat(char *dest, char c) {
217 int k = strlen(dest);
218 dest[k] = c;
219 dest[k+1] = 0;
220 return dest;
223 vector<int> getquotelayout(const string &haystack, const string &qs, const string &aescs) {
224 vector<int> r;
225 string needle, escs;
226 int pos, prevpos, curpos;
227 char cchar, qchar, prevchar;
229 qchar = 0;
230 curpos = prevpos = 0;
231 escs = (qs == aescs) ? "" : aescs;
232 needle = qs + escs;
234 while((pos = haystack.substr(curpos).find_first_of(needle)) != -1) {
235 curpos += pos;
236 cchar = *(haystack.begin()+curpos);
238 if(escs.find(cchar) != -1) {
239 if(qchar)
240 if(prevpos == curpos-1)
241 if(escs.find(prevchar) != -1) {
242 /* Neutralize previous esc char */
243 cchar = 0;
245 } else if(qs.find(cchar) != -1) {
246 if(!((escs.find(prevchar) != -1) && (prevpos == curpos-1))) {
247 /* Wasn't an escape (right before this quote char) */
249 if(!qchar || (qchar == cchar)) {
250 qchar = qchar ? 0 : cchar;
251 r.push_back(curpos);
256 prevpos = curpos++;
257 prevchar = cchar;
260 return r;
263 vector<int> getsymbolpositions(const string &haystack, const string &needles, const string &qoutes, const string &esc) {
264 vector<int> r, qp, nr;
265 vector<int>::iterator iq, ir;
266 int pos, st, ed, cpos;
268 for(cpos = 0; (pos = haystack.substr(cpos).find_first_of(needles)) != -1; ) {
269 r.push_back(cpos+pos);
270 cpos += pos+1;
273 qp = getquotelayout(haystack, qoutes, esc);
274 for(iq = qp.begin(); iq != qp.end(); iq++) {
275 if(!((iq-qp.begin()) % 2)) {
276 st = *iq;
277 ed = iq+1 != qp.end() ? *(iq+1) : haystack.size();
278 nr.clear();
280 for(ir = r.begin(); ir != r.end(); ir++) {
281 if(!(*ir > st && *ir < ed)) {
282 nr.push_back(*ir);
286 r = nr;
290 return r;
293 #define CHECKESC(curpos, startpos, esc) \
294 if(curpos > startpos+1) \
295 if(strchr(esc, *(curpos-1))) \
296 if(!strchr(esc, *(curpos-2))) { \
297 curpos++; \
298 continue; \
301 const char *strqpbrk(const char *s, int offset, const char *accept, const char *q, const char *esc) {
302 if(!s) return 0;
303 if(!s[0]) return 0;
305 char qchar = 0;
306 const char *ret = 0, *p = s;
307 char *cset = (char *) malloc(strlen(accept)+strlen(q)+1);
309 strcpy(cset, accept);
310 strcat(cset, q);
312 while(p = strpbrk(p, cset)) {
313 if(strchr(q, *p)) {
314 if(strcmp(esc, q))
315 CHECKESC(p, s, esc);
317 if(!qchar) {
318 qchar = *p;
319 } else {
320 if(*p == qchar) qchar = 0;
322 } else if((p-s >= offset) && !qchar) {
323 ret = p;
324 break;
326 p++;
329 free(cset);
330 return ret;
333 const char *strqcasestr(const char *s, const char *str, const char *q, const char *esc) {
334 char quote = 0;
335 int i;
337 for(i = 0; i < strlen(s); i++) {
338 if(strchr(q, s[i])) {
339 if(strcmp(esc, q))
340 CHECKESC(s+i, s, esc);
341 quote = !quote;
344 if(!quote)
345 if(!strncasecmp(s + i, str, strlen(str))) return s + i;
348 return 0;
351 const char *strqstr(const char *s, const char *str, const char *q, const char *esc) {
352 char quote;
353 const char *ret = 0, *p, *ss, *r;
354 p = ss = s;
356 while(p = strstr(ss, str)) {
357 quote = 0;
358 r = s;
360 while(r = strpbrk(r, q)) {
361 if(r > p) break;
362 if(strcmp(esc, q))
363 CHECKESC(r, s, esc);
364 quote = !quote;
365 r++;
368 if(!quote) {
369 ret = p;
370 break;
371 } else {
372 ss = p+strlen(str);
376 return ret;
379 char *strinsert(char *buf, int pos, char *ins) {
380 char *p = strdup(buf+pos);
381 memcpy(buf+pos+strlen(ins), p, strlen(p)+1);
382 memcpy(buf+pos, ins, strlen(ins));
383 free(p);
384 return buf;
387 char *strcinsert(char *buf, int pos, char ins) {
388 char *p = strdup(buf+pos);
389 memcpy(buf+pos+1, p, strlen(p)+1);
390 buf[pos] = ins;
391 free(p);
392 return buf;
395 int strchcount(char *s, char *accept) {
396 char *p = s-1;
397 int ret = 0;
398 while(p = strpbrk(p+1, accept)) ret++;
399 return ret;
402 int stralone(char *buf, char *startword, int wordlen, char *delim) {
403 int leftdelim = 0, rightdelim = 0;
404 leftdelim = (buf != startword && strchr(delim, *(startword-1))) || buf == startword;
405 rightdelim = !*(startword+wordlen) || strchr(delim, *(startword+wordlen));
406 return leftdelim && rightdelim;
409 string justfname(const string &fname) {
410 return fname.substr(fname.rfind("/")+1);
413 string justpathname(const string &fname) {
414 int pos;
416 if((pos = fname.rfind("/")) != -1) {
417 return fname.substr(0, pos);
418 } else {
419 return "";
423 void charpointerfree(void *p) {
424 char *cp = (char *) p;
425 if(cp)
426 free (cp);
429 void nothingfree(void *p) {
432 int stringcompare(void *s1, void *s2) {
433 if(!s1 || !s2) {
434 return s1 != s2;
435 } else {
436 return strcmp((char *) s1, (char *) s2);
440 int intcompare(void *s1, void *s2) {
441 return (intptr_t) s1 != (intptr_t) s2;
444 string i2str(int i) {
445 char buf[64];
446 sprintf(buf, "%d", i);
447 return (string) buf;
450 string ui2str(int i) {
451 char buf[64];
452 sprintf(buf, "%du", i);
453 return (string) buf;
456 string textscreen(const string &text) {
457 string r = text;
459 for(int i = 0; i < r.size(); i++) {
460 if(!isalnum(r[i])) r.insert(i++, "\\");
463 return r;
466 string leadcut(const string &base, const string &delim) {
467 int pos = base.find_first_not_of(delim);
468 return (pos != -1) ? base.substr(pos) : "";
471 string trailcut(const string &base, const string &delim) {
472 int pos = base.find_last_not_of(delim);
473 return (pos != -1) ? base.substr(0, pos+1) : "";
476 string getword(string &base, const string &delim) {
477 string sub;
478 int i;
479 bool found = false;
481 base = leadcut(base, delim);
483 for(i = 0, sub = base; i < sub.size(); i++)
484 if(strchr(delim.c_str(), sub[i])) {
485 sub.resize(i);
486 base.replace(0, i, "");
487 base = leadcut(base, delim);
488 found = true;
489 break;
492 if(!found) base = "";
493 return sub;
496 const string getwordquote(string &base, string quote, string delim) {
497 string sub;
498 bool inquote = false;
499 int i;
501 base = leadcut(base, delim);
503 for(i = 0, sub = base; i < sub.size(); i++) {
504 if(strchr(quote.c_str(), sub[i])) {
505 inquote = !inquote;
506 } else if(!inquote && strchr(delim.c_str(), sub[i])) {
507 sub.resize(i);
508 base.replace(0, i, "");
509 base = leadcut(base, delim);
510 break;
514 if(sub == base) base = "";
515 return sub;
518 string getrword(string &base, const string &delim) {
519 string sub;
520 int i;
522 base = trailcut(base, delim);
524 for(i = base.size()-1, sub = base; i >= 0; i--)
525 if(strchr(delim.c_str(), base[i])) {
526 sub = base.substr(i+1);
527 base.resize(i);
528 base = trailcut(base, delim);
529 break;
532 if(sub == base) base = "";
533 return sub;
536 string getrwordquote(string &base, const string &quote, const string &delim) {
537 string sub;
538 bool inquote = false;
539 int i;
541 base = trailcut(base, delim);
543 for(i = base.size()-1, sub = base; i >= 0; i--)
544 if(strchr(quote.c_str(), base[i])) {
545 inquote = !inquote;
546 } else if(!inquote && strchr(delim.c_str(), base[i])) {
547 sub = base.substr(i+1);
548 base.resize(i);
549 base = trailcut(base, delim);
550 break;
553 if(sub == base) base = "";
554 return sub;
557 int rtabmargin(bool fake, int curpos, const char *p) {
558 int ret = -1, n, near;
560 if(p && (curpos != strlen(p))) {
561 n = strspn(p+curpos, " ");
563 if(fake) {
564 near = ((curpos/(TAB_SIZE/2))+1)*(TAB_SIZE/2);
565 if(n >= near-curpos) ret = near;
568 near = ((curpos/TAB_SIZE)+1)*TAB_SIZE;
569 if(n >= near-curpos) ret = near;
570 } else {
571 if(p && fake) fake = (strspn(p, " ") == strlen(p));
572 if(fake) ret = ((curpos/(TAB_SIZE/2))+1)*(TAB_SIZE/2);
573 else ret = ((curpos/TAB_SIZE)+1)*TAB_SIZE;
576 return ret;
579 int ltabmargin(bool fake, int curpos, const char *p) {
580 int ret = -1, near, n = 0;
581 const char *cp;
583 if(p) {
584 cp = p+curpos;
586 if(curpos) {
587 if(*(--cp) == ' ') n++;
588 for(; (*cp == ' ') && (cp != p); cp--) n++;
591 if(fake) {
592 near = (curpos/(TAB_SIZE/2))*(TAB_SIZE/2);
593 if(near <= curpos-n)
594 if((ret = curpos-n) != 0) ret++;
597 near = (curpos/TAB_SIZE)*TAB_SIZE;
598 if(near <= curpos-n) {
599 if((ret = curpos-n) != 0) ret++;
600 } else ret = near;
602 } else {
603 if(fake) ret = (curpos/(TAB_SIZE/2))*(TAB_SIZE/2);
604 else ret = (curpos/TAB_SIZE)*TAB_SIZE;
607 return ret;
610 void breakintolines(string text, vector<string> &lst, int linelen) {
611 int dpos, nlen;
612 string sub;
613 vector<string>::iterator i;
615 breakintolines(text, lst);
617 if(linelen > 0) {
618 for(i = lst.begin(); i != lst.end(); i++) {
619 if(i->size() > linelen) {
620 sub = i->substr(0, nlen = linelen);
622 if((dpos = sub.rfind(" ")) != -1) {
623 if(dpos) nlen = dpos; else nlen = 1;
626 if(dpos != -1)
627 nlen++;
629 sub = i->substr(nlen);
630 i->erase(nlen);
631 lst.insert(i+1, sub);
632 i = lst.begin();
638 void breakintolines(const string &text, vector<string> &lst) {
639 int npos, dpos, tpos;
640 string sub;
642 tpos = 0;
643 lst.clear();
645 while(tpos < text.size()) {
646 if((npos = text.find("\n", tpos)) != -1) {
647 sub = text.substr(tpos, npos-tpos);
648 } else {
649 sub = text.substr(tpos);
650 npos = text.size();
653 tpos += npos-tpos+1;
655 for(dpos = 0; (dpos = sub.find("\r", dpos)) != -1; ) {
656 sub.erase(dpos, 1);
659 for(dpos = 0; (dpos = sub.find("\t", dpos)) != -1; ) {
660 sub.erase(dpos, 1);
661 sub.insert(dpos, string(rtabmargin(false, dpos)-dpos, ' '));
664 lst.push_back(sub);
668 void find_gather_quoted(vector<quotedblock> &lst, const string &str,
669 const string &quote, const string &escape) {
670 bool inquote = false;
671 int npos = 0, qch;
672 quotedblock qb;
674 while((npos = str.find_first_of(quote, npos)) != -1) {
675 if(npos)
676 if(escape.find(str[npos-1]) == -1) {
677 inquote = !inquote;
679 if(inquote) {
680 qb.begin = npos;
681 qch = str[npos];
682 } else {
683 if(str[npos] == qch) {
684 qb.end = npos;
685 lst.push_back(qb);
686 } else {
687 inquote = true;
691 npos++;
695 int find_quoted(const string &str, const string &needle, int offs,
696 const string &quote, const string &escape) {
697 vector<quotedblock> positions;
698 vector<quotedblock>::iterator qi;
699 int npos = offs;
700 bool found;
702 find_gather_quoted(positions, str, quote, escape);
704 while((npos = str.find(needle, npos)) != -1) {
705 for(found = false, qi = positions.begin(); qi != positions.end() && !found; qi++)
706 if((npos > qi->begin) && (npos < qi->end)) found = true;
708 if(!found) break;
709 npos++;
712 return !found ? npos : -1;
715 int find_quoted_first_of(const string &str, const string &needle, int offs,
716 const string &quote, const string &escape) {
717 vector<quotedblock> positions;
718 vector<quotedblock>::iterator qi;
719 int npos = offs;
720 bool found;
722 find_gather_quoted(positions, str, quote, escape);
724 while((npos = str.find_first_of(needle, npos)) != -1) {
725 for(found = false, qi = positions.begin(); qi != positions.end() && !found; qi++)
726 if((npos > qi->begin) && (npos < qi->end)) found = true;
728 if(!found) break;
729 npos++;
732 return !found ? npos : -1;
735 void splitlongtext(string text, vector<string> &lst, int size, const string cont) {
736 string sub;
737 int npos;
739 lst.clear();
741 while(!text.empty()) {
742 if(text.size() <= size-cont.size()) {
743 npos = text.size();
744 } else if((npos = text.substr(0, size-cont.size()).find_last_of(" \t")) == -1) {
745 npos = size-cont.size();
748 sub = text.substr(0, npos);
749 text.erase(0, npos);
751 if(text.size() > cont.size()) sub += cont; else {
752 sub += text;
753 text = "";
756 if((npos = text.find_first_not_of(" \t")) != -1)
757 text.erase(0, npos);
759 lst.push_back(sub);
763 string strdateandtime(time_t stamp, const string &fmt) {
764 return strdateandtime(localtime(&stamp), fmt);
767 string strdateandtime(struct tm *tms, const string &fmt) {
768 char buf[512];
769 /*time_t current_time = time(0);*/
770 /*time_t when = mktime(tms);*/
771 string afmt = fmt;
773 if(afmt.empty()) {
774 afmt = "%b %e %Y %H:%M";
776 if(current_time > when + 6L * 30L * 24L * 60L * 60L // Old.
777 || current_time < when - 60L * 60L) { // Future.
778 afmt = "%b %e %Y";
779 } else {
780 afmt = "%b %e %H:%M";
785 strftime(buf, 512, afmt.c_str(), tms);
786 return buf;
789 bool iswholeword(const string &s, int so, int eo) {
790 bool rm, lm;
791 const string wdelims = "[](),.; <>-+{}=|&%~*/:?@";
793 lm = !so || (wdelims.find(s.substr(so-1, 1)) != -1);
794 rm = (eo == s.size()-1) || (wdelims.find(s.substr(eo, 1)) != -1);
796 return rm && lm;
799 int hex2int(const string &ahex) {
800 int r, i;
802 r = 0;
804 if(ahex.size() <= 2) {
805 for(i = 0; i < ahex.size(); i++) {
806 r += isdigit(ahex[i]) ? ahex[i]-48 : toupper(ahex[i])-55;
807 if(!i) r *= 16;
811 return r;
814 bool getconf(string &st, string &buf, ifstream &f, bool passemptylines) {
815 bool ret = false;
816 static string sect;
818 while(!f.eof() && !ret) {
819 getstring(f, buf);
821 if(buf.size()) {
822 switch(buf[0]) {
823 case '%':
824 sect = buf.substr(1);
825 break;
826 case '#':
827 if(buf[1] != '!') break;
828 default:
829 ret = buf.size();
830 break;
832 } else if(passemptylines) {
833 ret = 1;
837 st = sect;
838 return ret;
841 bool getstring(istream &f, string &sbuf) {
842 static char buf[2048];
843 bool r;
845 if(r = !f.eof()) {
846 sbuf = "";
848 do {
849 f.clear();
850 f.getline(buf, 2048);
851 sbuf += buf;
852 } while(!f.good() && !f.eof());
855 return r;
858 string ruscase(const string &s, const string &mode) {
859 static const string lower = "ÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝØßÙÜÀÑ";
860 static const string upper = "áâ÷çäåöúéêëìíîïðòóôõæèãþûýøÿùüàñ";
861 string r, tfrom, tto;
862 int pos, tpos;
864 if(mode == "tolower") {
865 tfrom = upper;
866 tto = lower;
867 } else if(mode == "toupper") {
868 tfrom = lower;
869 tto = upper;
870 } else {
871 return s;
874 pos = 0;
876 for(r = s; (pos = r.find_first_of(tfrom, pos)) != -1; ) {
877 char c = r[pos];
878 tpos = tfrom.find(c);
879 r[pos] = tto[tpos];
880 pos++;
883 pos = 0;
885 while((pos = r.find_first_not_of(tfrom, pos)) != -1) {
886 if(mode == "tolower") r[pos] = tolower(r[pos]); else
887 if(mode == "toupper") r[pos] = toupper(r[pos]);
888 pos++;
891 return r;
894 string siconv(const string &atext, const string &fromcs, const string &tocs) {
895 #ifdef HAVE_ICONV
896 iconv_t cd = iconv_open(tocs.c_str(), fromcs.c_str());
898 if(cd != ((iconv_t) -1)) {
899 string r, text(atext);
900 size_t inleft, outleft, soutleft;
901 char *inbuf, *outbuf, *sinbuf, *soutbuf;
903 //from iconv.c (libiconv)
904 iconv(cd,NULL,NULL,NULL,NULL);
906 size_t len = text.size();
907 sinbuf = inbuf = (char *)malloc(len+1);
908 memcpy(sinbuf, text.c_str(), len+1);
909 inleft = len;
911 while (inleft > 0) {
912 soutleft = outleft = inleft*4;
913 soutbuf = outbuf = new char[outleft];
915 size_t res = iconv(cd, (ICONV_CONST char **) &inbuf, &inleft,
916 &outbuf, &outleft);
918 soutbuf[soutleft-outleft] = 0;
919 r += soutbuf;
921 delete[] soutbuf;
923 if ((res == (size_t)(-1)) && (errno != EILSEQ)) {
924 break;
927 if (inleft>0) {
928 inbuf++;
929 inleft--;
933 free(sinbuf);
935 iconv_close(cd);
936 return r;
938 #endif
940 return atext;
943 string cuthtml(const string &html, int flags) {
944 string r, tag, buf, token;
945 int npos, pos, tpos;
947 for(pos = 0; (npos = html.find("<", pos)) != -1; pos = npos) {
948 tpos = npos;
949 r += html.substr(pos, npos-pos);
951 if((npos = html.find(">", ++npos)) != -1) {
952 npos++;
954 tag = html.substr(tpos+1, npos-tpos-2);
955 if(tag.substr(0, 1) == "/") tag.erase(0, 1);
956 tag = leadcut(trailcut(tag, "/ \n\r"), "/ \n\r");
958 buf = ruscase(tag, "toupper");
959 token = getword(buf);
961 if(token == "BR") r += (flags & chCutBR) ? "\n" : "<br>";
962 else if((flags & chCutBR) && token == "P") r += "\n\n";
964 if(flags & chLeaveLinks) {
965 getword(tag);
967 if(token == "A") {
968 if((tpos = buf.find("HREF")) != -1)
969 if((tpos = buf.substr(tpos).find("\"")) != -1) {
970 tag.erase(0, tpos+1);
971 r += "[ href: " + getword(tag, "\"") + " ] ";
974 } else if(token == "IMG") {
975 if((tpos = buf.find("SRC")) != -1)
976 if((tpos = buf.substr(tpos).find("\"")) != -1) {
977 tag.erase(0, tpos+1);
978 r += " [ img: " + getword(tag, "\"") + " ]";
984 } else {
985 r += html.substr(tpos);
986 npos = html.size();
990 if(pos < html.size())
991 r += html.substr(pos);
993 return r;
996 char *utf8_to_str(const char *pin) {
997 int n = 0, i = 0, inlen;
998 unsigned char *result;
999 const unsigned char *in = (unsigned char *) pin;
1001 if(!in) return NULL;
1003 inlen = strlen(pin);
1004 result = new unsigned char[inlen + 1];
1006 while (n <= inlen - 1) {
1007 long c = (long)in[n];
1008 if(c < 0x80) result[i++] = (char)c; else {
1009 if((c & 0xC0) == 0xC0) result[i++] = (char)(((c & 0x03) << 6) | (((unsigned char)in[++n]) & 0x3F));
1010 else if ((c & 0xE0) == 0xE0) {
1011 if (n + 2 <= inlen) {
1012 result[i] = (char)(((c & 0xF) << 4) | (((unsigned char)in[++n]) & 0x3F));
1013 result[i] = (char)(((unsigned char)result[i]) | (((unsigned char)in[++n]) & 0x3F));
1014 i++;
1015 } else n += 2;
1016 } else if ((c & 0xF0) == 0xF0) n += 3;
1017 else if ((c & 0xF8) == 0xF8)
1018 n += 4;
1019 else if ((c & 0xFC) == 0xFC)
1020 n += 5;
1022 n++;
1025 result[i] = '\0';
1026 return (char *) result;
1029 char *str_to_utf8(const char *pin) {
1030 int n = 0, i = 0;
1031 int inlen;
1032 char *result = NULL;
1033 const unsigned char *in = (unsigned char *) pin;
1035 if(!in)
1036 return NULL;
1038 inlen = strlen(pin);
1039 result = new char[inlen * 2 + 1];
1041 while (n < inlen) {
1042 long c = (long)in[n];
1043 if (c == 27) {
1044 n += 2;
1045 if (in[n] == 'x')
1046 n++;
1047 if (in[n] == '3')
1048 n++;
1049 n += 2;
1050 continue;
1053 if (c < 128)
1054 result[i++] = (char)c;
1055 else {
1056 result[i++] = (char)((c >> 6) | 192);
1057 result[i++] = (char)((c & 63) | 128);
1059 n++;
1062 result[i] = '\0';
1063 return result;
1066 string striprtf(const string &s, const string &charset) {
1067 string r, spec, unichar, tmp;
1068 char pre = 0;
1069 bool bprint, bspec, bunicode;
1070 int bparen = -1;
1072 bprint = true;
1073 bspec = bunicode = false;
1075 for(string::const_iterator i = s.begin(); i != s.end(); ++i) {
1076 if(!isalpha(*i) && !isdigit(*i)) bprint = true;
1078 if(bspec) {
1079 spec += *i;
1081 if(spec.size() == 2) {
1082 r += (char) hex2int(spec);
1083 bspec = false;
1084 bprint = true;
1087 } else switch(*i) {
1088 case '{':
1089 if(pre != '\\') {
1090 bparen++;
1091 bprint = false;
1092 } else {
1093 bprint = true;
1094 r += *i;
1096 break;
1098 case '}':
1099 if(pre != '\\') {
1100 bprint = false;
1101 bparen--;
1102 } else {
1103 bprint = true;
1104 r += *i;
1107 break;
1109 case '\\':
1110 if(pre != '\\') {
1111 bprint = false;
1112 } else {
1113 bprint = true;
1114 r += *i;
1115 pre = 0;
1116 continue;
1118 break;
1120 case '\'':
1121 if(!bparen && bprint && pre == '\\') {
1122 spec = "";
1123 bspec = true;
1124 } else {
1125 r += *i;
1127 break;
1129 case 'u':
1130 if(!bparen) {
1131 if(pre == '\\' && isdigit(*(i+1))) {
1132 unichar = "";
1133 bunicode = true;
1134 } else if(bprint) {
1135 r += *i;
1138 break;
1139 default:
1140 if(!bparen) {
1141 if(bunicode) {
1142 unichar += *i;
1144 if(unichar.size() == 5) {
1145 bunicode = false;
1146 if(unichar.substr(0, 4).find_first_not_of("0123456789") == -1) {
1147 long l = strtol(unichar.substr(0, 4).c_str(), 0, 0);
1148 char ubuf[sizeof(long)+4];
1149 #ifdef HAVE_ICONV
1150 memcpy(ubuf, "\xff\xfe", 2);
1151 memcpy(ubuf+2, &l, sizeof(long));
1152 memcpy(ubuf+sizeof(long)+2, "\x0a\x00", 2);
1153 #else
1154 strcpy(ubuf, unichar.substr(4).c_str());
1155 #endif
1156 r += siconv(ubuf , "utf-16", charset);
1160 } else if(bprint) {
1161 r += *i;
1166 pre = *i;
1169 return leadcut(trailcut(r));