Recoded russian messages to UTF-8
[centerim.git] / kkstrtext / kkstrtext.cc
blob5a1d95185ba930b5055f6b160dbe2e2e9eaf2fbc
1 /*
3 * kkstrtext string related and text processing routines
4 * $Id: kkstrtext.cc,v 1.45 2005/02/01 00:13:24 konst Exp $
6 * Copyright (C) 1999-2004 by Konstantin Klyagin <k@thekonst.net>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or (at
11 * your option) any later version.
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * General Public License for more details.
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
21 * USA
25 #include "kkstrtext.h"
26 #include <errno.h>
27 #include <stdint.h> /* for intptr_t */
29 char *strcut(char *strin, int frompos, int count) {
30 if(count > 0) {
31 if(count > strlen(strin)-frompos)
32 count = strlen(strin)-frompos;
33 char *buf = (char *) malloc(strlen(strin) - frompos - count + 1);
34 memcpy(buf, strin + frompos + count, strlen(strin) - frompos - count);
35 memcpy(strin + frompos, buf, strlen(strin) - frompos - count);
36 strin[strlen(strin) - count] = 0;
37 free(buf);
39 return strin;
42 char *strimlead(char *str) { return trimlead(str, " \t"); }
43 char *strimtrail(char *str) { return trimtrail(str, " \t"); }
44 char *strim(char *str) { return trim(str, " \t"); }
46 char *trimlead(char *str, char *chr) {
47 while(strchr(chr, str[0]) && strlen(str)) strcpy(str, str + 1);
48 return str;
51 char *trimtrail(char *str, char *chr) {
52 while(strlen(str) && strchr(chr, str[strlen(str)-1]))
53 str[strlen(str)-1] = 0;
54 return str;
57 char *trim(char *str, char *chr) {
58 return trimlead(trimtrail(str, chr), chr);
61 char *time2str(const time_t *t, char *mask, char *sout) {
62 struct tm *s;
63 char ch, b[10], b1[20];
64 int len, i, j;
66 sout[0] = 0;
67 s = localtime(t);
69 for(i = 0; i < strlen(mask); i++) {
70 len = 0;
72 if(strchr("DMYhms", ch = mask[i])) {
73 j = i; len = 1;
74 while(mask[++j] == ch) len++;
75 sprintf(b, "%%0%dd", len);
76 i += len-1;
78 switch(ch) {
79 case 'D': sprintf(b1, b, s->tm_mday); break;
80 case 'M': sprintf(b1, b, s->tm_mon+1); break;
81 case 'Y':
82 j = s->tm_year + 1900;
83 sprintf(b1, b, j);
84 if(len <= 3) strcut(b1, 0, 2);
85 break;
86 case 'h': sprintf(b1, b, s->tm_hour); break;
87 case 'm': sprintf(b1, b, s->tm_min); break;
88 case 's': sprintf(b1, b, s->tm_sec); break;
90 strcat(sout, b1);
91 } else {
92 len = strlen(sout);
93 sout[len+1] = 0;
94 sout[len] = mask[i];
97 return sout;
100 time_t str2time(char *sdate, char *mask, time_t *t) {
101 struct tm *s;
102 int i, len, j, k;
103 char ch, b[10];
105 s = (struct tm*) malloc(sizeof(struct tm));
107 for(i = 0; i < strlen(mask); i++) {
108 len = 0;
110 if(strchr("DMYhms", ch = mask[i])) {
111 j = i; len = 1;
112 while(mask[++j] == ch) len++;
113 i += len-1;
115 b[0] = 0;
116 for(j = i-len+1; j < i+1; j++) {
117 k = strlen(b);
118 b[k+1] = 0;
119 b[k] = sdate[j];
122 switch(ch) {
123 case 'D': s->tm_mday=atoi(b); break;
124 case 'M': s->tm_mon=atoi(b); s->tm_mon--; break;
125 case 'Y': s->tm_year=atoi(b); s->tm_year-=1900; break;
126 case 'h': s->tm_hour=atoi(b); s->tm_hour--; break;
127 case 'm': s->tm_min=atoi(b); break;
128 case 's': s->tm_sec=atoi(b); break;
133 s->tm_isdst = -1;
134 *t = mktime(s);
135 free(s);
136 return *t;
139 string unmime(const string &text) {
140 string r;
141 char *buf = new char[text.size()+1];
142 strcpy(buf, text.c_str());
143 r = unmime(buf);
144 delete[] buf;
145 return r;
148 string mime(const string &text) {
149 string r;
150 char *buf = new char[text.size()*3+1];
151 r = mime(buf, text.c_str());
152 delete[] buf;
153 return r;
156 string fromutf8(const string &text) {
157 string r;
158 char *buf = (char *) utf8_to_str(text.c_str());
159 r = buf;
160 delete buf;
161 return r;
164 string toutf8(const string &text) {
165 string r;
166 char *buf = str_to_utf8(text.c_str());
167 r = buf;
168 delete buf;
169 return r;
172 char *unmime(char *text) {
173 register int s, d;
174 int htm;
176 for(s = 0, d = 0; text[s] != 0; s++) {
177 if(text[s] == '+') text[d++] = ' '; else
178 if(text[s] == '%') {
179 sscanf(text + s + 1, "%2x", &htm);
180 text[d++] = htm;
181 s += 2;
182 } else
183 text[d++] = text[s];
186 text[d] = 0;
187 return(text);
190 char *mime(char *dst, const char *src) {
191 register int s, d;
192 char c;
194 for(s = 0, d = 0; src[s]; s++) {
195 if((src[s] >= 'a' && src[s] <= 'z') ||
196 (src[s] >= 'A' && src[s] <= 'Z') ||
197 (src[s] >= '0' && src[s] <= '9')) dst[d++] = src[s]; else {
198 if(src[s] != ' ') {
199 dst[d++] = '%';
200 c = (src[s] >> 4 & 0x0F);
201 dst[d++] = (c > 9) ? 'A'+c-10 : '0'+c;
202 c = (src[s] & 0x0F);
203 dst[d++] = (c > 9) ? 'A'+c-10 : '0'+c;
204 } else
205 dst[d++] = '+';
209 dst[d] = '\0';
210 return(dst);
213 char *strccat(char *dest, char c) {
214 int k = strlen(dest);
215 dest[k] = c;
216 dest[k+1] = 0;
217 return dest;
220 vector<int> getquotelayout(const string &haystack, const string &qs, const string &aescs) {
221 vector<int> r;
222 string needle, escs;
223 int pos, prevpos, curpos;
224 char cchar, qchar, prevchar;
226 qchar = 0;
227 curpos = prevpos = 0;
228 escs = (qs == aescs) ? "" : aescs;
229 needle = qs + escs;
231 while((pos = haystack.substr(curpos).find_first_of(needle)) != -1) {
232 curpos += pos;
233 cchar = *(haystack.begin()+curpos);
235 if(escs.find(cchar) != -1) {
236 if(qchar)
237 if(prevpos == curpos-1)
238 if(escs.find(prevchar) != -1) {
239 /* Neutralize previous esc char */
240 cchar = 0;
242 } else if(qs.find(cchar) != -1) {
243 if(!((escs.find(prevchar) != -1) && (prevpos == curpos-1))) {
244 /* Wasn't an escape (right before this quote char) */
246 if(!qchar || (qchar == cchar)) {
247 qchar = qchar ? 0 : cchar;
248 r.push_back(curpos);
253 prevpos = curpos++;
254 prevchar = cchar;
257 return r;
260 vector<int> getsymbolpositions(const string &haystack, const string &needles, const string &qoutes, const string &esc) {
261 vector<int> r, qp, nr;
262 vector<int>::iterator iq, ir;
263 int pos, st, ed, cpos;
265 for(cpos = 0; (pos = haystack.substr(cpos).find_first_of(needles)) != -1; ) {
266 r.push_back(cpos+pos);
267 cpos += pos+1;
270 qp = getquotelayout(haystack, qoutes, esc);
271 for(iq = qp.begin(); iq != qp.end(); iq++) {
272 if(!((iq-qp.begin()) % 2)) {
273 st = *iq;
274 ed = iq+1 != qp.end() ? *(iq+1) : haystack.size();
275 nr.clear();
277 for(ir = r.begin(); ir != r.end(); ir++) {
278 if(!(*ir > st && *ir < ed)) {
279 nr.push_back(*ir);
283 r = nr;
287 return r;
290 #define CHECKESC(curpos, startpos, esc) \
291 if(curpos > startpos+1) \
292 if(strchr(esc, *(curpos-1))) \
293 if(!strchr(esc, *(curpos-2))) { \
294 curpos++; \
295 continue; \
298 const char *strqpbrk(const char *s, int offset, const char *accept, const char *q, const char *esc) {
299 if(!s) return 0;
300 if(!s[0]) return 0;
302 char qchar = 0;
303 const char *ret = 0, *p = s;
304 char *cset = (char *) malloc(strlen(accept)+strlen(q)+1);
306 strcpy(cset, accept);
307 strcat(cset, q);
309 while(p = strpbrk(p, cset)) {
310 if(strchr(q, *p)) {
311 if(strcmp(esc, q))
312 CHECKESC(p, s, esc);
314 if(!qchar) {
315 qchar = *p;
316 } else {
317 if(*p == qchar) qchar = 0;
319 } else if((p-s >= offset) && !qchar) {
320 ret = p;
321 break;
323 p++;
326 free(cset);
327 return ret;
330 const char *strqcasestr(const char *s, const char *str, const char *q, const char *esc) {
331 char quote = 0;
332 int i;
334 for(i = 0; i < strlen(s); i++) {
335 if(strchr(q, s[i])) {
336 if(strcmp(esc, q))
337 CHECKESC(s+i, s, esc);
338 quote = !quote;
341 if(!quote)
342 if(!strncasecmp(s + i, str, strlen(str))) return s + i;
345 return 0;
348 const char *strqstr(const char *s, const char *str, const char *q, const char *esc) {
349 char quote;
350 const char *ret = 0, *p, *ss, *r;
351 p = ss = s;
353 while(p = strstr(ss, str)) {
354 quote = 0;
355 r = s;
357 while(r = strpbrk(r, q)) {
358 if(r > p) break;
359 if(strcmp(esc, q))
360 CHECKESC(r, s, esc);
361 quote = !quote;
362 r++;
365 if(!quote) {
366 ret = p;
367 break;
368 } else {
369 ss = p+strlen(str);
373 return ret;
376 char *strinsert(char *buf, int pos, char *ins) {
377 char *p = strdup(buf+pos);
378 memcpy(buf+pos+strlen(ins), p, strlen(p)+1);
379 memcpy(buf+pos, ins, strlen(ins));
380 free(p);
381 return buf;
384 char *strcinsert(char *buf, int pos, char ins) {
385 char *p = strdup(buf+pos);
386 memcpy(buf+pos+1, p, strlen(p)+1);
387 buf[pos] = ins;
388 free(p);
389 return buf;
392 int strchcount(char *s, char *accept) {
393 char *p = s-1;
394 int ret = 0;
395 while(p = strpbrk(p+1, accept)) ret++;
396 return ret;
399 int stralone(char *buf, char *startword, int wordlen, char *delim) {
400 int leftdelim = 0, rightdelim = 0;
401 leftdelim = (buf != startword && strchr(delim, *(startword-1))) || buf == startword;
402 rightdelim = !*(startword+wordlen) || strchr(delim, *(startword+wordlen));
403 return leftdelim && rightdelim;
406 string justfname(const string &fname) {
407 return fname.substr(fname.rfind("/")+1);
410 string justpathname(const string &fname) {
411 int pos;
413 if((pos = fname.rfind("/")) != -1) {
414 return fname.substr(0, pos);
415 } else {
416 return "";
420 void charpointerfree(void *p) {
421 char *cp = (char *) p;
422 if(cp)
423 free (cp);
426 void nothingfree(void *p) {
429 int stringcompare(void *s1, void *s2) {
430 if(!s1 || !s2) {
431 return s1 != s2;
432 } else {
433 return strcmp((char *) s1, (char *) s2);
437 int intcompare(void *s1, void *s2) {
438 return (intptr_t) s1 != (intptr_t) s2;
441 string i2str(int i) {
442 char buf[64];
443 sprintf(buf, "%d", i);
444 return (string) buf;
447 string ui2str(int i) {
448 char buf[64];
449 sprintf(buf, "%du", i);
450 return (string) buf;
453 string textscreen(const string &text) {
454 string r = text;
456 for(int i = 0; i < r.size(); i++) {
457 if(!isalnum(r[i])) r.insert(i++, "\\");
460 return r;
463 string leadcut(const string &base, const string &delim) {
464 int pos = base.find_first_not_of(delim);
465 return (pos != -1) ? base.substr(pos) : "";
468 string trailcut(const string &base, const string &delim) {
469 int pos = base.find_last_not_of(delim);
470 return (pos != -1) ? base.substr(0, pos+1) : "";
473 string getword(string &base, const string &delim) {
474 string sub;
475 int i;
476 bool found = false;
478 base = leadcut(base, delim);
480 for(i = 0, sub = base; i < sub.size(); i++)
481 if(strchr(delim.c_str(), sub[i])) {
482 sub.resize(i);
483 base.replace(0, i, "");
484 base = leadcut(base, delim);
485 found = true;
486 break;
489 if(!found) base = "";
490 return sub;
493 const string getwordquote(string &base, string quote, string delim) {
494 string sub;
495 bool inquote = false;
496 int i;
498 base = leadcut(base, delim);
500 for(i = 0, sub = base; i < sub.size(); i++) {
501 if(strchr(quote.c_str(), sub[i])) {
502 inquote = !inquote;
503 } else if(!inquote && strchr(delim.c_str(), sub[i])) {
504 sub.resize(i);
505 base.replace(0, i, "");
506 base = leadcut(base, delim);
507 break;
511 if(sub == base) base = "";
512 return sub;
515 string getrword(string &base, const string &delim) {
516 string sub;
517 int i;
519 base = trailcut(base, delim);
521 for(i = base.size()-1, sub = base; i >= 0; i--)
522 if(strchr(delim.c_str(), base[i])) {
523 sub = base.substr(i+1);
524 base.resize(i);
525 base = trailcut(base, delim);
526 break;
529 if(sub == base) base = "";
530 return sub;
533 string getrwordquote(string &base, const string &quote, const string &delim) {
534 string sub;
535 bool inquote = false;
536 int i;
538 base = trailcut(base, delim);
540 for(i = base.size()-1, sub = base; i >= 0; i--)
541 if(strchr(quote.c_str(), base[i])) {
542 inquote = !inquote;
543 } else if(!inquote && strchr(delim.c_str(), base[i])) {
544 sub = base.substr(i+1);
545 base.resize(i);
546 base = trailcut(base, delim);
547 break;
550 if(sub == base) base = "";
551 return sub;
554 int rtabmargin(bool fake, int curpos, const char *p) {
555 int ret = -1, n, near;
557 if(p && (curpos != strlen(p))) {
558 n = strspn(p+curpos, " ");
560 if(fake) {
561 near = ((curpos/(TAB_SIZE/2))+1)*(TAB_SIZE/2);
562 if(n >= near-curpos) ret = near;
565 near = ((curpos/TAB_SIZE)+1)*TAB_SIZE;
566 if(n >= near-curpos) ret = near;
567 } else {
568 if(p && fake) fake = (strspn(p, " ") == strlen(p));
569 if(fake) ret = ((curpos/(TAB_SIZE/2))+1)*(TAB_SIZE/2);
570 else ret = ((curpos/TAB_SIZE)+1)*TAB_SIZE;
573 return ret;
576 int ltabmargin(bool fake, int curpos, const char *p) {
577 int ret = -1, near, n = 0;
578 const char *cp;
580 if(p) {
581 cp = p+curpos;
583 if(curpos) {
584 if(*(--cp) == ' ') n++;
585 for(; (*cp == ' ') && (cp != p); cp--) n++;
588 if(fake) {
589 near = (curpos/(TAB_SIZE/2))*(TAB_SIZE/2);
590 if(near <= curpos-n)
591 if((ret = curpos-n) != 0) ret++;
594 near = (curpos/TAB_SIZE)*TAB_SIZE;
595 if(near <= curpos-n) {
596 if((ret = curpos-n) != 0) ret++;
597 } else ret = near;
599 } else {
600 if(fake) ret = (curpos/(TAB_SIZE/2))*(TAB_SIZE/2);
601 else ret = (curpos/TAB_SIZE)*TAB_SIZE;
604 return ret;
607 void breakintolines(string text, vector<string> &lst, int linelen) {
608 int dpos, nlen;
609 string sub;
610 vector<string>::iterator i;
612 breakintolines(text, lst);
614 if(linelen > 0) {
615 for(i = lst.begin(); i != lst.end(); i++) {
616 if(i->size() > linelen) {
617 sub = i->substr(0, nlen = linelen);
619 if((dpos = sub.rfind(" ")) != -1) {
620 if(dpos) nlen = dpos; else nlen = 1;
623 if(dpos != -1)
624 nlen++;
626 sub = i->substr(nlen);
627 i->erase(nlen);
628 lst.insert(i+1, sub);
629 i = lst.begin();
635 void breakintolines(const string &text, vector<string> &lst) {
636 int npos, dpos, tpos;
637 string sub;
639 tpos = 0;
640 lst.clear();
642 while(tpos < text.size()) {
643 if((npos = text.find("\n", tpos)) != -1) {
644 sub = text.substr(tpos, npos-tpos);
645 } else {
646 sub = text.substr(tpos);
647 npos = text.size();
650 tpos += npos-tpos+1;
652 for(dpos = 0; (dpos = sub.find("\r", dpos)) != -1; ) {
653 sub.erase(dpos, 1);
656 for(dpos = 0; (dpos = sub.find("\t", dpos)) != -1; ) {
657 sub.erase(dpos, 1);
658 sub.insert(dpos, string(rtabmargin(false, dpos)-dpos, ' '));
661 lst.push_back(sub);
665 void find_gather_quoted(vector<quotedblock> &lst, const string &str,
666 const string &quote, const string &escape) {
667 bool inquote = false;
668 int npos = 0, qch;
669 quotedblock qb;
671 while((npos = str.find_first_of(quote, npos)) != -1) {
672 if(npos)
673 if(escape.find(str[npos-1]) == -1) {
674 inquote = !inquote;
676 if(inquote) {
677 qb.begin = npos;
678 qch = str[npos];
679 } else {
680 if(str[npos] == qch) {
681 qb.end = npos;
682 lst.push_back(qb);
683 } else {
684 inquote = true;
688 npos++;
692 int find_quoted(const string &str, const string &needle, int offs,
693 const string &quote, const string &escape) {
694 vector<quotedblock> positions;
695 vector<quotedblock>::iterator qi;
696 int npos = offs;
697 bool found;
699 find_gather_quoted(positions, str, quote, escape);
701 while((npos = str.find(needle, npos)) != -1) {
702 for(found = false, qi = positions.begin(); qi != positions.end() && !found; qi++)
703 if((npos > qi->begin) && (npos < qi->end)) found = true;
705 if(!found) break;
706 npos++;
709 return !found ? npos : -1;
712 int find_quoted_first_of(const string &str, const string &needle, int offs,
713 const string &quote, const string &escape) {
714 vector<quotedblock> positions;
715 vector<quotedblock>::iterator qi;
716 int npos = offs;
717 bool found;
719 find_gather_quoted(positions, str, quote, escape);
721 while((npos = str.find_first_of(needle, npos)) != -1) {
722 for(found = false, qi = positions.begin(); qi != positions.end() && !found; qi++)
723 if((npos > qi->begin) && (npos < qi->end)) found = true;
725 if(!found) break;
726 npos++;
729 return !found ? npos : -1;
732 void splitlongtext(string text, vector<string> &lst, int size, const string cont) {
733 string sub;
734 int npos;
736 lst.clear();
738 while(!text.empty()) {
739 if(text.size() <= size-cont.size()) {
740 npos = text.size();
741 } else if((npos = text.substr(0, size-cont.size()).find_last_of(" \t")) == -1) {
742 npos = size-cont.size();
745 sub = text.substr(0, npos);
746 text.erase(0, npos);
748 if(text.size() > cont.size()) sub += cont; else {
749 sub += text;
750 text = "";
753 if((npos = text.find_first_not_of(" \t")) != -1)
754 text.erase(0, npos);
756 lst.push_back(sub);
760 string strdateandtime(time_t stamp, const string &fmt) {
761 return strdateandtime(localtime(&stamp), fmt);
764 string strdateandtime(struct tm *tms, const string &fmt) {
765 char buf[512];
766 /*time_t current_time = time(0);*/
767 /*time_t when = mktime(tms);*/
768 string afmt = fmt;
770 if(afmt.empty()) {
771 afmt = "%b %e %Y %H:%M";
773 if(current_time > when + 6L * 30L * 24L * 60L * 60L // Old.
774 || current_time < when - 60L * 60L) { // Future.
775 afmt = "%b %e %Y";
776 } else {
777 afmt = "%b %e %H:%M";
782 strftime(buf, 512, afmt.c_str(), tms);
783 return buf;
786 bool iswholeword(const string &s, int so, int eo) {
787 bool rm, lm;
788 const string wdelims = "[](),.; <>-+{}=|&%~*/:?@";
790 lm = !so || (wdelims.find(s.substr(so-1, 1)) != -1);
791 rm = (eo == s.size()-1) || (wdelims.find(s.substr(eo, 1)) != -1);
793 return rm && lm;
796 int hex2int(const string &ahex) {
797 int r, i;
799 r = 0;
801 if(ahex.size() <= 2) {
802 for(i = 0; i < ahex.size(); i++) {
803 r += isdigit(ahex[i]) ? ahex[i]-48 : toupper(ahex[i])-55;
804 if(!i) r *= 16;
808 return r;
811 bool getconf(string &st, string &buf, ifstream &f, bool passemptylines) {
812 bool ret = false;
813 static string sect;
815 while(!f.eof() && !ret) {
816 getstring(f, buf);
818 if(buf.size()) {
819 switch(buf[0]) {
820 case '%':
821 sect = buf.substr(1);
822 break;
823 case '#':
824 if(buf[1] != '!') break;
825 default:
826 ret = buf.size();
827 break;
829 } else if(passemptylines) {
830 ret = 1;
834 st = sect;
835 return ret;
838 bool getstring(istream &f, string &sbuf) {
839 static char buf[2048];
840 bool r;
842 if(r = !f.eof()) {
843 sbuf = "";
845 do {
846 f.clear();
847 f.getline(buf, 2048);
848 sbuf += buf;
849 } while(!f.good() && !f.eof());
852 return r;
855 string ruscase(const string &s, const string &mode) {
856 static const string lower = "ÁÂ×ÇÄÅÖÚÉÊËÌÍÎÏÐÒÓÔÕÆÈÃÞÛÝØßÙÜÀÑ";
857 static const string upper = "áâ÷çäåöúéêëìíîïðòóôõæèãþûýøÿùüàñ";
858 string r, tfrom, tto;
859 int pos, tpos;
861 if(mode == "tolower") {
862 tfrom = upper;
863 tto = lower;
864 } else if(mode == "toupper") {
865 tfrom = lower;
866 tto = upper;
867 } else {
868 return s;
871 for(r = s; (pos = r.find_first_of(tfrom)) != -1; ) {
872 char c = r[pos];
873 tpos = tfrom.find(c);
874 r[pos] = tto[tpos];
877 pos = 0;
879 while((pos = r.find_first_not_of(tfrom, pos)) != -1) {
880 if(mode == "tolower") r[pos] = tolower(r[pos]); else
881 if(mode == "toupper") r[pos] = toupper(r[pos]);
882 pos++;
885 return r;
888 string siconv(const string &atext, const string &fromcs, const string &tocs) {
889 #ifdef HAVE_ICONV
890 iconv_t cd = iconv_open(tocs.c_str(), fromcs.c_str());
892 if(cd != ((iconv_t) -1)) {
893 string r, text(atext);
894 size_t inleft, outleft, soutleft;
895 char *inbuf, *outbuf, *sinbuf, *soutbuf;
897 //from iconv.c (libiconv)
898 iconv(cd,NULL,NULL,NULL,NULL);
900 while(!text.empty()) {
901 sinbuf = inbuf = strdup(text.c_str());
902 inleft = strlen(inbuf);
904 soutleft = outleft = inleft*4;
905 soutbuf = outbuf = new char[outleft];
907 size_t res = iconv(cd, (ICONV_CONST char **) &inbuf, &inleft,
908 &outbuf, &outleft);
910 soutbuf[soutleft-outleft] = 0;
911 r += soutbuf;
912 text.erase(0, text.size()-inleft);
914 delete[] soutbuf;
915 free(sinbuf);
917 if(res == -1 && errno != EILSEQ)
918 break;
920 if(!text.empty()) {
921 text.erase(0, 1);
922 r += " ";
926 iconv_close(cd);
927 return r;
929 #endif
931 return atext;
934 string cuthtml(const string &html, int flags) {
935 string r, tag, buf, token;
936 int npos, pos, tpos;
938 for(pos = 0; (npos = html.find("<", pos)) != -1; pos = npos) {
939 tpos = npos;
940 r += html.substr(pos, npos-pos);
942 if((npos = html.find(">", ++npos)) != -1) {
943 npos++;
945 tag = html.substr(tpos+1, npos-tpos-2);
946 if(tag.substr(0, 1) == "/") tag.erase(0, 1);
947 tag = leadcut(trailcut(tag, "/ \n\r"), "/ \n\r");
949 buf = ruscase(tag, "toupper");
950 token = getword(buf);
952 if(token == "BR") r += (flags & chCutBR) ? "\n" : "<br>";
953 else if((flags & chCutBR) && token == "P") r += "\n\n";
955 if(flags & chLeaveLinks) {
956 getword(tag);
958 if(token == "A") {
959 if((tpos = buf.find("HREF")) != -1)
960 if((tpos = buf.substr(tpos).find("\"")) != -1) {
961 tag.erase(0, tpos+1);
962 r += "[ href: " + getword(tag, "\"") + " ] ";
965 } else if(token == "IMG") {
966 if((tpos = buf.find("SRC")) != -1)
967 if((tpos = buf.substr(tpos).find("\"")) != -1) {
968 tag.erase(0, tpos+1);
969 r += " [ img: " + getword(tag, "\"") + " ]";
975 } else {
976 r += html.substr(tpos);
977 npos = html.size();
981 if(pos < html.size())
982 r += html.substr(pos);
984 return r;
987 char *utf8_to_str(const char *pin) {
988 int n = 0, i = 0, inlen;
989 unsigned char *result;
990 const unsigned char *in = (unsigned char *) pin;
992 if(!in) return NULL;
994 inlen = strlen(pin);
995 result = new unsigned char[inlen + 1];
997 while (n <= inlen - 1) {
998 long c = (long)in[n];
999 if(c < 0x80) result[i++] = (char)c; else {
1000 if((c & 0xC0) == 0xC0) result[i++] = (char)(((c & 0x03) << 6) | (((unsigned char)in[++n]) & 0x3F));
1001 else if ((c & 0xE0) == 0xE0) {
1002 if (n + 2 <= inlen) {
1003 result[i] = (char)(((c & 0xF) << 4) | (((unsigned char)in[++n]) & 0x3F));
1004 result[i] = (char)(((unsigned char)result[i]) | (((unsigned char)in[++n]) & 0x3F));
1005 i++;
1006 } else n += 2;
1007 } else if ((c & 0xF0) == 0xF0) n += 3;
1008 else if ((c & 0xF8) == 0xF8)
1009 n += 4;
1010 else if ((c & 0xFC) == 0xFC)
1011 n += 5;
1013 n++;
1016 result[i] = '\0';
1017 return (char *) result;
1020 char *str_to_utf8(const char *pin) {
1021 int n = 0, i = 0;
1022 int inlen;
1023 char *result = NULL;
1024 const unsigned char *in = (unsigned char *) pin;
1026 if(!in)
1027 return NULL;
1029 inlen = strlen(pin);
1030 result = new char[inlen * 2 + 1];
1032 while (n < inlen) {
1033 long c = (long)in[n];
1034 if (c == 27) {
1035 n += 2;
1036 if (in[n] == 'x')
1037 n++;
1038 if (in[n] == '3')
1039 n++;
1040 n += 2;
1041 continue;
1044 if (c < 128)
1045 result[i++] = (char)c;
1046 else {
1047 result[i++] = (char)((c >> 6) | 192);
1048 result[i++] = (char)((c & 63) | 128);
1050 n++;
1053 result[i] = '\0';
1054 return result;
1057 string striprtf(const string &s, const string &charset) {
1058 string r, spec, unichar, tmp;
1059 char pre = 0;
1060 bool bprint, bspec, bunicode;
1061 int bparen = -1;
1063 bprint = true;
1064 bspec = bunicode = false;
1066 for(string::const_iterator i = s.begin(); i != s.end(); ++i) {
1067 if(!isalpha(*i) && !isdigit(*i)) bprint = true;
1069 if(bspec) {
1070 spec += *i;
1072 if(spec.size() == 2) {
1073 r += (char) hex2int(spec);
1074 bspec = false;
1075 bprint = true;
1078 } else switch(*i) {
1079 case '{':
1080 if(pre != '\\') {
1081 bparen++;
1082 bprint = false;
1083 } else {
1084 bprint = true;
1085 r += *i;
1087 break;
1089 case '}':
1090 if(pre != '\\') {
1091 bprint = false;
1092 bparen--;
1093 } else {
1094 bprint = true;
1095 r += *i;
1098 break;
1100 case '\\':
1101 if(pre != '\\') {
1102 bprint = false;
1103 } else {
1104 bprint = true;
1105 r += *i;
1106 pre = 0;
1107 continue;
1109 break;
1111 case '\'':
1112 if(!bparen && bprint && pre == '\\') {
1113 spec = "";
1114 bspec = true;
1115 } else {
1116 r += *i;
1118 break;
1120 case 'u':
1121 if(!bparen) {
1122 if(pre == '\\' && isdigit(*(i+1))) {
1123 unichar = "";
1124 bunicode = true;
1125 } else if(bprint) {
1126 r += *i;
1129 break;
1130 default:
1131 if(!bparen) {
1132 if(bunicode) {
1133 unichar += *i;
1135 if(unichar.size() == 5) {
1136 bunicode = false;
1137 if(unichar.substr(0, 4).find_first_not_of("0123456789") == -1) {
1138 long l = strtol(unichar.substr(0, 4).c_str(), 0, 0);
1139 char ubuf[sizeof(long)+4];
1140 #ifdef HAVE_ICONV
1141 memcpy(ubuf, "\xff\xfe", 2);
1142 memcpy(ubuf+2, &l, sizeof(long));
1143 memcpy(ubuf+sizeof(long)*2, "\x0a\x00", 2);
1144 #else
1145 strcpy(ubuf, unichar.substr(4).c_str());
1146 #endif
1147 r += siconv(ubuf , "utf-16", charset);
1151 } else if(bprint) {
1152 r += *i;
1157 pre = *i;
1160 return leadcut(trailcut(r));