Generate lowercase MIME charset=/RFC 2231 charset strings
[s-mailx.git] / ui_str.c
blobaa9cacae23b8e7f4bda56ec6f0a982ae34690b2f
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 */
7 /*
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 #undef n_FILE
33 #define n_FILE ui_str
35 #ifndef HAVE_AMALGAMATION
36 # include "nail.h"
37 #endif
39 #include <ctype.h>
41 FL bool_t
42 n_visual_info(struct n_visual_info_ctx *vicp, enum n_visual_info_flags vif){
43 #ifdef HAVE_C90AMEND1
44 mbstate_t *mbp;
45 #endif
46 size_t il;
47 char const *ib;
48 bool_t rv;
49 NYD2_ENTER;
51 assert(vicp != NULL);
52 assert(vicp->vic_inlen == 0 || vicp->vic_indat != NULL);
53 assert(!(vif & n__VISUAL_INFO_FLAGS) || !(vif & n_VISUAL_INFO_ONE_CHAR));
55 rv = TRU1;
56 ib = vicp->vic_indat;
57 if((il = vicp->vic_inlen) == UIZ_MAX)
58 il = vicp->vic_inlen = strlen(ib);
60 if((vif & (n_VISUAL_INFO_WIDTH_QUERY | n_VISUAL_INFO_WOUT_PRINTABLE)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE)
62 vif |= n_VISUAL_INFO_WIDTH_QUERY;
64 vicp->vic_chars_seen = vicp->vic_vi_width = 0;
65 if(vif & n_VISUAL_INFO_WOUT_CREATE){
66 if(vif & n_VISUAL_INFO_WOUT_SALLOC)
67 vicp->vic_woudat = salloc(sizeof(*vicp->vic_woudat) * (il +1));
68 vicp->vic_woulen = 0;
70 #ifdef HAVE_C90AMEND1
71 if((mbp = vicp->vic_mbstate) == NULL)
72 mbp = &vicp->vic_mbs_def;
73 #endif
75 if(il > 0){
76 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
77 #ifdef HAVE_C90AMEND1
78 size_t i = mbrtowc(&vicp->vic_waccu, ib, il, mbp);
80 if(i == (size_t)-2){
81 rv = FAL0;
82 break;
83 }else if(i == (size_t)-1){
84 if(!(vif & n_VISUAL_INFO_SKIP_ERRORS)){
85 rv = FAL0;
86 break;
88 memset(mbp, 0, sizeof *mbp);
89 vicp->vic_waccu = (options & OPT_UNICODE ? 0xFFFD : '?');
90 i = 1;
91 }else if(i == 0){
92 il = 0;
93 break;
96 ++vicp->vic_chars_seen;
97 vicp->vic_bytes_seen += i;
98 ib += i;
99 il -= i;
101 if(vif & n_VISUAL_INFO_WIDTH_QUERY){
102 int w;
103 wchar_t wc = vicp->vic_waccu;
105 # ifdef HAVE_WCWIDTH
106 w = (wc == '\t' ? 1 : wcwidth(wc));
107 # else
108 if(wc == '\t' || iswprint(wc))
109 w = 1 + (wc >= 0x1100u); /* S-CText isfullwidth() */
110 else
111 w = -1;
112 # endif
113 if(w > 0)
114 vicp->vic_vi_width += w;
115 else if(vif & n_VISUAL_INFO_WOUT_PRINTABLE)
116 continue;
118 #else /* HAVE_C90AMEND1 */
119 char c = *ib;
121 if(c == '\0'){
122 il = 0;
123 break;
126 ++vicp->vic_chars_seen;
127 ++vicp->vic_bytes_seen;
128 vicp->vic_waccu = c;
129 if(vif & n_VISUAL_INFO_WIDTH_QUERY)
130 vicp->vic_vi_width += (c == '\t' || isprint(c)); /* XXX */
132 ++ib;
133 --il;
134 #endif
136 if(vif & n_VISUAL_INFO_WOUT_CREATE)
137 vicp->vic_woudat[vicp->vic_woulen++] = vicp->vic_waccu;
138 }while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0);
141 if(vif & n_VISUAL_INFO_WOUT_CREATE)
142 vicp->vic_woudat[vicp->vic_woulen] = L'\0';
143 vicp->vic_oudat = ib;
144 vicp->vic_oulen = il;
145 vicp->vic_flags = vif;
146 NYD2_LEAVE;
147 return rv;
150 FL size_t
151 field_detect_clip(size_t maxlen, char const *buf, size_t blen)/*TODO mbrtowc()*/
153 size_t rv;
154 NYD_ENTER;
156 #ifdef HAVE_NATCH_CHAR
157 maxlen = MIN(maxlen, blen);
158 for (rv = 0; maxlen > 0;) {
159 int ml = mblen(buf, maxlen);
160 if (ml <= 0) {
161 mblen(NULL, 0);
162 break;
164 buf += ml;
165 rv += ml;
166 maxlen -= ml;
168 #else
169 rv = MIN(blen, maxlen);
170 #endif
171 NYD_LEAVE;
172 return rv;
175 FL size_t
176 field_put_bidi_clip(char *store, size_t maxlen, char const *buf, size_t blen)
178 NATCH_CHAR( struct bidi_info bi; )
179 size_t rv NATCH_CHAR( COMMA i );
180 NYD_ENTER;
182 rv = 0;
183 if (maxlen-- == 0)
184 goto j_leave;
186 #ifdef HAVE_NATCH_CHAR
187 bidi_info_create(&bi);
188 if (bi.bi_start.l == 0 || !bidi_info_needed(buf, blen)) {
189 bi.bi_end.l = 0;
190 goto jnobidi;
193 if (maxlen >= (i = bi.bi_pad + bi.bi_end.l + bi.bi_start.l))
194 maxlen -= i;
195 else
196 goto jleave;
198 if ((i = bi.bi_start.l) > 0) {
199 memcpy(store, bi.bi_start.s, i);
200 store += i;
201 rv += i;
204 jnobidi:
205 while (maxlen > 0) {
206 int ml = mblen(buf, blen);
207 if (ml <= 0) {
208 mblen(NULL, 0);
209 break;
211 if (UICMP(z, maxlen, <, ml))
212 break;
213 if (ml == 1)
214 *store = *buf;
215 else
216 memcpy(store, buf, ml);
217 store += ml;
218 buf += ml;
219 rv += ml;
220 maxlen -= ml;
223 if ((i = bi.bi_end.l) > 0) {
224 memcpy(store, bi.bi_end.s, i);
225 store += i;
226 rv += i;
228 jleave:
229 *store = '\0';
231 #else
232 rv = MIN(blen, maxlen);
233 memcpy(store, buf, rv);
234 store[rv] = '\0';
235 #endif
236 j_leave:
237 NYD_LEAVE;
238 return rv;
241 FL char *
242 colalign(char const *cp, int col, int fill, int *cols_decr_used_or_null)
244 NATCH_CHAR( struct bidi_info bi; )
245 int col_orig = col, n, sz;
246 bool_t isbidi, isuni, istab, isrepl;
247 char *nb, *np;
248 NYD_ENTER;
250 /* Bidi only on request and when there is 8-bit data */
251 isbidi = isuni = FAL0;
252 #ifdef HAVE_NATCH_CHAR
253 isuni = ((options & OPT_UNICODE) != 0);
254 bidi_info_create(&bi);
255 if (bi.bi_start.l == 0)
256 goto jnobidi;
257 if (!(isbidi = bidi_info_needed(cp, strlen(cp))))
258 goto jnobidi;
260 if ((size_t)col >= bi.bi_pad)
261 col -= bi.bi_pad;
262 else
263 col = 0;
264 jnobidi:
265 #endif
267 np = nb = salloc(mb_cur_max * strlen(cp) +
268 ((fill ? col : 0)
269 NATCH_CHAR( + (isbidi ? bi.bi_start.l + bi.bi_end.l : 0) )
270 +1));
272 #ifdef HAVE_NATCH_CHAR
273 if (isbidi) {
274 memcpy(np, bi.bi_start.s, bi.bi_start.l);
275 np += bi.bi_start.l;
277 #endif
279 while (*cp != '\0') {
280 istab = FAL0;
281 #ifdef HAVE_C90AMEND1
282 if (mb_cur_max > 1) {
283 wchar_t wc;
285 n = 1;
286 isrepl = TRU1;
287 if ((sz = mbtowc(&wc, cp, mb_cur_max)) == -1)
288 sz = 1;
289 else if (wc == L'\t') {
290 cp += sz - 1; /* Silly, no such charset known (.. until S-Ctext) */
291 isrepl = FAL0;
292 istab = TRU1;
293 } else if (iswprint(wc)) {
294 # ifndef HAVE_WCWIDTH
295 n = 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
296 # else
297 if ((n = wcwidth(wc)) == -1)
298 n = 1;
299 else
300 # endif
301 isrepl = FAL0;
303 } else
304 #endif
306 n = sz = 1;
307 istab = (*cp == '\t');
308 isrepl = !(istab || isprint((uc_i)*cp));
311 if (n > col)
312 break;
313 col -= n;
315 if (isrepl) {
316 if (isuni) {
317 np[0] = (char)0xEFu;
318 np[1] = (char)0xBFu;
319 np[2] = (char)0xBDu;
320 np += 3;
321 } else
322 *np++ = '?';
323 cp += sz;
324 } else if (istab || (sz == 1 && spacechar(*cp))) {
325 *np++ = ' ';
326 ++cp;
327 } else
328 while (sz--)
329 *np++ = *cp++;
332 if (fill && col != 0) {
333 if (fill > 0) {
334 memmove(nb + col, nb, PTR2SIZE(np - nb));
335 memset(nb, ' ', col);
336 } else
337 memset(np, ' ', col);
338 np += col;
339 col = 0;
342 #ifdef HAVE_NATCH_CHAR
343 if (isbidi) {
344 memcpy(np, bi.bi_end.s, bi.bi_end.l);
345 np += bi.bi_end.l;
347 #endif
349 *np = '\0';
350 if (cols_decr_used_or_null != NULL)
351 *cols_decr_used_or_null -= col_orig - col;
352 NYD_LEAVE;
353 return nb;
356 FL void
357 makeprint(struct str const *in, struct str *out)
359 char const *inp, *maxp;
360 char *outp;
361 DBG( size_t msz; )
362 NYD_ENTER;
364 out->s = outp = smalloc(DBG( msz = ) in->l*mb_cur_max + 2u*mb_cur_max +1);
365 inp = in->s;
366 maxp = inp + in->l;
368 #ifdef HAVE_NATCH_CHAR
369 if (mb_cur_max > 1) {
370 char mbb[MB_LEN_MAX + 1];
371 wchar_t wc;
372 int i, n;
373 bool_t isuni = ((options & OPT_UNICODE) != 0);
375 out->l = 0;
376 while (inp < maxp) {
377 if (*inp & 0200)
378 n = mbtowc(&wc, inp, PTR2SIZE(maxp - inp));
379 else {
380 wc = *inp;
381 n = 1;
383 if (n == -1) {
384 /* FIXME Why mbtowc() resetting here?
385 * FIXME what about ISO 2022-JP plus -- those
386 * FIXME will loose shifts, then!
387 * FIXME THUS - we'd need special "known points"
388 * FIXME to do so - say, after a newline!!
389 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
390 mbtowc(&wc, NULL, mb_cur_max);
391 wc = isuni ? 0xFFFD : '?';
392 n = 1;
393 } else if (n == 0)
394 n = 1;
395 inp += n;
396 if (!iswprint(wc) && wc != '\n' && wc != '\r' && wc != '\b' &&
397 wc != '\t') {
398 if ((wc & ~(wchar_t)037) == 0)
399 wc = isuni ? 0x2400 | wc : '?';
400 else if (wc == 0177)
401 wc = isuni ? 0x2421 : '?';
402 else
403 wc = isuni ? 0x2426 : '?';
404 }else if(isuni){ /* TODO ctext */
405 /* We need to actively filter out L-TO-R and R-TO-R marks TODO ctext */
406 if(wc == 0x200E || wc == 0x200F || (wc >= 0x202A && wc <= 0x202E))
407 continue;
408 /* And some zero-width messes */
409 if(wc == 0x00AD || (wc >= 0x200B && wc <= 0x200D))
410 continue;
411 /* Oh about the ISO C wide character interfaces, baby! */
412 if(wc == 0xFEFF)
413 continue;
415 if ((n = wctomb(mbb, wc)) <= 0)
416 continue;
417 out->l += n;
418 assert(out->l < msz);
419 for (i = 0; i < n; ++i)
420 *outp++ = mbb[i];
422 } else
423 #endif /* NATCH_CHAR */
425 int c;
426 while (inp < maxp) {
427 c = *inp++ & 0377;
428 if (!isprint(c) && c != '\n' && c != '\r' && c != '\b' && c != '\t')
429 c = '?';
430 *outp++ = c;
432 out->l = in->l;
434 out->s[out->l] = '\0';
435 NYD_LEAVE;
438 FL size_t
439 delctrl(char *cp, size_t len)
441 size_t x, y;
442 NYD_ENTER;
444 for (x = y = 0; x < len; ++x)
445 if (!cntrlchar(cp[x]))
446 cp[y++] = cp[x];
447 cp[y] = '\0';
448 NYD_LEAVE;
449 return y;
452 FL char *
453 prstr(char const *s)
455 struct str in, out;
456 char *rp;
457 NYD_ENTER;
459 in.s = UNCONST(s);
460 in.l = strlen(s);
461 makeprint(&in, &out);
462 rp = savestrbuf(out.s, out.l);
463 free(out.s);
464 NYD_LEAVE;
465 return rp;
468 FL int
469 prout(char const *s, size_t sz, FILE *fp)
471 struct str in, out;
472 int n;
473 NYD_ENTER;
475 in.s = UNCONST(s);
476 in.l = sz;
477 makeprint(&in, &out);
478 n = fwrite(out.s, 1, out.l, fp);
479 free(out.s);
480 NYD_LEAVE;
481 return n;
484 FL size_t
485 putuc(int u, int c, FILE *fp)
487 size_t rv;
488 NYD_ENTER;
489 UNUSED(u);
491 #ifdef HAVE_NATCH_CHAR
492 if ((options & OPT_UNICODE) && (u & ~(wchar_t)0177)) {
493 char mbb[MB_LEN_MAX];
494 int i, n;
496 if ((n = wctomb(mbb, u)) > 0) {
497 rv = wcwidth(u);
498 for (i = 0; i < n; ++i)
499 if (putc(mbb[i] & 0377, fp) == EOF) {
500 rv = 0;
501 break;
503 } else if (n == 0)
504 rv = (putc('\0', fp) != EOF);
505 else
506 rv = 0;
507 } else
508 #endif
509 rv = (putc(c, fp) != EOF);
510 NYD_LEAVE;
511 return rv;
514 FL bool_t
515 bidi_info_needed(char const *bdat, size_t blen)
517 bool_t rv = FAL0;
518 NYD_ENTER;
520 #ifdef HAVE_NATCH_CHAR
521 if (options & OPT_UNICODE)
522 while (blen > 0) {
523 /* TODO Checking for BIDI character: use S-CText fromutf8
524 * TODO plus isrighttoleft (or whatever there will be)! */
525 ui32_t c = n_utf8_to_utf32(&bdat, &blen);
526 if (c == UI32_MAX)
527 break;
529 if (c <= 0x05BE)
530 continue;
532 /* (Very very fuzzy, awaiting S-CText for good) */
533 if ((c >= 0x05BE && c <= 0x08E3) ||
534 (c >= 0xFB1D && c <= 0xFE00) /* No: variation selectors */ ||
535 (c >= 0xFE70 && c <= 0xFEFC) ||
536 (c >= 0x10800 && c <= 0x10C48) ||
537 (c >= 0x1EE00 && c <= 0x1EEF1)) {
538 rv = TRU1;
539 break;
542 #endif /* HAVE_NATCH_CHAR */
543 NYD_LEAVE;
544 return rv;
547 FL void
548 bidi_info_create(struct bidi_info *bip)
550 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
551 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
552 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
553 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
554 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
555 NATCH_CHAR( char const *hb; )
556 NYD_ENTER;
558 memset(bip, 0, sizeof *bip);
559 bip->bi_start.s = bip->bi_end.s = UNCONST("");
561 #ifdef HAVE_NATCH_CHAR
562 if ((options & OPT_UNICODE) && (hb = ok_vlook(headline_bidi)) != NULL) {
563 switch (*hb) {
564 case '3':
565 bip->bi_pad = 2;
566 /* FALLTHRU */
567 case '2':
568 bip->bi_start.s = bip->bi_end.s = UNCONST("\xE2\x80\x8E");
569 break;
570 case '1':
571 bip->bi_pad = 2;
572 /* FALLTHRU */
573 default:
574 bip->bi_start.s = UNCONST("\xE2\x81\xA8");
575 bip->bi_end.s = UNCONST("\xE2\x81\xA9");
576 break;
578 bip->bi_start.l = bip->bi_end.l = 3;
580 #endif
581 NYD_LEAVE;
584 /* s-it-mode */