cc-test.sh: t_behave_iconv_mainbody() should compile test instead, sigh!
[s-mailx.git] / ui-str.c
blob0d788d89afd94c94a0f78869e4d1c95ecdedbbec
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 #undef n_FILE
33 #define n_FILE ui_str
35 #ifndef HAVE_AMALGAMATION
36 # include "nail.h"
37 #endif
39 #include <ctype.h>
41 FL bool_t
42 n_visual_info(struct n_visual_info_ctx *vicp, enum n_visual_info_flags vif){
43 #ifdef HAVE_C90AMEND1
44 mbstate_t *mbp;
45 #endif
46 size_t il;
47 char const *ib;
48 bool_t rv;
49 NYD2_ENTER;
51 assert(vicp != NULL);
52 assert(vicp->vic_inlen == 0 || vicp->vic_indat != NULL);
53 assert(!(vif & n__VISUAL_INFO_FLAGS) || !(vif & n_VISUAL_INFO_ONE_CHAR));
55 rv = TRU1;
56 ib = vicp->vic_indat;
57 if((il = vicp->vic_inlen) == UIZ_MAX)
58 il = vicp->vic_inlen = strlen(ib);
60 if((vif & (n_VISUAL_INFO_WIDTH_QUERY | n_VISUAL_INFO_WOUT_PRINTABLE)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE)
62 vif |= n_VISUAL_INFO_WIDTH_QUERY;
64 vicp->vic_chars_seen = vicp->vic_bytes_seen = vicp->vic_vi_width = 0;
65 if(vif & n_VISUAL_INFO_WOUT_CREATE){
66 if(vif & n_VISUAL_INFO_WOUT_SALLOC)
67 vicp->vic_woudat = salloc(sizeof(*vicp->vic_woudat) * (il +1));
68 vicp->vic_woulen = 0;
70 #ifdef HAVE_C90AMEND1
71 if((mbp = vicp->vic_mbstate) == NULL)
72 mbp = &vicp->vic_mbs_def;
73 #endif
75 if(il > 0){
76 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
77 #ifdef HAVE_C90AMEND1
78 size_t i = mbrtowc(&vicp->vic_waccu, ib, il, mbp);
80 if(i == (size_t)-2){
81 rv = FAL0;
82 break;
83 }else if(i == (size_t)-1){
84 if(!(vif & n_VISUAL_INFO_SKIP_ERRORS)){
85 rv = FAL0;
86 break;
88 memset(mbp, 0, sizeof *mbp);
89 vicp->vic_waccu = (n_psonce & n_PSO_UNICODE) ? 0xFFFD : '?';
90 i = 1;
91 }else if(i == 0){
92 il = 0;
93 break;
96 ++vicp->vic_chars_seen;
97 vicp->vic_bytes_seen += i;
98 ib += i;
99 il -= i;
101 if(vif & n_VISUAL_INFO_WIDTH_QUERY){
102 int w;
103 wchar_t wc = vicp->vic_waccu;
105 # ifdef HAVE_WCWIDTH
106 w = (wc == '\t' ? 1 : wcwidth(wc));
107 # else
108 if(wc == '\t' || iswprint(wc))
109 w = 1 + (wc >= 0x1100u); /* S-CText isfullwidth() */
110 else
111 w = -1;
112 # endif
113 if(w > 0)
114 vicp->vic_vi_width += w;
115 else if(vif & n_VISUAL_INFO_WOUT_PRINTABLE)
116 continue;
118 #else /* HAVE_C90AMEND1 */
119 char c = *ib;
121 if(c == '\0'){
122 il = 0;
123 break;
126 ++vicp->vic_chars_seen;
127 ++vicp->vic_bytes_seen;
128 vicp->vic_waccu = c;
129 if(vif & n_VISUAL_INFO_WIDTH_QUERY)
130 vicp->vic_vi_width += (c == '\t' || isprint(c)); /* XXX */
132 ++ib;
133 --il;
134 #endif
136 if(vif & n_VISUAL_INFO_WOUT_CREATE)
137 vicp->vic_woudat[vicp->vic_woulen++] = vicp->vic_waccu;
138 }while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0);
141 if(vif & n_VISUAL_INFO_WOUT_CREATE)
142 vicp->vic_woudat[vicp->vic_woulen] = L'\0';
143 vicp->vic_oudat = ib;
144 vicp->vic_oulen = il;
145 vicp->vic_flags = vif;
146 NYD2_LEAVE;
147 return rv;
150 FL size_t
151 field_detect_clip(size_t maxlen, char const *buf, size_t blen)/*TODO mbrtowc()*/
153 size_t rv;
154 NYD_ENTER;
156 #ifdef HAVE_NATCH_CHAR
157 maxlen = n_MIN(maxlen, blen);
158 for (rv = 0; maxlen > 0;) {
159 int ml = mblen(buf, maxlen);
160 if (ml <= 0) {
161 mblen(NULL, 0);
162 break;
164 buf += ml;
165 rv += ml;
166 maxlen -= ml;
168 #else
169 rv = n_MIN(blen, maxlen);
170 #endif
171 NYD_LEAVE;
172 return rv;
175 FL char *
176 colalign(char const *cp, int col, int fill, int *cols_decr_used_or_null)
178 n_NATCH_CHAR( struct bidi_info bi; )
179 int col_orig = col, n, sz;
180 bool_t isbidi, isuni, istab, isrepl;
181 char *nb, *np;
182 NYD_ENTER;
184 /* Bidi only on request and when there is 8-bit data */
185 isbidi = isuni = FAL0;
186 #ifdef HAVE_NATCH_CHAR
187 isuni = ((n_psonce & n_PSO_UNICODE) != 0);
188 bidi_info_create(&bi);
189 if (bi.bi_start.l == 0)
190 goto jnobidi;
191 if (!(isbidi = bidi_info_needed(cp, strlen(cp))))
192 goto jnobidi;
194 if ((size_t)col >= bi.bi_pad)
195 col -= bi.bi_pad;
196 else
197 col = 0;
198 jnobidi:
199 #endif
201 np = nb = salloc(n_mb_cur_max * strlen(cp) +
202 ((fill ? col : 0)
203 n_NATCH_CHAR( + (isbidi ? bi.bi_start.l + bi.bi_end.l : 0) )
204 +1));
206 #ifdef HAVE_NATCH_CHAR
207 if (isbidi) {
208 memcpy(np, bi.bi_start.s, bi.bi_start.l);
209 np += bi.bi_start.l;
211 #endif
213 while (*cp != '\0') {
214 istab = FAL0;
215 #ifdef HAVE_C90AMEND1
216 if (n_mb_cur_max > 1) {
217 wchar_t wc;
219 n = 1;
220 isrepl = TRU1;
221 if ((sz = mbtowc(&wc, cp, n_mb_cur_max)) == -1)
222 sz = 1;
223 else if (wc == L'\t') {
224 cp += sz - 1; /* Silly, no such charset known (.. until S-Ctext) */
225 isrepl = FAL0;
226 istab = TRU1;
227 } else if (iswprint(wc)) {
228 # ifndef HAVE_WCWIDTH
229 n = 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
230 # else
231 if ((n = wcwidth(wc)) == -1)
232 n = 1;
233 else
234 # endif
235 isrepl = FAL0;
237 } else
238 #endif
240 n = sz = 1;
241 istab = (*cp == '\t');
242 isrepl = !(istab || isprint((uc_i)*cp));
245 if (n > col)
246 break;
247 col -= n;
249 if (isrepl) {
250 if (isuni) {
251 /* Contained in n_mb_cur_max, then */
252 memcpy(np, n_unirepl, sizeof(n_unirepl) -1);
253 np += sizeof(n_unirepl) -1;
254 } else
255 *np++ = '?';
256 cp += sz;
257 } else if (istab || (sz == 1 && spacechar(*cp))) {
258 *np++ = ' ';
259 ++cp;
260 } else
261 while (sz--)
262 *np++ = *cp++;
265 if (fill && col != 0) {
266 if (fill > 0) {
267 memmove(nb + col, nb, PTR2SIZE(np - nb));
268 memset(nb, ' ', col);
269 } else
270 memset(np, ' ', col);
271 np += col;
272 col = 0;
275 #ifdef HAVE_NATCH_CHAR
276 if (isbidi) {
277 memcpy(np, bi.bi_end.s, bi.bi_end.l);
278 np += bi.bi_end.l;
280 #endif
282 *np = '\0';
283 if (cols_decr_used_or_null != NULL)
284 *cols_decr_used_or_null -= col_orig - col;
285 NYD_LEAVE;
286 return nb;
289 FL void
290 makeprint(struct str const *in, struct str *out) /* TODO <-> TTYCHARSET!! */
292 /* TODO: makeprint() should honour *ttycharset*. This of course does not
293 * TODO work with ISO C / POSIX since mbrtowc() do know about locales, not
294 * TODO charsets, and ditto iswprint() etc. do work with the locale too.
295 * TODO I hope S-CText can do something about that, and/or otherwise add
296 * TODO some special treatment for UTF-8 (take it from S-CText too then) */
297 char const *inp, *maxp;
298 char *outp;
299 DBG( size_t msz; )
300 NYD_ENTER;
302 out->s =
303 outp = smalloc(DBG( msz = ) in->l*n_mb_cur_max + 2u*n_mb_cur_max +1);
304 inp = in->s;
305 maxp = inp + in->l;
307 #ifdef HAVE_NATCH_CHAR
308 if (n_mb_cur_max > 1) {
309 char mbb[MB_LEN_MAX + 1];
310 wchar_t wc;
311 int i, n;
312 bool_t isuni = ((n_psonce & n_PSO_UNICODE) != 0);
314 out->l = 0;
315 while (inp < maxp) {
316 if (*inp & 0200)
317 n = mbtowc(&wc, inp, PTR2SIZE(maxp - inp));
318 else {
319 wc = *inp;
320 n = 1;
322 if (n == -1) {
323 /* FIXME Why mbtowc() resetting here?
324 * FIXME what about ISO 2022-JP plus -- those
325 * FIXME will loose shifts, then!
326 * FIXME THUS - we'd need special "known points"
327 * FIXME to do so - say, after a newline!!
328 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
329 mbtowc(&wc, NULL, n_mb_cur_max);
330 wc = isuni ? 0xFFFD : '?';
331 n = 1;
332 } else if (n == 0)
333 n = 1;
334 inp += n;
335 if (!iswprint(wc) && wc != '\n' /*&& wc != '\r' && wc != '\b'*/ &&
336 wc != '\t') {
337 if ((wc & ~(wchar_t)037) == 0)
338 wc = isuni ? 0x2400 | wc : '?';
339 else if (wc == 0177)
340 wc = isuni ? 0x2421 : '?';
341 else
342 wc = isuni ? 0x2426 : '?';
343 }else if(isuni){ /* TODO ctext */
344 /* Need to filter out L-TO-R and R-TO-R marks TODO ctext */
345 if(wc == 0x200E || wc == 0x200F || (wc >= 0x202A && wc <= 0x202E))
346 continue;
347 /* And some zero-width messes */
348 if(wc == 0x00AD || (wc >= 0x200B && wc <= 0x200D))
349 continue;
350 /* Oh about the ISO C wide character interfaces, baby! */
351 if(wc == 0xFEFF)
352 continue;
354 if ((n = wctomb(mbb, wc)) <= 0)
355 continue;
356 out->l += n;
357 assert(out->l < msz);
358 for (i = 0; i < n; ++i)
359 *outp++ = mbb[i];
361 } else
362 #endif /* NATCH_CHAR */
364 int c;
365 while (inp < maxp) {
366 c = *inp++ & 0377;
367 if (!isprint(c) && c != '\n' && c != '\r' && c != '\b' && c != '\t')
368 c = '?';
369 *outp++ = c;
371 out->l = in->l;
373 out->s[out->l] = '\0';
374 NYD_LEAVE;
377 FL size_t
378 delctrl(char *cp, size_t len)
380 size_t x, y;
381 NYD_ENTER;
383 for (x = y = 0; x < len; ++x)
384 if (!cntrlchar(cp[x]))
385 cp[y++] = cp[x];
386 cp[y] = '\0';
387 NYD_LEAVE;
388 return y;
391 FL char *
392 prstr(char const *s)
394 struct str in, out;
395 char *rp;
396 NYD_ENTER;
398 in.s = n_UNCONST(s);
399 in.l = strlen(s);
400 makeprint(&in, &out);
401 rp = savestrbuf(out.s, out.l);
402 free(out.s);
403 NYD_LEAVE;
404 return rp;
407 FL int
408 prout(char const *s, size_t sz, FILE *fp)
410 struct str in, out;
411 int n;
412 NYD_ENTER;
414 in.s = n_UNCONST(s);
415 in.l = sz;
416 makeprint(&in, &out);
417 n = fwrite(out.s, 1, out.l, fp);
418 free(out.s);
419 NYD_LEAVE;
420 return n;
423 FL size_t
424 putuc(int u, int c, FILE *fp)
426 size_t rv;
427 NYD_ENTER;
428 n_UNUSED(u);
430 #ifdef HAVE_NATCH_CHAR
431 if ((n_psonce & n_PSO_UNICODE) && (u & ~(wchar_t)0177)) {
432 char mbb[MB_LEN_MAX];
433 int i, n;
435 if ((n = wctomb(mbb, u)) > 0) {
436 rv = wcwidth(u);
437 for (i = 0; i < n; ++i)
438 if (putc(mbb[i] & 0377, fp) == EOF) {
439 rv = 0;
440 break;
442 } else if (n == 0)
443 rv = (putc('\0', fp) != EOF);
444 else
445 rv = 0;
446 } else
447 #endif
448 rv = (putc(c, fp) != EOF);
449 NYD_LEAVE;
450 return rv;
453 FL bool_t
454 bidi_info_needed(char const *bdat, size_t blen)
456 bool_t rv = FAL0;
457 NYD_ENTER;
459 #ifdef HAVE_NATCH_CHAR
460 if (n_psonce & n_PSO_UNICODE)
461 while (blen > 0) {
462 /* TODO Checking for BIDI character: use S-CText fromutf8
463 * TODO plus isrighttoleft (or whatever there will be)! */
464 ui32_t c = n_utf8_to_utf32(&bdat, &blen);
465 if (c == UI32_MAX)
466 break;
468 if (c <= 0x05BE)
469 continue;
471 /* (Very very fuzzy, awaiting S-CText for good) */
472 if ((c >= 0x05BE && c <= 0x08E3) ||
473 (c >= 0xFB1D && c <= 0xFE00) /* No: variation selectors */ ||
474 (c >= 0xFE70 && c <= 0xFEFC) ||
475 (c >= 0x10800 && c <= 0x10C48) ||
476 (c >= 0x1EE00 && c <= 0x1EEF1)) {
477 rv = TRU1;
478 break;
481 #endif /* HAVE_NATCH_CHAR */
482 NYD_LEAVE;
483 return rv;
486 FL void
487 bidi_info_create(struct bidi_info *bip)
489 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
490 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
491 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
492 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
493 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
494 n_NATCH_CHAR( char const *hb; )
495 NYD_ENTER;
497 memset(bip, 0, sizeof *bip);
498 bip->bi_start.s = bip->bi_end.s = n_UNCONST(n_empty);
500 #ifdef HAVE_NATCH_CHAR
501 if ((n_psonce & n_PSO_UNICODE) && (hb = ok_vlook(headline_bidi)) != NULL) {
502 switch (*hb) {
503 case '3':
504 bip->bi_pad = 2;
505 /* FALLTHRU */
506 case '2':
507 bip->bi_start.s = bip->bi_end.s = n_UNCONST("\xE2\x80\x8E");
508 break;
509 case '1':
510 bip->bi_pad = 2;
511 /* FALLTHRU */
512 default:
513 bip->bi_start.s = n_UNCONST("\xE2\x81\xA8");
514 bip->bi_end.s = n_UNCONST("\xE2\x81\xA9");
515 break;
517 bip->bi_start.l = bip->bi_end.l = 3;
519 #endif
520 NYD_LEAVE;
523 /* s-it-mode */