make-config.in: complete path (leftover of [807f64e2], 2015-12-26!)
[s-mailx.git] / ui-str.c
blob68dc34387b5722bda7be30fcd0ec8b233eaa0ea9
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 * SPDX-License-Identifier: BSD-3-Clause TODO ISC
7 */
8 /*
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
33 #undef n_FILE
34 #define n_FILE ui_str
36 #ifndef HAVE_AMALGAMATION
37 # include "nail.h"
38 #endif
40 #include <ctype.h>
42 FL bool_t
43 n_visual_info(struct n_visual_info_ctx *vicp, enum n_visual_info_flags vif){
44 #ifdef HAVE_C90AMEND1
45 mbstate_t *mbp;
46 #endif
47 size_t il;
48 char const *ib;
49 bool_t rv;
50 NYD2_ENTER;
52 assert(vicp != NULL);
53 assert(vicp->vic_inlen == 0 || vicp->vic_indat != NULL);
54 assert(!(vif & n__VISUAL_INFO_FLAGS) || !(vif & n_VISUAL_INFO_ONE_CHAR));
56 rv = TRU1;
57 ib = vicp->vic_indat;
58 if((il = vicp->vic_inlen) == UIZ_MAX)
59 il = vicp->vic_inlen = strlen(ib);
61 if((vif & (n_VISUAL_INFO_WIDTH_QUERY | n_VISUAL_INFO_WOUT_PRINTABLE)) ==
62 n_VISUAL_INFO_WOUT_PRINTABLE)
63 vif |= n_VISUAL_INFO_WIDTH_QUERY;
65 vicp->vic_chars_seen = vicp->vic_bytes_seen = vicp->vic_vi_width = 0;
66 if(vif & n_VISUAL_INFO_WOUT_CREATE){
67 if(vif & n_VISUAL_INFO_WOUT_SALLOC)
68 vicp->vic_woudat =
69 n_autorec_alloc(sizeof(*vicp->vic_woudat) * (il +1));
70 vicp->vic_woulen = 0;
72 #ifdef HAVE_C90AMEND1
73 if((mbp = vicp->vic_mbstate) == NULL)
74 mbp = &vicp->vic_mbs_def;
75 #endif
77 if(il > 0){
78 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
79 #ifdef HAVE_C90AMEND1
80 size_t i = mbrtowc(&vicp->vic_waccu, ib, il, mbp);
82 if(i == (size_t)-2){
83 rv = FAL0;
84 break;
85 }else if(i == (size_t)-1){
86 if(!(vif & n_VISUAL_INFO_SKIP_ERRORS)){
87 rv = FAL0;
88 break;
90 memset(mbp, 0, sizeof *mbp);
91 vicp->vic_waccu = (n_psonce & n_PSO_UNICODE) ? 0xFFFD : '?';
92 i = 1;
93 }else if(i == 0){
94 il = 0;
95 break;
98 ++vicp->vic_chars_seen;
99 vicp->vic_bytes_seen += i;
100 ib += i;
101 il -= i;
103 if(vif & n_VISUAL_INFO_WIDTH_QUERY){
104 int w;
105 wchar_t wc = vicp->vic_waccu;
107 # ifdef HAVE_WCWIDTH
108 w = (wc == '\t' ? 1 : wcwidth(wc));
109 # else
110 if(wc == '\t' || iswprint(wc))
111 w = 1 + (wc >= 0x1100u); /* S-CText isfullwidth() */
112 else
113 w = -1;
114 # endif
115 if(w > 0)
116 vicp->vic_vi_width += w;
117 else if(vif & n_VISUAL_INFO_WOUT_PRINTABLE)
118 continue;
120 #else /* HAVE_C90AMEND1 */
121 char c = *ib;
123 if(c == '\0'){
124 il = 0;
125 break;
128 ++vicp->vic_chars_seen;
129 ++vicp->vic_bytes_seen;
130 vicp->vic_waccu = c;
131 if(vif & n_VISUAL_INFO_WIDTH_QUERY)
132 vicp->vic_vi_width += (c == '\t' || isprint(c)); /* XXX */
134 ++ib;
135 --il;
136 #endif
138 if(vif & n_VISUAL_INFO_WOUT_CREATE)
139 vicp->vic_woudat[vicp->vic_woulen++] = vicp->vic_waccu;
140 }while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0);
143 if(vif & n_VISUAL_INFO_WOUT_CREATE)
144 vicp->vic_woudat[vicp->vic_woulen] = L'\0';
145 vicp->vic_oudat = ib;
146 vicp->vic_oulen = il;
147 vicp->vic_flags = vif;
148 NYD2_LEAVE;
149 return rv;
152 FL size_t
153 field_detect_clip(size_t maxlen, char const *buf, size_t blen)/*TODO mbrtowc()*/
155 size_t rv;
156 NYD_ENTER;
158 #ifdef HAVE_NATCH_CHAR
159 maxlen = n_MIN(maxlen, blen);
160 for (rv = 0; maxlen > 0;) {
161 int ml = mblen(buf, maxlen);
162 if (ml <= 0) {
163 mblen(NULL, 0);
164 break;
166 buf += ml;
167 rv += ml;
168 maxlen -= ml;
170 #else
171 rv = n_MIN(blen, maxlen);
172 #endif
173 NYD_LEAVE;
174 return rv;
177 FL char *
178 colalign(char const *cp, int col, int fill, int *cols_decr_used_or_null)
180 n_NATCH_CHAR( struct bidi_info bi; )
181 int col_orig = col, n, sz;
182 bool_t isbidi, isuni, istab, isrepl;
183 char *nb, *np;
184 NYD_ENTER;
186 /* Bidi only on request and when there is 8-bit data */
187 isbidi = isuni = FAL0;
188 #ifdef HAVE_NATCH_CHAR
189 isuni = ((n_psonce & n_PSO_UNICODE) != 0);
190 bidi_info_create(&bi);
191 if (bi.bi_start.l == 0)
192 goto jnobidi;
193 if (!(isbidi = bidi_info_needed(cp, strlen(cp))))
194 goto jnobidi;
196 if ((size_t)col >= bi.bi_pad)
197 col -= bi.bi_pad;
198 else
199 col = 0;
200 jnobidi:
201 #endif
203 np = nb = n_autorec_alloc(n_mb_cur_max * strlen(cp) +
204 ((fill ? col : 0)
205 n_NATCH_CHAR( + (isbidi ? bi.bi_start.l + bi.bi_end.l : 0) )
206 +1));
208 #ifdef HAVE_NATCH_CHAR
209 if (isbidi) {
210 memcpy(np, bi.bi_start.s, bi.bi_start.l);
211 np += bi.bi_start.l;
213 #endif
215 while (*cp != '\0') {
216 istab = FAL0;
217 #ifdef HAVE_C90AMEND1
218 if (n_mb_cur_max > 1) {
219 wchar_t wc;
221 n = 1;
222 isrepl = TRU1;
223 if ((sz = mbtowc(&wc, cp, n_mb_cur_max)) == -1)
224 sz = 1;
225 else if (wc == L'\t') {
226 cp += sz - 1; /* Silly, no such charset known (.. until S-Ctext) */
227 isrepl = FAL0;
228 istab = TRU1;
229 } else if (iswprint(wc)) {
230 # ifndef HAVE_WCWIDTH
231 n = 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
232 # else
233 if ((n = wcwidth(wc)) == -1)
234 n = 1;
235 else
236 # endif
237 isrepl = FAL0;
239 } else
240 #endif
242 n = sz = 1;
243 istab = (*cp == '\t');
244 isrepl = !(istab || isprint((uc_i)*cp));
247 if (n > col)
248 break;
249 col -= n;
251 if (isrepl) {
252 if (isuni) {
253 /* Contained in n_mb_cur_max, then */
254 memcpy(np, n_unirepl, sizeof(n_unirepl) -1);
255 np += sizeof(n_unirepl) -1;
256 } else
257 *np++ = '?';
258 cp += sz;
259 } else if (istab || (sz == 1 && spacechar(*cp))) {
260 *np++ = ' ';
261 ++cp;
262 } else
263 while (sz--)
264 *np++ = *cp++;
267 if (fill && col != 0) {
268 if (fill > 0) {
269 memmove(nb + col, nb, PTR2SIZE(np - nb));
270 memset(nb, ' ', col);
271 } else
272 memset(np, ' ', col);
273 np += col;
274 col = 0;
277 #ifdef HAVE_NATCH_CHAR
278 if (isbidi) {
279 memcpy(np, bi.bi_end.s, bi.bi_end.l);
280 np += bi.bi_end.l;
282 #endif
284 *np = '\0';
285 if (cols_decr_used_or_null != NULL)
286 *cols_decr_used_or_null -= col_orig - col;
287 NYD_LEAVE;
288 return nb;
291 FL void
292 makeprint(struct str const *in, struct str *out) /* TODO <-> TTYCHARSET!! */
294 /* TODO: makeprint() should honour *ttycharset*. This of course does not
295 * TODO work with ISO C / POSIX since mbrtowc() do know about locales, not
296 * TODO charsets, and ditto iswprint() etc. do work with the locale too.
297 * TODO I hope S-CText can do something about that, and/or otherwise add
298 * TODO some special treatment for UTF-8 (take it from S-CText too then) */
299 char const *inp, *maxp;
300 char *outp;
301 DBG( size_t msz; )
302 NYD_ENTER;
304 out->s =
305 outp = n_alloc(DBG( msz = ) in->l*n_mb_cur_max + 2u*n_mb_cur_max +1);
306 inp = in->s;
307 maxp = inp + in->l;
309 #ifdef HAVE_NATCH_CHAR
310 if (n_mb_cur_max > 1) {
311 char mbb[MB_LEN_MAX + 1];
312 wchar_t wc;
313 int i, n;
314 bool_t isuni = ((n_psonce & n_PSO_UNICODE) != 0);
316 out->l = 0;
317 while (inp < maxp) {
318 if (*inp & 0200)
319 n = mbtowc(&wc, inp, PTR2SIZE(maxp - inp));
320 else {
321 wc = *inp;
322 n = 1;
324 if (n == -1) {
325 /* FIXME Why mbtowc() resetting here?
326 * FIXME what about ISO 2022-JP plus -- those
327 * FIXME will loose shifts, then!
328 * FIXME THUS - we'd need special "known points"
329 * FIXME to do so - say, after a newline!!
330 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
331 mbtowc(&wc, NULL, n_mb_cur_max);
332 wc = isuni ? 0xFFFD : '?';
333 n = 1;
334 } else if (n == 0)
335 n = 1;
336 inp += n;
337 if (!iswprint(wc) && wc != '\n' /*&& wc != '\r' && wc != '\b'*/ &&
338 wc != '\t') {
339 if ((wc & ~(wchar_t)037) == 0)
340 wc = isuni ? 0x2400 | wc : '?';
341 else if (wc == 0177)
342 wc = isuni ? 0x2421 : '?';
343 else
344 wc = isuni ? 0x2426 : '?';
345 }else if(isuni){ /* TODO ctext */
346 /* Need to filter out L-TO-R and R-TO-R marks TODO ctext */
347 if(wc == 0x200E || wc == 0x200F || (wc >= 0x202A && wc <= 0x202E))
348 continue;
349 /* And some zero-width messes */
350 if(wc == 0x00AD || (wc >= 0x200B && wc <= 0x200D))
351 continue;
352 /* Oh about the ISO C wide character interfaces, baby! */
353 if(wc == 0xFEFF)
354 continue;
356 if ((n = wctomb(mbb, wc)) <= 0)
357 continue;
358 out->l += n;
359 assert(out->l < msz);
360 for (i = 0; i < n; ++i)
361 *outp++ = mbb[i];
363 } else
364 #endif /* NATCH_CHAR */
366 int c;
367 while (inp < maxp) {
368 c = *inp++ & 0377;
369 if (!isprint(c) && c != '\n' && c != '\r' && c != '\b' && c != '\t')
370 c = '?';
371 *outp++ = c;
373 out->l = in->l;
375 out->s[out->l] = '\0';
376 NYD_LEAVE;
379 FL size_t
380 delctrl(char *cp, size_t len)
382 size_t x, y;
383 NYD_ENTER;
385 for (x = y = 0; x < len; ++x)
386 if (!cntrlchar(cp[x]))
387 cp[y++] = cp[x];
388 cp[y] = '\0';
389 NYD_LEAVE;
390 return y;
393 FL char *
394 prstr(char const *s)
396 struct str in, out;
397 char *rp;
398 NYD_ENTER;
400 in.s = n_UNCONST(s);
401 in.l = strlen(s);
402 makeprint(&in, &out);
403 rp = savestrbuf(out.s, out.l);
404 n_free(out.s);
405 NYD_LEAVE;
406 return rp;
409 FL int
410 prout(char const *s, size_t sz, FILE *fp)
412 struct str in, out;
413 int n;
414 NYD_ENTER;
416 in.s = n_UNCONST(s);
417 in.l = sz;
418 makeprint(&in, &out);
419 n = fwrite(out.s, 1, out.l, fp);
420 n_free(out.s);
421 NYD_LEAVE;
422 return n;
425 FL bool_t
426 bidi_info_needed(char const *bdat, size_t blen)
428 bool_t rv = FAL0;
429 NYD_ENTER;
431 #ifdef HAVE_NATCH_CHAR
432 if (n_psonce & n_PSO_UNICODE)
433 while (blen > 0) {
434 /* TODO Checking for BIDI character: use S-CText fromutf8
435 * TODO plus isrighttoleft (or whatever there will be)! */
436 ui32_t c = n_utf8_to_utf32(&bdat, &blen);
437 if (c == UI32_MAX)
438 break;
440 if (c <= 0x05BE)
441 continue;
443 /* (Very very fuzzy, awaiting S-CText for good) */
444 if ((c >= 0x05BE && c <= 0x08E3) ||
445 (c >= 0xFB1D && c <= 0xFE00) /* No: variation selectors */ ||
446 (c >= 0xFE70 && c <= 0xFEFC) ||
447 (c >= 0x10800 && c <= 0x10C48) ||
448 (c >= 0x1EE00 && c <= 0x1EEF1)) {
449 rv = TRU1;
450 break;
453 #endif /* HAVE_NATCH_CHAR */
454 NYD_LEAVE;
455 return rv;
458 FL void
459 bidi_info_create(struct bidi_info *bip)
461 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
462 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
463 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
464 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
465 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
466 n_NATCH_CHAR( char const *hb; )
467 NYD_ENTER;
469 memset(bip, 0, sizeof *bip);
470 bip->bi_start.s = bip->bi_end.s = n_UNCONST(n_empty);
472 #ifdef HAVE_NATCH_CHAR
473 if ((n_psonce & n_PSO_UNICODE) && (hb = ok_vlook(headline_bidi)) != NULL) {
474 switch (*hb) {
475 case '3':
476 bip->bi_pad = 2;
477 /* FALLTHRU */
478 case '2':
479 bip->bi_start.s = bip->bi_end.s = n_UNCONST("\xE2\x80\x8E");
480 break;
481 case '1':
482 bip->bi_pad = 2;
483 /* FALLTHRU */
484 default:
485 bip->bi_start.s = n_UNCONST("\xE2\x81\xA8");
486 bip->bi_end.s = n_UNCONST("\xE2\x81\xA9");
487 break;
489 bip->bi_start.l = bip->bi_end.l = 3;
491 #endif
492 NYD_LEAVE;
495 /* s-it-mode */