Tweak previous, it added a bad memory access
[s-mailx.git] / ui-str.c
blobf607df8ab0988a203362474218a345658282c66b
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 #undef n_FILE
33 #define n_FILE ui_str
35 #ifndef HAVE_AMALGAMATION
36 # include "nail.h"
37 #endif
39 #include <ctype.h>
41 FL bool_t
42 n_visual_info(struct n_visual_info_ctx *vicp, enum n_visual_info_flags vif){
43 #ifdef HAVE_C90AMEND1
44 mbstate_t *mbp;
45 #endif
46 size_t il;
47 char const *ib;
48 bool_t rv;
49 NYD2_ENTER;
51 assert(vicp != NULL);
52 assert(vicp->vic_inlen == 0 || vicp->vic_indat != NULL);
53 assert(!(vif & n__VISUAL_INFO_FLAGS) || !(vif & n_VISUAL_INFO_ONE_CHAR));
55 rv = TRU1;
56 ib = vicp->vic_indat;
57 if((il = vicp->vic_inlen) == UIZ_MAX)
58 il = vicp->vic_inlen = strlen(ib);
60 if((vif & (n_VISUAL_INFO_WIDTH_QUERY | n_VISUAL_INFO_WOUT_PRINTABLE)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE)
62 vif |= n_VISUAL_INFO_WIDTH_QUERY;
64 vicp->vic_chars_seen = vicp->vic_bytes_seen = vicp->vic_vi_width = 0;
65 if(vif & n_VISUAL_INFO_WOUT_CREATE){
66 if(vif & n_VISUAL_INFO_WOUT_SALLOC)
67 vicp->vic_woudat = salloc(sizeof(*vicp->vic_woudat) * (il +1));
68 vicp->vic_woulen = 0;
70 #ifdef HAVE_C90AMEND1
71 if((mbp = vicp->vic_mbstate) == NULL)
72 mbp = &vicp->vic_mbs_def;
73 #endif
75 if(il > 0){
76 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
77 #ifdef HAVE_C90AMEND1
78 size_t i = mbrtowc(&vicp->vic_waccu, ib, il, mbp);
80 if(i == (size_t)-2){
81 rv = FAL0;
82 break;
83 }else if(i == (size_t)-1){
84 if(!(vif & n_VISUAL_INFO_SKIP_ERRORS)){
85 rv = FAL0;
86 break;
88 memset(mbp, 0, sizeof *mbp);
89 vicp->vic_waccu = (n_psonce & n_PSO_UNICODE) ? 0xFFFD : '?';
90 i = 1;
91 }else if(i == 0){
92 il = 0;
93 break;
96 ++vicp->vic_chars_seen;
97 vicp->vic_bytes_seen += i;
98 ib += i;
99 il -= i;
101 if(vif & n_VISUAL_INFO_WIDTH_QUERY){
102 int w;
103 wchar_t wc = vicp->vic_waccu;
105 # ifdef HAVE_WCWIDTH
106 w = (wc == '\t' ? 1 : wcwidth(wc));
107 # else
108 if(wc == '\t' || iswprint(wc))
109 w = 1 + (wc >= 0x1100u); /* S-CText isfullwidth() */
110 else
111 w = -1;
112 # endif
113 if(w > 0)
114 vicp->vic_vi_width += w;
115 else if(vif & n_VISUAL_INFO_WOUT_PRINTABLE)
116 continue;
118 #else /* HAVE_C90AMEND1 */
119 char c = *ib;
121 if(c == '\0'){
122 il = 0;
123 break;
126 ++vicp->vic_chars_seen;
127 ++vicp->vic_bytes_seen;
128 vicp->vic_waccu = c;
129 if(vif & n_VISUAL_INFO_WIDTH_QUERY)
130 vicp->vic_vi_width += (c == '\t' || isprint(c)); /* XXX */
132 ++ib;
133 --il;
134 #endif
136 if(vif & n_VISUAL_INFO_WOUT_CREATE)
137 vicp->vic_woudat[vicp->vic_woulen++] = vicp->vic_waccu;
138 }while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0);
141 if(vif & n_VISUAL_INFO_WOUT_CREATE)
142 vicp->vic_woudat[vicp->vic_woulen] = L'\0';
143 vicp->vic_oudat = ib;
144 vicp->vic_oulen = il;
145 vicp->vic_flags = vif;
146 NYD2_LEAVE;
147 return rv;
150 FL size_t
151 field_detect_clip(size_t maxlen, char const *buf, size_t blen)/*TODO mbrtowc()*/
153 size_t rv;
154 NYD_ENTER;
156 #ifdef HAVE_NATCH_CHAR
157 maxlen = n_MIN(maxlen, blen);
158 for (rv = 0; maxlen > 0;) {
159 int ml = mblen(buf, maxlen);
160 if (ml <= 0) {
161 mblen(NULL, 0);
162 break;
164 buf += ml;
165 rv += ml;
166 maxlen -= ml;
168 #else
169 rv = n_MIN(blen, maxlen);
170 #endif
171 NYD_LEAVE;
172 return rv;
175 FL char *
176 colalign(char const *cp, int col, int fill, int *cols_decr_used_or_null)
178 n_NATCH_CHAR( struct bidi_info bi; )
179 int col_orig = col, n, sz;
180 bool_t isbidi, isuni, istab, isrepl;
181 char *nb, *np;
182 NYD_ENTER;
184 /* Bidi only on request and when there is 8-bit data */
185 isbidi = isuni = FAL0;
186 #ifdef HAVE_NATCH_CHAR
187 isuni = ((n_psonce & n_PSO_UNICODE) != 0);
188 bidi_info_create(&bi);
189 if (bi.bi_start.l == 0)
190 goto jnobidi;
191 if (!(isbidi = bidi_info_needed(cp, strlen(cp))))
192 goto jnobidi;
194 if ((size_t)col >= bi.bi_pad)
195 col -= bi.bi_pad;
196 else
197 col = 0;
198 jnobidi:
199 #endif
201 np = nb = salloc(n_mb_cur_max * strlen(cp) +
202 ((fill ? col : 0)
203 n_NATCH_CHAR( + (isbidi ? bi.bi_start.l + bi.bi_end.l : 0) )
204 +1));
206 #ifdef HAVE_NATCH_CHAR
207 if (isbidi) {
208 memcpy(np, bi.bi_start.s, bi.bi_start.l);
209 np += bi.bi_start.l;
211 #endif
213 while (*cp != '\0') {
214 istab = FAL0;
215 #ifdef HAVE_C90AMEND1
216 if (n_mb_cur_max > 1) {
217 wchar_t wc;
219 n = 1;
220 isrepl = TRU1;
221 if ((sz = mbtowc(&wc, cp, n_mb_cur_max)) == -1)
222 sz = 1;
223 else if (wc == L'\t') {
224 cp += sz - 1; /* Silly, no such charset known (.. until S-Ctext) */
225 isrepl = FAL0;
226 istab = TRU1;
227 } else if (iswprint(wc)) {
228 # ifndef HAVE_WCWIDTH
229 n = 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
230 # else
231 if ((n = wcwidth(wc)) == -1)
232 n = 1;
233 else
234 # endif
235 isrepl = FAL0;
237 } else
238 #endif
240 n = sz = 1;
241 istab = (*cp == '\t');
242 isrepl = !(istab || isprint((uc_i)*cp));
245 if (n > col)
246 break;
247 col -= n;
249 if (isrepl) {
250 if (isuni) {
251 /* Contained in n_mb_cur_max, then */
252 memcpy(np, n_unirepl, sizeof(n_unirepl) -1);
253 np += sizeof(n_unirepl) -1;
254 } else
255 *np++ = '?';
256 cp += sz;
257 } else if (istab || (sz == 1 && spacechar(*cp))) {
258 *np++ = ' ';
259 ++cp;
260 } else
261 while (sz--)
262 *np++ = *cp++;
265 if (fill && col != 0) {
266 if (fill > 0) {
267 memmove(nb + col, nb, PTR2SIZE(np - nb));
268 memset(nb, ' ', col);
269 } else
270 memset(np, ' ', col);
271 np += col;
272 col = 0;
275 #ifdef HAVE_NATCH_CHAR
276 if (isbidi) {
277 memcpy(np, bi.bi_end.s, bi.bi_end.l);
278 np += bi.bi_end.l;
280 #endif
282 *np = '\0';
283 if (cols_decr_used_or_null != NULL)
284 *cols_decr_used_or_null -= col_orig - col;
285 NYD_LEAVE;
286 return nb;
289 FL void
290 makeprint(struct str const *in, struct str *out)
292 char const *inp, *maxp;
293 char *outp;
294 DBG( size_t msz; )
295 NYD_ENTER;
297 out->s =
298 outp = smalloc(DBG( msz = ) in->l*n_mb_cur_max + 2u*n_mb_cur_max +1);
299 inp = in->s;
300 maxp = inp + in->l;
302 #ifdef HAVE_NATCH_CHAR
303 if (n_mb_cur_max > 1) {
304 char mbb[MB_LEN_MAX + 1];
305 wchar_t wc;
306 int i, n;
307 bool_t isuni = ((n_psonce & n_PSO_UNICODE) != 0);
309 out->l = 0;
310 while (inp < maxp) {
311 if (*inp & 0200)
312 n = mbtowc(&wc, inp, PTR2SIZE(maxp - inp));
313 else {
314 wc = *inp;
315 n = 1;
317 if (n == -1) {
318 /* FIXME Why mbtowc() resetting here?
319 * FIXME what about ISO 2022-JP plus -- those
320 * FIXME will loose shifts, then!
321 * FIXME THUS - we'd need special "known points"
322 * FIXME to do so - say, after a newline!!
323 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
324 mbtowc(&wc, NULL, n_mb_cur_max);
325 wc = isuni ? 0xFFFD : '?';
326 n = 1;
327 } else if (n == 0)
328 n = 1;
329 inp += n;
330 if (!iswprint(wc) && wc != '\n' && wc != '\r' && wc != '\b' &&
331 wc != '\t') {
332 if ((wc & ~(wchar_t)037) == 0)
333 wc = isuni ? 0x2400 | wc : '?';
334 else if (wc == 0177)
335 wc = isuni ? 0x2421 : '?';
336 else
337 wc = isuni ? 0x2426 : '?';
338 }else if(isuni){ /* TODO ctext */
339 /* We need to actively filter out L-TO-R and R-TO-R marks TODO ctext */
340 if(wc == 0x200E || wc == 0x200F || (wc >= 0x202A && wc <= 0x202E))
341 continue;
342 /* And some zero-width messes */
343 if(wc == 0x00AD || (wc >= 0x200B && wc <= 0x200D))
344 continue;
345 /* Oh about the ISO C wide character interfaces, baby! */
346 if(wc == 0xFEFF)
347 continue;
349 if ((n = wctomb(mbb, wc)) <= 0)
350 continue;
351 out->l += n;
352 assert(out->l < msz);
353 for (i = 0; i < n; ++i)
354 *outp++ = mbb[i];
356 } else
357 #endif /* NATCH_CHAR */
359 int c;
360 while (inp < maxp) {
361 c = *inp++ & 0377;
362 if (!isprint(c) && c != '\n' && c != '\r' && c != '\b' && c != '\t')
363 c = '?';
364 *outp++ = c;
366 out->l = in->l;
368 out->s[out->l] = '\0';
369 NYD_LEAVE;
372 FL size_t
373 delctrl(char *cp, size_t len)
375 size_t x, y;
376 NYD_ENTER;
378 for (x = y = 0; x < len; ++x)
379 if (!cntrlchar(cp[x]))
380 cp[y++] = cp[x];
381 cp[y] = '\0';
382 NYD_LEAVE;
383 return y;
386 FL char *
387 prstr(char const *s)
389 struct str in, out;
390 char *rp;
391 NYD_ENTER;
393 in.s = n_UNCONST(s);
394 in.l = strlen(s);
395 makeprint(&in, &out);
396 rp = savestrbuf(out.s, out.l);
397 free(out.s);
398 NYD_LEAVE;
399 return rp;
402 FL int
403 prout(char const *s, size_t sz, FILE *fp)
405 struct str in, out;
406 int n;
407 NYD_ENTER;
409 in.s = n_UNCONST(s);
410 in.l = sz;
411 makeprint(&in, &out);
412 n = fwrite(out.s, 1, out.l, fp);
413 free(out.s);
414 NYD_LEAVE;
415 return n;
418 FL size_t
419 putuc(int u, int c, FILE *fp)
421 size_t rv;
422 NYD_ENTER;
423 n_UNUSED(u);
425 #ifdef HAVE_NATCH_CHAR
426 if ((n_psonce & n_PSO_UNICODE) && (u & ~(wchar_t)0177)) {
427 char mbb[MB_LEN_MAX];
428 int i, n;
430 if ((n = wctomb(mbb, u)) > 0) {
431 rv = wcwidth(u);
432 for (i = 0; i < n; ++i)
433 if (putc(mbb[i] & 0377, fp) == EOF) {
434 rv = 0;
435 break;
437 } else if (n == 0)
438 rv = (putc('\0', fp) != EOF);
439 else
440 rv = 0;
441 } else
442 #endif
443 rv = (putc(c, fp) != EOF);
444 NYD_LEAVE;
445 return rv;
448 FL bool_t
449 bidi_info_needed(char const *bdat, size_t blen)
451 bool_t rv = FAL0;
452 NYD_ENTER;
454 #ifdef HAVE_NATCH_CHAR
455 if (n_psonce & n_PSO_UNICODE)
456 while (blen > 0) {
457 /* TODO Checking for BIDI character: use S-CText fromutf8
458 * TODO plus isrighttoleft (or whatever there will be)! */
459 ui32_t c = n_utf8_to_utf32(&bdat, &blen);
460 if (c == UI32_MAX)
461 break;
463 if (c <= 0x05BE)
464 continue;
466 /* (Very very fuzzy, awaiting S-CText for good) */
467 if ((c >= 0x05BE && c <= 0x08E3) ||
468 (c >= 0xFB1D && c <= 0xFE00) /* No: variation selectors */ ||
469 (c >= 0xFE70 && c <= 0xFEFC) ||
470 (c >= 0x10800 && c <= 0x10C48) ||
471 (c >= 0x1EE00 && c <= 0x1EEF1)) {
472 rv = TRU1;
473 break;
476 #endif /* HAVE_NATCH_CHAR */
477 NYD_LEAVE;
478 return rv;
481 FL void
482 bidi_info_create(struct bidi_info *bip)
484 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
485 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
486 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
487 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
488 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
489 n_NATCH_CHAR( char const *hb; )
490 NYD_ENTER;
492 memset(bip, 0, sizeof *bip);
493 bip->bi_start.s = bip->bi_end.s = n_UNCONST(n_empty);
495 #ifdef HAVE_NATCH_CHAR
496 if ((n_psonce & n_PSO_UNICODE) && (hb = ok_vlook(headline_bidi)) != NULL) {
497 switch (*hb) {
498 case '3':
499 bip->bi_pad = 2;
500 /* FALLTHRU */
501 case '2':
502 bip->bi_start.s = bip->bi_end.s = n_UNCONST("\xE2\x80\x8E");
503 break;
504 case '1':
505 bip->bi_pad = 2;
506 /* FALLTHRU */
507 default:
508 bip->bi_start.s = n_UNCONST("\xE2\x81\xA8");
509 bip->bi_end.s = n_UNCONST("\xE2\x81\xA9");
510 break;
512 bip->bi_start.l = bip->bi_end.l = 3;
514 #endif
515 NYD_LEAVE;
518 /* s-it-mode */