n_lex_input(): truly support line continuation via backslash..
[s-mailx.git] / ui_str.c
blob9fd90eb90ac03abf6cc8ddc155b02b6a01d39673
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
6 */
7 /*
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 #undef n_FILE
33 #define n_FILE ui_str
35 #ifndef HAVE_AMALGAMATION
36 # include "nail.h"
37 #endif
39 #include <ctype.h>
41 FL size_t
42 field_detect_width(char const *buf, size_t blen){
43 size_t rv;
44 NYD2_ENTER;
46 if(blen == UIZ_MAX)
47 blen = (buf == NULL) ? 0 : strlen(buf);
48 assert(blen == 0 || buf != NULL);
50 if((rv = blen) > 0){
51 #ifdef HAVE_C90AMEND1
52 mbstate_t mbs;
53 wchar_t wc;
55 memset(&mbs, 0, sizeof mbs);
57 for(rv = 0; blen > 0;){
58 size_t i = mbrtowc(&wc, buf, blen, &mbs);
60 switch(i){
61 case (size_t)-2:
62 case (size_t)-1:
63 rv = (size_t)-1;
64 /* FALLTHRU */
65 case 0:
66 blen = 0;
67 break;
68 default:
69 buf += i;
70 blen -= i;
71 # ifdef HAVE_WCWIDTH
72 /* C99 */{
73 int w = wcwidth(wc);
75 if(w > 0)
76 rv += w;
77 else if(wc == '\t')
78 ++rv;
80 # else
81 if(iswprint(wc))
82 rv += 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
83 else if(wc == '\t')
84 ++rv;
85 # endif
86 break;
89 #endif /* HAVE_C90AMEND1 */
91 NYD2_LEAVE;
92 return rv;
95 FL size_t
96 field_detect_clip(size_t maxlen, char const *buf, size_t blen)/*TODO mbrtowc()*/
98 size_t rv;
99 NYD_ENTER;
101 #ifdef HAVE_NATCH_CHAR
102 maxlen = MIN(maxlen, blen);
103 for (rv = 0; maxlen > 0;) {
104 int ml = mblen(buf, maxlen);
105 if (ml <= 0) {
106 mblen(NULL, 0);
107 break;
109 buf += ml;
110 rv += ml;
111 maxlen -= ml;
113 #else
114 rv = MIN(blen, maxlen);
115 #endif
116 NYD_LEAVE;
117 return rv;
120 FL size_t
121 field_put_bidi_clip(char *store, size_t maxlen, char const *buf, size_t blen)
123 NATCH_CHAR( struct bidi_info bi; )
124 size_t rv NATCH_CHAR( COMMA i );
125 NYD_ENTER;
127 rv = 0;
128 if (maxlen-- == 0)
129 goto j_leave;
131 #ifdef HAVE_NATCH_CHAR
132 bidi_info_create(&bi);
133 if (bi.bi_start.l == 0 || !bidi_info_needed(buf, blen)) {
134 bi.bi_end.l = 0;
135 goto jnobidi;
138 if (maxlen >= (i = bi.bi_pad + bi.bi_end.l + bi.bi_start.l))
139 maxlen -= i;
140 else
141 goto jleave;
143 if ((i = bi.bi_start.l) > 0) {
144 memcpy(store, bi.bi_start.s, i);
145 store += i;
146 rv += i;
149 jnobidi:
150 while (maxlen > 0) {
151 int ml = mblen(buf, blen);
152 if (ml <= 0) {
153 mblen(NULL, 0);
154 break;
156 if (UICMP(z, maxlen, <, ml))
157 break;
158 if (ml == 1)
159 *store = *buf;
160 else
161 memcpy(store, buf, ml);
162 store += ml;
163 buf += ml;
164 rv += ml;
165 maxlen -= ml;
168 if ((i = bi.bi_end.l) > 0) {
169 memcpy(store, bi.bi_end.s, i);
170 store += i;
171 rv += i;
173 jleave:
174 *store = '\0';
176 #else
177 rv = MIN(blen, maxlen);
178 memcpy(store, buf, rv);
179 store[rv] = '\0';
180 #endif
181 j_leave:
182 NYD_LEAVE;
183 return rv;
186 FL char *
187 colalign(char const *cp, int col, int fill, int *cols_decr_used_or_null)
189 NATCH_CHAR( struct bidi_info bi; )
190 int col_orig = col, n, sz;
191 bool_t isbidi, isuni, istab, isrepl;
192 char *nb, *np;
193 NYD_ENTER;
195 /* Bidi only on request and when there is 8-bit data */
196 isbidi = isuni = FAL0;
197 #ifdef HAVE_NATCH_CHAR
198 isuni = ((options & OPT_UNICODE) != 0);
199 bidi_info_create(&bi);
200 if (bi.bi_start.l == 0)
201 goto jnobidi;
202 if (!(isbidi = bidi_info_needed(cp, strlen(cp))))
203 goto jnobidi;
205 if ((size_t)col >= bi.bi_pad)
206 col -= bi.bi_pad;
207 else
208 col = 0;
209 jnobidi:
210 #endif
212 np = nb = salloc(mb_cur_max * strlen(cp) +
213 ((fill ? col : 0)
214 NATCH_CHAR( + (isbidi ? bi.bi_start.l + bi.bi_end.l : 0) )
215 +1));
217 #ifdef HAVE_NATCH_CHAR
218 if (isbidi) {
219 memcpy(np, bi.bi_start.s, bi.bi_start.l);
220 np += bi.bi_start.l;
222 #endif
224 while (*cp != '\0') {
225 istab = FAL0;
226 #ifdef HAVE_C90AMEND1
227 if (mb_cur_max > 1) {
228 wchar_t wc;
230 n = 1;
231 isrepl = TRU1;
232 if ((sz = mbtowc(&wc, cp, mb_cur_max)) == -1)
233 sz = 1;
234 else if (wc == L'\t') {
235 cp += sz - 1; /* Silly, no such charset known (.. until S-Ctext) */
236 isrepl = FAL0;
237 istab = TRU1;
238 } else if (iswprint(wc)) {
239 # ifndef HAVE_WCWIDTH
240 n = 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
241 # else
242 if ((n = wcwidth(wc)) == -1)
243 n = 1;
244 else
245 # endif
246 isrepl = FAL0;
248 } else
249 #endif
251 n = sz = 1;
252 istab = (*cp == '\t');
253 isrepl = !(istab || isprint((uc_i)*cp));
256 if (n > col)
257 break;
258 col -= n;
260 if (isrepl) {
261 if (isuni) {
262 np[0] = (char)0xEFu;
263 np[1] = (char)0xBFu;
264 np[2] = (char)0xBDu;
265 np += 3;
266 } else
267 *np++ = '?';
268 cp += sz;
269 } else if (istab || (sz == 1 && spacechar(*cp))) {
270 *np++ = ' ';
271 ++cp;
272 } else
273 while (sz--)
274 *np++ = *cp++;
277 if (fill && col != 0) {
278 if (fill > 0) {
279 memmove(nb + col, nb, PTR2SIZE(np - nb));
280 memset(nb, ' ', col);
281 } else
282 memset(np, ' ', col);
283 np += col;
284 col = 0;
287 #ifdef HAVE_NATCH_CHAR
288 if (isbidi) {
289 memcpy(np, bi.bi_end.s, bi.bi_end.l);
290 np += bi.bi_end.l;
292 #endif
294 *np = '\0';
295 if (cols_decr_used_or_null != NULL)
296 *cols_decr_used_or_null -= col_orig - col;
297 NYD_LEAVE;
298 return nb;
301 FL void
302 makeprint(struct str const *in, struct str *out)
304 char const *inp, *maxp;
305 char *outp;
306 DBG( size_t msz; )
307 NYD_ENTER;
309 out->s = outp = smalloc(DBG( msz = ) in->l*mb_cur_max + 2u*mb_cur_max +1);
310 inp = in->s;
311 maxp = inp + in->l;
313 #ifdef HAVE_NATCH_CHAR
314 if (mb_cur_max > 1) {
315 char mbb[MB_LEN_MAX + 1];
316 wchar_t wc;
317 int i, n;
318 bool_t isuni = ((options & OPT_UNICODE) != 0);
320 out->l = 0;
321 while (inp < maxp) {
322 if (*inp & 0200)
323 n = mbtowc(&wc, inp, PTR2SIZE(maxp - inp));
324 else {
325 wc = *inp;
326 n = 1;
328 if (n == -1) {
329 /* FIXME Why mbtowc() resetting here?
330 * FIXME what about ISO 2022-JP plus -- those
331 * FIXME will loose shifts, then!
332 * FIXME THUS - we'd need special "known points"
333 * FIXME to do so - say, after a newline!!
334 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
335 mbtowc(&wc, NULL, mb_cur_max);
336 wc = isuni ? 0xFFFD : '?';
337 n = 1;
338 } else if (n == 0)
339 n = 1;
340 inp += n;
341 if (!iswprint(wc) && wc != '\n' && wc != '\r' && wc != '\b' &&
342 wc != '\t') {
343 if ((wc & ~(wchar_t)037) == 0)
344 wc = isuni ? 0x2400 | wc : '?';
345 else if (wc == 0177)
346 wc = isuni ? 0x2421 : '?';
347 else
348 wc = isuni ? 0x2426 : '?';
349 }else if(isuni){ /* TODO ctext */
350 /* We need to actively filter out L-TO-R and R-TO-R marks TODO ctext */
351 if(wc == 0x200E || wc == 0x200F || (wc >= 0x202A && wc <= 0x202E))
352 continue;
353 /* And some zero-width messes */
354 if(wc == 0x00AD || (wc >= 0x200B && wc <= 0x200D))
355 continue;
356 /* Oh about the ISO C wide character interfaces, baby! */
357 if(wc == 0xFEFF)
358 continue;
360 if ((n = wctomb(mbb, wc)) <= 0)
361 continue;
362 out->l += n;
363 assert(out->l < msz);
364 for (i = 0; i < n; ++i)
365 *outp++ = mbb[i];
367 } else
368 #endif /* NATCH_CHAR */
370 int c;
371 while (inp < maxp) {
372 c = *inp++ & 0377;
373 if (!isprint(c) && c != '\n' && c != '\r' && c != '\b' && c != '\t')
374 c = '?';
375 *outp++ = c;
377 out->l = in->l;
379 out->s[out->l] = '\0';
380 NYD_LEAVE;
383 FL size_t
384 delctrl(char *cp, size_t len)
386 size_t x, y;
387 NYD_ENTER;
389 for (x = y = 0; x < len; ++x)
390 if (!cntrlchar(cp[x]))
391 cp[y++] = cp[x];
392 cp[y] = '\0';
393 NYD_LEAVE;
394 return y;
397 FL char *
398 prstr(char const *s)
400 struct str in, out;
401 char *rp;
402 NYD_ENTER;
404 in.s = UNCONST(s);
405 in.l = strlen(s);
406 makeprint(&in, &out);
407 rp = savestrbuf(out.s, out.l);
408 free(out.s);
409 NYD_LEAVE;
410 return rp;
413 FL int
414 prout(char const *s, size_t sz, FILE *fp)
416 struct str in, out;
417 int n;
418 NYD_ENTER;
420 in.s = UNCONST(s);
421 in.l = sz;
422 makeprint(&in, &out);
423 n = fwrite(out.s, 1, out.l, fp);
424 free(out.s);
425 NYD_LEAVE;
426 return n;
429 FL size_t
430 putuc(int u, int c, FILE *fp)
432 size_t rv;
433 NYD_ENTER;
434 UNUSED(u);
436 #ifdef HAVE_NATCH_CHAR
437 if ((options & OPT_UNICODE) && (u & ~(wchar_t)0177)) {
438 char mbb[MB_LEN_MAX];
439 int i, n;
441 if ((n = wctomb(mbb, u)) > 0) {
442 rv = wcwidth(u);
443 for (i = 0; i < n; ++i)
444 if (putc(mbb[i] & 0377, fp) == EOF) {
445 rv = 0;
446 break;
448 } else if (n == 0)
449 rv = (putc('\0', fp) != EOF);
450 else
451 rv = 0;
452 } else
453 #endif
454 rv = (putc(c, fp) != EOF);
455 NYD_LEAVE;
456 return rv;
459 FL bool_t
460 bidi_info_needed(char const *bdat, size_t blen)
462 bool_t rv = FAL0;
463 NYD_ENTER;
465 #ifdef HAVE_NATCH_CHAR
466 if (options & OPT_UNICODE)
467 while (blen > 0) {
468 /* TODO Checking for BIDI character: use S-CText fromutf8
469 * TODO plus isrighttoleft (or whatever there will be)! */
470 ui32_t c = n_utf8_to_utf32(&bdat, &blen);
471 if (c == UI32_MAX)
472 break;
474 if (c <= 0x05BE)
475 continue;
477 /* (Very very fuzzy, awaiting S-CText for good) */
478 if ((c >= 0x05BE && c <= 0x08E3) ||
479 (c >= 0xFB1D && c <= 0xFE00) /* No: variation selectors */ ||
480 (c >= 0xFE70 && c <= 0xFEFC) ||
481 (c >= 0x10800 && c <= 0x10C48) ||
482 (c >= 0x1EE00 && c <= 0x1EEF1)) {
483 rv = TRU1;
484 break;
487 #endif /* HAVE_NATCH_CHAR */
488 NYD_LEAVE;
489 return rv;
492 FL void
493 bidi_info_create(struct bidi_info *bip)
495 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
496 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
497 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
498 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
499 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
500 NATCH_CHAR( char const *hb; )
501 NYD_ENTER;
503 memset(bip, 0, sizeof *bip);
504 bip->bi_start.s = bip->bi_end.s = UNCONST("");
506 #ifdef HAVE_NATCH_CHAR
507 if ((options & OPT_UNICODE) && (hb = ok_vlook(headline_bidi)) != NULL) {
508 switch (*hb) {
509 case '3':
510 bip->bi_pad = 2;
511 /* FALLTHRU */
512 case '2':
513 bip->bi_start.s = bip->bi_end.s = UNCONST("\xE2\x80\x8E");
514 break;
515 case '1':
516 bip->bi_pad = 2;
517 /* FALLTHRU */
518 default:
519 bip->bi_start.s = UNCONST("\xE2\x81\xA8");
520 bip->bi_end.s = UNCONST("\xE2\x81\xA9");
521 break;
523 bip->bi_start.l = bip->bi_end.l = 3;
525 #endif
526 NYD_LEAVE;
529 /* s-it-mode */