Add n_hy[] ("-"), use it
[s-mailx.git] / ui-str.c
blob2468fcaaf1848cbfcd365ff8cf27464e8cae0ab9
1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 */
7 /*
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
32 #undef n_FILE
33 #define n_FILE ui_str
35 #ifndef HAVE_AMALGAMATION
36 # include "nail.h"
37 #endif
39 #include <ctype.h>
41 FL bool_t
42 n_visual_info(struct n_visual_info_ctx *vicp, enum n_visual_info_flags vif){
43 #ifdef HAVE_C90AMEND1
44 mbstate_t *mbp;
45 #endif
46 size_t il;
47 char const *ib;
48 bool_t rv;
49 NYD2_ENTER;
51 assert(vicp != NULL);
52 assert(vicp->vic_inlen == 0 || vicp->vic_indat != NULL);
53 assert(!(vif & n__VISUAL_INFO_FLAGS) || !(vif & n_VISUAL_INFO_ONE_CHAR));
55 rv = TRU1;
56 ib = vicp->vic_indat;
57 if((il = vicp->vic_inlen) == UIZ_MAX)
58 il = vicp->vic_inlen = strlen(ib);
60 if((vif & (n_VISUAL_INFO_WIDTH_QUERY | n_VISUAL_INFO_WOUT_PRINTABLE)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE)
62 vif |= n_VISUAL_INFO_WIDTH_QUERY;
64 vicp->vic_chars_seen = vicp->vic_bytes_seen = vicp->vic_vi_width = 0;
65 if(vif & n_VISUAL_INFO_WOUT_CREATE){
66 if(vif & n_VISUAL_INFO_WOUT_SALLOC)
67 vicp->vic_woudat =
68 n_autorec_alloc(sizeof(*vicp->vic_woudat) * (il +1));
69 vicp->vic_woulen = 0;
71 #ifdef HAVE_C90AMEND1
72 if((mbp = vicp->vic_mbstate) == NULL)
73 mbp = &vicp->vic_mbs_def;
74 #endif
76 if(il > 0){
77 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
78 #ifdef HAVE_C90AMEND1
79 size_t i = mbrtowc(&vicp->vic_waccu, ib, il, mbp);
81 if(i == (size_t)-2){
82 rv = FAL0;
83 break;
84 }else if(i == (size_t)-1){
85 if(!(vif & n_VISUAL_INFO_SKIP_ERRORS)){
86 rv = FAL0;
87 break;
89 memset(mbp, 0, sizeof *mbp);
90 vicp->vic_waccu = (n_psonce & n_PSO_UNICODE) ? 0xFFFD : '?';
91 i = 1;
92 }else if(i == 0){
93 il = 0;
94 break;
97 ++vicp->vic_chars_seen;
98 vicp->vic_bytes_seen += i;
99 ib += i;
100 il -= i;
102 if(vif & n_VISUAL_INFO_WIDTH_QUERY){
103 int w;
104 wchar_t wc = vicp->vic_waccu;
106 # ifdef HAVE_WCWIDTH
107 w = (wc == '\t' ? 1 : wcwidth(wc));
108 # else
109 if(wc == '\t' || iswprint(wc))
110 w = 1 + (wc >= 0x1100u); /* S-CText isfullwidth() */
111 else
112 w = -1;
113 # endif
114 if(w > 0)
115 vicp->vic_vi_width += w;
116 else if(vif & n_VISUAL_INFO_WOUT_PRINTABLE)
117 continue;
119 #else /* HAVE_C90AMEND1 */
120 char c = *ib;
122 if(c == '\0'){
123 il = 0;
124 break;
127 ++vicp->vic_chars_seen;
128 ++vicp->vic_bytes_seen;
129 vicp->vic_waccu = c;
130 if(vif & n_VISUAL_INFO_WIDTH_QUERY)
131 vicp->vic_vi_width += (c == '\t' || isprint(c)); /* XXX */
133 ++ib;
134 --il;
135 #endif
137 if(vif & n_VISUAL_INFO_WOUT_CREATE)
138 vicp->vic_woudat[vicp->vic_woulen++] = vicp->vic_waccu;
139 }while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0);
142 if(vif & n_VISUAL_INFO_WOUT_CREATE)
143 vicp->vic_woudat[vicp->vic_woulen] = L'\0';
144 vicp->vic_oudat = ib;
145 vicp->vic_oulen = il;
146 vicp->vic_flags = vif;
147 NYD2_LEAVE;
148 return rv;
151 FL size_t
152 field_detect_clip(size_t maxlen, char const *buf, size_t blen)/*TODO mbrtowc()*/
154 size_t rv;
155 NYD_ENTER;
157 #ifdef HAVE_NATCH_CHAR
158 maxlen = n_MIN(maxlen, blen);
159 for (rv = 0; maxlen > 0;) {
160 int ml = mblen(buf, maxlen);
161 if (ml <= 0) {
162 mblen(NULL, 0);
163 break;
165 buf += ml;
166 rv += ml;
167 maxlen -= ml;
169 #else
170 rv = n_MIN(blen, maxlen);
171 #endif
172 NYD_LEAVE;
173 return rv;
176 FL char *
177 colalign(char const *cp, int col, int fill, int *cols_decr_used_or_null)
179 n_NATCH_CHAR( struct bidi_info bi; )
180 int col_orig = col, n, sz;
181 bool_t isbidi, isuni, istab, isrepl;
182 char *nb, *np;
183 NYD_ENTER;
185 /* Bidi only on request and when there is 8-bit data */
186 isbidi = isuni = FAL0;
187 #ifdef HAVE_NATCH_CHAR
188 isuni = ((n_psonce & n_PSO_UNICODE) != 0);
189 bidi_info_create(&bi);
190 if (bi.bi_start.l == 0)
191 goto jnobidi;
192 if (!(isbidi = bidi_info_needed(cp, strlen(cp))))
193 goto jnobidi;
195 if ((size_t)col >= bi.bi_pad)
196 col -= bi.bi_pad;
197 else
198 col = 0;
199 jnobidi:
200 #endif
202 np = nb = n_autorec_alloc(n_mb_cur_max * strlen(cp) +
203 ((fill ? col : 0)
204 n_NATCH_CHAR( + (isbidi ? bi.bi_start.l + bi.bi_end.l : 0) )
205 +1));
207 #ifdef HAVE_NATCH_CHAR
208 if (isbidi) {
209 memcpy(np, bi.bi_start.s, bi.bi_start.l);
210 np += bi.bi_start.l;
212 #endif
214 while (*cp != '\0') {
215 istab = FAL0;
216 #ifdef HAVE_C90AMEND1
217 if (n_mb_cur_max > 1) {
218 wchar_t wc;
220 n = 1;
221 isrepl = TRU1;
222 if ((sz = mbtowc(&wc, cp, n_mb_cur_max)) == -1)
223 sz = 1;
224 else if (wc == L'\t') {
225 cp += sz - 1; /* Silly, no such charset known (.. until S-Ctext) */
226 isrepl = FAL0;
227 istab = TRU1;
228 } else if (iswprint(wc)) {
229 # ifndef HAVE_WCWIDTH
230 n = 1 + (wc >= 0x1100u); /* TODO use S-CText isfullwidth() */
231 # else
232 if ((n = wcwidth(wc)) == -1)
233 n = 1;
234 else
235 # endif
236 isrepl = FAL0;
238 } else
239 #endif
241 n = sz = 1;
242 istab = (*cp == '\t');
243 isrepl = !(istab || isprint((uc_i)*cp));
246 if (n > col)
247 break;
248 col -= n;
250 if (isrepl) {
251 if (isuni) {
252 /* Contained in n_mb_cur_max, then */
253 memcpy(np, n_unirepl, sizeof(n_unirepl) -1);
254 np += sizeof(n_unirepl) -1;
255 } else
256 *np++ = '?';
257 cp += sz;
258 } else if (istab || (sz == 1 && spacechar(*cp))) {
259 *np++ = ' ';
260 ++cp;
261 } else
262 while (sz--)
263 *np++ = *cp++;
266 if (fill && col != 0) {
267 if (fill > 0) {
268 memmove(nb + col, nb, PTR2SIZE(np - nb));
269 memset(nb, ' ', col);
270 } else
271 memset(np, ' ', col);
272 np += col;
273 col = 0;
276 #ifdef HAVE_NATCH_CHAR
277 if (isbidi) {
278 memcpy(np, bi.bi_end.s, bi.bi_end.l);
279 np += bi.bi_end.l;
281 #endif
283 *np = '\0';
284 if (cols_decr_used_or_null != NULL)
285 *cols_decr_used_or_null -= col_orig - col;
286 NYD_LEAVE;
287 return nb;
290 FL void
291 makeprint(struct str const *in, struct str *out) /* TODO <-> TTYCHARSET!! */
293 /* TODO: makeprint() should honour *ttycharset*. This of course does not
294 * TODO work with ISO C / POSIX since mbrtowc() do know about locales, not
295 * TODO charsets, and ditto iswprint() etc. do work with the locale too.
296 * TODO I hope S-CText can do something about that, and/or otherwise add
297 * TODO some special treatment for UTF-8 (take it from S-CText too then) */
298 char const *inp, *maxp;
299 char *outp;
300 DBG( size_t msz; )
301 NYD_ENTER;
303 out->s =
304 outp = n_alloc(DBG( msz = ) in->l*n_mb_cur_max + 2u*n_mb_cur_max +1);
305 inp = in->s;
306 maxp = inp + in->l;
308 #ifdef HAVE_NATCH_CHAR
309 if (n_mb_cur_max > 1) {
310 char mbb[MB_LEN_MAX + 1];
311 wchar_t wc;
312 int i, n;
313 bool_t isuni = ((n_psonce & n_PSO_UNICODE) != 0);
315 out->l = 0;
316 while (inp < maxp) {
317 if (*inp & 0200)
318 n = mbtowc(&wc, inp, PTR2SIZE(maxp - inp));
319 else {
320 wc = *inp;
321 n = 1;
323 if (n == -1) {
324 /* FIXME Why mbtowc() resetting here?
325 * FIXME what about ISO 2022-JP plus -- those
326 * FIXME will loose shifts, then!
327 * FIXME THUS - we'd need special "known points"
328 * FIXME to do so - say, after a newline!!
329 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
330 mbtowc(&wc, NULL, n_mb_cur_max);
331 wc = isuni ? 0xFFFD : '?';
332 n = 1;
333 } else if (n == 0)
334 n = 1;
335 inp += n;
336 if (!iswprint(wc) && wc != '\n' /*&& wc != '\r' && wc != '\b'*/ &&
337 wc != '\t') {
338 if ((wc & ~(wchar_t)037) == 0)
339 wc = isuni ? 0x2400 | wc : '?';
340 else if (wc == 0177)
341 wc = isuni ? 0x2421 : '?';
342 else
343 wc = isuni ? 0x2426 : '?';
344 }else if(isuni){ /* TODO ctext */
345 /* Need to filter out L-TO-R and R-TO-R marks TODO ctext */
346 if(wc == 0x200E || wc == 0x200F || (wc >= 0x202A && wc <= 0x202E))
347 continue;
348 /* And some zero-width messes */
349 if(wc == 0x00AD || (wc >= 0x200B && wc <= 0x200D))
350 continue;
351 /* Oh about the ISO C wide character interfaces, baby! */
352 if(wc == 0xFEFF)
353 continue;
355 if ((n = wctomb(mbb, wc)) <= 0)
356 continue;
357 out->l += n;
358 assert(out->l < msz);
359 for (i = 0; i < n; ++i)
360 *outp++ = mbb[i];
362 } else
363 #endif /* NATCH_CHAR */
365 int c;
366 while (inp < maxp) {
367 c = *inp++ & 0377;
368 if (!isprint(c) && c != '\n' && c != '\r' && c != '\b' && c != '\t')
369 c = '?';
370 *outp++ = c;
372 out->l = in->l;
374 out->s[out->l] = '\0';
375 NYD_LEAVE;
378 FL size_t
379 delctrl(char *cp, size_t len)
381 size_t x, y;
382 NYD_ENTER;
384 for (x = y = 0; x < len; ++x)
385 if (!cntrlchar(cp[x]))
386 cp[y++] = cp[x];
387 cp[y] = '\0';
388 NYD_LEAVE;
389 return y;
392 FL char *
393 prstr(char const *s)
395 struct str in, out;
396 char *rp;
397 NYD_ENTER;
399 in.s = n_UNCONST(s);
400 in.l = strlen(s);
401 makeprint(&in, &out);
402 rp = savestrbuf(out.s, out.l);
403 n_free(out.s);
404 NYD_LEAVE;
405 return rp;
408 FL int
409 prout(char const *s, size_t sz, FILE *fp)
411 struct str in, out;
412 int n;
413 NYD_ENTER;
415 in.s = n_UNCONST(s);
416 in.l = sz;
417 makeprint(&in, &out);
418 n = fwrite(out.s, 1, out.l, fp);
419 n_free(out.s);
420 NYD_LEAVE;
421 return n;
424 FL bool_t
425 bidi_info_needed(char const *bdat, size_t blen)
427 bool_t rv = FAL0;
428 NYD_ENTER;
430 #ifdef HAVE_NATCH_CHAR
431 if (n_psonce & n_PSO_UNICODE)
432 while (blen > 0) {
433 /* TODO Checking for BIDI character: use S-CText fromutf8
434 * TODO plus isrighttoleft (or whatever there will be)! */
435 ui32_t c = n_utf8_to_utf32(&bdat, &blen);
436 if (c == UI32_MAX)
437 break;
439 if (c <= 0x05BE)
440 continue;
442 /* (Very very fuzzy, awaiting S-CText for good) */
443 if ((c >= 0x05BE && c <= 0x08E3) ||
444 (c >= 0xFB1D && c <= 0xFE00) /* No: variation selectors */ ||
445 (c >= 0xFE70 && c <= 0xFEFC) ||
446 (c >= 0x10800 && c <= 0x10C48) ||
447 (c >= 0x1EE00 && c <= 0x1EEF1)) {
448 rv = TRU1;
449 break;
452 #endif /* HAVE_NATCH_CHAR */
453 NYD_LEAVE;
454 return rv;
457 FL void
458 bidi_info_create(struct bidi_info *bip)
460 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
461 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
462 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
463 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
464 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
465 n_NATCH_CHAR( char const *hb; )
466 NYD_ENTER;
468 memset(bip, 0, sizeof *bip);
469 bip->bi_start.s = bip->bi_end.s = n_UNCONST(n_empty);
471 #ifdef HAVE_NATCH_CHAR
472 if ((n_psonce & n_PSO_UNICODE) && (hb = ok_vlook(headline_bidi)) != NULL) {
473 switch (*hb) {
474 case '3':
475 bip->bi_pad = 2;
476 /* FALLTHRU */
477 case '2':
478 bip->bi_start.s = bip->bi_end.s = n_UNCONST("\xE2\x80\x8E");
479 break;
480 case '1':
481 bip->bi_pad = 2;
482 /* FALLTHRU */
483 default:
484 bip->bi_start.s = n_UNCONST("\xE2\x81\xA8");
485 bip->bi_end.s = n_UNCONST("\xE2\x81\xA9");
486 break;
488 bip->bi_start.l = bip->bi_end.l = 3;
490 #endif
491 NYD_LEAVE;
494 /* s-it-mode */