1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
6 * SPDX-License-Identifier: BSD-3-Clause TODO ISC
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 #ifndef HAVE_AMALGAMATION
43 n_visual_info(struct n_visual_info_ctx
*vicp
, enum n_visual_info_flags vif
){
53 assert(vicp
->vic_inlen
== 0 || vicp
->vic_indat
!= NULL
);
54 assert(!(vif
& n__VISUAL_INFO_FLAGS
) || !(vif
& n_VISUAL_INFO_ONE_CHAR
));
58 if((il
= vicp
->vic_inlen
) == UIZ_MAX
)
59 il
= vicp
->vic_inlen
= strlen(ib
);
61 if((vif
& (n_VISUAL_INFO_WIDTH_QUERY
| n_VISUAL_INFO_WOUT_PRINTABLE
)) ==
62 n_VISUAL_INFO_WOUT_PRINTABLE
)
63 vif
|= n_VISUAL_INFO_WIDTH_QUERY
;
65 vicp
->vic_chars_seen
= vicp
->vic_bytes_seen
= vicp
->vic_vi_width
= 0;
66 if(vif
& n_VISUAL_INFO_WOUT_CREATE
){
67 if(vif
& n_VISUAL_INFO_WOUT_SALLOC
)
69 n_autorec_alloc(sizeof(*vicp
->vic_woudat
) * (il
+1));
73 if((mbp
= vicp
->vic_mbstate
) == NULL
)
74 mbp
= &vicp
->vic_mbs_def
;
78 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
80 size_t i
= mbrtowc(&vicp
->vic_waccu
, ib
, il
, mbp
);
85 }else if(i
== (size_t)-1){
86 if(!(vif
& n_VISUAL_INFO_SKIP_ERRORS
)){
90 memset(mbp
, 0, sizeof *mbp
);
91 vicp
->vic_waccu
= (n_psonce
& n_PSO_UNICODE
) ? 0xFFFD : '?';
98 ++vicp
->vic_chars_seen
;
99 vicp
->vic_bytes_seen
+= i
;
103 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
){
105 wchar_t wc
= vicp
->vic_waccu
;
108 w
= (wc
== '\t' ? 1 : wcwidth(wc
));
110 if(wc
== '\t' || iswprint(wc
))
111 w
= 1 + (wc
>= 0x1100u
); /* S-CText isfullwidth() */
116 vicp
->vic_vi_width
+= w
;
117 else if(vif
& n_VISUAL_INFO_WOUT_PRINTABLE
)
120 #else /* HAVE_C90AMEND1 */
128 ++vicp
->vic_chars_seen
;
129 ++vicp
->vic_bytes_seen
;
131 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
)
132 vicp
->vic_vi_width
+= (c
== '\t' || isprint(c
)); /* XXX */
138 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
139 vicp
->vic_woudat
[vicp
->vic_woulen
++] = vicp
->vic_waccu
;
140 }while(!(vif
& n_VISUAL_INFO_ONE_CHAR
) && il
> 0);
143 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
144 vicp
->vic_woudat
[vicp
->vic_woulen
] = L
'\0';
145 vicp
->vic_oudat
= ib
;
146 vicp
->vic_oulen
= il
;
147 vicp
->vic_flags
= vif
;
153 field_detect_clip(size_t maxlen
, char const *buf
, size_t blen
)/*TODO mbrtowc()*/
158 #ifdef HAVE_NATCH_CHAR
159 maxlen
= n_MIN(maxlen
, blen
);
160 for (rv
= 0; maxlen
> 0;) {
161 int ml
= mblen(buf
, maxlen
);
171 rv
= n_MIN(blen
, maxlen
);
178 colalign(char const *cp
, int col
, int fill
, int *cols_decr_used_or_null
)
180 n_NATCH_CHAR( struct bidi_info bi
; )
181 int col_orig
= col
, n
, sz
;
182 bool_t isbidi
, isuni
, istab
, isrepl
;
186 /* Bidi only on request and when there is 8-bit data */
187 isbidi
= isuni
= FAL0
;
188 #ifdef HAVE_NATCH_CHAR
189 isuni
= ((n_psonce
& n_PSO_UNICODE
) != 0);
190 bidi_info_create(&bi
);
191 if (bi
.bi_start
.l
== 0)
193 if (!(isbidi
= bidi_info_needed(cp
, strlen(cp
))))
196 if ((size_t)col
>= bi
.bi_pad
)
203 np
= nb
= n_autorec_alloc(n_mb_cur_max
* strlen(cp
) +
205 n_NATCH_CHAR( + (isbidi
? bi
.bi_start
.l
+ bi
.bi_end
.l
: 0) )
208 #ifdef HAVE_NATCH_CHAR
210 memcpy(np
, bi
.bi_start
.s
, bi
.bi_start
.l
);
215 while (*cp
!= '\0') {
217 #ifdef HAVE_C90AMEND1
218 if (n_mb_cur_max
> 1) {
223 if ((sz
= mbtowc(&wc
, cp
, n_mb_cur_max
)) == -1)
225 else if (wc
== L
'\t') {
226 cp
+= sz
- 1; /* Silly, no such charset known (.. until S-Ctext) */
229 } else if (iswprint(wc
)) {
230 # ifndef HAVE_WCWIDTH
231 n
= 1 + (wc
>= 0x1100u
); /* TODO use S-CText isfullwidth() */
233 if ((n
= wcwidth(wc
)) == -1)
243 istab
= (*cp
== '\t');
244 isrepl
= !(istab
|| isprint((uc_i
)*cp
));
253 /* Contained in n_mb_cur_max, then */
254 memcpy(np
, n_unirepl
, sizeof(n_unirepl
) -1);
255 np
+= sizeof(n_unirepl
) -1;
259 } else if (istab
|| (sz
== 1 && spacechar(*cp
))) {
267 if (fill
&& col
!= 0) {
269 memmove(nb
+ col
, nb
, PTR2SIZE(np
- nb
));
270 memset(nb
, ' ', col
);
272 memset(np
, ' ', col
);
277 #ifdef HAVE_NATCH_CHAR
279 memcpy(np
, bi
.bi_end
.s
, bi
.bi_end
.l
);
285 if (cols_decr_used_or_null
!= NULL
)
286 *cols_decr_used_or_null
-= col_orig
- col
;
292 makeprint(struct str
const *in
, struct str
*out
) /* TODO <-> TTYCHARSET!! */
294 /* TODO: makeprint() should honour *ttycharset*. This of course does not
295 * TODO work with ISO C / POSIX since mbrtowc() do know about locales, not
296 * TODO charsets, and ditto iswprint() etc. do work with the locale too.
297 * TODO I hope S-CText can do something about that, and/or otherwise add
298 * TODO some special treatment for UTF-8 (take it from S-CText too then) */
299 char const *inp
, *maxp
;
305 outp
= n_alloc(DBG( msz
= ) in
->l
*n_mb_cur_max
+ 2u*n_mb_cur_max
+1);
309 #ifdef HAVE_NATCH_CHAR
310 if (n_mb_cur_max
> 1) {
311 char mbb
[MB_LEN_MAX
+ 1];
314 bool_t isuni
= ((n_psonce
& n_PSO_UNICODE
) != 0);
319 n
= mbtowc(&wc
, inp
, PTR2SIZE(maxp
- inp
));
325 /* FIXME Why mbtowc() resetting here?
326 * FIXME what about ISO 2022-JP plus -- those
327 * FIXME will loose shifts, then!
328 * FIXME THUS - we'd need special "known points"
329 * FIXME to do so - say, after a newline!!
330 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
331 mbtowc(&wc
, NULL
, n_mb_cur_max
);
332 wc
= isuni
? 0xFFFD : '?';
337 if (!iswprint(wc
) && wc
!= '\n' /*&& wc != '\r' && wc != '\b'*/ &&
339 if ((wc
& ~(wchar_t)037) == 0)
340 wc
= isuni
? 0x2400 | wc
: '?';
342 wc
= isuni
? 0x2421 : '?';
344 wc
= isuni
? 0x2426 : '?';
345 }else if(isuni
){ /* TODO ctext */
346 /* Need to filter out L-TO-R and R-TO-R marks TODO ctext */
347 if(wc
== 0x200E || wc
== 0x200F || (wc
>= 0x202A && wc
<= 0x202E))
349 /* And some zero-width messes */
350 if(wc
== 0x00AD || (wc
>= 0x200B && wc
<= 0x200D))
352 /* Oh about the ISO C wide character interfaces, baby! */
356 if ((n
= wctomb(mbb
, wc
)) <= 0)
359 assert(out
->l
< msz
);
360 for (i
= 0; i
< n
; ++i
)
364 #endif /* NATCH_CHAR */
369 if (!isprint(c
) && c
!= '\n' && c
!= '\r' && c
!= '\b' && c
!= '\t')
375 out
->s
[out
->l
] = '\0';
380 delctrl(char *cp
, size_t len
)
385 for (x
= y
= 0; x
< len
; ++x
)
386 if (!cntrlchar(cp
[x
]))
402 makeprint(&in
, &out
);
403 rp
= savestrbuf(out
.s
, out
.l
);
410 prout(char const *s
, size_t sz
, FILE *fp
)
418 makeprint(&in
, &out
);
419 n
= fwrite(out
.s
, 1, out
.l
, fp
);
426 bidi_info_needed(char const *bdat
, size_t blen
)
431 #ifdef HAVE_NATCH_CHAR
432 if (n_psonce
& n_PSO_UNICODE
)
434 /* TODO Checking for BIDI character: use S-CText fromutf8
435 * TODO plus isrighttoleft (or whatever there will be)! */
436 ui32_t c
= n_utf8_to_utf32(&bdat
, &blen
);
443 /* (Very very fuzzy, awaiting S-CText for good) */
444 if ((c
>= 0x05BE && c
<= 0x08E3) ||
445 (c
>= 0xFB1D && c
<= 0xFE00) /* No: variation selectors */ ||
446 (c
>= 0xFE70 && c
<= 0xFEFC) ||
447 (c
>= 0x10800 && c
<= 0x10C48) ||
448 (c
>= 0x1EE00 && c
<= 0x1EEF1)) {
453 #endif /* HAVE_NATCH_CHAR */
459 bidi_info_create(struct bidi_info
*bip
)
461 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
462 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
463 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
464 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
465 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
466 n_NATCH_CHAR( char const *hb
; )
469 memset(bip
, 0, sizeof *bip
);
470 bip
->bi_start
.s
= bip
->bi_end
.s
= n_UNCONST(n_empty
);
472 #ifdef HAVE_NATCH_CHAR
473 if ((n_psonce
& n_PSO_UNICODE
) && (hb
= ok_vlook(headline_bidi
)) != NULL
) {
479 bip
->bi_start
.s
= bip
->bi_end
.s
= n_UNCONST("\xE2\x80\x8E");
485 bip
->bi_start
.s
= n_UNCONST("\xE2\x81\xA8");
486 bip
->bi_end
.s
= n_UNCONST("\xE2\x81\xA9");
489 bip
->bi_start
.l
= bip
->bi_end
.l
= 3;