1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #ifndef HAVE_AMALGAMATION
42 n_visual_info(struct n_visual_info_ctx
*vicp
, enum n_visual_info_flags vif
){
52 assert(vicp
->vic_inlen
== 0 || vicp
->vic_indat
!= NULL
);
53 assert(!(vif
& n__VISUAL_INFO_FLAGS
) || !(vif
& n_VISUAL_INFO_ONE_CHAR
));
57 if((il
= vicp
->vic_inlen
) == UIZ_MAX
)
58 il
= vicp
->vic_inlen
= strlen(ib
);
60 if((vif
& (n_VISUAL_INFO_WIDTH_QUERY
| n_VISUAL_INFO_WOUT_PRINTABLE
)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE
)
62 vif
|= n_VISUAL_INFO_WIDTH_QUERY
;
64 vicp
->vic_chars_seen
= vicp
->vic_bytes_seen
= vicp
->vic_vi_width
= 0;
65 if(vif
& n_VISUAL_INFO_WOUT_CREATE
){
66 if(vif
& n_VISUAL_INFO_WOUT_SALLOC
)
68 n_autorec_alloc(sizeof(*vicp
->vic_woudat
) * (il
+1));
72 if((mbp
= vicp
->vic_mbstate
) == NULL
)
73 mbp
= &vicp
->vic_mbs_def
;
77 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
79 size_t i
= mbrtowc(&vicp
->vic_waccu
, ib
, il
, mbp
);
84 }else if(i
== (size_t)-1){
85 if(!(vif
& n_VISUAL_INFO_SKIP_ERRORS
)){
89 memset(mbp
, 0, sizeof *mbp
);
90 vicp
->vic_waccu
= (n_psonce
& n_PSO_UNICODE
) ? 0xFFFD : '?';
97 ++vicp
->vic_chars_seen
;
98 vicp
->vic_bytes_seen
+= i
;
102 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
){
104 wchar_t wc
= vicp
->vic_waccu
;
107 w
= (wc
== '\t' ? 1 : wcwidth(wc
));
109 if(wc
== '\t' || iswprint(wc
))
110 w
= 1 + (wc
>= 0x1100u
); /* S-CText isfullwidth() */
115 vicp
->vic_vi_width
+= w
;
116 else if(vif
& n_VISUAL_INFO_WOUT_PRINTABLE
)
119 #else /* HAVE_C90AMEND1 */
127 ++vicp
->vic_chars_seen
;
128 ++vicp
->vic_bytes_seen
;
130 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
)
131 vicp
->vic_vi_width
+= (c
== '\t' || isprint(c
)); /* XXX */
137 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
138 vicp
->vic_woudat
[vicp
->vic_woulen
++] = vicp
->vic_waccu
;
139 }while(!(vif
& n_VISUAL_INFO_ONE_CHAR
) && il
> 0);
142 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
143 vicp
->vic_woudat
[vicp
->vic_woulen
] = L
'\0';
144 vicp
->vic_oudat
= ib
;
145 vicp
->vic_oulen
= il
;
146 vicp
->vic_flags
= vif
;
152 field_detect_clip(size_t maxlen
, char const *buf
, size_t blen
)/*TODO mbrtowc()*/
157 #ifdef HAVE_NATCH_CHAR
158 maxlen
= n_MIN(maxlen
, blen
);
159 for (rv
= 0; maxlen
> 0;) {
160 int ml
= mblen(buf
, maxlen
);
170 rv
= n_MIN(blen
, maxlen
);
177 colalign(char const *cp
, int col
, int fill
, int *cols_decr_used_or_null
)
179 n_NATCH_CHAR( struct bidi_info bi
; )
180 int col_orig
= col
, n
, sz
;
181 bool_t isbidi
, isuni
, istab
, isrepl
;
185 /* Bidi only on request and when there is 8-bit data */
186 isbidi
= isuni
= FAL0
;
187 #ifdef HAVE_NATCH_CHAR
188 isuni
= ((n_psonce
& n_PSO_UNICODE
) != 0);
189 bidi_info_create(&bi
);
190 if (bi
.bi_start
.l
== 0)
192 if (!(isbidi
= bidi_info_needed(cp
, strlen(cp
))))
195 if ((size_t)col
>= bi
.bi_pad
)
202 np
= nb
= n_autorec_alloc(n_mb_cur_max
* strlen(cp
) +
204 n_NATCH_CHAR( + (isbidi
? bi
.bi_start
.l
+ bi
.bi_end
.l
: 0) )
207 #ifdef HAVE_NATCH_CHAR
209 memcpy(np
, bi
.bi_start
.s
, bi
.bi_start
.l
);
214 while (*cp
!= '\0') {
216 #ifdef HAVE_C90AMEND1
217 if (n_mb_cur_max
> 1) {
222 if ((sz
= mbtowc(&wc
, cp
, n_mb_cur_max
)) == -1)
224 else if (wc
== L
'\t') {
225 cp
+= sz
- 1; /* Silly, no such charset known (.. until S-Ctext) */
228 } else if (iswprint(wc
)) {
229 # ifndef HAVE_WCWIDTH
230 n
= 1 + (wc
>= 0x1100u
); /* TODO use S-CText isfullwidth() */
232 if ((n
= wcwidth(wc
)) == -1)
242 istab
= (*cp
== '\t');
243 isrepl
= !(istab
|| isprint((uc_i
)*cp
));
252 /* Contained in n_mb_cur_max, then */
253 memcpy(np
, n_unirepl
, sizeof(n_unirepl
) -1);
254 np
+= sizeof(n_unirepl
) -1;
258 } else if (istab
|| (sz
== 1 && spacechar(*cp
))) {
266 if (fill
&& col
!= 0) {
268 memmove(nb
+ col
, nb
, PTR2SIZE(np
- nb
));
269 memset(nb
, ' ', col
);
271 memset(np
, ' ', col
);
276 #ifdef HAVE_NATCH_CHAR
278 memcpy(np
, bi
.bi_end
.s
, bi
.bi_end
.l
);
284 if (cols_decr_used_or_null
!= NULL
)
285 *cols_decr_used_or_null
-= col_orig
- col
;
291 makeprint(struct str
const *in
, struct str
*out
) /* TODO <-> TTYCHARSET!! */
293 /* TODO: makeprint() should honour *ttycharset*. This of course does not
294 * TODO work with ISO C / POSIX since mbrtowc() do know about locales, not
295 * TODO charsets, and ditto iswprint() etc. do work with the locale too.
296 * TODO I hope S-CText can do something about that, and/or otherwise add
297 * TODO some special treatment for UTF-8 (take it from S-CText too then) */
298 char const *inp
, *maxp
;
304 outp
= n_alloc(DBG( msz
= ) in
->l
*n_mb_cur_max
+ 2u*n_mb_cur_max
+1);
308 #ifdef HAVE_NATCH_CHAR
309 if (n_mb_cur_max
> 1) {
310 char mbb
[MB_LEN_MAX
+ 1];
313 bool_t isuni
= ((n_psonce
& n_PSO_UNICODE
) != 0);
318 n
= mbtowc(&wc
, inp
, PTR2SIZE(maxp
- inp
));
324 /* FIXME Why mbtowc() resetting here?
325 * FIXME what about ISO 2022-JP plus -- those
326 * FIXME will loose shifts, then!
327 * FIXME THUS - we'd need special "known points"
328 * FIXME to do so - say, after a newline!!
329 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
330 mbtowc(&wc
, NULL
, n_mb_cur_max
);
331 wc
= isuni
? 0xFFFD : '?';
336 if (!iswprint(wc
) && wc
!= '\n' /*&& wc != '\r' && wc != '\b'*/ &&
338 if ((wc
& ~(wchar_t)037) == 0)
339 wc
= isuni
? 0x2400 | wc
: '?';
341 wc
= isuni
? 0x2421 : '?';
343 wc
= isuni
? 0x2426 : '?';
344 }else if(isuni
){ /* TODO ctext */
345 /* Need to filter out L-TO-R and R-TO-R marks TODO ctext */
346 if(wc
== 0x200E || wc
== 0x200F || (wc
>= 0x202A && wc
<= 0x202E))
348 /* And some zero-width messes */
349 if(wc
== 0x00AD || (wc
>= 0x200B && wc
<= 0x200D))
351 /* Oh about the ISO C wide character interfaces, baby! */
355 if ((n
= wctomb(mbb
, wc
)) <= 0)
358 assert(out
->l
< msz
);
359 for (i
= 0; i
< n
; ++i
)
363 #endif /* NATCH_CHAR */
368 if (!isprint(c
) && c
!= '\n' && c
!= '\r' && c
!= '\b' && c
!= '\t')
374 out
->s
[out
->l
] = '\0';
379 delctrl(char *cp
, size_t len
)
384 for (x
= y
= 0; x
< len
; ++x
)
385 if (!cntrlchar(cp
[x
]))
401 makeprint(&in
, &out
);
402 rp
= savestrbuf(out
.s
, out
.l
);
409 prout(char const *s
, size_t sz
, FILE *fp
)
417 makeprint(&in
, &out
);
418 n
= fwrite(out
.s
, 1, out
.l
, fp
);
425 bidi_info_needed(char const *bdat
, size_t blen
)
430 #ifdef HAVE_NATCH_CHAR
431 if (n_psonce
& n_PSO_UNICODE
)
433 /* TODO Checking for BIDI character: use S-CText fromutf8
434 * TODO plus isrighttoleft (or whatever there will be)! */
435 ui32_t c
= n_utf8_to_utf32(&bdat
, &blen
);
442 /* (Very very fuzzy, awaiting S-CText for good) */
443 if ((c
>= 0x05BE && c
<= 0x08E3) ||
444 (c
>= 0xFB1D && c
<= 0xFE00) /* No: variation selectors */ ||
445 (c
>= 0xFE70 && c
<= 0xFEFC) ||
446 (c
>= 0x10800 && c
<= 0x10C48) ||
447 (c
>= 0x1EE00 && c
<= 0x1EEF1)) {
452 #endif /* HAVE_NATCH_CHAR */
458 bidi_info_create(struct bidi_info
*bip
)
460 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
461 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
462 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
463 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
464 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
465 n_NATCH_CHAR( char const *hb
; )
468 memset(bip
, 0, sizeof *bip
);
469 bip
->bi_start
.s
= bip
->bi_end
.s
= n_UNCONST(n_empty
);
471 #ifdef HAVE_NATCH_CHAR
472 if ((n_psonce
& n_PSO_UNICODE
) && (hb
= ok_vlook(headline_bidi
)) != NULL
) {
478 bip
->bi_start
.s
= bip
->bi_end
.s
= n_UNCONST("\xE2\x80\x8E");
484 bip
->bi_start
.s
= n_UNCONST("\xE2\x81\xA8");
485 bip
->bi_end
.s
= n_UNCONST("\xE2\x81\xA9");
488 bip
->bi_start
.l
= bip
->bi_end
.l
= 3;