1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2016 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #ifndef HAVE_AMALGAMATION
42 n_visual_info(struct n_visual_info_ctx
*vicp
, enum n_visual_info_flags vif
){
52 assert(vicp
->vic_inlen
== 0 || vicp
->vic_indat
!= NULL
);
53 assert(!(vif
& n__VISUAL_INFO_FLAGS
) || !(vif
& n_VISUAL_INFO_ONE_CHAR
));
57 if((il
= vicp
->vic_inlen
) == UIZ_MAX
)
58 il
= vicp
->vic_inlen
= strlen(ib
);
60 if((vif
& (n_VISUAL_INFO_WIDTH_QUERY
| n_VISUAL_INFO_WOUT_PRINTABLE
)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE
)
62 vif
|= n_VISUAL_INFO_WIDTH_QUERY
;
64 vicp
->vic_chars_seen
= vicp
->vic_vi_width
= 0;
65 if(vif
& n_VISUAL_INFO_WOUT_CREATE
){
66 if(vif
& n_VISUAL_INFO_WOUT_SALLOC
)
67 vicp
->vic_woudat
= salloc(sizeof(*vicp
->vic_woudat
) * (il
+1));
71 if((mbp
= vicp
->vic_mbstate
) == NULL
)
72 mbp
= &vicp
->vic_mbs_def
;
76 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
78 size_t i
= mbrtowc(&vicp
->vic_waccu
, ib
, il
, mbp
);
83 }else if(i
== (size_t)-1){
84 if(!(vif
& n_VISUAL_INFO_SKIP_ERRORS
)){
88 memset(mbp
, 0, sizeof *mbp
);
89 vicp
->vic_waccu
= (options
& OPT_UNICODE
? 0xFFFD : '?');
96 ++vicp
->vic_chars_seen
;
97 vicp
->vic_bytes_seen
+= i
;
101 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
){
103 wchar_t wc
= vicp
->vic_waccu
;
106 w
= (wc
== '\t' ? 1 : wcwidth(wc
));
108 if(wc
== '\t' || iswprint(wc
))
109 w
= 1 + (wc
>= 0x1100u
); /* S-CText isfullwidth() */
114 vicp
->vic_vi_width
+= w
;
115 else if(vif
& n_VISUAL_INFO_WOUT_PRINTABLE
)
118 #else /* HAVE_C90AMEND1 */
126 ++vicp
->vic_chars_seen
;
127 ++vicp
->vic_bytes_seen
;
129 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
)
130 vicp
->vic_vi_width
+= (c
== '\t' || isprint(c
)); /* XXX */
136 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
137 vicp
->vic_woudat
[vicp
->vic_woulen
++] = vicp
->vic_waccu
;
138 }while(!(vif
& n_VISUAL_INFO_ONE_CHAR
) && il
> 0);
141 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
142 vicp
->vic_woudat
[vicp
->vic_woulen
] = L
'\0';
143 vicp
->vic_oudat
= ib
;
144 vicp
->vic_oulen
= il
;
145 vicp
->vic_flags
= vif
;
151 field_detect_clip(size_t maxlen
, char const *buf
, size_t blen
)/*TODO mbrtowc()*/
156 #ifdef HAVE_NATCH_CHAR
157 maxlen
= MIN(maxlen
, blen
);
158 for (rv
= 0; maxlen
> 0;) {
159 int ml
= mblen(buf
, maxlen
);
169 rv
= MIN(blen
, maxlen
);
176 field_put_bidi_clip(char *store
, size_t maxlen
, char const *buf
, size_t blen
)
178 NATCH_CHAR( struct bidi_info bi
; )
179 size_t rv
NATCH_CHAR( COMMA i
);
186 #ifdef HAVE_NATCH_CHAR
187 bidi_info_create(&bi
);
188 if (bi
.bi_start
.l
== 0 || !bidi_info_needed(buf
, blen
)) {
193 if (maxlen
>= (i
= bi
.bi_pad
+ bi
.bi_end
.l
+ bi
.bi_start
.l
))
198 if ((i
= bi
.bi_start
.l
) > 0) {
199 memcpy(store
, bi
.bi_start
.s
, i
);
206 int ml
= mblen(buf
, blen
);
211 if (UICMP(z
, maxlen
, <, ml
))
216 memcpy(store
, buf
, ml
);
223 if ((i
= bi
.bi_end
.l
) > 0) {
224 memcpy(store
, bi
.bi_end
.s
, i
);
232 rv
= MIN(blen
, maxlen
);
233 memcpy(store
, buf
, rv
);
242 colalign(char const *cp
, int col
, int fill
, int *cols_decr_used_or_null
)
244 NATCH_CHAR( struct bidi_info bi
; )
245 int col_orig
= col
, n
, sz
;
246 bool_t isbidi
, isuni
, istab
, isrepl
;
250 /* Bidi only on request and when there is 8-bit data */
251 isbidi
= isuni
= FAL0
;
252 #ifdef HAVE_NATCH_CHAR
253 isuni
= ((options
& OPT_UNICODE
) != 0);
254 bidi_info_create(&bi
);
255 if (bi
.bi_start
.l
== 0)
257 if (!(isbidi
= bidi_info_needed(cp
, strlen(cp
))))
260 if ((size_t)col
>= bi
.bi_pad
)
267 np
= nb
= salloc(mb_cur_max
* strlen(cp
) +
269 NATCH_CHAR( + (isbidi
? bi
.bi_start
.l
+ bi
.bi_end
.l
: 0) )
272 #ifdef HAVE_NATCH_CHAR
274 memcpy(np
, bi
.bi_start
.s
, bi
.bi_start
.l
);
279 while (*cp
!= '\0') {
281 #ifdef HAVE_C90AMEND1
282 if (mb_cur_max
> 1) {
287 if ((sz
= mbtowc(&wc
, cp
, mb_cur_max
)) == -1)
289 else if (wc
== L
'\t') {
290 cp
+= sz
- 1; /* Silly, no such charset known (.. until S-Ctext) */
293 } else if (iswprint(wc
)) {
294 # ifndef HAVE_WCWIDTH
295 n
= 1 + (wc
>= 0x1100u
); /* TODO use S-CText isfullwidth() */
297 if ((n
= wcwidth(wc
)) == -1)
307 istab
= (*cp
== '\t');
308 isrepl
= !(istab
|| isprint((uc_i
)*cp
));
324 } else if (istab
|| (sz
== 1 && spacechar(*cp
))) {
332 if (fill
&& col
!= 0) {
334 memmove(nb
+ col
, nb
, PTR2SIZE(np
- nb
));
335 memset(nb
, ' ', col
);
337 memset(np
, ' ', col
);
342 #ifdef HAVE_NATCH_CHAR
344 memcpy(np
, bi
.bi_end
.s
, bi
.bi_end
.l
);
350 if (cols_decr_used_or_null
!= NULL
)
351 *cols_decr_used_or_null
-= col_orig
- col
;
357 makeprint(struct str
const *in
, struct str
*out
)
359 char const *inp
, *maxp
;
364 out
->s
= outp
= smalloc(DBG( msz
= ) in
->l
*mb_cur_max
+ 2u*mb_cur_max
+1);
368 #ifdef HAVE_NATCH_CHAR
369 if (mb_cur_max
> 1) {
370 char mbb
[MB_LEN_MAX
+ 1];
373 bool_t isuni
= ((options
& OPT_UNICODE
) != 0);
378 n
= mbtowc(&wc
, inp
, PTR2SIZE(maxp
- inp
));
384 /* FIXME Why mbtowc() resetting here?
385 * FIXME what about ISO 2022-JP plus -- those
386 * FIXME will loose shifts, then!
387 * FIXME THUS - we'd need special "known points"
388 * FIXME to do so - say, after a newline!!
389 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
390 mbtowc(&wc
, NULL
, mb_cur_max
);
391 wc
= isuni
? 0xFFFD : '?';
396 if (!iswprint(wc
) && wc
!= '\n' && wc
!= '\r' && wc
!= '\b' &&
398 if ((wc
& ~(wchar_t)037) == 0)
399 wc
= isuni
? 0x2400 | wc
: '?';
401 wc
= isuni
? 0x2421 : '?';
403 wc
= isuni
? 0x2426 : '?';
404 }else if(isuni
){ /* TODO ctext */
405 /* We need to actively filter out L-TO-R and R-TO-R marks TODO ctext */
406 if(wc
== 0x200E || wc
== 0x200F || (wc
>= 0x202A && wc
<= 0x202E))
408 /* And some zero-width messes */
409 if(wc
== 0x00AD || (wc
>= 0x200B && wc
<= 0x200D))
411 /* Oh about the ISO C wide character interfaces, baby! */
415 if ((n
= wctomb(mbb
, wc
)) <= 0)
418 assert(out
->l
< msz
);
419 for (i
= 0; i
< n
; ++i
)
423 #endif /* NATCH_CHAR */
428 if (!isprint(c
) && c
!= '\n' && c
!= '\r' && c
!= '\b' && c
!= '\t')
434 out
->s
[out
->l
] = '\0';
439 delctrl(char *cp
, size_t len
)
444 for (x
= y
= 0; x
< len
; ++x
)
445 if (!cntrlchar(cp
[x
]))
461 makeprint(&in
, &out
);
462 rp
= savestrbuf(out
.s
, out
.l
);
469 prout(char const *s
, size_t sz
, FILE *fp
)
477 makeprint(&in
, &out
);
478 n
= fwrite(out
.s
, 1, out
.l
, fp
);
485 putuc(int u
, int c
, FILE *fp
)
491 #ifdef HAVE_NATCH_CHAR
492 if ((options
& OPT_UNICODE
) && (u
& ~(wchar_t)0177)) {
493 char mbb
[MB_LEN_MAX
];
496 if ((n
= wctomb(mbb
, u
)) > 0) {
498 for (i
= 0; i
< n
; ++i
)
499 if (putc(mbb
[i
] & 0377, fp
) == EOF
) {
504 rv
= (putc('\0', fp
) != EOF
);
509 rv
= (putc(c
, fp
) != EOF
);
515 bidi_info_needed(char const *bdat
, size_t blen
)
520 #ifdef HAVE_NATCH_CHAR
521 if (options
& OPT_UNICODE
)
523 /* TODO Checking for BIDI character: use S-CText fromutf8
524 * TODO plus isrighttoleft (or whatever there will be)! */
525 ui32_t c
= n_utf8_to_utf32(&bdat
, &blen
);
532 /* (Very very fuzzy, awaiting S-CText for good) */
533 if ((c
>= 0x05BE && c
<= 0x08E3) ||
534 (c
>= 0xFB1D && c
<= 0xFE00) /* No: variation selectors */ ||
535 (c
>= 0xFE70 && c
<= 0xFEFC) ||
536 (c
>= 0x10800 && c
<= 0x10C48) ||
537 (c
>= 0x1EE00 && c
<= 0x1EEF1)) {
542 #endif /* HAVE_NATCH_CHAR */
548 bidi_info_create(struct bidi_info
*bip
)
550 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
551 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
552 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
553 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
554 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
555 NATCH_CHAR( char const *hb
; )
558 memset(bip
, 0, sizeof *bip
);
559 bip
->bi_start
.s
= bip
->bi_end
.s
= UNCONST("");
561 #ifdef HAVE_NATCH_CHAR
562 if ((options
& OPT_UNICODE
) && (hb
= ok_vlook(headline_bidi
)) != NULL
) {
568 bip
->bi_start
.s
= bip
->bi_end
.s
= UNCONST("\xE2\x80\x8E");
574 bip
->bi_start
.s
= UNCONST("\xE2\x81\xA8");
575 bip
->bi_end
.s
= UNCONST("\xE2\x81\xA9");
578 bip
->bi_start
.l
= bip
->bi_end
.l
= 3;