1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2018 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #ifndef HAVE_AMALGAMATION
42 n_visual_info(struct n_visual_info_ctx
*vicp
, enum n_visual_info_flags vif
){
52 assert(vicp
->vic_inlen
== 0 || vicp
->vic_indat
!= NULL
);
53 assert(!(vif
& n__VISUAL_INFO_FLAGS
) || !(vif
& n_VISUAL_INFO_ONE_CHAR
));
57 if((il
= vicp
->vic_inlen
) == UIZ_MAX
)
58 il
= vicp
->vic_inlen
= strlen(ib
);
60 if((vif
& (n_VISUAL_INFO_WIDTH_QUERY
| n_VISUAL_INFO_WOUT_PRINTABLE
)) ==
61 n_VISUAL_INFO_WOUT_PRINTABLE
)
62 vif
|= n_VISUAL_INFO_WIDTH_QUERY
;
64 vicp
->vic_chars_seen
= vicp
->vic_bytes_seen
= vicp
->vic_vi_width
= 0;
65 if(vif
& n_VISUAL_INFO_WOUT_CREATE
){
66 if(vif
& n_VISUAL_INFO_WOUT_SALLOC
)
67 vicp
->vic_woudat
= salloc(sizeof(*vicp
->vic_woudat
) * (il
+1));
71 if((mbp
= vicp
->vic_mbstate
) == NULL
)
72 mbp
= &vicp
->vic_mbs_def
;
76 do/* while(!(vif & n_VISUAL_INFO_ONE_CHAR) && il > 0) */{
78 size_t i
= mbrtowc(&vicp
->vic_waccu
, ib
, il
, mbp
);
83 }else if(i
== (size_t)-1){
84 if(!(vif
& n_VISUAL_INFO_SKIP_ERRORS
)){
88 memset(mbp
, 0, sizeof *mbp
);
89 vicp
->vic_waccu
= (n_psonce
& n_PSO_UNICODE
) ? 0xFFFD : '?';
96 ++vicp
->vic_chars_seen
;
97 vicp
->vic_bytes_seen
+= i
;
101 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
){
103 wchar_t wc
= vicp
->vic_waccu
;
106 w
= (wc
== '\t' ? 1 : wcwidth(wc
));
108 if(wc
== '\t' || iswprint(wc
))
109 w
= 1 + (wc
>= 0x1100u
); /* S-CText isfullwidth() */
114 vicp
->vic_vi_width
+= w
;
115 else if(vif
& n_VISUAL_INFO_WOUT_PRINTABLE
)
118 #else /* HAVE_C90AMEND1 */
126 ++vicp
->vic_chars_seen
;
127 ++vicp
->vic_bytes_seen
;
129 if(vif
& n_VISUAL_INFO_WIDTH_QUERY
)
130 vicp
->vic_vi_width
+= (c
== '\t' || isprint(c
)); /* XXX */
136 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
137 vicp
->vic_woudat
[vicp
->vic_woulen
++] = vicp
->vic_waccu
;
138 }while(!(vif
& n_VISUAL_INFO_ONE_CHAR
) && il
> 0);
141 if(vif
& n_VISUAL_INFO_WOUT_CREATE
)
142 vicp
->vic_woudat
[vicp
->vic_woulen
] = L
'\0';
143 vicp
->vic_oudat
= ib
;
144 vicp
->vic_oulen
= il
;
145 vicp
->vic_flags
= vif
;
151 field_detect_clip(size_t maxlen
, char const *buf
, size_t blen
)/*TODO mbrtowc()*/
156 #ifdef HAVE_NATCH_CHAR
157 maxlen
= n_MIN(maxlen
, blen
);
158 for (rv
= 0; maxlen
> 0;) {
159 int ml
= mblen(buf
, maxlen
);
169 rv
= n_MIN(blen
, maxlen
);
176 colalign(char const *cp
, int col
, int fill
, int *cols_decr_used_or_null
)
178 n_NATCH_CHAR( struct bidi_info bi
; )
179 int col_orig
= col
, n
, sz
;
180 bool_t isbidi
, isuni
, istab
, isrepl
;
184 /* Bidi only on request and when there is 8-bit data */
185 isbidi
= isuni
= FAL0
;
186 #ifdef HAVE_NATCH_CHAR
187 isuni
= ((n_psonce
& n_PSO_UNICODE
) != 0);
188 bidi_info_create(&bi
);
189 if (bi
.bi_start
.l
== 0)
191 if (!(isbidi
= bidi_info_needed(cp
, strlen(cp
))))
194 if ((size_t)col
>= bi
.bi_pad
)
201 np
= nb
= salloc(n_mb_cur_max
* strlen(cp
) +
203 n_NATCH_CHAR( + (isbidi
? bi
.bi_start
.l
+ bi
.bi_end
.l
: 0) )
206 #ifdef HAVE_NATCH_CHAR
208 memcpy(np
, bi
.bi_start
.s
, bi
.bi_start
.l
);
213 while (*cp
!= '\0') {
215 #ifdef HAVE_C90AMEND1
216 if (n_mb_cur_max
> 1) {
221 if ((sz
= mbtowc(&wc
, cp
, n_mb_cur_max
)) == -1)
223 else if (wc
== L
'\t') {
224 cp
+= sz
- 1; /* Silly, no such charset known (.. until S-Ctext) */
227 } else if (iswprint(wc
)) {
228 # ifndef HAVE_WCWIDTH
229 n
= 1 + (wc
>= 0x1100u
); /* TODO use S-CText isfullwidth() */
231 if ((n
= wcwidth(wc
)) == -1)
241 istab
= (*cp
== '\t');
242 isrepl
= !(istab
|| isprint((uc_i
)*cp
));
251 /* Contained in n_mb_cur_max, then */
252 memcpy(np
, n_unirepl
, sizeof(n_unirepl
) -1);
253 np
+= sizeof(n_unirepl
) -1;
257 } else if (istab
|| (sz
== 1 && spacechar(*cp
))) {
265 if (fill
&& col
!= 0) {
267 memmove(nb
+ col
, nb
, PTR2SIZE(np
- nb
));
268 memset(nb
, ' ', col
);
270 memset(np
, ' ', col
);
275 #ifdef HAVE_NATCH_CHAR
277 memcpy(np
, bi
.bi_end
.s
, bi
.bi_end
.l
);
283 if (cols_decr_used_or_null
!= NULL
)
284 *cols_decr_used_or_null
-= col_orig
- col
;
290 makeprint(struct str
const *in
, struct str
*out
) /* TODO <-> TTYCHARSET!! */
292 /* TODO: makeprint() should honour *ttycharset*. This of course does not
293 * TODO work with ISO C / POSIX since mbrtowc() do know about locales, not
294 * TODO charsets, and ditto iswprint() etc. do work with the locale too.
295 * TODO I hope S-CText can do something about that, and/or otherwise add
296 * TODO some special treatment for UTF-8 (take it from S-CText too then) */
297 char const *inp
, *maxp
;
303 outp
= smalloc(DBG( msz
= ) in
->l
*n_mb_cur_max
+ 2u*n_mb_cur_max
+1);
307 #ifdef HAVE_NATCH_CHAR
308 if (n_mb_cur_max
> 1) {
309 char mbb
[MB_LEN_MAX
+ 1];
312 bool_t isuni
= ((n_psonce
& n_PSO_UNICODE
) != 0);
317 n
= mbtowc(&wc
, inp
, PTR2SIZE(maxp
- inp
));
323 /* FIXME Why mbtowc() resetting here?
324 * FIXME what about ISO 2022-JP plus -- those
325 * FIXME will loose shifts, then!
326 * FIXME THUS - we'd need special "known points"
327 * FIXME to do so - say, after a newline!!
328 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
329 mbtowc(&wc
, NULL
, n_mb_cur_max
);
330 wc
= isuni
? 0xFFFD : '?';
335 if (!iswprint(wc
) && wc
!= '\n' /*&& wc != '\r' && wc != '\b'*/ &&
337 if ((wc
& ~(wchar_t)037) == 0)
338 wc
= isuni
? 0x2400 | wc
: '?';
340 wc
= isuni
? 0x2421 : '?';
342 wc
= isuni
? 0x2426 : '?';
343 }else if(isuni
){ /* TODO ctext */
344 /* Need to filter out L-TO-R and R-TO-R marks TODO ctext */
345 if(wc
== 0x200E || wc
== 0x200F || (wc
>= 0x202A && wc
<= 0x202E))
347 /* And some zero-width messes */
348 if(wc
== 0x00AD || (wc
>= 0x200B && wc
<= 0x200D))
350 /* Oh about the ISO C wide character interfaces, baby! */
354 if ((n
= wctomb(mbb
, wc
)) <= 0)
357 assert(out
->l
< msz
);
358 for (i
= 0; i
< n
; ++i
)
362 #endif /* NATCH_CHAR */
367 if (!isprint(c
) && c
!= '\n' && c
!= '\r' && c
!= '\b' && c
!= '\t')
373 out
->s
[out
->l
] = '\0';
378 delctrl(char *cp
, size_t len
)
383 for (x
= y
= 0; x
< len
; ++x
)
384 if (!cntrlchar(cp
[x
]))
400 makeprint(&in
, &out
);
401 rp
= savestrbuf(out
.s
, out
.l
);
408 prout(char const *s
, size_t sz
, FILE *fp
)
416 makeprint(&in
, &out
);
417 n
= fwrite(out
.s
, 1, out
.l
, fp
);
424 bidi_info_needed(char const *bdat
, size_t blen
)
429 #ifdef HAVE_NATCH_CHAR
430 if (n_psonce
& n_PSO_UNICODE
)
432 /* TODO Checking for BIDI character: use S-CText fromutf8
433 * TODO plus isrighttoleft (or whatever there will be)! */
434 ui32_t c
= n_utf8_to_utf32(&bdat
, &blen
);
441 /* (Very very fuzzy, awaiting S-CText for good) */
442 if ((c
>= 0x05BE && c
<= 0x08E3) ||
443 (c
>= 0xFB1D && c
<= 0xFE00) /* No: variation selectors */ ||
444 (c
>= 0xFE70 && c
<= 0xFEFC) ||
445 (c
>= 0x10800 && c
<= 0x10C48) ||
446 (c
>= 0x1EE00 && c
<= 0x1EEF1)) {
451 #endif /* HAVE_NATCH_CHAR */
457 bidi_info_create(struct bidi_info
*bip
)
459 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
460 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
461 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
462 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
463 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
464 n_NATCH_CHAR( char const *hb
; )
467 memset(bip
, 0, sizeof *bip
);
468 bip
->bi_start
.s
= bip
->bi_end
.s
= n_UNCONST(n_empty
);
470 #ifdef HAVE_NATCH_CHAR
471 if ((n_psonce
& n_PSO_UNICODE
) && (hb
= ok_vlook(headline_bidi
)) != NULL
) {
477 bip
->bi_start
.s
= bip
->bi_end
.s
= n_UNCONST("\xE2\x80\x8E");
483 bip
->bi_start
.s
= n_UNCONST("\xE2\x81\xA8");
484 bip
->bi_end
.s
= n_UNCONST("\xE2\x81\xA9");
487 bip
->bi_start
.l
= bip
->bi_end
.l
= 3;