1 /*@ S-nail - a mail user agent derived from Berkeley Mail.
2 *@ UserInterface: string related operations.
4 * Copyright (c) 2000-2004 Gunnar Ritter, Freiburg i. Br., Germany.
5 * Copyright (c) 2012 - 2015 Steffen (Daode) Nurpmeso <sdaoden@users.sf.net>.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 #ifndef HAVE_AMALGAMATION
42 field_detect_width(char const *buf
, size_t blen
){
47 blen
= (buf
== NULL
) ? 0 : strlen(buf
);
48 assert(blen
== 0 || buf
!= NULL
);
55 memset(&mbs
, 0, sizeof mbs
);
57 for(rv
= 0; blen
> 0;){
58 size_t i
= mbrtowc(&wc
, buf
, blen
, &mbs
);
82 rv
+= 1 + (wc
>= 0x1100u
); /* TODO use S-CText isfullwidth() */
89 #endif /* HAVE_C90AMEND1 */
96 field_detect_clip(size_t maxlen
, char const *buf
, size_t blen
)/*TODO mbrtowc()*/
101 #ifdef HAVE_NATCH_CHAR
102 maxlen
= MIN(maxlen
, blen
);
103 for (rv
= 0; maxlen
> 0;) {
104 int ml
= mblen(buf
, maxlen
);
114 rv
= MIN(blen
, maxlen
);
121 field_put_bidi_clip(char *store
, size_t maxlen
, char const *buf
, size_t blen
)
123 NATCH_CHAR( struct bidi_info bi
; )
124 size_t rv
NATCH_CHAR( COMMA i
);
131 #ifdef HAVE_NATCH_CHAR
132 bidi_info_create(&bi
);
133 if (bi
.bi_start
.l
== 0 || !bidi_info_needed(buf
, blen
)) {
138 if (maxlen
>= (i
= bi
.bi_pad
+ bi
.bi_end
.l
+ bi
.bi_start
.l
))
143 if ((i
= bi
.bi_start
.l
) > 0) {
144 memcpy(store
, bi
.bi_start
.s
, i
);
151 int ml
= mblen(buf
, blen
);
156 if (UICMP(z
, maxlen
, <, ml
))
161 memcpy(store
, buf
, ml
);
168 if ((i
= bi
.bi_end
.l
) > 0) {
169 memcpy(store
, bi
.bi_end
.s
, i
);
177 rv
= MIN(blen
, maxlen
);
178 memcpy(store
, buf
, rv
);
187 colalign(char const *cp
, int col
, int fill
, int *cols_decr_used_or_null
)
189 NATCH_CHAR( struct bidi_info bi
; )
190 int col_orig
= col
, n
, sz
;
191 bool_t isbidi
, isuni
, istab
, isrepl
;
195 /* Bidi only on request and when there is 8-bit data */
196 isbidi
= isuni
= FAL0
;
197 #ifdef HAVE_NATCH_CHAR
198 isuni
= ((options
& OPT_UNICODE
) != 0);
199 bidi_info_create(&bi
);
200 if (bi
.bi_start
.l
== 0)
202 if (!(isbidi
= bidi_info_needed(cp
, strlen(cp
))))
205 if ((size_t)col
>= bi
.bi_pad
)
212 np
= nb
= salloc(mb_cur_max
* strlen(cp
) +
214 NATCH_CHAR( + (isbidi
? bi
.bi_start
.l
+ bi
.bi_end
.l
: 0) )
217 #ifdef HAVE_NATCH_CHAR
219 memcpy(np
, bi
.bi_start
.s
, bi
.bi_start
.l
);
224 while (*cp
!= '\0') {
226 #ifdef HAVE_C90AMEND1
227 if (mb_cur_max
> 1) {
232 if ((sz
= mbtowc(&wc
, cp
, mb_cur_max
)) == -1)
234 else if (wc
== L
'\t') {
235 cp
+= sz
- 1; /* Silly, no such charset known (.. until S-Ctext) */
238 } else if (iswprint(wc
)) {
239 # ifndef HAVE_WCWIDTH
240 n
= 1 + (wc
>= 0x1100u
); /* TODO use S-CText isfullwidth() */
242 if ((n
= wcwidth(wc
)) == -1)
252 istab
= (*cp
== '\t');
253 isrepl
= !(istab
|| isprint((uc_i
)*cp
));
269 } else if (istab
|| (sz
== 1 && spacechar(*cp
))) {
277 if (fill
&& col
!= 0) {
279 memmove(nb
+ col
, nb
, PTR2SIZE(np
- nb
));
280 memset(nb
, ' ', col
);
282 memset(np
, ' ', col
);
287 #ifdef HAVE_NATCH_CHAR
289 memcpy(np
, bi
.bi_end
.s
, bi
.bi_end
.l
);
295 if (cols_decr_used_or_null
!= NULL
)
296 *cols_decr_used_or_null
-= col_orig
- col
;
302 makeprint(struct str
const *in
, struct str
*out
)
304 char const *inp
, *maxp
;
309 out
->s
= outp
= smalloc(DBG( msz
= ) in
->l
*mb_cur_max
+ 2u*mb_cur_max
+1);
313 #ifdef HAVE_NATCH_CHAR
314 if (mb_cur_max
> 1) {
315 char mbb
[MB_LEN_MAX
+ 1];
318 bool_t isuni
= ((options
& OPT_UNICODE
) != 0);
323 n
= mbtowc(&wc
, inp
, PTR2SIZE(maxp
- inp
));
329 /* FIXME Why mbtowc() resetting here?
330 * FIXME what about ISO 2022-JP plus -- those
331 * FIXME will loose shifts, then!
332 * FIXME THUS - we'd need special "known points"
333 * FIXME to do so - say, after a newline!!
334 * FIXME WE NEED TO CHANGE ALL USES +MBLEN! */
335 mbtowc(&wc
, NULL
, mb_cur_max
);
336 wc
= isuni
? 0xFFFD : '?';
341 if (!iswprint(wc
) && wc
!= '\n' && wc
!= '\r' && wc
!= '\b' &&
343 if ((wc
& ~(wchar_t)037) == 0)
344 wc
= isuni
? 0x2400 | wc
: '?';
346 wc
= isuni
? 0x2421 : '?';
348 wc
= isuni
? 0x2426 : '?';
349 }else if(isuni
){ /* TODO ctext */
350 /* We need to actively filter out L-TO-R and R-TO-R marks TODO ctext */
351 if(wc
== 0x200E || wc
== 0x200F || (wc
>= 0x202A && wc
<= 0x202E))
353 /* And some zero-width messes */
354 if(wc
== 0x00AD || (wc
>= 0x200B && wc
<= 0x200D))
356 /* Oh about the ISO C wide character interfaces, baby! */
360 if ((n
= wctomb(mbb
, wc
)) <= 0)
363 assert(out
->l
< msz
);
364 for (i
= 0; i
< n
; ++i
)
368 #endif /* NATCH_CHAR */
373 if (!isprint(c
) && c
!= '\n' && c
!= '\r' && c
!= '\b' && c
!= '\t')
379 out
->s
[out
->l
] = '\0';
384 delctrl(char *cp
, size_t len
)
389 for (x
= y
= 0; x
< len
; ++x
)
390 if (!cntrlchar(cp
[x
]))
406 makeprint(&in
, &out
);
407 rp
= savestrbuf(out
.s
, out
.l
);
414 prout(char const *s
, size_t sz
, FILE *fp
)
422 makeprint(&in
, &out
);
423 n
= fwrite(out
.s
, 1, out
.l
, fp
);
430 putuc(int u
, int c
, FILE *fp
)
436 #ifdef HAVE_NATCH_CHAR
437 if ((options
& OPT_UNICODE
) && (u
& ~(wchar_t)0177)) {
438 char mbb
[MB_LEN_MAX
];
441 if ((n
= wctomb(mbb
, u
)) > 0) {
443 for (i
= 0; i
< n
; ++i
)
444 if (putc(mbb
[i
] & 0377, fp
) == EOF
) {
449 rv
= (putc('\0', fp
) != EOF
);
454 rv
= (putc(c
, fp
) != EOF
);
460 bidi_info_needed(char const *bdat
, size_t blen
)
465 #ifdef HAVE_NATCH_CHAR
466 if (options
& OPT_UNICODE
)
468 /* TODO Checking for BIDI character: use S-CText fromutf8
469 * TODO plus isrighttoleft (or whatever there will be)! */
470 ui32_t c
= n_utf8_to_utf32(&bdat
, &blen
);
477 /* (Very very fuzzy, awaiting S-CText for good) */
478 if ((c
>= 0x05BE && c
<= 0x08E3) ||
479 (c
>= 0xFB1D && c
<= 0xFE00) /* No: variation selectors */ ||
480 (c
>= 0xFE70 && c
<= 0xFEFC) ||
481 (c
>= 0x10800 && c
<= 0x10C48) ||
482 (c
>= 0x1EE00 && c
<= 0x1EEF1)) {
487 #endif /* HAVE_NATCH_CHAR */
493 bidi_info_create(struct bidi_info
*bip
)
495 /* Unicode: how to isolate RIGHT-TO-LEFT scripts via *headline-bidi*
496 * 1.1 (Jun 1993): U+200E (E2 80 8E) LEFT-TO-RIGHT MARK
497 * 6.3 (Sep 2013): U+2068 (E2 81 A8) FIRST STRONG ISOLATE,
498 * U+2069 (E2 81 A9) POP DIRECTIONAL ISOLATE
499 * Worse results seen for: U+202D "\xE2\x80\xAD" U+202C "\xE2\x80\xAC" */
500 NATCH_CHAR( char const *hb
; )
503 memset(bip
, 0, sizeof *bip
);
504 bip
->bi_start
.s
= bip
->bi_end
.s
= UNCONST("");
506 #ifdef HAVE_NATCH_CHAR
507 if ((options
& OPT_UNICODE
) && (hb
= ok_vlook(headline_bidi
)) != NULL
) {
513 bip
->bi_start
.s
= bip
->bi_end
.s
= UNCONST("\xE2\x80\x8E");
519 bip
->bi_start
.s
= UNCONST("\xE2\x81\xA8");
520 bip
->bi_end
.s
= UNCONST("\xE2\x81\xA9");
523 bip
->bi_start
.l
= bip
->bi_end
.l
= 3;