1 /* Copyright (c) 2006-2015 Jonas Fonseca <jonas.fonseca@gmail.com>
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
15 #include "tig/string.h"
22 string_isnumber(const char *str
)
26 for (pos
= 0; str
[pos
]; pos
++) {
27 if (!isdigit(str
[pos
]))
35 iscommit(const char *str
)
39 for (pos
= 0; str
[pos
]; pos
++) {
40 if (!isxdigit(str
[pos
]))
44 return 7 <= pos
&& pos
< SIZEOF_REV
;
48 suffixcmp(const char *str
, int slen
, const char *suffix
)
50 size_t len
= slen
>= 0 ? slen
: strlen(str
);
51 size_t suffixlen
= strlen(suffix
);
53 return suffixlen
< len
? strcmp(str
+ len
- suffixlen
, suffix
) : -1;
57 string_ncopy_do(char *dst
, size_t dstlen
, const char *src
, size_t srclen
)
59 if (srclen
> dstlen
- 1)
62 strncpy(dst
, src
, srclen
);
67 string_copy_rev(char *dst
, const char *src
)
74 for (srclen
= 0; srclen
< SIZEOF_REV
; srclen
++)
75 if (!src
[srclen
] || isspace(src
[srclen
]))
78 string_ncopy_do(dst
, SIZEOF_REV
, src
, srclen
);
82 string_copy_rev_from_commit_line(char *dst
, const char *src
)
84 string_copy_rev(dst
, src
+ STRING_SIZE("commit "));
88 string_expanded_length(const char *src
, size_t srclen
, size_t tabsize
, size_t max_size
)
92 for (size
= pos
= 0; pos
< srclen
&& size
< max_size
; pos
++) {
93 if (src
[pos
] == '\t') {
94 size_t expanded
= tabsize
- (size
% tabsize
);
106 string_expand(char *dst
, size_t dstlen
, const char *src
, int srclen
, int tabsize
)
110 for (size
= pos
= 0; size
< dstlen
- 1 && (srclen
== -1 || pos
< srclen
) && src
[pos
]; pos
++) {
111 const char c
= src
[pos
];
114 size_t expanded
= tabsize
- (size
% tabsize
);
116 if (expanded
+ size
>= dstlen
- 1)
117 expanded
= dstlen
- size
- 1;
118 memcpy(dst
+ size
, " ", expanded
);
120 } else if (isspace(c
) || iscntrl(c
)) {
123 dst
[size
++] = src
[pos
];
132 chomp_string(char *name
)
136 while (isspace(*name
))
139 namelen
= strlen(name
) - 1;
140 while (namelen
> 0 && isspace(name
[namelen
]))
146 bool PRINTF_LIKE(4, 5)
147 string_nformat(char *buf
, size_t bufsize
, size_t *bufpos
, const char *fmt
, ...)
149 size_t pos
= bufpos
? *bufpos
: 0;
152 FORMAT_BUFFER(buf
+ pos
, bufsize
- pos
, fmt
, retval
, false);
153 if (bufpos
&& retval
> 0)
154 *bufpos
= pos
+ retval
;
156 return pos
>= bufsize
? false : true;
160 strcmp_null(const char *s1
, const char *s2
)
163 return (!!s1
) - (!!s2
);
166 return strcmp(s1
, s2
);
170 strcmp_numeric(const char *s1
, const char *s2
)
175 for (; *s1
&& *s2
&& *s1
== *s2
; s1
++, s2
++) {
179 number
= 10 * number
+ (c
- '0');
185 num1
= number
* 10 + atoi(s1
);
186 num2
= number
* 10 + atoi(s2
);
192 return !!*s2
- !!*s1
;
197 * Unicode / UTF-8 handling
199 * NOTE: Much of the following code for dealing with Unicode is derived from
200 * ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
201 * src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
205 unicode_width(unsigned long c
, int tab_size
)
208 (c
<= 0x115f /* Hangul Jamo */
211 || (c
>= 0x2e80 && c
<= 0xa4cf && c
!= 0x303f)
213 || (c
>= 0xac00 && c
<= 0xd7a3) /* Hangul Syllables */
214 || (c
>= 0xf900 && c
<= 0xfaff) /* CJK Compatibility Ideographs */
215 || (c
>= 0xfe30 && c
<= 0xfe6f) /* CJK Compatibility Forms */
216 || (c
>= 0xff00 && c
<= 0xff60) /* Fullwidth Forms */
217 || (c
>= 0xffe0 && c
<= 0xffe6)
218 || (c
>= 0x20000 && c
<= 0x2fffd)
219 || (c
>= 0x30000 && c
<= 0x3fffd)))
222 if ((c
>= 0x0300 && c
<= 0x036f) /* combining diacretical marks */
223 || (c
>= 0x1dc0 && c
<= 0x1dff) /* combining diacretical marks supplement */
224 || (c
>= 0x20d0 && c
<= 0x20ff) /* combining diacretical marks for symbols */
225 || (c
>= 0xfe20 && c
<= 0xfe2f)) /* combining half marks */
234 /* Number of bytes used for encoding a UTF-8 character indexed by first byte.
235 * Illegal bytes are set one. */
236 static const unsigned char utf8_bytes
[256] = {
237 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
238 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
239 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
240 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
241 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
242 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
243 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
244 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
248 utf8_char_length(const char *string
)
250 int c
= *(unsigned char *) string
;
252 return utf8_bytes
[c
];
255 /* Decode UTF-8 multi-byte representation into a Unicode character. */
257 utf8_to_unicode(const char *string
, size_t length
)
259 unsigned long unicode
;
266 unicode
= (string
[0] & 0x1f) << 6;
267 unicode
+= (string
[1] & 0x3f);
270 unicode
= (string
[0] & 0x0f) << 12;
271 unicode
+= ((string
[1] & 0x3f) << 6);
272 unicode
+= (string
[2] & 0x3f);
275 unicode
= (string
[0] & 0x0f) << 18;
276 unicode
+= ((string
[1] & 0x3f) << 12);
277 unicode
+= ((string
[2] & 0x3f) << 6);
278 unicode
+= (string
[3] & 0x3f);
281 unicode
= (string
[0] & 0x0f) << 24;
282 unicode
+= ((string
[1] & 0x3f) << 18);
283 unicode
+= ((string
[2] & 0x3f) << 12);
284 unicode
+= ((string
[3] & 0x3f) << 6);
285 unicode
+= (string
[4] & 0x3f);
288 unicode
= (string
[0] & 0x01) << 30;
289 unicode
+= ((string
[1] & 0x3f) << 24);
290 unicode
+= ((string
[2] & 0x3f) << 18);
291 unicode
+= ((string
[3] & 0x3f) << 12);
292 unicode
+= ((string
[4] & 0x3f) << 6);
293 unicode
+= (string
[5] & 0x3f);
299 /* Invalid characters could return the special 0xfffd value but NUL
300 * should be just as good. */
301 return unicode
> 0xffff ? 0 : unicode
;
304 /* Calculates how much of string can be shown within the given maximum width
305 * and sets trimmed parameter to non-zero value if all of string could not be
306 * shown. If the reserve flag is true, it will reserve at least one
307 * trailing character, which can be useful when drawing a delimiter.
309 * Returns the number of bytes to output from string to satisfy max_width. */
311 utf8_length(const char **start
, int max_chars
, size_t skip
, int *width
, size_t max_width
, int *trimmed
, bool reserve
, int tab_size
)
313 const char *string
= *start
;
314 const char *end
= max_chars
< 0 ? strchr(string
, '\0') : string
+ max_chars
;
315 unsigned char last_bytes
= 0;
316 size_t last_ucwidth
= 0;
321 while (string
< end
) {
322 unsigned char bytes
= utf8_char_length(string
);
324 unsigned long unicode
;
326 if (string
+ bytes
> end
)
329 /* Change representation to figure out whether
330 * it is a single- or double-width character. */
332 unicode
= utf8_to_unicode(string
, bytes
);
333 /* FIXME: Graceful handling of invalid Unicode character. */
337 ucwidth
= unicode_width(unicode
, tab_size
);
339 skip
-= ucwidth
<= skip
? ucwidth
: skip
;
343 if (max_width
> 0 && *width
> max_width
) {
346 if (reserve
&& *width
== max_width
) {
347 string
-= last_bytes
;
348 *width
-= last_ucwidth
;
356 last_ucwidth
= ucwidth
;
362 return string
- *start
;
366 utf8_width_of(const char *text
, int max_bytes
, int max_width
)
369 const char *tmp
= text
;
372 utf8_length(&tmp
, max_bytes
, 0, &text_width
, max_width
, &trimmed
, false, 1);
376 /* vim: set ts=8 sw=8 noexpandtab: */