1 /* Copyright (c) 2006-2014 Jonas Fonseca <jonas.fonseca@gmail.com>
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
15 #include "tig/string.h"
22 string_isnumber(const char *str
)
26 for (pos
= 0; str
[pos
]; pos
++) {
27 if (!isdigit(str
[pos
]))
35 iscommit(const char *str
)
39 for (pos
= 0; str
[pos
]; pos
++) {
40 if (!isxdigit(str
[pos
]))
44 return 7 <= pos
&& pos
< SIZEOF_REV
;
48 suffixcmp(const char *str
, int slen
, const char *suffix
)
50 size_t len
= slen
>= 0 ? slen
: strlen(str
);
51 size_t suffixlen
= strlen(suffix
);
53 return suffixlen
< len
? strcmp(str
+ len
- suffixlen
, suffix
) : -1;
57 string_ncopy_do(char *dst
, size_t dstlen
, const char *src
, size_t srclen
)
59 if (srclen
> dstlen
- 1)
62 strncpy(dst
, src
, srclen
);
67 string_copy_rev(char *dst
, const char *src
)
74 for (srclen
= 0; srclen
< SIZEOF_REV
; srclen
++)
75 if (isspace(src
[srclen
]))
78 string_ncopy_do(dst
, SIZEOF_REV
, src
, srclen
);
82 string_copy_rev_from_commit_line(char *dst
, const char *src
)
84 string_copy_rev(dst
, src
+ STRING_SIZE("commit "));
88 string_expanded_length(const char *src
, size_t srclen
, size_t tabsize
, size_t max_size
)
92 for (size
= pos
= 0; pos
< srclen
&& size
< max_size
; pos
++) {
93 if (src
[pos
] == '\t') {
94 size_t expanded
= tabsize
- (size
% tabsize
);
106 string_expand(char *dst
, size_t dstlen
, const char *src
, int tabsize
)
110 for (size
= pos
= 0; size
< dstlen
- 1 && src
[pos
]; pos
++) {
111 if (src
[pos
] == '\t') {
112 size_t expanded
= tabsize
- (size
% tabsize
);
114 if (expanded
+ size
>= dstlen
- 1)
115 expanded
= dstlen
- size
- 1;
116 memcpy(dst
+ size
, " ", expanded
);
119 dst
[size
++] = src
[pos
];
128 chomp_string(char *name
)
132 while (isspace(*name
))
135 namelen
= strlen(name
) - 1;
136 while (namelen
> 0 && isspace(name
[namelen
]))
142 bool PRINTF_LIKE(4, 5)
143 string_nformat(char *buf
, size_t bufsize
, size_t *bufpos
, const char *fmt
, ...)
145 size_t pos
= bufpos
? *bufpos
: 0;
148 FORMAT_BUFFER(buf
+ pos
, bufsize
- pos
, fmt
, retval
, FALSE
);
149 if (bufpos
&& retval
> 0)
150 *bufpos
= pos
+ retval
;
152 return pos
>= bufsize
? FALSE
: TRUE
;
156 strcmp_null(const char *s1
, const char *s2
)
159 return (!!s1
) - (!!s2
);
162 return strcmp(s1
, s2
);
166 strcmp_numeric(const char *s1
, const char *s2
)
171 for (; *s1
&& *s2
&& *s1
== *s2
; s1
++, s2
++) {
175 number
= 10 * number
+ (c
- '0');
181 num1
= number
* 10 + atoi(s1
);
182 num2
= number
* 10 + atoi(s2
);
188 return !!*s2
- !!*s1
;
193 * Unicode / UTF-8 handling
195 * NOTE: Much of the following code for dealing with Unicode is derived from
196 * ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
197 * src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
201 unicode_width(unsigned long c
, int tab_size
)
204 (c
<= 0x115f /* Hangul Jamo */
207 || (c
>= 0x2e80 && c
<= 0xa4cf && c
!= 0x303f)
209 || (c
>= 0xac00 && c
<= 0xd7a3) /* Hangul Syllables */
210 || (c
>= 0xf900 && c
<= 0xfaff) /* CJK Compatibility Ideographs */
211 || (c
>= 0xfe30 && c
<= 0xfe6f) /* CJK Compatibility Forms */
212 || (c
>= 0xff00 && c
<= 0xff60) /* Fullwidth Forms */
213 || (c
>= 0xffe0 && c
<= 0xffe6)
214 || (c
>= 0x20000 && c
<= 0x2fffd)
215 || (c
>= 0x30000 && c
<= 0x3fffd)))
218 if ((c
>= 0x0300 && c
<= 0x036f) /* combining diacretical marks */
219 || (c
>= 0x1dc0 && c
<= 0x1dff) /* combining diacretical marks supplement */
220 || (c
>= 0x20d0 && c
<= 0x20ff) /* combining diacretical marks for symbols */
221 || (c
>= 0xfe20 && c
<= 0xfe2f)) /* combining half marks */
230 /* Number of bytes used for encoding a UTF-8 character indexed by first byte.
231 * Illegal bytes are set one. */
232 static const unsigned char utf8_bytes
[256] = {
233 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
234 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
235 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
236 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
237 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
238 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
239 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
240 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
244 utf8_char_length(const char *string
)
246 int c
= *(unsigned char *) string
;
248 return utf8_bytes
[c
];
251 /* Decode UTF-8 multi-byte representation into a Unicode character. */
253 utf8_to_unicode(const char *string
, size_t length
)
255 unsigned long unicode
;
262 unicode
= (string
[0] & 0x1f) << 6;
263 unicode
+= (string
[1] & 0x3f);
266 unicode
= (string
[0] & 0x0f) << 12;
267 unicode
+= ((string
[1] & 0x3f) << 6);
268 unicode
+= (string
[2] & 0x3f);
271 unicode
= (string
[0] & 0x0f) << 18;
272 unicode
+= ((string
[1] & 0x3f) << 12);
273 unicode
+= ((string
[2] & 0x3f) << 6);
274 unicode
+= (string
[3] & 0x3f);
277 unicode
= (string
[0] & 0x0f) << 24;
278 unicode
+= ((string
[1] & 0x3f) << 18);
279 unicode
+= ((string
[2] & 0x3f) << 12);
280 unicode
+= ((string
[3] & 0x3f) << 6);
281 unicode
+= (string
[4] & 0x3f);
284 unicode
= (string
[0] & 0x01) << 30;
285 unicode
+= ((string
[1] & 0x3f) << 24);
286 unicode
+= ((string
[2] & 0x3f) << 18);
287 unicode
+= ((string
[3] & 0x3f) << 12);
288 unicode
+= ((string
[4] & 0x3f) << 6);
289 unicode
+= (string
[5] & 0x3f);
295 /* Invalid characters could return the special 0xfffd value but NUL
296 * should be just as good. */
297 return unicode
> 0xffff ? 0 : unicode
;
300 /* Calculates how much of string can be shown within the given maximum width
301 * and sets trimmed parameter to non-zero value if all of string could not be
302 * shown. If the reserve flag is TRUE, it will reserve at least one
303 * trailing character, which can be useful when drawing a delimiter.
305 * Returns the number of bytes to output from string to satisfy max_width. */
307 utf8_length(const char **start
, size_t skip
, int *width
, size_t max_width
, int *trimmed
, bool reserve
, int tab_size
)
309 const char *string
= *start
;
310 const char *end
= strchr(string
, '\0');
311 unsigned char last_bytes
= 0;
312 size_t last_ucwidth
= 0;
317 while (string
< end
) {
318 unsigned char bytes
= utf8_char_length(string
);
320 unsigned long unicode
;
322 if (string
+ bytes
> end
)
325 /* Change representation to figure out whether
326 * it is a single- or double-width character. */
328 unicode
= utf8_to_unicode(string
, bytes
);
329 /* FIXME: Graceful handling of invalid Unicode character. */
333 ucwidth
= unicode_width(unicode
, tab_size
);
335 skip
-= ucwidth
<= skip
? ucwidth
: skip
;
339 if (max_width
> 0 && *width
> max_width
) {
342 if (reserve
&& *width
== max_width
) {
343 string
-= last_bytes
;
344 *width
-= last_ucwidth
;
352 last_ucwidth
= ucwidth
;
358 return string
- *start
;
362 utf8_width_max(const char *text
, int max
)
365 const char *tmp
= text
;
368 utf8_length(&tmp
, 0, &text_width
, max
, &trimmed
, FALSE
, 1);
372 /* vim: set ts=8 sw=8 noexpandtab: */