Merge pull request #530 from jjlin/master
[tig.git] / src / string.c
blob7428986e86e0bb36f2bf3cdd8d606f3c9ee8fc00
1 /* Copyright (c) 2006-2015 Jonas Fonseca <jonas.fonseca@gmail.com>
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
14 #include "tig/tig.h"
15 #include "tig/string.h"
18 * Strings.
21 bool
22 string_isnumber(const char *str)
24 int pos;
26 for (pos = 0; str[pos]; pos++) {
27 if (!isdigit(str[pos]))
28 return false;
31 return pos > 0;
34 bool
35 iscommit(const char *str)
37 int pos;
39 for (pos = 0; str[pos]; pos++) {
40 if (!isxdigit(str[pos]))
41 return false;
44 return 7 <= pos && pos < SIZEOF_REV;
47 int
48 suffixcmp(const char *str, int slen, const char *suffix)
50 size_t len = slen >= 0 ? slen : strlen(str);
51 size_t suffixlen = strlen(suffix);
53 return suffixlen < len ? strcmp(str + len - suffixlen, suffix) : -1;
56 void
57 string_ncopy_do(char *dst, size_t dstlen, const char *src, size_t srclen)
59 if (srclen > dstlen - 1)
60 srclen = dstlen - 1;
62 strncpy(dst, src, srclen);
63 dst[srclen] = 0;
66 void
67 string_copy_rev(char *dst, const char *src)
69 size_t srclen;
71 if (!*src)
72 return;
74 for (srclen = 0; srclen < SIZEOF_REV; srclen++)
75 if (!src[srclen] || isspace(src[srclen]))
76 break;
78 string_ncopy_do(dst, SIZEOF_REV, src, srclen);
81 void
82 string_copy_rev_from_commit_line(char *dst, const char *src)
84 string_copy_rev(dst, src + STRING_SIZE("commit "));
87 size_t
88 string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size)
90 size_t size, pos;
92 for (size = pos = 0; pos < srclen && size < max_size; pos++) {
93 if (src[pos] == '\t') {
94 size_t expanded = tabsize - (size % tabsize);
96 size += expanded;
97 } else {
98 size++;
102 return pos;
105 size_t
106 string_expand(char *dst, size_t dstlen, const char *src, int srclen, int tabsize)
108 size_t size, pos;
110 for (size = pos = 0; size < dstlen - 1 && (srclen == -1 || pos < srclen) && src[pos]; pos++) {
111 const char c = src[pos];
113 if (c == '\t') {
114 size_t expanded = tabsize - (size % tabsize);
116 if (expanded + size >= dstlen - 1)
117 expanded = dstlen - size - 1;
118 memcpy(dst + size, " ", expanded);
119 size += expanded;
120 } else if (isspace(c) || iscntrl(c)) {
121 dst[size++] = ' ';
122 } else {
123 dst[size++] = src[pos];
127 dst[size] = 0;
128 return pos;
131 char *
132 chomp_string(char *name)
134 int namelen;
136 while (isspace(*name))
137 name++;
139 namelen = strlen(name) - 1;
140 while (namelen > 0 && isspace(name[namelen]))
141 name[namelen--] = 0;
143 return name;
146 bool PRINTF_LIKE(4, 5)
147 string_nformat(char *buf, size_t bufsize, size_t *bufpos, const char *fmt, ...)
149 size_t pos = bufpos ? *bufpos : 0;
150 int retval;
152 FORMAT_BUFFER(buf + pos, bufsize - pos, fmt, retval, false);
153 if (bufpos && retval > 0)
154 *bufpos = pos + retval;
156 return pos >= bufsize ? false : true;
160 strcmp_null(const char *s1, const char *s2)
162 if (!s1 || !s2) {
163 return (!!s1) - (!!s2);
166 return strcmp(s1, s2);
170 strcmp_numeric(const char *s1, const char *s2)
172 int number = 0;
173 int num1, num2;
175 for (; *s1 && *s2 && *s1 == *s2; s1++, s2++) {
176 int c = *s1;
178 if (isdigit(c)) {
179 number = 10 * number + (c - '0');
180 } else {
181 number = 0;
185 num1 = number * 10 + atoi(s1);
186 num2 = number * 10 + atoi(s2);
188 if (num1 != num2)
189 return num2 - num1;
191 if (!!*s1 != !!*s2)
192 return !!*s2 - !!*s1;
193 return *s1 - *s2;
197 * Unicode / UTF-8 handling
199 * NOTE: Much of the following code for dealing with Unicode is derived from
200 * ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
201 * src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
205 unicode_width(unsigned long c, int tab_size)
207 if (c >= 0x1100 &&
208 (c <= 0x115f /* Hangul Jamo */
209 || c == 0x2329
210 || c == 0x232a
211 || (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f)
212 /* CJK ... Yi */
213 || (c >= 0xac00 && c <= 0xd7a3) /* Hangul Syllables */
214 || (c >= 0xf900 && c <= 0xfaff) /* CJK Compatibility Ideographs */
215 || (c >= 0xfe30 && c <= 0xfe6f) /* CJK Compatibility Forms */
216 || (c >= 0xff00 && c <= 0xff60) /* Fullwidth Forms */
217 || (c >= 0xffe0 && c <= 0xffe6)
218 || (c >= 0x20000 && c <= 0x2fffd)
219 || (c >= 0x30000 && c <= 0x3fffd)))
220 return 2;
222 if ((c >= 0x0300 && c <= 0x036f) /* combining diacretical marks */
223 || (c >= 0x1dc0 && c <= 0x1dff) /* combining diacretical marks supplement */
224 || (c >= 0x20d0 && c <= 0x20ff) /* combining diacretical marks for symbols */
225 || (c >= 0xfe20 && c <= 0xfe2f)) /* combining half marks */
226 return 0;
228 if (c == '\t')
229 return tab_size;
231 return 1;
234 /* Number of bytes used for encoding a UTF-8 character indexed by first byte.
235 * Illegal bytes are set one. */
236 static const unsigned char utf8_bytes[256] = {
237 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
238 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
239 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
240 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
241 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
242 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
243 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
244 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
247 unsigned char
248 utf8_char_length(const char *string)
250 int c = *(unsigned char *) string;
252 return utf8_bytes[c];
255 /* Decode UTF-8 multi-byte representation into a Unicode character. */
256 unsigned long
257 utf8_to_unicode(const char *string, size_t length)
259 unsigned long unicode;
261 switch (length) {
262 case 1:
263 unicode = string[0];
264 break;
265 case 2:
266 unicode = (string[0] & 0x1f) << 6;
267 unicode += (string[1] & 0x3f);
268 break;
269 case 3:
270 unicode = (string[0] & 0x0f) << 12;
271 unicode += ((string[1] & 0x3f) << 6);
272 unicode += (string[2] & 0x3f);
273 break;
274 case 4:
275 unicode = (string[0] & 0x0f) << 18;
276 unicode += ((string[1] & 0x3f) << 12);
277 unicode += ((string[2] & 0x3f) << 6);
278 unicode += (string[3] & 0x3f);
279 break;
280 case 5:
281 unicode = (string[0] & 0x0f) << 24;
282 unicode += ((string[1] & 0x3f) << 18);
283 unicode += ((string[2] & 0x3f) << 12);
284 unicode += ((string[3] & 0x3f) << 6);
285 unicode += (string[4] & 0x3f);
286 break;
287 case 6:
288 unicode = (string[0] & 0x01) << 30;
289 unicode += ((string[1] & 0x3f) << 24);
290 unicode += ((string[2] & 0x3f) << 18);
291 unicode += ((string[3] & 0x3f) << 12);
292 unicode += ((string[4] & 0x3f) << 6);
293 unicode += (string[5] & 0x3f);
294 break;
295 default:
296 return 0;
299 /* Invalid characters could return the special 0xfffd value but NUL
300 * should be just as good. */
301 return unicode > 0xffff ? 0 : unicode;
304 /* Calculates how much of string can be shown within the given maximum width
305 * and sets trimmed parameter to non-zero value if all of string could not be
306 * shown. If the reserve flag is true, it will reserve at least one
307 * trailing character, which can be useful when drawing a delimiter.
309 * Returns the number of bytes to output from string to satisfy max_width. */
310 size_t
311 utf8_length(const char **start, int max_chars, size_t skip, int *width, size_t max_width, int *trimmed, bool reserve, int tab_size)
313 const char *string = *start;
314 const char *end = max_chars < 0 ? strchr(string, '\0') : string + max_chars;
315 unsigned char last_bytes = 0;
316 size_t last_ucwidth = 0;
318 *width = 0;
319 *trimmed = 0;
321 while (string < end) {
322 unsigned char bytes = utf8_char_length(string);
323 size_t ucwidth;
324 unsigned long unicode;
326 if (string + bytes > end)
327 break;
329 /* Change representation to figure out whether
330 * it is a single- or double-width character. */
332 unicode = utf8_to_unicode(string, bytes);
333 /* FIXME: Graceful handling of invalid Unicode character. */
334 if (!unicode)
335 break;
337 ucwidth = unicode_width(unicode, tab_size);
338 if (skip > 0) {
339 skip -= ucwidth <= skip ? ucwidth : skip;
340 *start += bytes;
342 *width += ucwidth;
343 if (max_width > 0 && *width > max_width) {
344 *trimmed = 1;
345 *width -= ucwidth;
346 if (reserve && *width == max_width) {
347 string -= last_bytes;
348 *width -= last_ucwidth;
350 break;
353 string += bytes;
354 if (ucwidth) {
355 last_bytes = bytes;
356 last_ucwidth = ucwidth;
357 } else {
358 last_bytes += bytes;
362 return string - *start;
366 utf8_width_of(const char *text, int max_bytes, int max_width)
368 int text_width = 0;
369 const char *tmp = text;
370 int trimmed = false;
372 utf8_length(&tmp, max_bytes, 0, &text_width, max_width, &trimmed, false, 1);
373 return text_width;
376 /* vim: set ts=8 sw=8 noexpandtab: */