Highlight search results
[tig.git] / src / string.c
blob9d3b986bac19f5683a402239197a717528cc7df1
1 /* Copyright (c) 2006-2015 Jonas Fonseca <jonas.fonseca@gmail.com>
3 * This program is free software; you can redistribute it and/or
4 * modify it under the terms of the GNU General Public License as
5 * published by the Free Software Foundation; either version 2 of
6 * the License, or (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
14 #include "tig/tig.h"
15 #include "tig/string.h"
18 * Strings.
21 bool
22 string_isnumber(const char *str)
24 int pos;
26 for (pos = 0; str[pos]; pos++) {
27 if (!isdigit(str[pos]))
28 return false;
31 return pos > 0;
34 bool
35 iscommit(const char *str)
37 int pos;
39 for (pos = 0; str[pos]; pos++) {
40 if (!isxdigit(str[pos]))
41 return false;
44 return 7 <= pos && pos < SIZEOF_REV;
47 int
48 suffixcmp(const char *str, int slen, const char *suffix)
50 size_t len = slen >= 0 ? slen : strlen(str);
51 size_t suffixlen = strlen(suffix);
53 return suffixlen < len ? strcmp(str + len - suffixlen, suffix) : -1;
56 void
57 string_ncopy_do(char *dst, size_t dstlen, const char *src, size_t srclen)
59 if (srclen > dstlen - 1)
60 srclen = dstlen - 1;
62 strncpy(dst, src, srclen);
63 dst[srclen] = 0;
66 void
67 string_copy_rev(char *dst, const char *src)
69 size_t srclen;
71 if (!*src)
72 return;
74 for (srclen = 0; srclen < SIZEOF_REV; srclen++)
75 if (isspace(src[srclen]))
76 break;
78 string_ncopy_do(dst, SIZEOF_REV, src, srclen);
81 void
82 string_copy_rev_from_commit_line(char *dst, const char *src)
84 string_copy_rev(dst, src + STRING_SIZE("commit "));
87 size_t
88 string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size)
90 size_t size, pos;
92 for (size = pos = 0; pos < srclen && size < max_size; pos++) {
93 if (src[pos] == '\t') {
94 size_t expanded = tabsize - (size % tabsize);
96 size += expanded;
97 } else {
98 size++;
102 return pos;
105 size_t
106 string_expand(char *dst, size_t dstlen, const char *src, int tabsize)
108 size_t size, pos;
110 for (size = pos = 0; size < dstlen - 1 && src[pos]; pos++) {
111 if (src[pos] == '\t') {
112 size_t expanded = tabsize - (size % tabsize);
114 if (expanded + size >= dstlen - 1)
115 expanded = dstlen - size - 1;
116 memcpy(dst + size, " ", expanded);
117 size += expanded;
118 } else {
119 dst[size++] = src[pos];
123 dst[size] = 0;
124 return pos;
127 char *
128 chomp_string(char *name)
130 int namelen;
132 while (isspace(*name))
133 name++;
135 namelen = strlen(name) - 1;
136 while (namelen > 0 && isspace(name[namelen]))
137 name[namelen--] = 0;
139 return name;
142 bool PRINTF_LIKE(4, 5)
143 string_nformat(char *buf, size_t bufsize, size_t *bufpos, const char *fmt, ...)
145 size_t pos = bufpos ? *bufpos : 0;
146 int retval;
148 FORMAT_BUFFER(buf + pos, bufsize - pos, fmt, retval, false);
149 if (bufpos && retval > 0)
150 *bufpos = pos + retval;
152 return pos >= bufsize ? false : true;
156 strcmp_null(const char *s1, const char *s2)
158 if (!s1 || !s2) {
159 return (!!s1) - (!!s2);
162 return strcmp(s1, s2);
166 strcmp_numeric(const char *s1, const char *s2)
168 int number = 0;
169 int num1, num2;
171 for (; *s1 && *s2 && *s1 == *s2; s1++, s2++) {
172 int c = *s1;
174 if (isdigit(c)) {
175 number = 10 * number + (c - '0');
176 } else {
177 number = 0;
181 num1 = number * 10 + atoi(s1);
182 num2 = number * 10 + atoi(s2);
184 if (num1 != num2)
185 return num2 - num1;
187 if (!!*s1 != !!*s2)
188 return !!*s2 - !!*s1;
189 return *s1 - *s2;
193 * Unicode / UTF-8 handling
195 * NOTE: Much of the following code for dealing with Unicode is derived from
196 * ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
197 * src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
201 unicode_width(unsigned long c, int tab_size)
203 if (c >= 0x1100 &&
204 (c <= 0x115f /* Hangul Jamo */
205 || c == 0x2329
206 || c == 0x232a
207 || (c >= 0x2e80 && c <= 0xa4cf && c != 0x303f)
208 /* CJK ... Yi */
209 || (c >= 0xac00 && c <= 0xd7a3) /* Hangul Syllables */
210 || (c >= 0xf900 && c <= 0xfaff) /* CJK Compatibility Ideographs */
211 || (c >= 0xfe30 && c <= 0xfe6f) /* CJK Compatibility Forms */
212 || (c >= 0xff00 && c <= 0xff60) /* Fullwidth Forms */
213 || (c >= 0xffe0 && c <= 0xffe6)
214 || (c >= 0x20000 && c <= 0x2fffd)
215 || (c >= 0x30000 && c <= 0x3fffd)))
216 return 2;
218 if ((c >= 0x0300 && c <= 0x036f) /* combining diacretical marks */
219 || (c >= 0x1dc0 && c <= 0x1dff) /* combining diacretical marks supplement */
220 || (c >= 0x20d0 && c <= 0x20ff) /* combining diacretical marks for symbols */
221 || (c >= 0xfe20 && c <= 0xfe2f)) /* combining half marks */
222 return 0;
224 if (c == '\t')
225 return tab_size;
227 return 1;
230 /* Number of bytes used for encoding a UTF-8 character indexed by first byte.
231 * Illegal bytes are set one. */
232 static const unsigned char utf8_bytes[256] = {
233 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
234 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
235 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
236 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
237 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
238 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
239 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
240 3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
243 unsigned char
244 utf8_char_length(const char *string)
246 int c = *(unsigned char *) string;
248 return utf8_bytes[c];
251 /* Decode UTF-8 multi-byte representation into a Unicode character. */
252 unsigned long
253 utf8_to_unicode(const char *string, size_t length)
255 unsigned long unicode;
257 switch (length) {
258 case 1:
259 unicode = string[0];
260 break;
261 case 2:
262 unicode = (string[0] & 0x1f) << 6;
263 unicode += (string[1] & 0x3f);
264 break;
265 case 3:
266 unicode = (string[0] & 0x0f) << 12;
267 unicode += ((string[1] & 0x3f) << 6);
268 unicode += (string[2] & 0x3f);
269 break;
270 case 4:
271 unicode = (string[0] & 0x0f) << 18;
272 unicode += ((string[1] & 0x3f) << 12);
273 unicode += ((string[2] & 0x3f) << 6);
274 unicode += (string[3] & 0x3f);
275 break;
276 case 5:
277 unicode = (string[0] & 0x0f) << 24;
278 unicode += ((string[1] & 0x3f) << 18);
279 unicode += ((string[2] & 0x3f) << 12);
280 unicode += ((string[3] & 0x3f) << 6);
281 unicode += (string[4] & 0x3f);
282 break;
283 case 6:
284 unicode = (string[0] & 0x01) << 30;
285 unicode += ((string[1] & 0x3f) << 24);
286 unicode += ((string[2] & 0x3f) << 18);
287 unicode += ((string[3] & 0x3f) << 12);
288 unicode += ((string[4] & 0x3f) << 6);
289 unicode += (string[5] & 0x3f);
290 break;
291 default:
292 return 0;
295 /* Invalid characters could return the special 0xfffd value but NUL
296 * should be just as good. */
297 return unicode > 0xffff ? 0 : unicode;
300 /* Calculates how much of string can be shown within the given maximum width
301 * and sets trimmed parameter to non-zero value if all of string could not be
302 * shown. If the reserve flag is true, it will reserve at least one
303 * trailing character, which can be useful when drawing a delimiter.
305 * Returns the number of bytes to output from string to satisfy max_width. */
306 size_t
307 utf8_length(const char **start, int max_chars, size_t skip, int *width, size_t max_width, int *trimmed, bool reserve, int tab_size)
309 const char *string = *start;
310 const char *end = max_chars < 0 ? strchr(string, '\0') : string + max_chars;
311 unsigned char last_bytes = 0;
312 size_t last_ucwidth = 0;
314 *width = 0;
315 *trimmed = 0;
317 while (string < end) {
318 unsigned char bytes = utf8_char_length(string);
319 size_t ucwidth;
320 unsigned long unicode;
322 if (string + bytes > end)
323 break;
325 /* Change representation to figure out whether
326 * it is a single- or double-width character. */
328 unicode = utf8_to_unicode(string, bytes);
329 /* FIXME: Graceful handling of invalid Unicode character. */
330 if (!unicode)
331 break;
333 ucwidth = unicode_width(unicode, tab_size);
334 if (skip > 0) {
335 skip -= ucwidth <= skip ? ucwidth : skip;
336 *start += bytes;
338 *width += ucwidth;
339 if (max_width > 0 && *width > max_width) {
340 *trimmed = 1;
341 *width -= ucwidth;
342 if (reserve && *width == max_width) {
343 string -= last_bytes;
344 *width -= last_ucwidth;
346 break;
349 string += bytes;
350 if (ucwidth) {
351 last_bytes = bytes;
352 last_ucwidth = ucwidth;
353 } else {
354 last_bytes += bytes;
358 return string - *start;
362 utf8_width_of(const char *text, int max_bytes, int max_width)
364 int text_width = 0;
365 const char *tmp = text;
366 int trimmed = false;
368 utf8_length(&tmp, max_bytes, 0, &text_width, max_width, &trimmed, false, 1);
369 return text_width;
372 /* vim: set ts=8 sw=8 noexpandtab: */