src/string.c

   1 /* Copyright (c) 2006-2015 Jonas Fonseca <jonas.fonseca@gmail.com>
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of the GNU General Public License as
   5  * published by the Free Software Foundation; either version 2 of
   6  * the License, or (at your option) any later version.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  */
  13
  14 #include "tig/tig.h"
  15 #include "tig/string.h"
  16
  17 /*
  18  * Strings.
  19  */
  20
  21 bool
  22 string_isnumber(const char *str)
  23 {
  24         int pos;
  25
  26         for (pos = 0; str[pos]; pos++) {
  27                 if (!isdigit(str[pos]))
  28                         return false;
  29         }
  30
  31         return pos > 0;
  32 }
  33
  34 bool
  35 iscommit(const char *str)
  36 {
  37         int pos;
  38
  39         for (pos = 0; str[pos]; pos++) {
  40                 if (!isxdigit(str[pos]))
  41                         return false;
  42         }
  43
  44         return 7 <= pos && pos < SIZEOF_REV;
  45 }
  46
  47 int
  48 suffixcmp(const char *str, int slen, const char *suffix)
  49 {
  50         size_t len = slen >= 0 ? slen : strlen(str);
  51         size_t suffixlen = strlen(suffix);
  52
  53         return suffixlen < len ? strcmp(str + len - suffixlen, suffix) : -1;
  54 }
  55
  56 void
  57 string_ncopy_do(char *dst, size_t dstlen, const char *src, size_t srclen)
  58 {
  59         if (srclen > dstlen - 1)
  60                 srclen = dstlen - 1;
  61
  62         strncpy(dst, src, srclen);
  63         dst[srclen] = 0;
  64 }
  65
  66 void
  67 string_copy_rev(char *dst, const char *src)
  68 {
  69         size_t srclen;
  70
  71         if (!*src)
  72                 return;
  73
  74         for (srclen = 0; srclen < SIZEOF_REV; srclen++)
  75                 if (isspace(src[srclen]))
  76                         break;
  77
  78         string_ncopy_do(dst, SIZEOF_REV, src, srclen);
  79 }
  80
  81 void
  82 string_copy_rev_from_commit_line(char *dst, const char *src)
  83 {
  84         string_copy_rev(dst, src + STRING_SIZE("commit "));
  85 }
  86
  87 size_t
  88 string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size)
  89 {
  90         size_t size, pos;
  91
  92         for (size = pos = 0; pos < srclen && size < max_size; pos++) {
  93                 if (src[pos] == '\t') {
  94                         size_t expanded = tabsize - (size % tabsize);
  95
  96                         size += expanded;
  97                 } else {
  98                         size++;
  99                 }
 100         }
 101
 102         return pos;
 103 }
 104
 105 size_t
 106 string_expand(char *dst, size_t dstlen, const char *src, int tabsize)
 107 {
 108         size_t size, pos;
 109
 110         for (size = pos = 0; size < dstlen - 1 && src[pos]; pos++) {
 111                 if (src[pos] == '\t') {
 112                         size_t expanded = tabsize - (size % tabsize);
 113
 114                         if (expanded + size >= dstlen - 1)
 115                                 expanded = dstlen - size - 1;
 116                         memcpy(dst + size, "        ", expanded);
 117                         size += expanded;
 118                 } else {
 119                         dst[size++] = src[pos];
 120                 }
 121         }
 122
 123         dst[size] = 0;
 124         return pos;
 125 }
 126
 127 char *
 128 chomp_string(char *name)
 129 {
 130         int namelen;
 131
 132         while (isspace(*name))
 133                 name++;
 134
 135         namelen = strlen(name) - 1;
 136         while (namelen > 0 && isspace(name[namelen]))
 137                 name[namelen--] = 0;
 138
 139         return name;
 140 }
 141
 142 bool PRINTF_LIKE(4, 5)
 143 string_nformat(char *buf, size_t bufsize, size_t *bufpos, const char *fmt, ...)
 144 {
 145         size_t pos = bufpos ? *bufpos : 0;
 146         int retval;
 147
 148         FORMAT_BUFFER(buf + pos, bufsize - pos, fmt, retval, false);
 149         if (bufpos && retval > 0)
 150                 *bufpos = pos + retval;
 151
 152         return pos >= bufsize ? false : true;
 153 }
 154
 155 int
 156 strcmp_null(const char *s1, const char *s2)
 157 {
 158         if (!s1 || !s2) {
 159                 return (!!s1) - (!!s2);
 160         }
 161
 162         return strcmp(s1, s2);
 163 }
 164
 165 int
 166 strcmp_numeric(const char *s1, const char *s2)
 167 {
 168         int number = 0;
 169         int num1, num2;
 170
 171         for (; *s1 && *s2 && *s1 == *s2; s1++, s2++) {
 172                 int c = *s1;
 173
 174                 if (isdigit(c)) {
 175                         number = 10 * number + (c - '0');
 176                 } else {
 177                         number = 0;
 178                 }
 179         }
 180
 181         num1 = number * 10 + atoi(s1);
 182         num2 = number * 10 + atoi(s2);
 183
 184         if (num1 != num2)
 185                 return num2 - num1;
 186
 187         if (!!*s1 != !!*s2)
 188                 return !!*s2 - !!*s1;
 189         return *s1 - *s2;
 190 }
 191
 192 /*
 193  * Unicode / UTF-8 handling
 194  *
 195  * NOTE: Much of the following code for dealing with Unicode is derived from
 196  * ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
 197  * src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
 198  */
 199
 200 int
 201 unicode_width(unsigned long c, int tab_size)
 202 {
 203         if (c >= 0x1100 &&
 204            (c <= 0x115f                         /* Hangul Jamo */
 205             || c == 0x2329
 206             || c == 0x232a
 207             || (c >= 0x2e80  && c <= 0xa4cf && c != 0x303f)
 208                                                 /* CJK ... Yi */
 209             || (c >= 0xac00  && c <= 0xd7a3)    /* Hangul Syllables */
 210             || (c >= 0xf900  && c <= 0xfaff)    /* CJK Compatibility Ideographs */
 211             || (c >= 0xfe30  && c <= 0xfe6f)    /* CJK Compatibility Forms */
 212             || (c >= 0xff00  && c <= 0xff60)    /* Fullwidth Forms */
 213             || (c >= 0xffe0  && c <= 0xffe6)
 214             || (c >= 0x20000 && c <= 0x2fffd)
 215             || (c >= 0x30000 && c <= 0x3fffd)))
 216                 return 2;
 217
 218         if ((c >= 0x0300 && c <= 0x036f)        /* combining diacretical marks */
 219             || (c >= 0x1dc0 && c <= 0x1dff)     /* combining diacretical marks supplement */
 220             || (c >= 0x20d0 && c <= 0x20ff)     /* combining diacretical marks for symbols */
 221             || (c >= 0xfe20 && c <= 0xfe2f))    /* combining half marks */
 222                 return 0;
 223
 224         if (c == '\t')
 225                 return tab_size;
 226
 227         return 1;
 228 }
 229
 230 /* Number of bytes used for encoding a UTF-8 character indexed by first byte.
 231  * Illegal bytes are set one. */
 232 static const unsigned char utf8_bytes[256] = {
 233         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 234         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 235         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 236         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 237         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 238         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 239         2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
 240         3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
 241 };
 242
 243 unsigned char
 244 utf8_char_length(const char *string)
 245 {
 246         int c = *(unsigned char *) string;
 247
 248         return utf8_bytes[c];
 249 }
 250
 251 /* Decode UTF-8 multi-byte representation into a Unicode character. */
 252 unsigned long
 253 utf8_to_unicode(const char *string, size_t length)
 254 {
 255         unsigned long unicode;
 256
 257         switch (length) {
 258         case 1:
 259                 unicode  =   string[0];
 260                 break;
 261         case 2:
 262                 unicode  =  (string[0] & 0x1f) << 6;
 263                 unicode +=  (string[1] & 0x3f);
 264                 break;
 265         case 3:
 266                 unicode  =  (string[0] & 0x0f) << 12;
 267                 unicode += ((string[1] & 0x3f) << 6);
 268                 unicode +=  (string[2] & 0x3f);
 269                 break;
 270         case 4:
 271                 unicode  =  (string[0] & 0x0f) << 18;
 272                 unicode += ((string[1] & 0x3f) << 12);
 273                 unicode += ((string[2] & 0x3f) << 6);
 274                 unicode +=  (string[3] & 0x3f);
 275                 break;
 276         case 5:
 277                 unicode  =  (string[0] & 0x0f) << 24;
 278                 unicode += ((string[1] & 0x3f) << 18);
 279                 unicode += ((string[2] & 0x3f) << 12);
 280                 unicode += ((string[3] & 0x3f) << 6);
 281                 unicode +=  (string[4] & 0x3f);
 282                 break;
 283         case 6:
 284                 unicode  =  (string[0] & 0x01) << 30;
 285                 unicode += ((string[1] & 0x3f) << 24);
 286                 unicode += ((string[2] & 0x3f) << 18);
 287                 unicode += ((string[3] & 0x3f) << 12);
 288                 unicode += ((string[4] & 0x3f) << 6);
 289                 unicode +=  (string[5] & 0x3f);
 290                 break;
 291         default:
 292                 return 0;
 293         }
 294
 295         /* Invalid characters could return the special 0xfffd value but NUL
 296          * should be just as good. */
 297         return unicode > 0xffff ? 0 : unicode;
 298 }
 299
 300 /* Calculates how much of string can be shown within the given maximum width
 301  * and sets trimmed parameter to non-zero value if all of string could not be
 302  * shown. If the reserve flag is true, it will reserve at least one
 303  * trailing character, which can be useful when drawing a delimiter.
 304  *
 305  * Returns the number of bytes to output from string to satisfy max_width. */
 306 size_t
 307 utf8_length(const char **start, int max_chars, size_t skip, int *width, size_t max_width, int *trimmed, bool reserve, int tab_size)
 308 {
 309         const char *string = *start;
 310         const char *end = max_chars < 0 ? strchr(string, '\0') : string + max_chars;
 311         unsigned char last_bytes = 0;
 312         size_t last_ucwidth = 0;
 313
 314         *width = 0;
 315         *trimmed = 0;
 316
 317         while (string < end) {
 318                 unsigned char bytes = utf8_char_length(string);
 319                 size_t ucwidth;
 320                 unsigned long unicode;
 321
 322                 if (string + bytes > end)
 323                         break;
 324
 325                 /* Change representation to figure out whether
 326                  * it is a single- or double-width character. */
 327
 328                 unicode = utf8_to_unicode(string, bytes);
 329                 /* FIXME: Graceful handling of invalid Unicode character. */
 330                 if (!unicode)
 331                         break;
 332
 333                 ucwidth = unicode_width(unicode, tab_size);
 334                 if (skip > 0) {
 335                         skip -= ucwidth <= skip ? ucwidth : skip;
 336                         *start += bytes;
 337                 }
 338                 *width  += ucwidth;
 339                 if (max_width > 0 && *width > max_width) {
 340                         *trimmed = 1;
 341                         *width -= ucwidth;
 342                         if (reserve && *width == max_width) {
 343                                 string -= last_bytes;
 344                                 *width -= last_ucwidth;
 345                         }
 346                         break;
 347                 }
 348
 349                 string  += bytes;
 350                 if (ucwidth) {
 351                         last_bytes = bytes;
 352                         last_ucwidth = ucwidth;
 353                 } else {
 354                         last_bytes += bytes;
 355                 }
 356         }
 357
 358         return string - *start;
 359 }
 360
 361 int
 362 utf8_width_of(const char *text, int max_bytes, int max_width)
 363 {
 364         int text_width = 0;
 365         const char *tmp = text;
 366         int trimmed = false;
 367
 368         utf8_length(&tmp, max_bytes, 0, &text_width, max_width, &trimmed, false, 1);
 369         return text_width;
 370 }
 371
 372 /* vim: set ts=8 sw=8 noexpandtab: */