src/string.c

   1 /* Copyright (c) 2006-2015 Jonas Fonseca <jonas.fonseca@gmail.com>
   2  *
   3  * This program is free software; you can redistribute it and/or
   4  * modify it under the terms of the GNU General Public License as
   5  * published by the Free Software Foundation; either version 2 of
   6  * the License, or (at your option) any later version.
   7  *
   8  * This program is distributed in the hope that it will be useful,
   9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11  * GNU General Public License for more details.
  12  */
  13
  14 #include "tig/tig.h"
  15 #include "tig/string.h"
  16
  17 /*
  18  * Strings.
  19  */
  20
  21 bool
  22 string_isnumber(const char *str)
  23 {
  24         int pos;
  25
  26         for (pos = 0; str[pos]; pos++) {
  27                 if (!isdigit(str[pos]))
  28                         return false;
  29         }
  30
  31         return pos > 0;
  32 }
  33
  34 bool
  35 iscommit(const char *str)
  36 {
  37         int pos;
  38
  39         for (pos = 0; str[pos]; pos++) {
  40                 if (!isxdigit(str[pos]))
  41                         return false;
  42         }
  43
  44         return 7 <= pos && pos < SIZEOF_REV;
  45 }
  46
  47 int
  48 suffixcmp(const char *str, int slen, const char *suffix)
  49 {
  50         size_t len = slen >= 0 ? slen : strlen(str);
  51         size_t suffixlen = strlen(suffix);
  52
  53         return suffixlen < len ? strcmp(str + len - suffixlen, suffix) : -1;
  54 }
  55
  56 void
  57 string_ncopy_do(char *dst, size_t dstlen, const char *src, size_t srclen)
  58 {
  59         if (srclen > dstlen - 1)
  60                 srclen = dstlen - 1;
  61
  62         strncpy(dst, src, srclen);
  63         dst[srclen] = 0;
  64 }
  65
  66 void
  67 string_copy_rev(char *dst, const char *src)
  68 {
  69         size_t srclen;
  70
  71         if (!*src)
  72                 return;
  73
  74         for (srclen = 0; srclen < SIZEOF_REV; srclen++)
  75                 if (!src[srclen] || isspace(src[srclen]))
  76                         break;
  77
  78         string_ncopy_do(dst, SIZEOF_REV, src, srclen);
  79 }
  80
  81 void
  82 string_copy_rev_from_commit_line(char *dst, const char *src)
  83 {
  84         string_copy_rev(dst, src + STRING_SIZE("commit "));
  85 }
  86
  87 size_t
  88 string_expanded_length(const char *src, size_t srclen, size_t tabsize, size_t max_size)
  89 {
  90         size_t size, pos;
  91
  92         for (size = pos = 0; pos < srclen && size < max_size; pos++) {
  93                 if (src[pos] == '\t') {
  94                         size_t expanded = tabsize - (size % tabsize);
  95
  96                         size += expanded;
  97                 } else {
  98                         size++;
  99                 }
 100         }
 101
 102         return pos;
 103 }
 104
 105 size_t
 106 string_expand(char *dst, size_t dstlen, const char *src, int srclen, int tabsize)
 107 {
 108         size_t size, pos;
 109
 110         for (size = pos = 0; size < dstlen - 1 && (srclen == -1 || pos < srclen) && src[pos]; pos++) {
 111                 const char c = src[pos];
 112
 113                 if (c == '\t') {
 114                         size_t expanded = tabsize - (size % tabsize);
 115
 116                         if (expanded + size >= dstlen - 1)
 117                                 expanded = dstlen - size - 1;
 118                         memcpy(dst + size, "        ", expanded);
 119                         size += expanded;
 120                 } else if (isspace(c) || iscntrl(c)) {
 121                         dst[size++] = ' ';
 122                 } else {
 123                         dst[size++] = src[pos];
 124                 }
 125         }
 126
 127         dst[size] = 0;
 128         return pos;
 129 }
 130
 131 char *
 132 chomp_string(char *name)
 133 {
 134         int namelen;
 135
 136         while (isspace(*name))
 137                 name++;
 138
 139         namelen = strlen(name) - 1;
 140         while (namelen > 0 && isspace(name[namelen]))
 141                 name[namelen--] = 0;
 142
 143         return name;
 144 }
 145
 146 bool PRINTF_LIKE(4, 5)
 147 string_nformat(char *buf, size_t bufsize, size_t *bufpos, const char *fmt, ...)
 148 {
 149         size_t pos = bufpos ? *bufpos : 0;
 150         int retval;
 151
 152         FORMAT_BUFFER(buf + pos, bufsize - pos, fmt, retval, false);
 153         if (bufpos && retval > 0)
 154                 *bufpos = pos + retval;
 155
 156         return pos >= bufsize ? false : true;
 157 }
 158
 159 int
 160 strcmp_null(const char *s1, const char *s2)
 161 {
 162         if (!s1 || !s2) {
 163                 return (!!s1) - (!!s2);
 164         }
 165
 166         return strcmp(s1, s2);
 167 }
 168
 169 int
 170 strcmp_numeric(const char *s1, const char *s2)
 171 {
 172         int number = 0;
 173         int num1, num2;
 174
 175         for (; *s1 && *s2 && *s1 == *s2; s1++, s2++) {
 176                 int c = *s1;
 177
 178                 if (isdigit(c)) {
 179                         number = 10 * number + (c - '0');
 180                 } else {
 181                         number = 0;
 182                 }
 183         }
 184
 185         num1 = number * 10 + atoi(s1);
 186         num2 = number * 10 + atoi(s2);
 187
 188         if (num1 != num2)
 189                 return num2 - num1;
 190
 191         if (!!*s1 != !!*s2)
 192                 return !!*s2 - !!*s1;
 193         return *s1 - *s2;
 194 }
 195
 196 /*
 197  * Unicode / UTF-8 handling
 198  *
 199  * NOTE: Much of the following code for dealing with Unicode is derived from
 200  * ELinks' UTF-8 code developed by Scrool <scroolik@gmail.com>. Origin file is
 201  * src/intl/charset.c from the UTF-8 branch commit elinks-0.11.0-g31f2c28.
 202  */
 203
 204 int
 205 unicode_width(unsigned long c, int tab_size)
 206 {
 207         if (c >= 0x1100 &&
 208            (c <= 0x115f                         /* Hangul Jamo */
 209             || c == 0x2329
 210             || c == 0x232a
 211             || (c >= 0x2e80  && c <= 0xa4cf && c != 0x303f)
 212                                                 /* CJK ... Yi */
 213             || (c >= 0xac00  && c <= 0xd7a3)    /* Hangul Syllables */
 214             || (c >= 0xf900  && c <= 0xfaff)    /* CJK Compatibility Ideographs */
 215             || (c >= 0xfe30  && c <= 0xfe6f)    /* CJK Compatibility Forms */
 216             || (c >= 0xff00  && c <= 0xff60)    /* Fullwidth Forms */
 217             || (c >= 0xffe0  && c <= 0xffe6)
 218             || (c >= 0x20000 && c <= 0x2fffd)
 219             || (c >= 0x30000 && c <= 0x3fffd)))
 220                 return 2;
 221
 222         if ((c >= 0x0300 && c <= 0x036f)        /* combining diacretical marks */
 223             || (c >= 0x1dc0 && c <= 0x1dff)     /* combining diacretical marks supplement */
 224             || (c >= 0x20d0 && c <= 0x20ff)     /* combining diacretical marks for symbols */
 225             || (c >= 0xfe20 && c <= 0xfe2f))    /* combining half marks */
 226                 return 0;
 227
 228         if (c == '\t')
 229                 return tab_size;
 230
 231         return 1;
 232 }
 233
 234 /* Number of bytes used for encoding a UTF-8 character indexed by first byte.
 235  * Illegal bytes are set one. */
 236 static const unsigned char utf8_bytes[256] = {
 237         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 238         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 239         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 240         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 241         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 242         1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
 243         2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
 244         3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 5,5,5,5,6,6,1,1,
 245 };
 246
 247 unsigned char
 248 utf8_char_length(const char *string)
 249 {
 250         int c = *(unsigned char *) string;
 251
 252         return utf8_bytes[c];
 253 }
 254
 255 /* Decode UTF-8 multi-byte representation into a Unicode character. */
 256 unsigned long
 257 utf8_to_unicode(const char *string, size_t length)
 258 {
 259         unsigned long unicode;
 260
 261         switch (length) {
 262         case 1:
 263                 unicode  =   string[0];
 264                 break;
 265         case 2:
 266                 unicode  =  (string[0] & 0x1f) << 6;
 267                 unicode +=  (string[1] & 0x3f);
 268                 break;
 269         case 3:
 270                 unicode  =  (string[0] & 0x0f) << 12;
 271                 unicode += ((string[1] & 0x3f) << 6);
 272                 unicode +=  (string[2] & 0x3f);
 273                 break;
 274         case 4:
 275                 unicode  =  (string[0] & 0x0f) << 18;
 276                 unicode += ((string[1] & 0x3f) << 12);
 277                 unicode += ((string[2] & 0x3f) << 6);
 278                 unicode +=  (string[3] & 0x3f);
 279                 break;
 280         case 5:
 281                 unicode  =  (string[0] & 0x0f) << 24;
 282                 unicode += ((string[1] & 0x3f) << 18);
 283                 unicode += ((string[2] & 0x3f) << 12);
 284                 unicode += ((string[3] & 0x3f) << 6);
 285                 unicode +=  (string[4] & 0x3f);
 286                 break;
 287         case 6:
 288                 unicode  =  (string[0] & 0x01) << 30;
 289                 unicode += ((string[1] & 0x3f) << 24);
 290                 unicode += ((string[2] & 0x3f) << 18);
 291                 unicode += ((string[3] & 0x3f) << 12);
 292                 unicode += ((string[4] & 0x3f) << 6);
 293                 unicode +=  (string[5] & 0x3f);
 294                 break;
 295         default:
 296                 return 0;
 297         }
 298
 299         /* Invalid characters could return the special 0xfffd value but NUL
 300          * should be just as good. */
 301         return unicode > 0xffff ? 0 : unicode;
 302 }
 303
 304 /* Calculates how much of string can be shown within the given maximum width
 305  * and sets trimmed parameter to non-zero value if all of string could not be
 306  * shown. If the reserve flag is true, it will reserve at least one
 307  * trailing character, which can be useful when drawing a delimiter.
 308  *
 309  * Returns the number of bytes to output from string to satisfy max_width. */
 310 size_t
 311 utf8_length(const char **start, int max_chars, size_t skip, int *width, size_t max_width, int *trimmed, bool reserve, int tab_size)
 312 {
 313         const char *string = *start;
 314         const char *end = max_chars < 0 ? strchr(string, '\0') : string + max_chars;
 315         unsigned char last_bytes = 0;
 316         size_t last_ucwidth = 0;
 317
 318         *width = 0;
 319         *trimmed = 0;
 320
 321         while (string < end) {
 322                 unsigned char bytes = utf8_char_length(string);
 323                 size_t ucwidth;
 324                 unsigned long unicode;
 325
 326                 if (string + bytes > end)
 327                         break;
 328
 329                 /* Change representation to figure out whether
 330                  * it is a single- or double-width character. */
 331
 332                 unicode = utf8_to_unicode(string, bytes);
 333                 /* FIXME: Graceful handling of invalid Unicode character. */
 334                 if (!unicode)
 335                         break;
 336
 337                 ucwidth = unicode_width(unicode, tab_size);
 338                 if (skip > 0) {
 339                         skip -= ucwidth <= skip ? ucwidth : skip;
 340                         *start += bytes;
 341                 }
 342                 *width  += ucwidth;
 343                 if (max_width > 0 && *width > max_width) {
 344                         *trimmed = 1;
 345                         *width -= ucwidth;
 346                         if (reserve && *width == max_width) {
 347                                 string -= last_bytes;
 348                                 *width -= last_ucwidth;
 349                         }
 350                         break;
 351                 }
 352
 353                 string  += bytes;
 354                 if (ucwidth) {
 355                         last_bytes = bytes;
 356                         last_ucwidth = ucwidth;
 357                 } else {
 358                         last_bytes += bytes;
 359                 }
 360         }
 361
 362         return string - *start;
 363 }
 364
 365 int
 366 utf8_width_of(const char *text, int max_bytes, int max_width)
 367 {
 368         int text_width = 0;
 369         const char *tmp = text;
 370         int trimmed = false;
 371
 372         utf8_length(&tmp, max_bytes, 0, &text_width, max_width, &trimmed, false, 1);
 373         return text_width;
 374 }
 375
 376 /* vim: set ts=8 sw=8 noexpandtab: */