release/src/router/minidlna/tagutils/textutils.c

   1 //=========================================================================
   2 // FILENAME     : textutils.c
   3 // DESCRIPTION  : Misc. text utilities
   4 //=========================================================================
   5 // Copyright (c) 2008- NETGEAR, Inc. All Rights Reserved.
   6 //=========================================================================
   7
   8 /* This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program. If not, see <http://www.gnu.org/licenses/>.
  20  */
  21
  22
  23 #include <stdlib.h>
  24 #include <stdio.h>
  25 #include <string.h>
  26 #include <ctype.h>
  27
  28 #include "misc.h"
  29 #include "textutils.h"
  30 #include "../log.h"
  31
  32 static unsigned int
  33 _char_htoi(char h)
  34 {
  35   if (h<'0')
  36     return 0;
  37   if (h<='9')
  38     return h-'0';
  39   if (h<'A')
  40     return 0;
  41   if (h<='F')
  42     return h-'A'+10;
  43   if (h<'a')
  44     return 0;
  45   if (h<='f')
  46     return h-'a'+10;
  47   return 0;
  48 }
  49
  50 void
  51 urldecode(char *src)
  52 {
  53   char c, *s, *d;
  54
  55   for (d=s=src; *s; s++, d++) {
  56     c = *s;
  57     if (c=='%') {
  58       c = *++s;
  59       if (c=='%')
  60         c = '%';
  61       else {
  62         c = _char_htoi(c)<<4 | _char_htoi(*++s);
  63       }
  64       *d = c;
  65     }
  66     else {
  67       *d = c;
  68     }
  69   }
  70   *d = '\0';
  71 }
  72
  73 #if 0
  74 static int
  75 is_ignoredword(const char *str)
  76 {
  77   int i;
  78
  79   if (!prefs.ignoredwords)
  80     return 0;
  81
  82   for (i=0; prefs.ignoredwords[i].n; i++) {
  83     if (!(strncasecmp(prefs.ignoredwords[i].word, str, prefs.ignoredwords[i].n))) {
  84       char next_char = str[prefs.ignoredwords[i].n];
  85       if (isalnum(next_char))
  86         continue;
  87       return prefs.ignoredwords[i].n;
  88     }
  89   }
  90   return 0;
  91 }
  92 #endif
  93
  94 char *
  95 skipspaces(const char *str)
  96 {
  97   while (isspace(*str)) str++;
  98   return (char*) str;
  99 }
 100
 101 /*
 102 U+0040 (40): @ A B C  D E F G  H I J K  L M N O
 103 U+0050 (50): P Q R S  T U V W  X Y Z [  \ ] ^ _
 104 U+0060 (60): ` a b c  d e f g  h i j k  l m n o
 105 U+0070 (70): p q r s  t u v w  x y z {  | } ~
 106
 107 U+00c0 (c3 80):  À Á Â Ã  Ä Å Æ Ç  È É Ê Ë  Ì Í Î Ï
 108 U+00d0 (c3 90):  Ð Ñ Ò Ó  Ô Õ Ö ×  Ø Ù Ú Û  Ü Ý Þ ß
 109 U+00e0 (c3 a0):  à á â ã  ä å æ ç  è é ê ë  ì í î ï
 110 U+00f0 (c3 b0):  ð ñ ò ó  ô õ ö ÷  ø ù ú û  ü ý þ ÿ
 111 U+0100 (c4 80):  Ā ā Ă ă  Ą ą Ć ć  Ĉ ĉ Ċ ċ  Č č Ď ď
 112 U+0110 (c4 90):  Đ đ Ē ē  Ĕ ĕ Ė ė  Ę ę Ě ě  Ĝ ĝ Ğ ğ
 113 U+0120 (c4 a0):  Ġ ġ Ģ ģ  Ĥ ĥ Ħ ħ  Ĩ ĩ Ī ī  Ĭ ĭ Į į
 114 U+0130 (c4 b0):  İ ı Ĳ ĳ  Ĵ ĵ Ķ ķ  ĸ Ĺ ĺ Ļ  ļ Ľ ľ Ŀ
 115 U+0140 (c5 80):  ŀ Ł ł Ń  ń Ņ ņ Ň  ň ŉ Ŋ ŋ  Ō ō Ŏ ŏ
 116 U+0150 (c5 90):  Ő ő Œ œ  Ŕ ŕ Ŗ ŗ  Ř ř Ś ś  Ŝ ŝ Ş ş
 117 U+0160 (c5 a0):  Š š Ţ ţ  Ť ť Ŧ ŧ  Ũ ũ Ū ū  Ŭ ŭ Ů ů
 118 U+0170 (c5 b0):  Ű ű Ų ų  Ŵ ŵ Ŷ ŷ  Ÿ Ź ź Ż  ż Ž ž ſ
 119  */
 120
 121 // conversion table for latin diacritical char to ascii one char or two chars.
 122 unsigned short UtoAscii[] = {
 123   // U+00c0
 124   0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049,
 125   0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353,
 126   // U+00e0
 127   0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x4145,0x0043, 0x0045,0x0045,0x0045,0x0045, 0x0049,0x0049,0x0049,0x0049,
 128   0x0044,0x004e,0x004f,0x004f, 0x004f,0x004f,0x004f,0xc397, 0xc398,0x0055,0x0055,0x0055, 0x0055,0x0059,0x0050,0x5353,
 129   // U+0100
 130   0x0041,0x0041,0x0041,0x0041, 0x0041,0x0041,0x0043,0x0043, 0x0043,0x0043,0x0043,0x0043, 0x0043,0x0043,0x0044,0x0044,
 131   0x0044,0x0044,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0045,0x0045,0x0045,0x0045, 0x0047,0x0047,0x0047,0x0047,
 132   // U+0120
 133   0x0047,0x0047,0x0047,0x0047, 0x0048,0x0048,0x0048,0x0048, 0x0049,0x0049,0x0049,0x0049, 0x0049,0x0049,0x0049,0x0049,
 134   0x0049,0x0049,0x494a,0x494a, 0x004a,0x004a,0x004b,0x004b, 0x004b,0x004c,0x004c,0x004c, 0x004c,0x004c,0x004c,0x004c,
 135   // U+0140
 136   0x004c,0x004c,0x004c,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004e,0x004e,0x004e,0x004e, 0x004f,0x004f,0x004f,0x004f,
 137   0x004f,0x004f,0x4f45,0x4f45, 0x0052,0x0052,0x0052,0x0052, 0x0052,0x0052,0x0053,0x0053, 0x0053,0x0053,0x0053,0x0053,
 138   // U+0160
 139   0x0053,0x0053,0x0054,0x0054, 0x0054,0x0054,0x0054,0x0054, 0x0055,0x0055,0x0055,0x0055, 0x0055,0x0055,0x0055,0x0055,
 140   0x0055,0x0055,0x0055,0x0055, 0x0057,0x0057,0x0059,0x0059, 0x0059,0x005a,0x005a,0x005a, 0x005a,0x005a,0x005a,0xc5bf
 141 };
 142
 143 // conversion table for toupper() function for latin diacritical char
 144 unsigned short UtoUpper[] = {
 145   // U+00c0
 146   0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f,
 147   0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0x5353,
 148   // U+00e0
 149   0xc380,0xc381,0xc382,0xc383, 0xc384,0xc385,0xc386,0xc387, 0xc388,0xc389,0xc38a,0xc38b, 0xc38c,0xc38d,0xc38e,0xc38f,
 150   0xc390,0xc391,0xc392,0xc393, 0xc394,0xc395,0xc396,0xc397, 0xc398,0xc399,0xc39a,0xc39b, 0xc39c,0xc39d,0xc39e,0xc39f,
 151   // U+0100
 152   0xc480,0xc480,0xc482,0xc482, 0xc484,0xc484,0xc486,0xc486, 0xc488,0xc488,0xc48a,0xc48a, 0xc48c,0xc48c,0xc48e,0xc48e,
 153   0xc490,0xc490,0xc492,0xc492, 0xc494,0xc494,0xc496,0xc496, 0xc498,0xc498,0xc49a,0xc49a, 0xc49c,0xc49c,0xc49e,0xc49e,
 154   // U+0120
 155   0xc4a0,0xc4a0,0xc4a2,0xc4a2, 0xc4a4,0xc4a4,0xc4a6,0xc4a6, 0xc4a8,0xc4a8,0xc4aa,0xc4aa, 0xc4ac,0xc4ac,0xc4ae,0xc4ae,
 156   0xc4b0,0xc4b0,0xc4b2,0xc4b2, 0xc4b4,0xc4b4,0xc4b6,0xc4b6, 0xc4b8,0xc4b9,0xc4b9,0xc4bb, 0xc4bb,0xc4bd,0xc4bd,0xc4bf,
 157   // U+0140
 158   0xc4bf,0xc581,0xc581,0xc583, 0xc583,0xc585,0xc585,0xc587, 0xc587,0xc589,0xc58a,0xc58a, 0xc58c,0xc58c,0xc58e,0xc58e,
 159   0xc590,0xc591,0xc592,0xc593, 0xc594,0xc595,0xc596,0xc597, 0xc598,0xc599,0xc59a,0xc59b, 0xc59c,0xc59d,0xc59e,0xc59f,
 160   // U+0160
 161   0xc5a0,0xc5a0,0xc5a2,0xc5a2, 0xc5a4,0xc5a4,0xc5a6,0xc5a6, 0xc5a8,0xc5a8,0xc5aa,0xc5aa, 0xc5ac,0xc5ac,0xc5ae,0xc5ae,
 162   0xc5b0,0xc5b1,0xc5b2,0xc5b3, 0xc5b4,0xc5b5,0xc5b6,0xc5b7, 0xc5b8,0xc5b9,0xc5b9,0xc5bb, 0xc5bc,0xc5bd,0xc5bd,0xc5bf,
 163 };
 164
 165
 166 int
 167 safe_atoi(char *s)
 168 {
 169   if (!s)
 170     return 0;
 171   if ((s[0]>='0' && s[0]<='9') || s[0]=='-' || s[0]=='+')
 172     return atoi(s);
 173   return 0;
 174 }
 175
 176 // NOTE: support U+0000 ~ U+FFFF only.
 177 int
 178 utf16le_to_utf8(char *dst, int n, __u16 utf16le)
 179 {
 180   __u16 wc = le16_to_cpu(utf16le);
 181   if (wc < 0x80) {
 182     if (n<1) return 0;
 183     *dst++ = wc & 0xff;
 184     return 1;
 185   }
 186   else if (wc < 0x800) {
 187     if (n<2) return 0;
 188     *dst++ = 0xc0 | (wc>>6);
 189     *dst++ = 0x80 | (wc & 0x3f);
 190     return 2;
 191   }
 192   else {
 193     if (n<3) return 0;
 194     *dst++ = 0xe0 | (wc>>12);
 195     *dst++ = 0x80 | ((wc>>6) & 0x3f);
 196     *dst++ = 0x80 | (wc & 0x3f);
 197     return 3;
 198   }
 199 }
 200
 201 void
 202 fetch_string_txt(char *fname, char *lang, int n, ...)
 203 {
 204   va_list args;
 205   char **keys;
 206   char ***strs;
 207   char **defstr;
 208   int i;
 209   FILE *fp;
 210   char buf[4096];
 211   int state;
 212   char *p;
 213   char *langid;
 214   const char *lang_en = "EN";
 215
 216   if (!(keys = malloc(sizeof(keys) * n))) {
 217     DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n");
 218   }
 219   if (!(strs = malloc(sizeof(strs) * n))) {
 220     DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n");
 221   }
 222   if (!(defstr = malloc(sizeof(defstr) * n))) {
 223     DPRINTF(E_FATAL, L_SCANNER, "Out of memory\n");
 224   }
 225
 226   va_start(args, n);
 227   for (i=0; i<n; i++) {
 228     keys[i] = va_arg(args, char *);
 229     strs[i] = va_arg(args, char **);
 230     defstr[i] = va_arg(args, char *);
 231   }
 232   va_end(args);
 233
 234   if (!(fp = fopen(fname, "rb"))) {
 235     DPRINTF(E_ERROR, L_SCANNER, "Cannot open <%s>\n", fname);
 236     goto _exit;
 237   }
 238
 239   state = -1;
 240   while (fgets(buf, sizeof(buf), fp)) {
 241     int len = strlen(buf);
 242
 243     if (buf[len-1]=='\n') buf[len-1] = '\0';
 244
 245     if (state<0) {
 246       if (isalpha(buf[0])) {
 247         for (i=0; i<n; i++) {
 248           if (!(strcmp(keys[i], buf))) {
 249             state = i;
 250             break;
 251           }
 252         }
 253       }
 254     }
 255     else {
 256       int found = 0;
 257
 258       if (isalpha(buf[0]) || buf[0]=='\0') {
 259         state = -1;
 260         continue;
 261       }
 262
 263       p = buf;
 264       while (isspace(*p)) p++;
 265       if (*p == '\0') {
 266         state = -1;
 267         continue;
 268       }
 269       langid = p;
 270       while (!isspace(*p)) p++;
 271       *p++ = '\0';
 272
 273       if (!strcmp(lang, langid))
 274         found = 1;
 275       else if (strcmp(lang_en, langid))
 276         continue;
 277
 278       while (isspace(*p)) p++;
 279       if (*strs[state])
 280         free(*strs[state]);
 281       *strs[state] = strdup(p);
 282
 283       if (found)
 284         state = -1;
 285     }
 286   }
 287
 288   for (i=0; i<n; i++) {
 289     if (!*strs[i])
 290       *strs[i] = defstr[i];
 291   }
 292   fclose(fp);
 293
 294  _exit:
 295   free(keys);
 296   free(strs);
 297   free(defstr);
 298 }