release/src-rt-6.x.4708/linux/linux-2.6.36/fs/udf/unicode.c

   1
   2
   3 #include "udfdecl.h"
   4
   5 #include <linux/kernel.h>
   6 #include <linux/string.h>       /* for memset */
   7 #include <linux/nls.h>
   8 #include <linux/crc-itu-t.h>
   9 #include <linux/slab.h>
  10
  11 #include "udf_sb.h"
  12
  13 static int udf_translate_to_linux(uint8_t *, uint8_t *, int, uint8_t *, int);
  14
  15 static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
  16 {
  17         if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN - 2))
  18                 return 0;
  19
  20         memset(dest, 0, sizeof(struct ustr));
  21         memcpy(dest->u_name, src, strlen);
  22         dest->u_cmpID = 0x08;
  23         dest->u_len = strlen;
  24
  25         return strlen;
  26 }
  27
  28 /*
  29  * udf_build_ustr
  30  */
  31 int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
  32 {
  33         int usesize;
  34
  35         if (!dest || !ptr || !size)
  36                 return -1;
  37         BUG_ON(size < 2);
  38
  39         usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name));
  40         usesize = min(usesize, size - 2);
  41         dest->u_cmpID = ptr[0];
  42         dest->u_len = usesize;
  43         memcpy(dest->u_name, ptr + 1, usesize);
  44         memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize);
  45
  46         return 0;
  47 }
  48
  49 /*
  50  * udf_build_ustr_exact
  51  */
  52 static int udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
  53 {
  54         if ((!dest) || (!ptr) || (!exactsize))
  55                 return -1;
  56
  57         memset(dest, 0, sizeof(struct ustr));
  58         dest->u_cmpID = ptr[0];
  59         dest->u_len = exactsize - 1;
  60         memcpy(dest->u_name, ptr + 1, exactsize - 1);
  61
  62         return 0;
  63 }
  64
  65 /*
  66  * udf_ocu_to_utf8
  67  *
  68  * PURPOSE
  69  *      Convert OSTA Compressed Unicode to the UTF-8 equivalent.
  70  *
  71  * PRE-CONDITIONS
  72  *      utf                     Pointer to UTF-8 output buffer.
  73  *      ocu                     Pointer to OSTA Compressed Unicode input buffer
  74  *                              of size UDF_NAME_LEN bytes.
  75  *                              both of type "struct ustr *"
  76  *
  77  * POST-CONDITIONS
  78  *      <return>                Zero on success.
  79  *
  80  * HISTORY
  81  *      November 12, 1997 - Andrew E. Mileski
  82  *      Written, tested, and released.
  83  */
  84 int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
  85 {
  86         const uint8_t *ocu;
  87         uint8_t cmp_id, ocu_len;
  88         int i;
  89
  90         ocu_len = ocu_i->u_len;
  91         if (ocu_len == 0) {
  92                 memset(utf_o, 0, sizeof(struct ustr));
  93                 return 0;
  94         }
  95
  96         cmp_id = ocu_i->u_cmpID;
  97         if (cmp_id != 8 && cmp_id != 16) {
  98                 memset(utf_o, 0, sizeof(struct ustr));
  99                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
 100                        cmp_id, ocu_i->u_name);
 101                 return 0;
 102         }
 103
 104         ocu = ocu_i->u_name;
 105         utf_o->u_len = 0;
 106         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
 107
 108                 /* Expand OSTA compressed Unicode to Unicode */
 109                 uint32_t c = ocu[i++];
 110                 if (cmp_id == 16)
 111                         c = (c << 8) | ocu[i++];
 112
 113                 /* Compress Unicode to UTF-8 */
 114                 if (c < 0x80U)
 115                         utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
 116                 else if (c < 0x800U) {
 117                         utf_o->u_name[utf_o->u_len++] =
 118                                                 (uint8_t)(0xc0 | (c >> 6));
 119                         utf_o->u_name[utf_o->u_len++] =
 120                                                 (uint8_t)(0x80 | (c & 0x3f));
 121                 } else {
 122                         utf_o->u_name[utf_o->u_len++] =
 123                                                 (uint8_t)(0xe0 | (c >> 12));
 124                         utf_o->u_name[utf_o->u_len++] =
 125                                                 (uint8_t)(0x80 |
 126                                                           ((c >> 6) & 0x3f));
 127                         utf_o->u_name[utf_o->u_len++] =
 128                                                 (uint8_t)(0x80 | (c & 0x3f));
 129                 }
 130         }
 131         utf_o->u_cmpID = 8;
 132
 133         return utf_o->u_len;
 134 }
 135
 136 /*
 137  *
 138  * udf_utf8_to_ocu
 139  *
 140  * PURPOSE
 141  *      Convert UTF-8 to the OSTA Compressed Unicode equivalent.
 142  *
 143  * DESCRIPTION
 144  *      This routine is only called by udf_lookup().
 145  *
 146  * PRE-CONDITIONS
 147  *      ocu                     Pointer to OSTA Compressed Unicode output
 148  *                              buffer of size UDF_NAME_LEN bytes.
 149  *      utf                     Pointer to UTF-8 input buffer.
 150  *      utf_len                 Length of UTF-8 input buffer in bytes.
 151  *
 152  * POST-CONDITIONS
 153  *      <return>                Zero on success.
 154  *
 155  * HISTORY
 156  *      November 12, 1997 - Andrew E. Mileski
 157  *      Written, tested, and released.
 158  */
 159 static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
 160 {
 161         unsigned c, i, max_val, utf_char;
 162         int utf_cnt, u_len;
 163
 164         memset(ocu, 0, sizeof(dstring) * length);
 165         ocu[0] = 8;
 166         max_val = 0xffU;
 167
 168 try_again:
 169         u_len = 0U;
 170         utf_char = 0U;
 171         utf_cnt = 0U;
 172         for (i = 0U; i < utf->u_len; i++) {
 173                 c = (uint8_t)utf->u_name[i];
 174
 175                 /* Complete a multi-byte UTF-8 character */
 176                 if (utf_cnt) {
 177                         utf_char = (utf_char << 6) | (c & 0x3fU);
 178                         if (--utf_cnt)
 179                                 continue;
 180                 } else {
 181                         /* Check for a multi-byte UTF-8 character */
 182                         if (c & 0x80U) {
 183                                 /* Start a multi-byte UTF-8 character */
 184                                 if ((c & 0xe0U) == 0xc0U) {
 185                                         utf_char = c & 0x1fU;
 186                                         utf_cnt = 1;
 187                                 } else if ((c & 0xf0U) == 0xe0U) {
 188                                         utf_char = c & 0x0fU;
 189                                         utf_cnt = 2;
 190                                 } else if ((c & 0xf8U) == 0xf0U) {
 191                                         utf_char = c & 0x07U;
 192                                         utf_cnt = 3;
 193                                 } else if ((c & 0xfcU) == 0xf8U) {
 194                                         utf_char = c & 0x03U;
 195                                         utf_cnt = 4;
 196                                 } else if ((c & 0xfeU) == 0xfcU) {
 197                                         utf_char = c & 0x01U;
 198                                         utf_cnt = 5;
 199                                 } else {
 200                                         goto error_out;
 201                                 }
 202                                 continue;
 203                         } else {
 204                                 /* Single byte UTF-8 character (most common) */
 205                                 utf_char = c;
 206                         }
 207                 }
 208
 209                 /* Choose no compression if necessary */
 210                 if (utf_char > max_val) {
 211                         if (max_val == 0xffU) {
 212                                 max_val = 0xffffU;
 213                                 ocu[0] = (uint8_t)0x10U;
 214                                 goto try_again;
 215                         }
 216                         goto error_out;
 217                 }
 218
 219                 if (max_val == 0xffffU)
 220                         ocu[++u_len] = (uint8_t)(utf_char >> 8);
 221                 ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
 222         }
 223
 224         if (utf_cnt) {
 225 error_out:
 226                 ocu[++u_len] = '?';
 227                 printk(KERN_DEBUG "udf: bad UTF-8 character\n");
 228         }
 229
 230         ocu[length - 1] = (uint8_t)u_len + 1;
 231
 232         return u_len + 1;
 233 }
 234
 235 static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
 236                         const struct ustr *ocu_i)
 237 {
 238         const uint8_t *ocu;
 239         uint8_t cmp_id, ocu_len;
 240         int i, len;
 241
 242
 243         ocu_len = ocu_i->u_len;
 244         if (ocu_len == 0) {
 245                 memset(utf_o, 0, sizeof(struct ustr));
 246                 return 0;
 247         }
 248
 249         cmp_id = ocu_i->u_cmpID;
 250         if (cmp_id != 8 && cmp_id != 16) {
 251                 memset(utf_o, 0, sizeof(struct ustr));
 252                 printk(KERN_ERR "udf: unknown compression code (%d) stri=%s\n",
 253                        cmp_id, ocu_i->u_name);
 254                 return 0;
 255         }
 256
 257         ocu = ocu_i->u_name;
 258         utf_o->u_len = 0;
 259         for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
 260                 /* Expand OSTA compressed Unicode to Unicode */
 261                 uint32_t c = ocu[i++];
 262                 if (cmp_id == 16)
 263                         c = (c << 8) | ocu[i++];
 264
 265                 len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
 266                                     UDF_NAME_LEN - utf_o->u_len);
 267                 /* Valid character? */
 268                 if (len >= 0)
 269                         utf_o->u_len += len;
 270                 else
 271                         utf_o->u_name[utf_o->u_len++] = '?';
 272         }
 273         utf_o->u_cmpID = 8;
 274
 275         return utf_o->u_len;
 276 }
 277
 278 static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
 279                         int length)
 280 {
 281         int len;
 282         unsigned i, max_val;
 283         uint16_t uni_char;
 284         int u_len;
 285
 286         memset(ocu, 0, sizeof(dstring) * length);
 287         ocu[0] = 8;
 288         max_val = 0xffU;
 289
 290 try_again:
 291         u_len = 0U;
 292         for (i = 0U; i < uni->u_len; i++) {
 293                 len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
 294                 if (!len)
 295                         continue;
 296                 /* Invalid character, deal with it */
 297                 if (len < 0) {
 298                         len = 1;
 299                         uni_char = '?';
 300                 }
 301
 302                 if (uni_char > max_val) {
 303                         max_val = 0xffffU;
 304                         ocu[0] = (uint8_t)0x10U;
 305                         goto try_again;
 306                 }
 307
 308                 if (max_val == 0xffffU)
 309                         ocu[++u_len] = (uint8_t)(uni_char >> 8);
 310                 ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
 311                 i += len - 1;
 312         }
 313
 314         ocu[length - 1] = (uint8_t)u_len + 1;
 315         return u_len + 1;
 316 }
 317
 318 int udf_get_filename(struct super_block *sb, uint8_t *sname, uint8_t *dname,
 319                      int flen)
 320 {
 321         struct ustr *filename, *unifilename;
 322         int len = 0;
 323
 324         filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
 325         if (!filename)
 326                 return 0;
 327
 328         unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
 329         if (!unifilename)
 330                 goto out1;
 331
 332         if (udf_build_ustr_exact(unifilename, sname, flen))
 333                 goto out2;
 334
 335         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
 336                 if (!udf_CS0toUTF8(filename, unifilename)) {
 337                         udf_debug("Failed in udf_get_filename: sname = %s\n",
 338                                   sname);
 339                         goto out2;
 340                 }
 341         } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
 342                 if (!udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
 343                                   unifilename)) {
 344                         udf_debug("Failed in udf_get_filename: sname = %s\n",
 345                                   sname);
 346                         goto out2;
 347                 }
 348         } else
 349                 goto out2;
 350
 351         len = udf_translate_to_linux(dname, filename->u_name, filename->u_len,
 352                                      unifilename->u_name, unifilename->u_len);
 353 out2:
 354         kfree(unifilename);
 355 out1:
 356         kfree(filename);
 357         return len;
 358 }
 359
 360 int udf_put_filename(struct super_block *sb, const uint8_t *sname,
 361                      uint8_t *dname, int flen)
 362 {
 363         struct ustr unifilename;
 364         int namelen;
 365
 366         if (!udf_char_to_ustr(&unifilename, sname, flen))
 367                 return 0;
 368
 369         if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
 370                 namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN);
 371                 if (!namelen)
 372                         return 0;
 373         } else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
 374                 namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
 375                                         &unifilename, UDF_NAME_LEN);
 376                 if (!namelen)
 377                         return 0;
 378         } else
 379                 return 0;
 380
 381         return namelen;
 382 }
 383
 384 #define ILLEGAL_CHAR_MARK       '_'
 385 #define EXT_MARK                '.'
 386 #define CRC_MARK                '#'
 387 #define EXT_SIZE                5
 388
 389 static int udf_translate_to_linux(uint8_t *newName, uint8_t *udfName,
 390                                   int udfLen, uint8_t *fidName,
 391                                   int fidNameLen)
 392 {
 393         int index, newIndex = 0, needsCRC = 0;
 394         int extIndex = 0, newExtIndex = 0, hasExt = 0;
 395         unsigned short valueCRC;
 396         uint8_t curr;
 397         const uint8_t hexChar[] = "0123456789ABCDEF";
 398
 399         if (udfName[0] == '.' &&
 400             (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
 401                 needsCRC = 1;
 402                 newIndex = udfLen;
 403                 memcpy(newName, udfName, udfLen);
 404         } else {
 405                 for (index = 0; index < udfLen; index++) {
 406                         curr = udfName[index];
 407                         if (curr == '/' || curr == 0) {
 408                                 needsCRC = 1;
 409                                 curr = ILLEGAL_CHAR_MARK;
 410                                 while (index + 1 < udfLen &&
 411                                                 (udfName[index + 1] == '/' ||
 412                                                  udfName[index + 1] == 0))
 413                                         index++;
 414                         }
 415                         if (curr == EXT_MARK &&
 416                                         (udfLen - index - 1) <= EXT_SIZE) {
 417                                 if (udfLen == index + 1)
 418                                         hasExt = 0;
 419                                 else {
 420                                         hasExt = 1;
 421                                         extIndex = index;
 422                                         newExtIndex = newIndex;
 423                                 }
 424                         }
 425                         if (newIndex < 256)
 426                                 newName[newIndex++] = curr;
 427                         else
 428                                 needsCRC = 1;
 429                 }
 430         }
 431         if (needsCRC) {
 432                 uint8_t ext[EXT_SIZE];
 433                 int localExtIndex = 0;
 434
 435                 if (hasExt) {
 436                         int maxFilenameLen;
 437                         for (index = 0;
 438                              index < EXT_SIZE && extIndex + index + 1 < udfLen;
 439                              index++) {
 440                                 curr = udfName[extIndex + index + 1];
 441
 442                                 if (curr == '/' || curr == 0) {
 443                                         needsCRC = 1;
 444                                         curr = ILLEGAL_CHAR_MARK;
 445                                         while (extIndex + index + 2 < udfLen &&
 446                                               (index + 1 < EXT_SIZE &&
 447                                                 (udfName[extIndex + index + 2] == '/' ||
 448                                                  udfName[extIndex + index + 2] == 0)))
 449                                                 index++;
 450                                 }
 451                                 ext[localExtIndex++] = curr;
 452                         }
 453                         maxFilenameLen = 250 - localExtIndex;
 454                         if (newIndex > maxFilenameLen)
 455                                 newIndex = maxFilenameLen;
 456                         else
 457                                 newIndex = newExtIndex;
 458                 } else if (newIndex > 250)
 459                         newIndex = 250;
 460                 newName[newIndex++] = CRC_MARK;
 461                 valueCRC = crc_itu_t(0, fidName, fidNameLen);
 462                 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
 463                 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
 464                 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
 465                 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
 466
 467                 if (hasExt) {
 468                         newName[newIndex++] = EXT_MARK;
 469                         for (index = 0; index < localExtIndex; index++)
 470                                 newName[newIndex++] = ext[index];
 471                 }
 472         }
 473
 474         return newIndex;
 475 }