release/src/router/mysql/mysys/charset.c

   1 /*
   2    Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
   3
   4    This program is free software; you can redistribute it and/or modify
   5    it under the terms of the GNU General Public License as published by
   6    the Free Software Foundation; version 2 of the License.
   7
   8    This program is distributed in the hope that it will be useful,
   9    but WITHOUT ANY WARRANTY; without even the implied warranty of
  10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  11    GNU General Public License for more details.
  12
  13    You should have received a copy of the GNU General Public License
  14    along with this program; if not, write to the Free Software
  15    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
  16 */
  17
  18 #include "mysys_priv.h"
  19 #include "mysys_err.h"
  20 #include <m_ctype.h>
  21 #include <m_string.h>
  22 #include <my_dir.h>
  23 #include <my_xml.h>
  24
  25
  26 /*
  27   The code below implements this functionality:
  28
  29     - Initializing charset related structures
  30     - Loading dynamic charsets
  31     - Searching for a proper CHARSET_INFO
  32       using charset name, collation name or collation ID
  33     - Setting server default character set
  34 */
  35
  36 my_bool my_charset_same(CHARSET_INFO *cs1, CHARSET_INFO *cs2)
  37 {
  38   return ((cs1 == cs2) || !strcmp(cs1->csname,cs2->csname));
  39 }
  40
  41
  42 static uint
  43 get_collation_number_internal(const char *name)
  44 {
  45   CHARSET_INFO **cs;
  46   for (cs= all_charsets;
  47        cs < all_charsets+array_elements(all_charsets)-1 ;
  48        cs++)
  49   {
  50     if ( cs[0] && cs[0]->name &&
  51          !my_strcasecmp(&my_charset_latin1, cs[0]->name, name))
  52       return cs[0]->number;
  53   }
  54   return 0;
  55 }
  56
  57
  58 static my_bool init_state_maps(CHARSET_INFO *cs)
  59 {
  60   uint i;
  61   uchar *state_map;
  62   uchar *ident_map;
  63
  64   if (!(cs->state_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
  65     return 1;
  66
  67   if (!(cs->ident_map= (uchar*) my_once_alloc(256, MYF(MY_WME))))
  68     return 1;
  69
  70   state_map= cs->state_map;
  71   ident_map= cs->ident_map;
  72
  73   /* Fill state_map with states to get a faster parser */
  74   for (i=0; i < 256 ; i++)
  75   {
  76     if (my_isalpha(cs,i))
  77       state_map[i]=(uchar) MY_LEX_IDENT;
  78     else if (my_isdigit(cs,i))
  79       state_map[i]=(uchar) MY_LEX_NUMBER_IDENT;
  80 #if defined(USE_MB) && defined(USE_MB_IDENT)
  81     else if (my_mbcharlen(cs, i)>1)
  82       state_map[i]=(uchar) MY_LEX_IDENT;
  83 #endif
  84     else if (my_isspace(cs,i))
  85       state_map[i]=(uchar) MY_LEX_SKIP;
  86     else
  87       state_map[i]=(uchar) MY_LEX_CHAR;
  88   }
  89   state_map[(uchar)'_']=state_map[(uchar)'$']=(uchar) MY_LEX_IDENT;
  90   state_map[(uchar)'\'']=(uchar) MY_LEX_STRING;
  91   state_map[(uchar)'.']=(uchar) MY_LEX_REAL_OR_POINT;
  92   state_map[(uchar)'>']=state_map[(uchar)'=']=state_map[(uchar)'!']= (uchar) MY_LEX_CMP_OP;
  93   state_map[(uchar)'<']= (uchar) MY_LEX_LONG_CMP_OP;
  94   state_map[(uchar)'&']=state_map[(uchar)'|']=(uchar) MY_LEX_BOOL;
  95   state_map[(uchar)'#']=(uchar) MY_LEX_COMMENT;
  96   state_map[(uchar)';']=(uchar) MY_LEX_SEMICOLON;
  97   state_map[(uchar)':']=(uchar) MY_LEX_SET_VAR;
  98   state_map[0]=(uchar) MY_LEX_EOL;
  99   state_map[(uchar)'\\']= (uchar) MY_LEX_ESCAPE;
 100   state_map[(uchar)'/']= (uchar) MY_LEX_LONG_COMMENT;
 101   state_map[(uchar)'*']= (uchar) MY_LEX_END_LONG_COMMENT;
 102   state_map[(uchar)'@']= (uchar) MY_LEX_USER_END;
 103   state_map[(uchar) '`']= (uchar) MY_LEX_USER_VARIABLE_DELIMITER;
 104   state_map[(uchar)'"']= (uchar) MY_LEX_STRING_OR_DELIMITER;
 105
 106   /*
 107     Create a second map to make it faster to find identifiers
 108   */
 109   for (i=0; i < 256 ; i++)
 110   {
 111     ident_map[i]= (uchar) (state_map[i] == MY_LEX_IDENT ||
 112                            state_map[i] == MY_LEX_NUMBER_IDENT);
 113   }
 114
 115   /* Special handling of hex and binary strings */
 116   state_map[(uchar)'x']= state_map[(uchar)'X']= (uchar) MY_LEX_IDENT_OR_HEX;
 117   state_map[(uchar)'b']= state_map[(uchar)'B']= (uchar) MY_LEX_IDENT_OR_BIN;
 118   state_map[(uchar)'n']= state_map[(uchar)'N']= (uchar) MY_LEX_IDENT_OR_NCHAR;
 119   return 0;
 120 }
 121
 122
 123 static void simple_cs_init_functions(CHARSET_INFO *cs)
 124 {
 125   if (cs->state & MY_CS_BINSORT)
 126     cs->coll= &my_collation_8bit_bin_handler;
 127   else
 128     cs->coll= &my_collation_8bit_simple_ci_handler;
 129
 130   cs->cset= &my_charset_8bit_handler;
 131 }
 132
 133
 134
 135 static int cs_copy_data(CHARSET_INFO *to, CHARSET_INFO *from)
 136 {
 137   to->number= from->number ? from->number : to->number;
 138
 139   if (from->csname)
 140     if (!(to->csname= my_once_strdup(from->csname,MYF(MY_WME))))
 141       goto err;
 142
 143   if (from->name)
 144     if (!(to->name= my_once_strdup(from->name,MYF(MY_WME))))
 145       goto err;
 146
 147   if (from->comment)
 148     if (!(to->comment= my_once_strdup(from->comment,MYF(MY_WME))))
 149       goto err;
 150
 151   if (from->ctype)
 152   {
 153     if (!(to->ctype= (uchar*) my_once_memdup((char*) from->ctype,
 154                                              MY_CS_CTYPE_TABLE_SIZE,
 155                                              MYF(MY_WME))))
 156       goto err;
 157     if (init_state_maps(to))
 158       goto err;
 159   }
 160   if (from->to_lower)
 161     if (!(to->to_lower= (uchar*) my_once_memdup((char*) from->to_lower,
 162                                                 MY_CS_TO_LOWER_TABLE_SIZE,
 163                                                 MYF(MY_WME))))
 164       goto err;
 165
 166   if (from->to_upper)
 167     if (!(to->to_upper= (uchar*) my_once_memdup((char*) from->to_upper,
 168                                                 MY_CS_TO_UPPER_TABLE_SIZE,
 169                                                 MYF(MY_WME))))
 170       goto err;
 171   if (from->sort_order)
 172   {
 173     if (!(to->sort_order= (uchar*) my_once_memdup((char*) from->sort_order,
 174                                                   MY_CS_SORT_ORDER_TABLE_SIZE,
 175                                                   MYF(MY_WME))))
 176       goto err;
 177
 178   }
 179   if (from->tab_to_uni)
 180   {
 181     uint sz= MY_CS_TO_UNI_TABLE_SIZE*sizeof(uint16);
 182     if (!(to->tab_to_uni= (uint16*)  my_once_memdup((char*)from->tab_to_uni,
 183                                                     sz, MYF(MY_WME))))
 184       goto err;
 185   }
 186   if (from->tailoring)
 187     if (!(to->tailoring= my_once_strdup(from->tailoring,MYF(MY_WME))))
 188       goto err;
 189
 190   return 0;
 191
 192 err:
 193   return 1;
 194 }
 195
 196
 197
 198 static my_bool simple_cs_is_full(CHARSET_INFO *cs)
 199 {
 200   return ((cs->csname && cs->tab_to_uni && cs->ctype && cs->to_upper &&
 201            cs->to_lower) &&
 202           (cs->number && cs->name &&
 203           (cs->sort_order || (cs->state & MY_CS_BINSORT) )));
 204 }
 205
 206
 207 static void
 208 copy_uca_collation(CHARSET_INFO *to, CHARSET_INFO *from)
 209 {
 210   to->cset= from->cset;
 211   to->coll= from->coll;
 212   to->strxfrm_multiply= from->strxfrm_multiply;
 213   to->min_sort_char= from->min_sort_char;
 214   to->max_sort_char= from->max_sort_char;
 215   to->mbminlen= from->mbminlen;
 216   to->mbmaxlen= from->mbmaxlen;
 217   to->state|= MY_CS_AVAILABLE | MY_CS_LOADED |
 218               MY_CS_STRNXFRM  | MY_CS_UNICODE;
 219 }
 220
 221
 222 static int add_collation(CHARSET_INFO *cs)
 223 {
 224   if (cs->name && (cs->number ||
 225                    (cs->number=get_collation_number_internal(cs->name))) &&
 226       cs->number < array_elements(all_charsets))
 227   {
 228     if (!all_charsets[cs->number])
 229     {
 230       if (!(all_charsets[cs->number]=
 231          (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),MYF(0))))
 232         return MY_XML_ERROR;
 233       bzero((void*)all_charsets[cs->number],sizeof(CHARSET_INFO));
 234     }
 235
 236     if (cs->primary_number == cs->number)
 237       cs->state |= MY_CS_PRIMARY;
 238
 239     if (cs->binary_number == cs->number)
 240       cs->state |= MY_CS_BINSORT;
 241
 242     all_charsets[cs->number]->state|= cs->state;
 243
 244     if (!(all_charsets[cs->number]->state & MY_CS_COMPILED))
 245     {
 246       CHARSET_INFO *newcs= all_charsets[cs->number];
 247       if (cs_copy_data(all_charsets[cs->number],cs))
 248         return MY_XML_ERROR;
 249
 250       newcs->caseup_multiply= newcs->casedn_multiply= 1;
 251
 252       if (!strcmp(cs->csname,"ucs2") )
 253       {
 254 #if defined(HAVE_CHARSET_ucs2) && defined(HAVE_UCA_COLLATIONS)
 255         copy_uca_collation(newcs, &my_charset_ucs2_unicode_ci);
 256 #endif
 257       }
 258       else if (!strcmp(cs->csname, "utf8"))
 259       {
 260 #if defined (HAVE_CHARSET_utf8) && defined(HAVE_UCA_COLLATIONS)
 261         copy_uca_collation(newcs, &my_charset_utf8_unicode_ci);
 262 #endif
 263       }
 264       else
 265       {
 266         uchar *sort_order= all_charsets[cs->number]->sort_order;
 267         simple_cs_init_functions(all_charsets[cs->number]);
 268         newcs->mbminlen= 1;
 269         newcs->mbmaxlen= 1;
 270         if (simple_cs_is_full(all_charsets[cs->number]))
 271         {
 272           all_charsets[cs->number]->state |= MY_CS_LOADED;
 273         }
 274         all_charsets[cs->number]->state|= MY_CS_AVAILABLE;
 275
 276         /*
 277           Check if case sensitive sort order: A < a < B.
 278           We need MY_CS_FLAG for regex library, and for
 279           case sensitivity flag for 5.0 client protocol,
 280           to support isCaseSensitive() method in JDBC driver
 281         */
 282         if (sort_order && sort_order['A'] < sort_order['a'] &&
 283                           sort_order['a'] < sort_order['B'])
 284           all_charsets[cs->number]->state|= MY_CS_CSSORT;
 285
 286         if (my_charset_is_8bit_pure_ascii(all_charsets[cs->number]))
 287           all_charsets[cs->number]->state|= MY_CS_PUREASCII;
 288       }
 289     }
 290     else
 291     {
 292       /*
 293         We need the below to make get_charset_name()
 294         and get_charset_number() working even if a
 295         character set has not been really incompiled.
 296         The above functions are used for example
 297         in error message compiler extra/comp_err.c.
 298         If a character set was compiled, this information
 299         will get lost and overwritten in add_compiled_collation().
 300       */
 301       CHARSET_INFO *dst= all_charsets[cs->number];
 302       dst->number= cs->number;
 303       if (cs->comment)
 304         if (!(dst->comment= my_once_strdup(cs->comment,MYF(MY_WME))))
 305           return MY_XML_ERROR;
 306       if (cs->csname)
 307         if (!(dst->csname= my_once_strdup(cs->csname,MYF(MY_WME))))
 308           return MY_XML_ERROR;
 309       if (cs->name)
 310         if (!(dst->name= my_once_strdup(cs->name,MYF(MY_WME))))
 311           return MY_XML_ERROR;
 312     }
 313     cs->number= 0;
 314     cs->primary_number= 0;
 315     cs->binary_number= 0;
 316     cs->name= NULL;
 317     cs->state= 0;
 318     cs->sort_order= NULL;
 319     cs->state= 0;
 320   }
 321   return MY_XML_OK;
 322 }
 323
 324
 325 #define MY_MAX_ALLOWED_BUF 1024*1024
 326 #define MY_CHARSET_INDEX "Index.xml"
 327
 328 const char *charsets_dir= NULL;
 329
 330
 331 static my_bool my_read_charset_file(const char *filename, myf myflags)
 332 {
 333   uchar *buf;
 334   int  fd;
 335   size_t len, tmp_len;
 336   MY_STAT stat_info;
 337
 338   if (!my_stat(filename, &stat_info, MYF(myflags)) ||
 339        ((len= (uint)stat_info.st_size) > MY_MAX_ALLOWED_BUF) ||
 340        !(buf= (uchar*) my_malloc(len,myflags)))
 341     return TRUE;
 342
 343   if ((fd=my_open(filename,O_RDONLY,myflags)) < 0)
 344     goto error;
 345   tmp_len=my_read(fd, buf, len, myflags);
 346   my_close(fd,myflags);
 347   if (tmp_len != len)
 348     goto error;
 349
 350   if (my_parse_charset_xml((char*) buf,len,add_collation))
 351   {
 352 #ifdef NOT_YET
 353     printf("ERROR at line %d pos %d '%s'\n",
 354            my_xml_error_lineno(&p)+1,
 355            my_xml_error_pos(&p),
 356            my_xml_error_string(&p));
 357 #endif
 358   }
 359
 360   my_free(buf, myflags);
 361   return FALSE;
 362
 363 error:
 364   my_free(buf, myflags);
 365   return TRUE;
 366 }
 367
 368
 369 char *get_charsets_dir(char *buf)
 370 {
 371   const char *sharedir= SHAREDIR;
 372   char *res;
 373   DBUG_ENTER("get_charsets_dir");
 374
 375   if (charsets_dir != NULL)
 376     strmake(buf, charsets_dir, FN_REFLEN-1);
 377   else
 378   {
 379     if (test_if_hard_path(sharedir) ||
 380         is_prefix(sharedir, DEFAULT_CHARSET_HOME))
 381       strxmov(buf, sharedir, "/", CHARSET_DIR, NullS);
 382     else
 383       strxmov(buf, DEFAULT_CHARSET_HOME, "/", sharedir, "/", CHARSET_DIR,
 384               NullS);
 385   }
 386   res= convert_dirname(buf,buf,NullS);
 387   DBUG_PRINT("info",("charsets dir: '%s'", buf));
 388   DBUG_RETURN(res);
 389 }
 390
 391 CHARSET_INFO *all_charsets[256]={NULL};
 392 CHARSET_INFO *default_charset_info = &my_charset_latin1;
 393
 394 void add_compiled_collation(CHARSET_INFO *cs)
 395 {
 396   all_charsets[cs->number]= cs;
 397   cs->state|= MY_CS_AVAILABLE;
 398 }
 399
 400 static void *cs_alloc(size_t size)
 401 {
 402   return my_once_alloc(size, MYF(MY_WME));
 403 }
 404
 405
 406 static my_pthread_once_t charsets_initialized= MY_PTHREAD_ONCE_INIT;
 407 static my_pthread_once_t charsets_template= MY_PTHREAD_ONCE_INIT;
 408
 409 static void init_available_charsets(void)
 410 {
 411   char fname[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
 412   CHARSET_INFO **cs;
 413
 414   bzero(&all_charsets,sizeof(all_charsets));
 415   init_compiled_charsets(MYF(0));
 416
 417   /* Copy compiled charsets */
 418   for (cs=all_charsets;
 419        cs < all_charsets+array_elements(all_charsets)-1 ;
 420        cs++)
 421   {
 422     if (*cs)
 423     {
 424       if (cs[0]->ctype)
 425         if (init_state_maps(*cs))
 426           *cs= NULL;
 427     }
 428   }
 429
 430   strmov(get_charsets_dir(fname), MY_CHARSET_INDEX);
 431   my_read_charset_file(fname, MYF(0));
 432 }
 433
 434
 435 void free_charsets(void)
 436 {
 437   charsets_initialized= charsets_template;
 438 }
 439
 440 uint get_collation_number(const char *name)
 441 {
 442   my_pthread_once(&charsets_initialized, init_available_charsets);
 443   return get_collation_number_internal(name);
 444 }
 445
 446
 447 uint get_charset_number(const char *charset_name, uint cs_flags)
 448 {
 449   CHARSET_INFO **cs;
 450   my_pthread_once(&charsets_initialized, init_available_charsets);
 451
 452   for (cs= all_charsets;
 453        cs < all_charsets+array_elements(all_charsets)-1 ;
 454        cs++)
 455   {
 456     if ( cs[0] && cs[0]->csname && (cs[0]->state & cs_flags) &&
 457          !my_strcasecmp(&my_charset_latin1, cs[0]->csname, charset_name))
 458       return cs[0]->number;
 459   }
 460   return 0;
 461 }
 462
 463
 464 const char *get_charset_name(uint charset_number)
 465 {
 466   CHARSET_INFO *cs;
 467   my_pthread_once(&charsets_initialized, init_available_charsets);
 468
 469   cs=all_charsets[charset_number];
 470   if (cs && (cs->number == charset_number) && cs->name )
 471     return (char*) cs->name;
 472
 473   return (char*) "?";   /* this mimics find_type() */
 474 }
 475
 476
 477 static CHARSET_INFO *get_internal_charset(uint cs_number, myf flags)
 478 {
 479   char  buf[FN_REFLEN];
 480   CHARSET_INFO *cs;
 481
 482   if ((cs= all_charsets[cs_number]))
 483   {
 484     if (cs->state & MY_CS_READY)  /* if CS is already initialized */
 485         return cs;
 486
 487     /*
 488       To make things thread safe we are not allowing other threads to interfere
 489       while we may changing the cs_info_table
 490     */
 491     pthread_mutex_lock(&THR_LOCK_charset);
 492
 493     if (!(cs->state & (MY_CS_COMPILED|MY_CS_LOADED))) /* if CS is not in memory */
 494     {
 495       strxmov(get_charsets_dir(buf), cs->csname, ".xml", NullS);
 496       my_read_charset_file(buf,flags);
 497     }
 498
 499     if (cs->state & MY_CS_AVAILABLE)
 500     {
 501       if (!(cs->state & MY_CS_READY))
 502       {
 503         if ((cs->cset->init && cs->cset->init(cs, cs_alloc)) ||
 504             (cs->coll->init && cs->coll->init(cs, cs_alloc)))
 505           cs= NULL;
 506         else
 507           cs->state|= MY_CS_READY;
 508       }
 509     }
 510     else
 511       cs= NULL;
 512
 513     pthread_mutex_unlock(&THR_LOCK_charset);
 514   }
 515   return cs;
 516 }
 517
 518
 519 CHARSET_INFO *get_charset(uint cs_number, myf flags)
 520 {
 521   CHARSET_INFO *cs;
 522   if (cs_number == default_charset_info->number)
 523     return default_charset_info;
 524
 525   my_pthread_once(&charsets_initialized, init_available_charsets);
 526
 527   if (!cs_number || cs_number >= array_elements(all_charsets)-1)
 528     return NULL;
 529
 530   cs=get_internal_charset(cs_number, flags);
 531
 532   if (!cs && (flags & MY_WME))
 533   {
 534     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)], cs_string[23];
 535     strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
 536     cs_string[0]='#';
 537     int10_to_str(cs_number, cs_string+1, 10);
 538     my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file);
 539   }
 540   return cs;
 541 }
 542
 543 CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags)
 544 {
 545   uint cs_number;
 546   CHARSET_INFO *cs;
 547   my_pthread_once(&charsets_initialized, init_available_charsets);
 548
 549   cs_number=get_collation_number(cs_name);
 550   cs= cs_number ? get_internal_charset(cs_number,flags) : NULL;
 551
 552   if (!cs && (flags & MY_WME))
 553   {
 554     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
 555     strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
 556     my_error(EE_UNKNOWN_COLLATION, MYF(ME_BELL), cs_name, index_file);
 557   }
 558
 559   return cs;
 560 }
 561
 562
 563 CHARSET_INFO *get_charset_by_csname(const char *cs_name,
 564                                     uint cs_flags,
 565                                     myf flags)
 566 {
 567   uint cs_number;
 568   CHARSET_INFO *cs;
 569   DBUG_ENTER("get_charset_by_csname");
 570   DBUG_PRINT("enter",("name: '%s'", cs_name));
 571
 572   my_pthread_once(&charsets_initialized, init_available_charsets);
 573
 574   cs_number= get_charset_number(cs_name, cs_flags);
 575   cs= cs_number ? get_internal_charset(cs_number, flags) : NULL;
 576
 577   if (!cs && (flags & MY_WME))
 578   {
 579     char index_file[FN_REFLEN + sizeof(MY_CHARSET_INDEX)];
 580     strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX);
 581     my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file);
 582   }
 583
 584   DBUG_RETURN(cs);
 585 }
 586
 587
 588 /**
 589   Resolve character set by the character set name (utf8, latin1, ...).
 590
 591   The function tries to resolve character set by the specified name. If
 592   there is character set with the given name, it is assigned to the "cs"
 593   parameter and FALSE is returned. If there is no such character set,
 594   "default_cs" is assigned to the "cs" and TRUE is returned.
 595
 596   @param[in] cs_name    Character set name.
 597   @param[in] default_cs Default character set.
 598   @param[out] cs        Variable to store character set.
 599
 600   @return FALSE if character set was resolved successfully; TRUE if there
 601   is no character set with given name.
 602 */
 603
 604 my_bool resolve_charset(const char *cs_name,
 605                         CHARSET_INFO *default_cs,
 606                         CHARSET_INFO **cs)
 607 {
 608   *cs= get_charset_by_csname(cs_name, MY_CS_PRIMARY, MYF(0));
 609
 610   if (*cs == NULL)
 611   {
 612     *cs= default_cs;
 613     return TRUE;
 614   }
 615
 616   return FALSE;
 617 }
 618
 619
 620 /**
 621   Resolve collation by the collation name (utf8_general_ci, ...).
 622
 623   The function tries to resolve collation by the specified name. If there
 624   is collation with the given name, it is assigned to the "cl" parameter
 625   and FALSE is returned. If there is no such collation, "default_cl" is
 626   assigned to the "cl" and TRUE is returned.
 627
 628   @param[out] cl        Variable to store collation.
 629   @param[in] cl_name    Collation name.
 630   @param[in] default_cl Default collation.
 631
 632   @return FALSE if collation was resolved successfully; TRUE if there is no
 633   collation with given name.
 634 */
 635
 636 my_bool resolve_collation(const char *cl_name,
 637                           CHARSET_INFO *default_cl,
 638                           CHARSET_INFO **cl)
 639 {
 640   *cl= get_charset_by_name(cl_name, MYF(0));
 641
 642   if (*cl == NULL)
 643   {
 644     *cl= default_cl;
 645     return TRUE;
 646   }
 647
 648   return FALSE;
 649 }
 650
 651
 652 /*
 653   Escape string with backslashes (\)
 654
 655   SYNOPSIS
 656     escape_string_for_mysql()
 657     charset_info        Charset of the strings
 658     to                  Buffer for escaped string
 659     to_length           Length of destination buffer, or 0
 660     from                The string to escape
 661     length              The length of the string to escape
 662
 663   DESCRIPTION
 664     This escapes the contents of a string by adding backslashes before special
 665     characters, and turning others into specific escape sequences, such as
 666     turning newlines into \n and null bytes into \0.
 667
 668   NOTE
 669     To maintain compatibility with the old C API, to_length may be 0 to mean
 670     "big enough"
 671
 672   RETURN VALUES
 673     (size_t) -1 The escaped string did not fit in the to buffer
 674     #           The length of the escaped string
 675 */
 676
 677 size_t escape_string_for_mysql(CHARSET_INFO *charset_info,
 678                                char *to, size_t to_length,
 679                                const char *from, size_t length)
 680 {
 681   const char *to_start= to;
 682   const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
 683   my_bool overflow= FALSE;
 684 #ifdef USE_MB
 685   my_bool use_mb_flag= use_mb(charset_info);
 686 #endif
 687   for (end= from + length; from < end; from++)
 688   {
 689     char escape= 0;
 690 #ifdef USE_MB
 691     int tmp_length;
 692     if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
 693     {
 694       if (to + tmp_length > to_end)
 695       {
 696         overflow= TRUE;
 697         break;
 698       }
 699       while (tmp_length--)
 700         *to++= *from++;
 701       from--;
 702       continue;
 703     }
 704     /*
 705      If the next character appears to begin a multi-byte character, we
 706      escape that first byte of that apparent multi-byte character. (The
 707      character just looks like a multi-byte character -- if it were actually
 708      a multi-byte character, it would have been passed through in the test
 709      above.)
 710
 711      Without this check, we can create a problem by converting an invalid
 712      multi-byte character into a valid one. For example, 0xbf27 is not
 713      a valid GBK character, but 0xbf5c is. (0x27 = ', 0x5c = \)
 714     */
 715     if (use_mb_flag && (tmp_length= my_mbcharlen(charset_info, *from)) > 1)
 716       escape= *from;
 717     else
 718 #endif
 719     switch (*from) {
 720     case 0:                             /* Must be escaped for 'mysql' */
 721       escape= '0';
 722       break;
 723     case '\n':                          /* Must be escaped for logs */
 724       escape= 'n';
 725       break;
 726     case '\r':
 727       escape= 'r';
 728       break;
 729     case '\\':
 730       escape= '\\';
 731       break;
 732     case '\'':
 733       escape= '\'';
 734       break;
 735     case '"':                           /* Better safe than sorry */
 736       escape= '"';
 737       break;
 738     case '\032':                        /* This gives problems on Win32 */
 739       escape= 'Z';
 740       break;
 741     }
 742     if (escape)
 743     {
 744       if (to + 2 > to_end)
 745       {
 746         overflow= TRUE;
 747         break;
 748       }
 749       *to++= '\\';
 750       *to++= escape;
 751     }
 752     else
 753     {
 754       if (to + 1 > to_end)
 755       {
 756         overflow= TRUE;
 757         break;
 758       }
 759       *to++= *from;
 760     }
 761   }
 762   *to= 0;
 763   return overflow ? (size_t) -1 : (size_t) (to - to_start);
 764 }
 765
 766
 767 #ifdef BACKSLASH_MBTAIL
 768 static CHARSET_INFO *fs_cset_cache= NULL;
 769
 770 CHARSET_INFO *fs_character_set()
 771 {
 772   if (!fs_cset_cache)
 773   {
 774     char buf[10]= "cp";
 775     GetLocaleInfo(LOCALE_SYSTEM_DEFAULT, LOCALE_IDEFAULTANSICODEPAGE,
 776                   buf+2, sizeof(buf)-3);
 777     /*
 778       We cannot call get_charset_by_name here
 779       because fs_character_set() is executed before
 780       LOCK_THD_charset mutex initialization, which
 781       is used inside get_charset_by_name.
 782       As we're now interested in cp932 only,
 783       let's just detect it using strcmp().
 784     */
 785     fs_cset_cache= !strcmp(buf, "cp932") ?
 786                    &my_charset_cp932_japanese_ci : &my_charset_bin;
 787   }
 788   return fs_cset_cache;
 789 }
 790 #endif
 791
 792 /*
 793   Escape apostrophes by doubling them up
 794
 795   SYNOPSIS
 796     escape_quotes_for_mysql()
 797     charset_info        Charset of the strings
 798     to                  Buffer for escaped string
 799     to_length           Length of destination buffer, or 0
 800     from                The string to escape
 801     length              The length of the string to escape
 802
 803   DESCRIPTION
 804     This escapes the contents of a string by doubling up any apostrophes that
 805     it contains. This is used when the NO_BACKSLASH_ESCAPES SQL_MODE is in
 806     effect on the server.
 807
 808   NOTE
 809     To be consistent with escape_string_for_mysql(), to_length may be 0 to
 810     mean "big enough"
 811
 812   RETURN VALUES
 813     ~0          The escaped string did not fit in the to buffer
 814     >=0         The length of the escaped string
 815 */
 816
 817 size_t escape_quotes_for_mysql(CHARSET_INFO *charset_info,
 818                                char *to, size_t to_length,
 819                                const char *from, size_t length)
 820 {
 821   const char *to_start= to;
 822   const char *end, *to_end=to_start + (to_length ? to_length-1 : 2*length);
 823   my_bool overflow= FALSE;
 824 #ifdef USE_MB
 825   my_bool use_mb_flag= use_mb(charset_info);
 826 #endif
 827   for (end= from + length; from < end; from++)
 828   {
 829 #ifdef USE_MB
 830     int tmp_length;
 831     if (use_mb_flag && (tmp_length= my_ismbchar(charset_info, from, end)))
 832     {
 833       if (to + tmp_length > to_end)
 834       {
 835         overflow= TRUE;
 836         break;
 837       }
 838       while (tmp_length--)
 839         *to++= *from++;
 840       from--;
 841       continue;
 842     }
 843     /*
 844       We don't have the same issue here with a non-multi-byte character being
 845       turned into a multi-byte character by the addition of an escaping
 846       character, because we are only escaping the ' character with itself.
 847      */
 848 #endif
 849     if (*from == '\'')
 850     {
 851       if (to + 2 > to_end)
 852       {
 853         overflow= TRUE;
 854         break;
 855       }
 856       *to++= '\'';
 857       *to++= '\'';
 858     }
 859     else
 860     {
 861       if (to + 1 > to_end)
 862       {
 863         overflow= TRUE;
 864         break;
 865       }
 866       *to++= *from;
 867     }
 868   }
 869   *to= 0;
 870   return overflow ? (ulong)~0 : (ulong) (to - to_start);
 871 }