src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL$
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "access/tuptoaster.h"
  20 #include "catalog/pg_type.h"
  21 #include "libpq/md5.h"
  22 #include "libpq/pqformat.h"
  23 #include "miscadmin.h"
  24 #include "parser/scansup.h"
  25 #include "regex/regex.h"
  26 #include "utils/builtins.h"
  27 #include "utils/lsyscache.h"
  28 #include "utils/pg_locale.h"
  29
  30
  31 typedef struct varlena unknown;
  32
  33 typedef struct
  34 {
  35         bool            use_wchar;              /* T if multibyte encoding */
  36         char       *str1;                       /* use these if not use_wchar */
  37         char       *str2;                       /* note: these point to original texts */
  38         pg_wchar   *wstr1;                      /* use these if use_wchar */
  39         pg_wchar   *wstr2;                      /* note: these are palloc'd */
  40         int                     len1;                   /* string lengths in logical characters */
  41         int                     len2;
  42         /* Skip table for Boyer-Moore-Horspool search algorithm: */
  43         int                     skiptablemask;  /* mask for ANDing with skiptable subscripts */
  44         int                     skiptable[256]; /* skip distance for given mismatched char */
  45 } TextPositionState;
  46
  47 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  48 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  49 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  50 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  51 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  52
  53 static int      text_cmp(text *arg1, text *arg2);
  54 static int32 text_length(Datum str);
  55 static int      text_position(text *t1, text *t2);
  56 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
  57 static int      text_position_next(int start_pos, TextPositionState *state);
  58 static void text_position_cleanup(TextPositionState *state);
  59 static text *text_substring(Datum str,
  60                            int32 start,
  61                            int32 length,
  62                            bool length_not_specified);
  63 static void appendStringInfoText(StringInfo str, const text *t);
  64
  65
  66 /*****************************************************************************
  67  *       CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                                                  *
  68  *****************************************************************************/
  69
  70 /*
  71  * cstring_to_text
  72  *
  73  * Create a text value from a null-terminated C string.
  74  *
  75  * The new text value is freshly palloc'd with a full-size VARHDR.
  76  */
  77 text *
  78 cstring_to_text(const char *s)
  79 {
  80         return cstring_to_text_with_len(s, strlen(s));
  81 }
  82
  83 /*
  84  * cstring_to_text_with_len
  85  *
  86  * Same as cstring_to_text except the caller specifies the string length;
  87  * the string need not be null_terminated.
  88  */
  89 text *
  90 cstring_to_text_with_len(const char *s, int len)
  91 {
  92         text       *result = (text *) palloc(len + VARHDRSZ);
  93
  94         SET_VARSIZE(result, len + VARHDRSZ);
  95         memcpy(VARDATA(result), s, len);
  96
  97         return result;
  98 }
  99
 100 /*
 101  * text_to_cstring
 102  *
 103  * Create a palloc'd, null-terminated C string from a text value.
 104  *
 105  * We support being passed a compressed or toasted text value.
 106  * This is a bit bogus since such values shouldn't really be referred to as
 107  * "text *", but it seems useful for robustness.  If we didn't handle that
 108  * case here, we'd need another routine that did, anyway.
 109  */
 110 char *
 111 text_to_cstring(const text *t)
 112 {
 113         /* must cast away the const, unfortunately */
 114         text       *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
 115         int                     len = VARSIZE_ANY_EXHDR(tunpacked);
 116         char       *result;
 117
 118         result = (char *) palloc(len + 1);
 119         memcpy(result, VARDATA_ANY(tunpacked), len);
 120         result[len] = '\0';
 121
 122         if (tunpacked != t)
 123                 pfree(tunpacked);
 124
 125         return result;
 126 }
 127
 128 /*
 129  * text_to_cstring_buffer
 130  *
 131  * Copy a text value into a caller-supplied buffer of size dst_len.
 132  *
 133  * The text string is truncated if necessary to fit.  The result is
 134  * guaranteed null-terminated (unless dst_len == 0).
 135  *
 136  * We support being passed a compressed or toasted text value.
 137  * This is a bit bogus since such values shouldn't really be referred to as
 138  * "text *", but it seems useful for robustness.  If we didn't handle that
 139  * case here, we'd need another routine that did, anyway.
 140  */
 141 void
 142 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
 143 {
 144         /* must cast away the const, unfortunately */
 145         text       *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
 146         size_t          src_len = VARSIZE_ANY_EXHDR(srcunpacked);
 147
 148         if (dst_len > 0)
 149         {
 150                 dst_len--;
 151                 if (dst_len >= src_len)
 152                         dst_len = src_len;
 153                 else                                    /* ensure truncation is encoding-safe */
 154                         dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
 155                 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
 156                 dst[dst_len] = '\0';
 157         }
 158
 159         if (srcunpacked != src)
 160                 pfree(srcunpacked);
 161 }
 162
 163
 164 /*****************************************************************************
 165  *       USER I/O ROUTINES                                                                                                               *
 166  *****************************************************************************/
 167
 168
 169 #define VAL(CH)                 ((CH) - '0')
 170 #define DIG(VAL)                ((VAL) + '0')
 171
 172 /*
 173  *              byteain                 - converts from printable representation of byte array
 174  *
 175  *              Non-printable characters must be passed as '\nnn' (octal) and are
 176  *              converted to internal form.  '\' must be passed as '\\'.
 177  *              ereport(ERROR, ...) if bad form.
 178  *
 179  *              BUGS:
 180  *                              The input is scanned twice.
 181  *                              The error checking of input is minimal.
 182  */
 183 Datum
 184 byteain(PG_FUNCTION_ARGS)
 185 {
 186         char       *inputText = PG_GETARG_CSTRING(0);
 187         char       *tp;
 188         char       *rp;
 189         int                     byte;
 190         bytea      *result;
 191
 192         for (byte = 0, tp = inputText; *tp != '\0'; byte++)
 193         {
 194                 if (tp[0] != '\\')
 195                         tp++;
 196                 else if ((tp[0] == '\\') &&
 197                                  (tp[1] >= '0' && tp[1] <= '3') &&
 198                                  (tp[2] >= '0' && tp[2] <= '7') &&
 199                                  (tp[3] >= '0' && tp[3] <= '7'))
 200                         tp += 4;
 201                 else if ((tp[0] == '\\') &&
 202                                  (tp[1] == '\\'))
 203                         tp += 2;
 204                 else
 205                 {
 206                         /*
 207                          * one backslash, not followed by 0 or ### valid octal
 208                          */
 209                         ereport(ERROR,
 210                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 211                                          errmsg("invalid input syntax for type bytea")));
 212                 }
 213         }
 214
 215         byte += VARHDRSZ;
 216         result = (bytea *) palloc(byte);
 217         SET_VARSIZE(result, byte);
 218
 219         tp = inputText;
 220         rp = VARDATA(result);
 221         while (*tp != '\0')
 222         {
 223                 if (tp[0] != '\\')
 224                         *rp++ = *tp++;
 225                 else if ((tp[0] == '\\') &&
 226                                  (tp[1] >= '0' && tp[1] <= '3') &&
 227                                  (tp[2] >= '0' && tp[2] <= '7') &&
 228                                  (tp[3] >= '0' && tp[3] <= '7'))
 229                 {
 230                         byte = VAL(tp[1]);
 231                         byte <<= 3;
 232                         byte += VAL(tp[2]);
 233                         byte <<= 3;
 234                         *rp++ = byte + VAL(tp[3]);
 235                         tp += 4;
 236                 }
 237                 else if ((tp[0] == '\\') &&
 238                                  (tp[1] == '\\'))
 239                 {
 240                         *rp++ = '\\';
 241                         tp += 2;
 242                 }
 243                 else
 244                 {
 245                         /*
 246                          * We should never get here. The first pass should not allow it.
 247                          */
 248                         ereport(ERROR,
 249                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 250                                          errmsg("invalid input syntax for type bytea")));
 251                 }
 252         }
 253
 254         PG_RETURN_BYTEA_P(result);
 255 }
 256
 257 /*
 258  *              byteaout                - converts to printable representation of byte array
 259  *
 260  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 261  *              '\\'.
 262  *
 263  *              NULL vlena should be an error--returning string with NULL for now.
 264  */
 265 Datum
 266 byteaout(PG_FUNCTION_ARGS)
 267 {
 268         bytea      *vlena = PG_GETARG_BYTEA_PP(0);
 269         char       *result;
 270         char       *vp;
 271         char       *rp;
 272         int                     val;                    /* holds unprintable chars */
 273         int                     i;
 274         int                     len;
 275
 276         len = 1;                                        /* empty string has 1 char */
 277         vp = VARDATA_ANY(vlena);
 278         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 279         {
 280                 if (*vp == '\\')
 281                         len += 2;
 282                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 283                         len += 4;
 284                 else
 285                         len++;
 286         }
 287         rp = result = (char *) palloc(len);
 288         vp = VARDATA_ANY(vlena);
 289         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 290         {
 291                 if (*vp == '\\')
 292                 {
 293                         *rp++ = '\\';
 294                         *rp++ = '\\';
 295                 }
 296                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 297                 {
 298                         val = *vp;
 299                         rp[0] = '\\';
 300                         rp[3] = DIG(val & 07);
 301                         val >>= 3;
 302                         rp[2] = DIG(val & 07);
 303                         val >>= 3;
 304                         rp[1] = DIG(val & 03);
 305                         rp += 4;
 306                 }
 307                 else
 308                         *rp++ = *vp;
 309         }
 310         *rp = '\0';
 311         PG_RETURN_CSTRING(result);
 312 }
 313
 314 /*
 315  *              bytearecv                       - converts external binary format to bytea
 316  */
 317 Datum
 318 bytearecv(PG_FUNCTION_ARGS)
 319 {
 320         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 321         bytea      *result;
 322         int                     nbytes;
 323
 324         nbytes = buf->len - buf->cursor;
 325         result = (bytea *) palloc(nbytes + VARHDRSZ);
 326         SET_VARSIZE(result, nbytes + VARHDRSZ);
 327         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 328         PG_RETURN_BYTEA_P(result);
 329 }
 330
 331 /*
 332  *              byteasend                       - converts bytea to binary format
 333  *
 334  * This is a special case: just copy the input...
 335  */
 336 Datum
 337 byteasend(PG_FUNCTION_ARGS)
 338 {
 339         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 340
 341         PG_RETURN_BYTEA_P(vlena);
 342 }
 343
 344
 345 /*
 346  *              textin                  - converts "..." to internal representation
 347  */
 348 Datum
 349 textin(PG_FUNCTION_ARGS)
 350 {
 351         char       *inputText = PG_GETARG_CSTRING(0);
 352
 353         PG_RETURN_TEXT_P(cstring_to_text(inputText));
 354 }
 355
 356 /*
 357  *              textout                 - converts internal representation to "..."
 358  */
 359 Datum
 360 textout(PG_FUNCTION_ARGS)
 361 {
 362         Datum           txt = PG_GETARG_DATUM(0);
 363
 364         PG_RETURN_CSTRING(TextDatumGetCString(txt));
 365 }
 366
 367 /*
 368  *              textrecv                        - converts external binary format to text
 369  */
 370 Datum
 371 textrecv(PG_FUNCTION_ARGS)
 372 {
 373         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 374         text       *result;
 375         char       *str;
 376         int                     nbytes;
 377
 378         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 379
 380         result = cstring_to_text_with_len(str, nbytes);
 381         pfree(str);
 382         PG_RETURN_TEXT_P(result);
 383 }
 384
 385 /*
 386  *              textsend                        - converts text to binary format
 387  */
 388 Datum
 389 textsend(PG_FUNCTION_ARGS)
 390 {
 391         text       *t = PG_GETARG_TEXT_PP(0);
 392         StringInfoData buf;
 393
 394         pq_begintypsend(&buf);
 395         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 396         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 397 }
 398
 399
 400 /*
 401  *              unknownin                       - converts "..." to internal representation
 402  */
 403 Datum
 404 unknownin(PG_FUNCTION_ARGS)
 405 {
 406         char       *str = PG_GETARG_CSTRING(0);
 407
 408         /* representation is same as cstring */
 409         PG_RETURN_CSTRING(pstrdup(str));
 410 }
 411
 412 /*
 413  *              unknownout                      - converts internal representation to "..."
 414  */
 415 Datum
 416 unknownout(PG_FUNCTION_ARGS)
 417 {
 418         /* representation is same as cstring */
 419         char       *str = PG_GETARG_CSTRING(0);
 420
 421         PG_RETURN_CSTRING(pstrdup(str));
 422 }
 423
 424 /*
 425  *              unknownrecv                     - converts external binary format to unknown
 426  */
 427 Datum
 428 unknownrecv(PG_FUNCTION_ARGS)
 429 {
 430         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 431         char       *str;
 432         int                     nbytes;
 433
 434         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 435         /* representation is same as cstring */
 436         PG_RETURN_CSTRING(str);
 437 }
 438
 439 /*
 440  *              unknownsend                     - converts unknown to binary format
 441  */
 442 Datum
 443 unknownsend(PG_FUNCTION_ARGS)
 444 {
 445         /* representation is same as cstring */
 446         char       *str = PG_GETARG_CSTRING(0);
 447         StringInfoData buf;
 448
 449         pq_begintypsend(&buf);
 450         pq_sendtext(&buf, str, strlen(str));
 451         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 452 }
 453
 454
 455 /* ========== PUBLIC ROUTINES ========== */
 456
 457 /*
 458  * textlen -
 459  *        returns the logical length of a text*
 460  *         (which is less than the VARSIZE of the text*)
 461  */
 462 Datum
 463 textlen(PG_FUNCTION_ARGS)
 464 {
 465         Datum           str = PG_GETARG_DATUM(0);
 466
 467         /* try to avoid decompressing argument */
 468         PG_RETURN_INT32(text_length(str));
 469 }
 470
 471 /*
 472  * text_length -
 473  *      Does the real work for textlen()
 474  *
 475  *      This is broken out so it can be called directly by other string processing
 476  *      functions.      Note that the argument is passed as a Datum, to indicate that
 477  *      it may still be in compressed form.  We can avoid decompressing it at all
 478  *      in some cases.
 479  */
 480 static int32
 481 text_length(Datum str)
 482 {
 483         /* fastpath when max encoding length is one */
 484         if (pg_database_encoding_max_length() == 1)
 485                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 486         else
 487         {
 488                 text       *t = DatumGetTextPP(str);
 489
 490                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
 491                                                                                          VARSIZE_ANY_EXHDR(t)));
 492         }
 493 }
 494
 495 /*
 496  * textoctetlen -
 497  *        returns the physical length of a text*
 498  *         (which is less than the VARSIZE of the text*)
 499  */
 500 Datum
 501 textoctetlen(PG_FUNCTION_ARGS)
 502 {
 503         Datum           str = PG_GETARG_DATUM(0);
 504
 505         /* We need not detoast the input at all */
 506         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 507 }
 508
 509 /*
 510  * textcat -
 511  *        takes two text* and returns a text* that is the concatenation of
 512  *        the two.
 513  *
 514  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 515  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 516  * Allocate space for output in all cases.
 517  * XXX - thomas 1997-07-10
 518  */
 519 Datum
 520 textcat(PG_FUNCTION_ARGS)
 521 {
 522         text       *t1 = PG_GETARG_TEXT_PP(0);
 523         text       *t2 = PG_GETARG_TEXT_PP(1);
 524         int                     len1,
 525                                 len2,
 526                                 len;
 527         text       *result;
 528         char       *ptr;
 529
 530         len1 = VARSIZE_ANY_EXHDR(t1);
 531         if (len1 < 0)
 532                 len1 = 0;
 533
 534         len2 = VARSIZE_ANY_EXHDR(t2);
 535         if (len2 < 0)
 536                 len2 = 0;
 537
 538         len = len1 + len2 + VARHDRSZ;
 539         result = (text *) palloc(len);
 540
 541         /* Set size of result string... */
 542         SET_VARSIZE(result, len);
 543
 544         /* Fill data field of result string... */
 545         ptr = VARDATA(result);
 546         if (len1 > 0)
 547                 memcpy(ptr, VARDATA_ANY(t1), len1);
 548         if (len2 > 0)
 549                 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
 550
 551         PG_RETURN_TEXT_P(result);
 552 }
 553
 554 /*
 555  * charlen_to_bytelen()
 556  *      Compute the number of bytes occupied by n characters starting at *p
 557  *
 558  * It is caller's responsibility that there actually are n characters;
 559  * the string need not be null-terminated.
 560  */
 561 static int
 562 charlen_to_bytelen(const char *p, int n)
 563 {
 564         if (pg_database_encoding_max_length() == 1)
 565         {
 566                 /* Optimization for single-byte encodings */
 567                 return n;
 568         }
 569         else
 570         {
 571                 const char *s;
 572
 573                 for (s = p; n > 0; n--)
 574                         s += pg_mblen(s);
 575
 576                 return s - p;
 577         }
 578 }
 579
 580 /*
 581  * text_substr()
 582  * Return a substring starting at the specified position.
 583  * - thomas 1997-12-31
 584  *
 585  * Input:
 586  *      - string
 587  *      - starting position (is one-based)
 588  *      - string length
 589  *
 590  * If the starting position is zero or less, then return from the start of the string
 591  *      adjusting the length to be consistent with the "negative start" per SQL92.
 592  * If the length is less than zero, return the remaining string.
 593  *
 594  * Added multibyte support.
 595  * - Tatsuo Ishii 1998-4-21
 596  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 597  * Formerly returned the entire string; now returns a portion.
 598  * - Thomas Lockhart 1998-12-10
 599  * Now uses faster TOAST-slicing interface
 600  * - John Gray 2002-02-22
 601  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 602  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 603  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 604  * S > LC and < LC + 4 sometimes garbage characters are returned.
 605  * - Joe Conway 2002-08-10
 606  */
 607 Datum
 608 text_substr(PG_FUNCTION_ARGS)
 609 {
 610         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 611                                                                         PG_GETARG_INT32(1),
 612                                                                         PG_GETARG_INT32(2),
 613                                                                         false));
 614 }
 615
 616 /*
 617  * text_substr_no_len -
 618  *        Wrapper to avoid opr_sanity failure due to
 619  *        one function accepting a different number of args.
 620  */
 621 Datum
 622 text_substr_no_len(PG_FUNCTION_ARGS)
 623 {
 624         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 625                                                                         PG_GETARG_INT32(1),
 626                                                                         -1, true));
 627 }
 628
 629 /*
 630  * text_substring -
 631  *      Does the real work for text_substr() and text_substr_no_len()
 632  *
 633  *      This is broken out so it can be called directly by other string processing
 634  *      functions.      Note that the argument is passed as a Datum, to indicate that
 635  *      it may still be in compressed/toasted form.  We can avoid detoasting all
 636  *      of it in some cases.
 637  *
 638  *      The result is always a freshly palloc'd datum.
 639  */
 640 static text *
 641 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 642 {
 643         int32           eml = pg_database_encoding_max_length();
 644         int32           S = start;              /* start position */
 645         int32           S1;                             /* adjusted start position */
 646         int32           L1;                             /* adjusted substring length */
 647
 648         /* life is easy if the encoding max length is 1 */
 649         if (eml == 1)
 650         {
 651                 S1 = Max(S, 1);
 652
 653                 if (length_not_specified)               /* special case - get length to end of
 654                                                                                  * string */
 655                         L1 = -1;
 656                 else
 657                 {
 658                         /* end position */
 659                         int                     E = S + length;
 660
 661                         /*
 662                          * A negative value for L is the only way for the end position to
 663                          * be before the start. SQL99 says to throw an error.
 664                          */
 665                         if (E < S)
 666                                 ereport(ERROR,
 667                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 668                                                  errmsg("negative substring length not allowed")));
 669
 670                         /*
 671                          * A zero or negative value for the end position can happen if the
 672                          * start was negative or one. SQL99 says to return a zero-length
 673                          * string.
 674                          */
 675                         if (E < 1)
 676                                 return cstring_to_text("");
 677
 678                         L1 = E - S1;
 679                 }
 680
 681                 /*
 682                  * If the start position is past the end of the string, SQL99 says to
 683                  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
 684                  * that for us. Convert to zero-based starting position
 685                  */
 686                 return DatumGetTextPSlice(str, S1 - 1, L1);
 687         }
 688         else if (eml > 1)
 689         {
 690                 /*
 691                  * When encoding max length is > 1, we can't get LC without
 692                  * detoasting, so we'll grab a conservatively large slice now and go
 693                  * back later to do the right thing
 694                  */
 695                 int32           slice_start;
 696                 int32           slice_size;
 697                 int32           slice_strlen;
 698                 text       *slice;
 699                 int32           E1;
 700                 int32           i;
 701                 char       *p;
 702                 char       *s;
 703                 text       *ret;
 704
 705                 /*
 706                  * if S is past the end of the string, the tuple toaster will return a
 707                  * zero-length string to us
 708                  */
 709                 S1 = Max(S, 1);
 710
 711                 /*
 712                  * We need to start at position zero because there is no way to know
 713                  * in advance which byte offset corresponds to the supplied start
 714                  * position.
 715                  */
 716                 slice_start = 0;
 717
 718                 if (length_not_specified)               /* special case - get length to end of
 719                                                                                  * string */
 720                         slice_size = L1 = -1;
 721                 else
 722                 {
 723                         int                     E = S + length;
 724
 725                         /*
 726                          * A negative value for L is the only way for the end position to
 727                          * be before the start. SQL99 says to throw an error.
 728                          */
 729                         if (E < S)
 730                                 ereport(ERROR,
 731                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 732                                                  errmsg("negative substring length not allowed")));
 733
 734                         /*
 735                          * A zero or negative value for the end position can happen if the
 736                          * start was negative or one. SQL99 says to return a zero-length
 737                          * string.
 738                          */
 739                         if (E < 1)
 740                                 return cstring_to_text("");
 741
 742                         /*
 743                          * if E is past the end of the string, the tuple toaster will
 744                          * truncate the length for us
 745                          */
 746                         L1 = E - S1;
 747
 748                         /*
 749                          * Total slice size in bytes can't be any longer than the start
 750                          * position plus substring length times the encoding max length.
 751                          */
 752                         slice_size = (S1 + L1) * eml;
 753                 }
 754
 755                 /*
 756                  * If we're working with an untoasted source, no need to do an extra
 757                  * copying step.
 758                  */
 759                 if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
 760                         VARATT_IS_EXTERNAL(DatumGetPointer(str)))
 761                         slice = DatumGetTextPSlice(str, slice_start, slice_size);
 762                 else
 763                         slice = (text *) DatumGetPointer(str);
 764
 765                 /* see if we got back an empty string */
 766                 if (VARSIZE_ANY_EXHDR(slice) == 0)
 767                 {
 768                         if (slice != (text *) DatumGetPointer(str))
 769                                 pfree(slice);
 770                         return cstring_to_text("");
 771                 }
 772
 773                 /* Now we can get the actual length of the slice in MB characters */
 774                 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
 775                                                                                         VARSIZE_ANY_EXHDR(slice));
 776
 777                 /*
 778                  * Check that the start position wasn't > slice_strlen. If so, SQL99
 779                  * says to return a zero-length string.
 780                  */
 781                 if (S1 > slice_strlen)
 782                 {
 783                         if (slice != (text *) DatumGetPointer(str))
 784                                 pfree(slice);
 785                         return cstring_to_text("");
 786                 }
 787
 788                 /*
 789                  * Adjust L1 and E1 now that we know the slice string length. Again
 790                  * remember that S1 is one based, and slice_start is zero based.
 791                  */
 792                 if (L1 > -1)
 793                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 794                 else
 795                         E1 = slice_start + 1 + slice_strlen;
 796
 797                 /*
 798                  * Find the start position in the slice; remember S1 is not zero based
 799                  */
 800                 p = VARDATA_ANY(slice);
 801                 for (i = 0; i < S1 - 1; i++)
 802                         p += pg_mblen(p);
 803
 804                 /* hang onto a pointer to our start position */
 805                 s = p;
 806
 807                 /*
 808                  * Count the actual bytes used by the substring of the requested
 809                  * length.
 810                  */
 811                 for (i = S1; i < E1; i++)
 812                         p += pg_mblen(p);
 813
 814                 ret = (text *) palloc(VARHDRSZ + (p - s));
 815                 SET_VARSIZE(ret, VARHDRSZ + (p - s));
 816                 memcpy(VARDATA(ret), s, (p - s));
 817
 818                 if (slice != (text *) DatumGetPointer(str))
 819                         pfree(slice);
 820
 821                 return ret;
 822         }
 823         else
 824                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 825
 826         /* not reached: suppress compiler warning */
 827         return NULL;
 828 }
 829
 830 /*
 831  * textpos -
 832  *        Return the position of the specified substring.
 833  *        Implements the SQL92 POSITION() function.
 834  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 835  * - thomas 1997-07-27
 836  */
 837 Datum
 838 textpos(PG_FUNCTION_ARGS)
 839 {
 840         text       *str = PG_GETARG_TEXT_PP(0);
 841         text       *search_str = PG_GETARG_TEXT_PP(1);
 842
 843         PG_RETURN_INT32((int32) text_position(str, search_str));
 844 }
 845
 846 /*
 847  * text_position -
 848  *      Does the real work for textpos()
 849  *
 850  * Inputs:
 851  *              t1 - string to be searched
 852  *              t2 - pattern to match within t1
 853  * Result:
 854  *              Character index of the first matched char, starting from 1,
 855  *              or 0 if no match.
 856  *
 857  *      This is broken out so it can be called directly by other string processing
 858  *      functions.
 859  */
 860 static int
 861 text_position(text *t1, text *t2)
 862 {
 863         TextPositionState state;
 864         int                     result;
 865
 866         text_position_setup(t1, t2, &state);
 867         result = text_position_next(1, &state);
 868         text_position_cleanup(&state);
 869         return result;
 870 }
 871
 872
 873 /*
 874  * text_position_setup, text_position_next, text_position_cleanup -
 875  *      Component steps of text_position()
 876  *
 877  * These are broken out so that a string can be efficiently searched for
 878  * multiple occurrences of the same pattern.  text_position_next may be
 879  * called multiple times with increasing values of start_pos, which is
 880  * the 1-based character position to start the search from.  The "state"
 881  * variable is normally just a local variable in the caller.
 882  */
 883
 884 static void
 885 text_position_setup(text *t1, text *t2, TextPositionState *state)
 886 {
 887         int                     len1 = VARSIZE_ANY_EXHDR(t1);
 888         int                     len2 = VARSIZE_ANY_EXHDR(t2);
 889
 890         if (pg_database_encoding_max_length() == 1)
 891         {
 892                 /* simple case - single byte encoding */
 893                 state->use_wchar = false;
 894                 state->str1 = VARDATA_ANY(t1);
 895                 state->str2 = VARDATA_ANY(t2);
 896                 state->len1 = len1;
 897                 state->len2 = len2;
 898         }
 899         else
 900         {
 901                 /* not as simple - multibyte encoding */
 902                 pg_wchar   *p1,
 903                                    *p2;
 904
 905                 p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 906                 len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
 907                 p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 908                 len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
 909
 910                 state->use_wchar = true;
 911                 state->wstr1 = p1;
 912                 state->wstr2 = p2;
 913                 state->len1 = len1;
 914                 state->len2 = len2;
 915         }
 916
 917         /*
 918          * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
 919          * notes we use the terminology that the "haystack" is the string to be
 920          * searched (t1) and the "needle" is the pattern being sought (t2).
 921          *
 922          * If the needle is empty or bigger than the haystack then there is no
 923          * point in wasting cycles initializing the table.  We also choose not
 924          * to use B-M-H for needles of length 1, since the skip table can't
 925          * possibly save anything in that case.
 926          */
 927         if (len1 >= len2 && len2 > 1)
 928         {
 929                 int             searchlength = len1 - len2;
 930                 int     skiptablemask;
 931                 int     last;
 932                 int     i;
 933
 934                 /*
 935                  * First we must determine how much of the skip table to use.  The
 936                  * declaration of TextPositionState allows up to 256 elements, but for
 937                  * short search problems we don't really want to have to initialize so
 938                  * many elements --- it would take too long in comparison to the
 939                  * actual search time.  So we choose a useful skip table size based on
 940                  * the haystack length minus the needle length.  The closer the needle
 941                  * length is to the haystack length the less useful skipping becomes.
 942                  *
 943                  * Note: since we use bit-masking to select table elements, the skip
 944                  * table size MUST be a power of 2, and so the mask must be 2^N-1.
 945                  */
 946                 if (searchlength < 16)
 947                         skiptablemask = 3;
 948                 else if (searchlength < 64)
 949                         skiptablemask = 7;
 950                 else if (searchlength < 128)
 951                         skiptablemask = 15;
 952                 else if (searchlength < 512)
 953                         skiptablemask = 31;
 954                 else if (searchlength < 2048)
 955                         skiptablemask = 63;
 956                 else if (searchlength < 4096)
 957                         skiptablemask = 127;
 958                 else
 959                         skiptablemask = 255;
 960                 state->skiptablemask = skiptablemask;
 961
 962                 /*
 963                  * Initialize the skip table.  We set all elements to the needle
 964                  * length, since this is the correct skip distance for any character
 965                  * not found in the needle.
 966                  */
 967                 for (i = 0; i <= skiptablemask; i++)
 968                         state->skiptable[i] = len2;
 969
 970                 /*
 971                  * Now examine the needle.  For each character except the last one,
 972                  * set the corresponding table element to the appropriate skip
 973                  * distance.  Note that when two characters share the same skip table
 974                  * entry, the one later in the needle must determine the skip distance.
 975                  */
 976                 last = len2 - 1;
 977
 978                 if (!state->use_wchar)
 979                 {
 980                         const char *str2 = state->str2;
 981
 982                         for (i = 0; i < last; i++)
 983                                 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
 984                 }
 985                 else
 986                 {
 987                         const pg_wchar *wstr2 = state->wstr2;
 988
 989                         for (i = 0; i < last; i++)
 990                                 state->skiptable[wstr2[i] & skiptablemask] = last - i;
 991                 }
 992         }
 993 }
 994
 995 static int
 996 text_position_next(int start_pos, TextPositionState *state)
 997 {
 998         int                     haystack_len = state->len1;
 999         int                     needle_len = state->len2;
1000         int                     skiptablemask = state->skiptablemask;
1001
1002         Assert(start_pos > 0);          /* else caller error */
1003
1004         if (needle_len <= 0)
1005                 return start_pos;               /* result for empty pattern */
1006
1007         start_pos--;                            /* adjust for zero based arrays */
1008
1009         /* Done if the needle can't possibly fit */
1010         if (haystack_len < start_pos + needle_len)
1011                 return 0;
1012
1013         if (!state->use_wchar)
1014         {
1015                 /* simple case - single byte encoding */
1016                 const char *haystack = state->str1;
1017                 const char *needle = state->str2;
1018                 const char *haystack_end = &haystack[haystack_len];
1019                 const char *hptr;
1020
1021                 if (needle_len == 1)
1022                 {
1023                         /* No point in using B-M-H for a one-character needle */
1024                         char    nchar = *needle;
1025
1026                         hptr = &haystack[start_pos];
1027                         while (hptr < haystack_end)
1028                         {
1029                                 if (*hptr == nchar)
1030                                         return hptr - haystack + 1;
1031                                 hptr++;
1032                         }
1033                 }
1034                 else
1035                 {
1036                         const char *needle_last = &needle[needle_len - 1];
1037
1038                         /* Start at startpos plus the length of the needle */
1039                         hptr = &haystack[start_pos + needle_len - 1];
1040                         while (hptr < haystack_end)
1041                         {
1042                                 /* Match the needle scanning *backward* */
1043                                 const char *nptr;
1044                                 const char *p;
1045
1046                                 nptr = needle_last;
1047                                 p = hptr;
1048                                 while (*nptr == *p)
1049                                 {
1050                                         /* Matched it all?  If so, return 1-based position */
1051                                         if (nptr == needle)
1052                                                 return p - haystack + 1;
1053                                         nptr--, p--;
1054                                 }
1055                                 /*
1056                                  * No match, so use the haystack char at hptr to decide how
1057                                  * far to advance.  If the needle had any occurrence of that
1058                                  * character (or more precisely, one sharing the same
1059                                  * skiptable entry) before its last character, then we advance
1060                                  * far enough to align the last such needle character with
1061                                  * that haystack position.  Otherwise we can advance by the
1062                                  * whole needle length.
1063                                  */
1064                                 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1065                         }
1066                 }
1067         }
1068         else
1069         {
1070                 /* The multibyte char version. This works exactly the same way. */
1071                 const pg_wchar *haystack = state->wstr1;
1072                 const pg_wchar *needle = state->wstr2;
1073                 const pg_wchar *haystack_end = &haystack[haystack_len];
1074                 const pg_wchar *hptr;
1075
1076                 if (needle_len == 1)
1077                 {
1078                         /* No point in using B-M-H for a one-character needle */
1079                         pg_wchar        nchar = *needle;
1080
1081                         hptr = &haystack[start_pos];
1082                         while (hptr < haystack_end)
1083                         {
1084                                 if (*hptr == nchar)
1085                                         return hptr - haystack + 1;
1086                                 hptr++;
1087                         }
1088                 }
1089                 else
1090                 {
1091                         const pg_wchar *needle_last = &needle[needle_len - 1];
1092
1093                         /* Start at startpos plus the length of the needle */
1094                         hptr = &haystack[start_pos + needle_len - 1];
1095                         while (hptr < haystack_end)
1096                         {
1097                                 /* Match the needle scanning *backward* */
1098                                 const pg_wchar *nptr;
1099                                 const pg_wchar *p;
1100
1101                                 nptr = needle_last;
1102                                 p = hptr;
1103                                 while (*nptr == *p)
1104                                 {
1105                                         /* Matched it all?  If so, return 1-based position */
1106                                         if (nptr == needle)
1107                                                 return p - haystack + 1;
1108                                         nptr--, p--;
1109                                 }
1110                                 /*
1111                                  * No match, so use the haystack char at hptr to decide how
1112                                  * far to advance.  If the needle had any occurrence of that
1113                                  * character (or more precisely, one sharing the same
1114                                  * skiptable entry) before its last character, then we advance
1115                                  * far enough to align the last such needle character with
1116                                  * that haystack position.  Otherwise we can advance by the
1117                                  * whole needle length.
1118                                  */
1119                                 hptr += state->skiptable[*hptr & skiptablemask];
1120                         }
1121                 }
1122         }
1123
1124         return 0;                                       /* not found */
1125 }
1126
1127 static void
1128 text_position_cleanup(TextPositionState *state)
1129 {
1130         if (state->use_wchar)
1131         {
1132                 pfree(state->wstr1);
1133                 pfree(state->wstr2);
1134         }
1135 }
1136
1137 /* varstr_cmp()
1138  * Comparison function for text strings with given lengths.
1139  * Includes locale support, but must copy strings to temporary memory
1140  *      to allow null-termination for inputs to strcoll().
1141  * Returns -1, 0 or 1
1142  */
1143 int
1144 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
1145 {
1146         int                     result;
1147
1148         /*
1149          * Unfortunately, there is no strncoll(), so in the non-C locale case we
1150          * have to do some memory copying.      This turns out to be significantly
1151          * slower, so we optimize the case where LC_COLLATE is C.  We also try to
1152          * optimize relatively-short strings by avoiding palloc/pfree overhead.
1153          */
1154         if (lc_collate_is_c())
1155         {
1156                 result = strncmp(arg1, arg2, Min(len1, len2));
1157                 if ((result == 0) && (len1 != len2))
1158                         result = (len1 < len2) ? -1 : 1;
1159         }
1160         else
1161         {
1162 #define STACKBUFLEN             1024
1163
1164                 char            a1buf[STACKBUFLEN];
1165                 char            a2buf[STACKBUFLEN];
1166                 char       *a1p,
1167                                    *a2p;
1168
1169 #ifdef WIN32
1170                 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1171                 if (GetDatabaseEncoding() == PG_UTF8)
1172                 {
1173                         int                     a1len;
1174                         int                     a2len;
1175                         int                     r;
1176
1177                         if (len1 >= STACKBUFLEN / 2)
1178                         {
1179                                 a1len = len1 * 2 + 2;
1180                                 a1p = palloc(a1len);
1181                         }
1182                         else
1183                         {
1184                                 a1len = STACKBUFLEN;
1185                                 a1p = a1buf;
1186                         }
1187                         if (len2 >= STACKBUFLEN / 2)
1188                         {
1189                                 a2len = len2 * 2 + 2;
1190                                 a2p = palloc(a2len);
1191                         }
1192                         else
1193                         {
1194                                 a2len = STACKBUFLEN;
1195                                 a2p = a2buf;
1196                         }
1197
1198                         /* stupid Microsloth API does not work for zero-length input */
1199                         if (len1 == 0)
1200                                 r = 0;
1201                         else
1202                         {
1203                                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1204                                                                                 (LPWSTR) a1p, a1len / 2);
1205                                 if (!r)
1206                                         ereport(ERROR,
1207                                          (errmsg("could not convert string to UTF-16: error %lu",
1208                                                          GetLastError())));
1209                         }
1210                         ((LPWSTR) a1p)[r] = 0;
1211
1212                         if (len2 == 0)
1213                                 r = 0;
1214                         else
1215                         {
1216                                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1217                                                                                 (LPWSTR) a2p, a2len / 2);
1218                                 if (!r)
1219                                         ereport(ERROR,
1220                                          (errmsg("could not convert string to UTF-16: error %lu",
1221                                                          GetLastError())));
1222                         }
1223                         ((LPWSTR) a2p)[r] = 0;
1224
1225                         errno = 0;
1226                         result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1227                         if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
1228                                                                                  * headers */
1229                                 ereport(ERROR,
1230                                                 (errmsg("could not compare Unicode strings: %m")));
1231
1232                         /*
1233                          * In some locales wcscoll() can claim that nonidentical strings
1234                          * are equal.  Believing that would be bad news for a number of
1235                          * reasons, so we follow Perl's lead and sort "equal" strings
1236                          * according to strcmp (on the UTF-8 representation).
1237                          */
1238                         if (result == 0)
1239                         {
1240                                 result = strncmp(arg1, arg2, Min(len1, len2));
1241                                 if ((result == 0) && (len1 != len2))
1242                                         result = (len1 < len2) ? -1 : 1;
1243                         }
1244
1245                         if (a1p != a1buf)
1246                                 pfree(a1p);
1247                         if (a2p != a2buf)
1248                                 pfree(a2p);
1249
1250                         return result;
1251                 }
1252 #endif   /* WIN32 */
1253
1254                 if (len1 >= STACKBUFLEN)
1255                         a1p = (char *) palloc(len1 + 1);
1256                 else
1257                         a1p = a1buf;
1258                 if (len2 >= STACKBUFLEN)
1259                         a2p = (char *) palloc(len2 + 1);
1260                 else
1261                         a2p = a2buf;
1262
1263                 memcpy(a1p, arg1, len1);
1264                 a1p[len1] = '\0';
1265                 memcpy(a2p, arg2, len2);
1266                 a2p[len2] = '\0';
1267
1268                 result = strcoll(a1p, a2p);
1269
1270                 /*
1271                  * In some locales strcoll() can claim that nonidentical strings are
1272                  * equal.  Believing that would be bad news for a number of reasons,
1273                  * so we follow Perl's lead and sort "equal" strings according to
1274                  * strcmp().
1275                  */
1276                 if (result == 0)
1277                         result = strcmp(a1p, a2p);
1278
1279                 if (a1p != a1buf)
1280                         pfree(a1p);
1281                 if (a2p != a2buf)
1282                         pfree(a2p);
1283         }
1284
1285         return result;
1286 }
1287
1288
1289 /* text_cmp()
1290  * Internal comparison function for text strings.
1291  * Returns -1, 0 or 1
1292  */
1293 static int
1294 text_cmp(text *arg1, text *arg2)
1295 {
1296         char       *a1p,
1297                            *a2p;
1298         int                     len1,
1299                                 len2;
1300
1301         a1p = VARDATA_ANY(arg1);
1302         a2p = VARDATA_ANY(arg2);
1303
1304         len1 = VARSIZE_ANY_EXHDR(arg1);
1305         len2 = VARSIZE_ANY_EXHDR(arg2);
1306
1307         return varstr_cmp(a1p, len1, a2p, len2);
1308 }
1309
1310 /*
1311  * Comparison functions for text strings.
1312  *
1313  * Note: btree indexes need these routines not to leak memory; therefore,
1314  * be careful to free working copies of toasted datums.  Most places don't
1315  * need to be so careful.
1316  */
1317
1318 Datum
1319 texteq(PG_FUNCTION_ARGS)
1320 {
1321         text       *arg1 = PG_GETARG_TEXT_PP(0);
1322         text       *arg2 = PG_GETARG_TEXT_PP(1);
1323         bool            result;
1324
1325         /*
1326          * Since we only care about equality or not-equality, we can avoid all the
1327          * expense of strcoll() here, and just do bitwise comparison.
1328          */
1329         if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1330                 result = false;
1331         else
1332                 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1333                                                   VARSIZE_ANY_EXHDR(arg1)) == 0);
1334
1335         PG_FREE_IF_COPY(arg1, 0);
1336         PG_FREE_IF_COPY(arg2, 1);
1337
1338         PG_RETURN_BOOL(result);
1339 }
1340
1341 Datum
1342 textne(PG_FUNCTION_ARGS)
1343 {
1344         text       *arg1 = PG_GETARG_TEXT_PP(0);
1345         text       *arg2 = PG_GETARG_TEXT_PP(1);
1346         bool            result;
1347
1348         /*
1349          * Since we only care about equality or not-equality, we can avoid all the
1350          * expense of strcoll() here, and just do bitwise comparison.
1351          */
1352         if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1353                 result = true;
1354         else
1355                 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1356                                                   VARSIZE_ANY_EXHDR(arg1)) != 0);
1357
1358         PG_FREE_IF_COPY(arg1, 0);
1359         PG_FREE_IF_COPY(arg2, 1);
1360
1361         PG_RETURN_BOOL(result);
1362 }
1363
1364 Datum
1365 text_lt(PG_FUNCTION_ARGS)
1366 {
1367         text       *arg1 = PG_GETARG_TEXT_PP(0);
1368         text       *arg2 = PG_GETARG_TEXT_PP(1);
1369         bool            result;
1370
1371         result = (text_cmp(arg1, arg2) < 0);
1372
1373         PG_FREE_IF_COPY(arg1, 0);
1374         PG_FREE_IF_COPY(arg2, 1);
1375
1376         PG_RETURN_BOOL(result);
1377 }
1378
1379 Datum
1380 text_le(PG_FUNCTION_ARGS)
1381 {
1382         text       *arg1 = PG_GETARG_TEXT_PP(0);
1383         text       *arg2 = PG_GETARG_TEXT_PP(1);
1384         bool            result;
1385
1386         result = (text_cmp(arg1, arg2) <= 0);
1387
1388         PG_FREE_IF_COPY(arg1, 0);
1389         PG_FREE_IF_COPY(arg2, 1);
1390
1391         PG_RETURN_BOOL(result);
1392 }
1393
1394 Datum
1395 text_gt(PG_FUNCTION_ARGS)
1396 {
1397         text       *arg1 = PG_GETARG_TEXT_PP(0);
1398         text       *arg2 = PG_GETARG_TEXT_PP(1);
1399         bool            result;
1400
1401         result = (text_cmp(arg1, arg2) > 0);
1402
1403         PG_FREE_IF_COPY(arg1, 0);
1404         PG_FREE_IF_COPY(arg2, 1);
1405
1406         PG_RETURN_BOOL(result);
1407 }
1408
1409 Datum
1410 text_ge(PG_FUNCTION_ARGS)
1411 {
1412         text       *arg1 = PG_GETARG_TEXT_PP(0);
1413         text       *arg2 = PG_GETARG_TEXT_PP(1);
1414         bool            result;
1415
1416         result = (text_cmp(arg1, arg2) >= 0);
1417
1418         PG_FREE_IF_COPY(arg1, 0);
1419         PG_FREE_IF_COPY(arg2, 1);
1420
1421         PG_RETURN_BOOL(result);
1422 }
1423
1424 Datum
1425 bttextcmp(PG_FUNCTION_ARGS)
1426 {
1427         text       *arg1 = PG_GETARG_TEXT_PP(0);
1428         text       *arg2 = PG_GETARG_TEXT_PP(1);
1429         int32           result;
1430
1431         result = text_cmp(arg1, arg2);
1432
1433         PG_FREE_IF_COPY(arg1, 0);
1434         PG_FREE_IF_COPY(arg2, 1);
1435
1436         PG_RETURN_INT32(result);
1437 }
1438
1439
1440 Datum
1441 text_larger(PG_FUNCTION_ARGS)
1442 {
1443         text       *arg1 = PG_GETARG_TEXT_PP(0);
1444         text       *arg2 = PG_GETARG_TEXT_PP(1);
1445         text       *result;
1446
1447         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1448
1449         PG_RETURN_TEXT_P(result);
1450 }
1451
1452 Datum
1453 text_smaller(PG_FUNCTION_ARGS)
1454 {
1455         text       *arg1 = PG_GETARG_TEXT_PP(0);
1456         text       *arg2 = PG_GETARG_TEXT_PP(1);
1457         text       *result;
1458
1459         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1460
1461         PG_RETURN_TEXT_P(result);
1462 }
1463
1464
1465 /*
1466  * The following operators support character-by-character comparison
1467  * of text datums, to allow building indexes suitable for LIKE clauses.
1468  * Note that the regular texteq/textne comparison operators are assumed
1469  * to be compatible with these!
1470  */
1471
1472 static int
1473 internal_text_pattern_compare(text *arg1, text *arg2)
1474 {
1475         int                     result;
1476         int                     len1,
1477                                 len2;
1478
1479         len1 = VARSIZE_ANY_EXHDR(arg1);
1480         len2 = VARSIZE_ANY_EXHDR(arg2);
1481
1482         result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1483         if (result != 0)
1484                 return result;
1485         else if (len1 < len2)
1486                 return -1;
1487         else if (len1 > len2)
1488                 return 1;
1489         else
1490                 return 0;
1491 }
1492
1493
1494 Datum
1495 text_pattern_lt(PG_FUNCTION_ARGS)
1496 {
1497         text       *arg1 = PG_GETARG_TEXT_PP(0);
1498         text       *arg2 = PG_GETARG_TEXT_PP(1);
1499         int                     result;
1500
1501         result = internal_text_pattern_compare(arg1, arg2);
1502
1503         PG_FREE_IF_COPY(arg1, 0);
1504         PG_FREE_IF_COPY(arg2, 1);
1505
1506         PG_RETURN_BOOL(result < 0);
1507 }
1508
1509
1510 Datum
1511 text_pattern_le(PG_FUNCTION_ARGS)
1512 {
1513         text       *arg1 = PG_GETARG_TEXT_PP(0);
1514         text       *arg2 = PG_GETARG_TEXT_PP(1);
1515         int                     result;
1516
1517         result = internal_text_pattern_compare(arg1, arg2);
1518
1519         PG_FREE_IF_COPY(arg1, 0);
1520         PG_FREE_IF_COPY(arg2, 1);
1521
1522         PG_RETURN_BOOL(result <= 0);
1523 }
1524
1525
1526 Datum
1527 text_pattern_ge(PG_FUNCTION_ARGS)
1528 {
1529         text       *arg1 = PG_GETARG_TEXT_PP(0);
1530         text       *arg2 = PG_GETARG_TEXT_PP(1);
1531         int                     result;
1532
1533         result = internal_text_pattern_compare(arg1, arg2);
1534
1535         PG_FREE_IF_COPY(arg1, 0);
1536         PG_FREE_IF_COPY(arg2, 1);
1537
1538         PG_RETURN_BOOL(result >= 0);
1539 }
1540
1541
1542 Datum
1543 text_pattern_gt(PG_FUNCTION_ARGS)
1544 {
1545         text       *arg1 = PG_GETARG_TEXT_PP(0);
1546         text       *arg2 = PG_GETARG_TEXT_PP(1);
1547         int                     result;
1548
1549         result = internal_text_pattern_compare(arg1, arg2);
1550
1551         PG_FREE_IF_COPY(arg1, 0);
1552         PG_FREE_IF_COPY(arg2, 1);
1553
1554         PG_RETURN_BOOL(result > 0);
1555 }
1556
1557
1558 Datum
1559 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1560 {
1561         text       *arg1 = PG_GETARG_TEXT_PP(0);
1562         text       *arg2 = PG_GETARG_TEXT_PP(1);
1563         int                     result;
1564
1565         result = internal_text_pattern_compare(arg1, arg2);
1566
1567         PG_FREE_IF_COPY(arg1, 0);
1568         PG_FREE_IF_COPY(arg2, 1);
1569
1570         PG_RETURN_INT32(result);
1571 }
1572
1573
1574 /*-------------------------------------------------------------
1575  * byteaoctetlen
1576  *
1577  * get the number of bytes contained in an instance of type 'bytea'
1578  *-------------------------------------------------------------
1579  */
1580 Datum
1581 byteaoctetlen(PG_FUNCTION_ARGS)
1582 {
1583         Datum           str = PG_GETARG_DATUM(0);
1584
1585         /* We need not detoast the input at all */
1586         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1587 }
1588
1589 /*
1590  * byteacat -
1591  *        takes two bytea* and returns a bytea* that is the concatenation of
1592  *        the two.
1593  *
1594  * Cloned from textcat and modified as required.
1595  */
1596 Datum
1597 byteacat(PG_FUNCTION_ARGS)
1598 {
1599         bytea      *t1 = PG_GETARG_BYTEA_PP(0);
1600         bytea      *t2 = PG_GETARG_BYTEA_PP(1);
1601         int                     len1,
1602                                 len2,
1603                                 len;
1604         bytea      *result;
1605         char       *ptr;
1606
1607         len1 = VARSIZE_ANY_EXHDR(t1);
1608         if (len1 < 0)
1609                 len1 = 0;
1610
1611         len2 = VARSIZE_ANY_EXHDR(t2);
1612         if (len2 < 0)
1613                 len2 = 0;
1614
1615         len = len1 + len2 + VARHDRSZ;
1616         result = (bytea *) palloc(len);
1617
1618         /* Set size of result string... */
1619         SET_VARSIZE(result, len);
1620
1621         /* Fill data field of result string... */
1622         ptr = VARDATA(result);
1623         if (len1 > 0)
1624                 memcpy(ptr, VARDATA_ANY(t1), len1);
1625         if (len2 > 0)
1626                 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
1627
1628         PG_RETURN_BYTEA_P(result);
1629 }
1630
1631 #define PG_STR_GET_BYTEA(str_) \
1632         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1633
1634 /*
1635  * bytea_substr()
1636  * Return a substring starting at the specified position.
1637  * Cloned from text_substr and modified as required.
1638  *
1639  * Input:
1640  *      - string
1641  *      - starting position (is one-based)
1642  *      - string length (optional)
1643  *
1644  * If the starting position is zero or less, then return from the start of the string
1645  * adjusting the length to be consistent with the "negative start" per SQL92.
1646  * If the length is less than zero, an ERROR is thrown. If no third argument
1647  * (length) is provided, the length to the end of the string is assumed.
1648  */
1649 Datum
1650 bytea_substr(PG_FUNCTION_ARGS)
1651 {
1652         int                     S = PG_GETARG_INT32(1); /* start position */
1653         int                     S1;                             /* adjusted start position */
1654         int                     L1;                             /* adjusted substring length */
1655
1656         S1 = Max(S, 1);
1657
1658         if (fcinfo->nargs == 2)
1659         {
1660                 /*
1661                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1662                  * the end of the string if we pass it a negative value for length.
1663                  */
1664                 L1 = -1;
1665         }
1666         else
1667         {
1668                 /* end position */
1669                 int                     E = S + PG_GETARG_INT32(2);
1670
1671                 /*
1672                  * A negative value for L is the only way for the end position to be
1673                  * before the start. SQL99 says to throw an error.
1674                  */
1675                 if (E < S)
1676                         ereport(ERROR,
1677                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1678                                          errmsg("negative substring length not allowed")));
1679
1680                 /*
1681                  * A zero or negative value for the end position can happen if the
1682                  * start was negative or one. SQL99 says to return a zero-length
1683                  * string.
1684                  */
1685                 if (E < 1)
1686                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1687
1688                 L1 = E - S1;
1689         }
1690
1691         /*
1692          * If the start position is past the end of the string, SQL99 says to
1693          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1694          * for us. Convert to zero-based starting position
1695          */
1696         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1697 }
1698
1699 /*
1700  * bytea_substr_no_len -
1701  *        Wrapper to avoid opr_sanity failure due to
1702  *        one function accepting a different number of args.
1703  */
1704 Datum
1705 bytea_substr_no_len(PG_FUNCTION_ARGS)
1706 {
1707         return bytea_substr(fcinfo);
1708 }
1709
1710 /*
1711  * byteapos -
1712  *        Return the position of the specified substring.
1713  *        Implements the SQL92 POSITION() function.
1714  * Cloned from textpos and modified as required.
1715  */
1716 Datum
1717 byteapos(PG_FUNCTION_ARGS)
1718 {
1719         bytea      *t1 = PG_GETARG_BYTEA_PP(0);
1720         bytea      *t2 = PG_GETARG_BYTEA_PP(1);
1721         int                     pos;
1722         int                     px,
1723                                 p;
1724         int                     len1,
1725                                 len2;
1726         char       *p1,
1727                            *p2;
1728
1729         len1 = VARSIZE_ANY_EXHDR(t1);
1730         len2 = VARSIZE_ANY_EXHDR(t2);
1731
1732         if (len2 <= 0)
1733                 PG_RETURN_INT32(1);             /* result for empty pattern */
1734
1735         p1 = VARDATA_ANY(t1);
1736         p2 = VARDATA_ANY(t2);
1737
1738         pos = 0;
1739         px = (len1 - len2);
1740         for (p = 0; p <= px; p++)
1741         {
1742                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1743                 {
1744                         pos = p + 1;
1745                         break;
1746                 };
1747                 p1++;
1748         };
1749
1750         PG_RETURN_INT32(pos);
1751 }
1752
1753 /*-------------------------------------------------------------
1754  * byteaGetByte
1755  *
1756  * this routine treats "bytea" as an array of bytes.
1757  * It returns the Nth byte (a number between 0 and 255).
1758  *-------------------------------------------------------------
1759  */
1760 Datum
1761 byteaGetByte(PG_FUNCTION_ARGS)
1762 {
1763         bytea      *v = PG_GETARG_BYTEA_PP(0);
1764         int32           n = PG_GETARG_INT32(1);
1765         int                     len;
1766         int                     byte;
1767
1768         len = VARSIZE_ANY_EXHDR(v);
1769
1770         if (n < 0 || n >= len)
1771                 ereport(ERROR,
1772                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1773                                  errmsg("index %d out of valid range, 0..%d",
1774                                                 n, len - 1)));
1775
1776         byte = ((unsigned char *) VARDATA_ANY(v))[n];
1777
1778         PG_RETURN_INT32(byte);
1779 }
1780
1781 /*-------------------------------------------------------------
1782  * byteaGetBit
1783  *
1784  * This routine treats a "bytea" type like an array of bits.
1785  * It returns the value of the Nth bit (0 or 1).
1786  *
1787  *-------------------------------------------------------------
1788  */
1789 Datum
1790 byteaGetBit(PG_FUNCTION_ARGS)
1791 {
1792         bytea      *v = PG_GETARG_BYTEA_PP(0);
1793         int32           n = PG_GETARG_INT32(1);
1794         int                     byteNo,
1795                                 bitNo;
1796         int                     len;
1797         int                     byte;
1798
1799         len = VARSIZE_ANY_EXHDR(v);
1800
1801         if (n < 0 || n >= len * 8)
1802                 ereport(ERROR,
1803                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1804                                  errmsg("index %d out of valid range, 0..%d",
1805                                                 n, len * 8 - 1)));
1806
1807         byteNo = n / 8;
1808         bitNo = n % 8;
1809
1810         byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
1811
1812         if (byte & (1 << bitNo))
1813                 PG_RETURN_INT32(1);
1814         else
1815                 PG_RETURN_INT32(0);
1816 }
1817
1818 /*-------------------------------------------------------------
1819  * byteaSetByte
1820  *
1821  * Given an instance of type 'bytea' creates a new one with
1822  * the Nth byte set to the given value.
1823  *
1824  *-------------------------------------------------------------
1825  */
1826 Datum
1827 byteaSetByte(PG_FUNCTION_ARGS)
1828 {
1829         bytea      *v = PG_GETARG_BYTEA_P(0);
1830         int32           n = PG_GETARG_INT32(1);
1831         int32           newByte = PG_GETARG_INT32(2);
1832         int                     len;
1833         bytea      *res;
1834
1835         len = VARSIZE(v) - VARHDRSZ;
1836
1837         if (n < 0 || n >= len)
1838                 ereport(ERROR,
1839                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1840                                  errmsg("index %d out of valid range, 0..%d",
1841                                                 n, len - 1)));
1842
1843         /*
1844          * Make a copy of the original varlena.
1845          */
1846         res = (bytea *) palloc(VARSIZE(v));
1847         memcpy((char *) res, (char *) v, VARSIZE(v));
1848
1849         /*
1850          * Now set the byte.
1851          */
1852         ((unsigned char *) VARDATA(res))[n] = newByte;
1853
1854         PG_RETURN_BYTEA_P(res);
1855 }
1856
1857 /*-------------------------------------------------------------
1858  * byteaSetBit
1859  *
1860  * Given an instance of type 'bytea' creates a new one with
1861  * the Nth bit set to the given value.
1862  *
1863  *-------------------------------------------------------------
1864  */
1865 Datum
1866 byteaSetBit(PG_FUNCTION_ARGS)
1867 {
1868         bytea      *v = PG_GETARG_BYTEA_P(0);
1869         int32           n = PG_GETARG_INT32(1);
1870         int32           newBit = PG_GETARG_INT32(2);
1871         bytea      *res;
1872         int                     len;
1873         int                     oldByte,
1874                                 newByte;
1875         int                     byteNo,
1876                                 bitNo;
1877
1878         len = VARSIZE(v) - VARHDRSZ;
1879
1880         if (n < 0 || n >= len * 8)
1881                 ereport(ERROR,
1882                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1883                                  errmsg("index %d out of valid range, 0..%d",
1884                                                 n, len * 8 - 1)));
1885
1886         byteNo = n / 8;
1887         bitNo = n % 8;
1888
1889         /*
1890          * sanity check!
1891          */
1892         if (newBit != 0 && newBit != 1)
1893                 ereport(ERROR,
1894                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1895                                  errmsg("new bit must be 0 or 1")));
1896
1897         /*
1898          * Make a copy of the original varlena.
1899          */
1900         res = (bytea *) palloc(VARSIZE(v));
1901         memcpy((char *) res, (char *) v, VARSIZE(v));
1902
1903         /*
1904          * Update the byte.
1905          */
1906         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1907
1908         if (newBit == 0)
1909                 newByte = oldByte & (~(1 << bitNo));
1910         else
1911                 newByte = oldByte | (1 << bitNo);
1912
1913         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1914
1915         PG_RETURN_BYTEA_P(res);
1916 }
1917
1918
1919 /* text_name()
1920  * Converts a text type to a Name type.
1921  */
1922 Datum
1923 text_name(PG_FUNCTION_ARGS)
1924 {
1925         text       *s = PG_GETARG_TEXT_PP(0);
1926         Name            result;
1927         int                     len;
1928
1929         len = VARSIZE_ANY_EXHDR(s);
1930
1931         /* Truncate oversize input */
1932         if (len >= NAMEDATALEN)
1933                 len = NAMEDATALEN - 1;
1934
1935         result = (Name) palloc(NAMEDATALEN);
1936         memcpy(NameStr(*result), VARDATA_ANY(s), len);
1937
1938         /* now null pad to full length... */
1939         while (len < NAMEDATALEN)
1940         {
1941                 *(NameStr(*result) + len) = '\0';
1942                 len++;
1943         }
1944
1945         PG_RETURN_NAME(result);
1946 }
1947
1948 /* name_text()
1949  * Converts a Name type to a text type.
1950  */
1951 Datum
1952 name_text(PG_FUNCTION_ARGS)
1953 {
1954         Name            s = PG_GETARG_NAME(0);
1955
1956         PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
1957 }
1958
1959
1960 /*
1961  * textToQualifiedNameList - convert a text object to list of names
1962  *
1963  * This implements the input parsing needed by nextval() and other
1964  * functions that take a text parameter representing a qualified name.
1965  * We split the name at dots, downcase if not double-quoted, and
1966  * truncate names if they're too long.
1967  */
1968 List *
1969 textToQualifiedNameList(text *textval)
1970 {
1971         char       *rawname;
1972         List       *result = NIL;
1973         List       *namelist;
1974         ListCell   *l;
1975
1976         /* Convert to C string (handles possible detoasting). */
1977         /* Note we rely on being able to modify rawname below. */
1978         rawname = text_to_cstring(textval);
1979
1980         if (!SplitIdentifierString(rawname, '.', &namelist))
1981                 ereport(ERROR,
1982                                 (errcode(ERRCODE_INVALID_NAME),
1983                                  errmsg("invalid name syntax")));
1984
1985         if (namelist == NIL)
1986                 ereport(ERROR,
1987                                 (errcode(ERRCODE_INVALID_NAME),
1988                                  errmsg("invalid name syntax")));
1989
1990         foreach(l, namelist)
1991         {
1992                 char       *curname = (char *) lfirst(l);
1993
1994                 result = lappend(result, makeString(pstrdup(curname)));
1995         }
1996
1997         pfree(rawname);
1998         list_free(namelist);
1999
2000         return result;
2001 }
2002
2003 /*
2004  * SplitIdentifierString --- parse a string containing identifiers
2005  *
2006  * This is the guts of textToQualifiedNameList, and is exported for use in
2007  * other situations such as parsing GUC variables.      In the GUC case, it's
2008  * important to avoid memory leaks, so the API is designed to minimize the
2009  * amount of stuff that needs to be allocated and freed.
2010  *
2011  * Inputs:
2012  *      rawstring: the input string; must be overwritable!      On return, it's
2013  *                         been modified to contain the separated identifiers.
2014  *      separator: the separator punctuation expected between identifiers
2015  *                         (typically '.' or ',').      Whitespace may also appear around
2016  *                         identifiers.
2017  * Outputs:
2018  *      namelist: filled with a palloc'd list of pointers to identifiers within
2019  *                        rawstring.  Caller should list_free() this even on error return.
2020  *
2021  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2022  *
2023  * Note that an empty string is considered okay here, though not in
2024  * textToQualifiedNameList.
2025  */
2026 bool
2027 SplitIdentifierString(char *rawstring, char separator,
2028                                           List **namelist)
2029 {
2030         char       *nextp = rawstring;
2031         bool            done = false;
2032
2033         *namelist = NIL;
2034
2035         while (isspace((unsigned char) *nextp))
2036                 nextp++;                                /* skip leading whitespace */
2037
2038         if (*nextp == '\0')
2039                 return true;                    /* allow empty string */
2040
2041         /* At the top of the loop, we are at start of a new identifier. */
2042         do
2043         {
2044                 char       *curname;
2045                 char       *endp;
2046
2047                 if (*nextp == '\"')
2048                 {
2049                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
2050                         curname = nextp + 1;
2051                         for (;;)
2052                         {
2053                                 endp = strchr(nextp + 1, '\"');
2054                                 if (endp == NULL)
2055                                         return false;           /* mismatched quotes */
2056                                 if (endp[1] != '\"')
2057                                         break;          /* found end of quoted name */
2058                                 /* Collapse adjacent quotes into one quote, and look again */
2059                                 memmove(endp, endp + 1, strlen(endp));
2060                                 nextp = endp;
2061                         }
2062                         /* endp now points at the terminating quote */
2063                         nextp = endp + 1;
2064                 }
2065                 else
2066                 {
2067                         /* Unquoted name --- extends to separator or whitespace */
2068                         char       *downname;
2069                         int                     len;
2070
2071                         curname = nextp;
2072                         while (*nextp && *nextp != separator &&
2073                                    !isspace((unsigned char) *nextp))
2074                                 nextp++;
2075                         endp = nextp;
2076                         if (curname == nextp)
2077                                 return false;   /* empty unquoted name not allowed */
2078
2079                         /*
2080                          * Downcase the identifier, using same code as main lexer does.
2081                          *
2082                          * XXX because we want to overwrite the input in-place, we cannot
2083                          * support a downcasing transformation that increases the string
2084                          * length.      This is not a problem given the current implementation
2085                          * of downcase_truncate_identifier, but we'll probably have to do
2086                          * something about this someday.
2087                          */
2088                         len = endp - curname;
2089                         downname = downcase_truncate_identifier(curname, len, false);
2090                         Assert(strlen(downname) <= len);
2091                         strncpy(curname, downname, len);
2092                         pfree(downname);
2093                 }
2094
2095                 while (isspace((unsigned char) *nextp))
2096                         nextp++;                        /* skip trailing whitespace */
2097
2098                 if (*nextp == separator)
2099                 {
2100                         nextp++;
2101                         while (isspace((unsigned char) *nextp))
2102                                 nextp++;                /* skip leading whitespace for next */
2103                         /* we expect another name, so done remains false */
2104                 }
2105                 else if (*nextp == '\0')
2106                         done = true;
2107                 else
2108                         return false;           /* invalid syntax */
2109
2110                 /* Now safe to overwrite separator with a null */
2111                 *endp = '\0';
2112
2113                 /* Truncate name if it's overlength */
2114                 truncate_identifier(curname, strlen(curname), false);
2115
2116                 /*
2117                  * Finished isolating current name --- add it to list
2118                  */
2119                 *namelist = lappend(*namelist, curname);
2120
2121                 /* Loop back if we didn't reach end of string */
2122         } while (!done);
2123
2124         return true;
2125 }
2126
2127
2128 /*****************************************************************************
2129  *      Comparison Functions used for bytea
2130  *
2131  * Note: btree indexes need these routines not to leak memory; therefore,
2132  * be careful to free working copies of toasted datums.  Most places don't
2133  * need to be so careful.
2134  *****************************************************************************/
2135
2136 Datum
2137 byteaeq(PG_FUNCTION_ARGS)
2138 {
2139         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2140         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2141         int                     len1,
2142                                 len2;
2143         bool            result;
2144
2145         len1 = VARSIZE_ANY_EXHDR(arg1);
2146         len2 = VARSIZE_ANY_EXHDR(arg2);
2147
2148         /* fast path for different-length inputs */
2149         if (len1 != len2)
2150                 result = false;
2151         else
2152                 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
2153
2154         PG_FREE_IF_COPY(arg1, 0);
2155         PG_FREE_IF_COPY(arg2, 1);
2156
2157         PG_RETURN_BOOL(result);
2158 }
2159
2160 Datum
2161 byteane(PG_FUNCTION_ARGS)
2162 {
2163         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2164         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2165         int                     len1,
2166                                 len2;
2167         bool            result;
2168
2169         len1 = VARSIZE_ANY_EXHDR(arg1);
2170         len2 = VARSIZE_ANY_EXHDR(arg2);
2171
2172         /* fast path for different-length inputs */
2173         if (len1 != len2)
2174                 result = true;
2175         else
2176                 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
2177
2178         PG_FREE_IF_COPY(arg1, 0);
2179         PG_FREE_IF_COPY(arg2, 1);
2180
2181         PG_RETURN_BOOL(result);
2182 }
2183
2184 Datum
2185 bytealt(PG_FUNCTION_ARGS)
2186 {
2187         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2188         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2189         int                     len1,
2190                                 len2;
2191         int                     cmp;
2192
2193         len1 = VARSIZE_ANY_EXHDR(arg1);
2194         len2 = VARSIZE_ANY_EXHDR(arg2);
2195
2196         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2197
2198         PG_FREE_IF_COPY(arg1, 0);
2199         PG_FREE_IF_COPY(arg2, 1);
2200
2201         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2202 }
2203
2204 Datum
2205 byteale(PG_FUNCTION_ARGS)
2206 {
2207         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2208         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2209         int                     len1,
2210                                 len2;
2211         int                     cmp;
2212
2213         len1 = VARSIZE_ANY_EXHDR(arg1);
2214         len2 = VARSIZE_ANY_EXHDR(arg2);
2215
2216         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2217
2218         PG_FREE_IF_COPY(arg1, 0);
2219         PG_FREE_IF_COPY(arg2, 1);
2220
2221         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2222 }
2223
2224 Datum
2225 byteagt(PG_FUNCTION_ARGS)
2226 {
2227         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2228         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2229         int                     len1,
2230                                 len2;
2231         int                     cmp;
2232
2233         len1 = VARSIZE_ANY_EXHDR(arg1);
2234         len2 = VARSIZE_ANY_EXHDR(arg2);
2235
2236         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2237
2238         PG_FREE_IF_COPY(arg1, 0);
2239         PG_FREE_IF_COPY(arg2, 1);
2240
2241         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2242 }
2243
2244 Datum
2245 byteage(PG_FUNCTION_ARGS)
2246 {
2247         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2248         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2249         int                     len1,
2250                                 len2;
2251         int                     cmp;
2252
2253         len1 = VARSIZE_ANY_EXHDR(arg1);
2254         len2 = VARSIZE_ANY_EXHDR(arg2);
2255
2256         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2257
2258         PG_FREE_IF_COPY(arg1, 0);
2259         PG_FREE_IF_COPY(arg2, 1);
2260
2261         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2262 }
2263
2264 Datum
2265 byteacmp(PG_FUNCTION_ARGS)
2266 {
2267         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2268         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2269         int                     len1,
2270                                 len2;
2271         int                     cmp;
2272
2273         len1 = VARSIZE_ANY_EXHDR(arg1);
2274         len2 = VARSIZE_ANY_EXHDR(arg2);
2275
2276         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2277         if ((cmp == 0) && (len1 != len2))
2278                 cmp = (len1 < len2) ? -1 : 1;
2279
2280         PG_FREE_IF_COPY(arg1, 0);
2281         PG_FREE_IF_COPY(arg2, 1);
2282
2283         PG_RETURN_INT32(cmp);
2284 }
2285
2286 /*
2287  * appendStringInfoText
2288  *
2289  * Append a text to str.
2290  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2291  */
2292 static void
2293 appendStringInfoText(StringInfo str, const text *t)
2294 {
2295         appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
2296 }
2297
2298 /*
2299  * replace_text
2300  * replace all occurrences of 'old_sub_str' in 'orig_str'
2301  * with 'new_sub_str' to form 'new_str'
2302  *
2303  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2304  * otherwise returns 'new_str'
2305  */
2306 Datum
2307 replace_text(PG_FUNCTION_ARGS)
2308 {
2309         text       *src_text = PG_GETARG_TEXT_PP(0);
2310         text       *from_sub_text = PG_GETARG_TEXT_PP(1);
2311         text       *to_sub_text = PG_GETARG_TEXT_PP(2);
2312         int                     src_text_len;
2313         int                     from_sub_text_len;
2314         TextPositionState state;
2315         text       *ret_text;
2316         int                     start_posn;
2317         int                     curr_posn;
2318         int                     chunk_len;
2319         char       *start_ptr;
2320         StringInfoData str;
2321
2322         text_position_setup(src_text, from_sub_text, &state);
2323
2324         /*
2325          * Note: we check the converted string length, not the original, because
2326          * they could be different if the input contained invalid encoding.
2327          */
2328         src_text_len = state.len1;
2329         from_sub_text_len = state.len2;
2330
2331         /* Return unmodified source string if empty source or pattern */
2332         if (src_text_len < 1 || from_sub_text_len < 1)
2333         {
2334                 text_position_cleanup(&state);
2335                 PG_RETURN_TEXT_P(src_text);
2336         }
2337
2338         start_posn = 1;
2339         curr_posn = text_position_next(1, &state);
2340
2341         /* When the from_sub_text is not found, there is nothing to do. */
2342         if (curr_posn == 0)
2343         {
2344                 text_position_cleanup(&state);
2345                 PG_RETURN_TEXT_P(src_text);
2346         }
2347
2348         /* start_ptr points to the start_posn'th character of src_text */
2349         start_ptr = VARDATA_ANY(src_text);
2350
2351         initStringInfo(&str);
2352
2353         do
2354         {
2355                 CHECK_FOR_INTERRUPTS();
2356
2357                 /* copy the data skipped over by last text_position_next() */
2358                 chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
2359                 appendBinaryStringInfo(&str, start_ptr, chunk_len);
2360
2361                 appendStringInfoText(&str, to_sub_text);
2362
2363                 start_posn = curr_posn;
2364                 start_ptr += chunk_len;
2365                 start_posn += from_sub_text_len;
2366                 start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
2367
2368                 curr_posn = text_position_next(start_posn, &state);
2369         }
2370         while (curr_posn > 0);
2371
2372         /* copy trailing data */
2373         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2374         appendBinaryStringInfo(&str, start_ptr, chunk_len);
2375
2376         text_position_cleanup(&state);
2377
2378         ret_text = cstring_to_text_with_len(str.data, str.len);
2379         pfree(str.data);
2380
2381         PG_RETURN_TEXT_P(ret_text);
2382 }
2383
2384 /*
2385  * check_replace_text_has_escape_char
2386  *
2387  * check whether replace_text contains escape char.
2388  */
2389 static bool
2390 check_replace_text_has_escape_char(const text *replace_text)
2391 {
2392         const char *p = VARDATA_ANY(replace_text);
2393         const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2394
2395         if (pg_database_encoding_max_length() == 1)
2396         {
2397                 for (; p < p_end; p++)
2398                 {
2399                         if (*p == '\\')
2400                                 return true;
2401                 }
2402         }
2403         else
2404         {
2405                 for (; p < p_end; p += pg_mblen(p))
2406                 {
2407                         if (*p == '\\')
2408                                 return true;
2409                 }
2410         }
2411
2412         return false;
2413 }
2414
2415 /*
2416  * appendStringInfoRegexpSubstr
2417  *
2418  * Append replace_text to str, substituting regexp back references for
2419  * \n escapes.  start_ptr is the start of the match in the source string,
2420  * at logical character position data_pos.
2421  */
2422 static void
2423 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2424                                                          regmatch_t *pmatch,
2425                                                          char *start_ptr, int data_pos)
2426 {
2427         const char *p = VARDATA_ANY(replace_text);
2428         const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2429         int                     eml = pg_database_encoding_max_length();
2430
2431         for (;;)
2432         {
2433                 const char *chunk_start = p;
2434                 int                     so;
2435                 int                     eo;
2436
2437                 /* Find next escape char. */
2438                 if (eml == 1)
2439                 {
2440                         for (; p < p_end && *p != '\\'; p++)
2441                                  /* nothing */ ;
2442                 }
2443                 else
2444                 {
2445                         for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2446                                  /* nothing */ ;
2447                 }
2448
2449                 /* Copy the text we just scanned over, if any. */
2450                 if (p > chunk_start)
2451                         appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2452
2453                 /* Done if at end of string, else advance over escape char. */
2454                 if (p >= p_end)
2455                         break;
2456                 p++;
2457
2458                 if (p >= p_end)
2459                 {
2460                         /* Escape at very end of input.  Treat same as unexpected char */
2461                         appendStringInfoChar(str, '\\');
2462                         break;
2463                 }
2464
2465                 if (*p >= '1' && *p <= '9')
2466                 {
2467                         /* Use the back reference of regexp. */
2468                         int                     idx = *p - '0';
2469
2470                         so = pmatch[idx].rm_so;
2471                         eo = pmatch[idx].rm_eo;
2472                         p++;
2473                 }
2474                 else if (*p == '&')
2475                 {
2476                         /* Use the entire matched string. */
2477                         so = pmatch[0].rm_so;
2478                         eo = pmatch[0].rm_eo;
2479                         p++;
2480                 }
2481                 else if (*p == '\\')
2482                 {
2483                         /* \\ means transfer one \ to output. */
2484                         appendStringInfoChar(str, '\\');
2485                         p++;
2486                         continue;
2487                 }
2488                 else
2489                 {
2490                         /*
2491                          * If escape char is not followed by any expected char, just treat
2492                          * it as ordinary data to copy.  (XXX would it be better to throw
2493                          * an error?)
2494                          */
2495                         appendStringInfoChar(str, '\\');
2496                         continue;
2497                 }
2498
2499                 if (so != -1 && eo != -1)
2500                 {
2501                         /*
2502                          * Copy the text that is back reference of regexp.      Note so and eo
2503                          * are counted in characters not bytes.
2504                          */
2505                         char       *chunk_start;
2506                         int                     chunk_len;
2507
2508                         Assert(so >= data_pos);
2509                         chunk_start = start_ptr;
2510                         chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
2511                         chunk_len = charlen_to_bytelen(chunk_start, eo - so);
2512                         appendBinaryStringInfo(str, chunk_start, chunk_len);
2513                 }
2514         }
2515 }
2516
2517 #define REGEXP_REPLACE_BACKREF_CNT              10
2518
2519 /*
2520  * replace_text_regexp
2521  *
2522  * replace text that matches to regexp in src_text to replace_text.
2523  *
2524  * Note: to avoid having to include regex.h in builtins.h, we declare
2525  * the regexp argument as void *, but really it's regex_t *.
2526  */
2527 text *
2528 replace_text_regexp(text *src_text, void *regexp,
2529                                         text *replace_text, bool glob)
2530 {
2531         text       *ret_text;
2532         regex_t    *re = (regex_t *) regexp;
2533         int                     src_text_len = VARSIZE_ANY_EXHDR(src_text);
2534         StringInfoData buf;
2535         regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
2536         pg_wchar   *data;
2537         size_t          data_len;
2538         int                     search_start;
2539         int                     data_pos;
2540         char       *start_ptr;
2541         bool            have_escape;
2542
2543         initStringInfo(&buf);
2544
2545         /* Convert data string to wide characters. */
2546         data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2547         data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
2548
2549         /* Check whether replace_text has escape char. */
2550         have_escape = check_replace_text_has_escape_char(replace_text);
2551
2552         /* start_ptr points to the data_pos'th character of src_text */
2553         start_ptr = (char *) VARDATA_ANY(src_text);
2554         data_pos = 0;
2555
2556         search_start = 0;
2557         while (search_start <= data_len)
2558         {
2559                 int                     regexec_result;
2560
2561                 CHECK_FOR_INTERRUPTS();
2562
2563                 regexec_result = pg_regexec(re,
2564                                                                         data,
2565                                                                         data_len,
2566                                                                         search_start,
2567                                                                         NULL,           /* no details */
2568                                                                         REGEXP_REPLACE_BACKREF_CNT,
2569                                                                         pmatch,
2570                                                                         0);
2571
2572                 if (regexec_result == REG_NOMATCH)
2573                         break;
2574
2575                 if (regexec_result != REG_OKAY)
2576                 {
2577                         char            errMsg[100];
2578
2579                         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2580                         ereport(ERROR,
2581                                         (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2582                                          errmsg("regular expression failed: %s", errMsg)));
2583                 }
2584
2585                 /*
2586                  * Copy the text to the left of the match position.  Note we are given
2587                  * character not byte indexes.
2588                  */
2589                 if (pmatch[0].rm_so - data_pos > 0)
2590                 {
2591                         int                     chunk_len;
2592
2593                         chunk_len = charlen_to_bytelen(start_ptr,
2594                                                                                    pmatch[0].rm_so - data_pos);
2595                         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2596
2597                         /*
2598                          * Advance start_ptr over that text, to avoid multiple rescans of
2599                          * it if the replace_text contains multiple back-references.
2600                          */
2601                         start_ptr += chunk_len;
2602                         data_pos = pmatch[0].rm_so;
2603                 }
2604
2605                 /*
2606                  * Copy the replace_text. Process back references when the
2607                  * replace_text has escape characters.
2608                  */
2609                 if (have_escape)
2610                         appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
2611                                                                                  start_ptr, data_pos);
2612                 else
2613                         appendStringInfoText(&buf, replace_text);
2614
2615                 /* Advance start_ptr and data_pos over the matched text. */
2616                 start_ptr += charlen_to_bytelen(start_ptr,
2617                                                                                 pmatch[0].rm_eo - data_pos);
2618                 data_pos = pmatch[0].rm_eo;
2619
2620                 /*
2621                  * When global option is off, replace the first instance only.
2622                  */
2623                 if (!glob)
2624                         break;
2625
2626                 /*
2627                  * Search from next character when the matching text is zero width.
2628                  */
2629                 search_start = data_pos;
2630                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2631                         search_start++;
2632         }
2633
2634         /*
2635          * Copy the text to the right of the last match.
2636          */
2637         if (data_pos < data_len)
2638         {
2639                 int                     chunk_len;
2640
2641                 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2642                 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2643         }
2644
2645         ret_text = cstring_to_text_with_len(buf.data, buf.len);
2646         pfree(buf.data);
2647         pfree(data);
2648
2649         return ret_text;
2650 }
2651
2652 /*
2653  * split_text
2654  * parse input string
2655  * return ord item (1 based)
2656  * based on provided field separator
2657  */
2658 Datum
2659 split_text(PG_FUNCTION_ARGS)
2660 {
2661         text       *inputstring = PG_GETARG_TEXT_PP(0);
2662         text       *fldsep = PG_GETARG_TEXT_PP(1);
2663         int                     fldnum = PG_GETARG_INT32(2);
2664         int                     inputstring_len;
2665         int                     fldsep_len;
2666         TextPositionState state;
2667         int                     start_posn;
2668         int                     end_posn;
2669         text       *result_text;
2670
2671         /* field number is 1 based */
2672         if (fldnum < 1)
2673                 ereport(ERROR,
2674                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2675                                  errmsg("field position must be greater than zero")));
2676
2677         text_position_setup(inputstring, fldsep, &state);
2678
2679         /*
2680          * Note: we check the converted string length, not the original, because
2681          * they could be different if the input contained invalid encoding.
2682          */
2683         inputstring_len = state.len1;
2684         fldsep_len = state.len2;
2685
2686         /* return empty string for empty input string */
2687         if (inputstring_len < 1)
2688         {
2689                 text_position_cleanup(&state);
2690                 PG_RETURN_TEXT_P(cstring_to_text(""));
2691         }
2692
2693         /* empty field separator */
2694         if (fldsep_len < 1)
2695         {
2696                 text_position_cleanup(&state);
2697                 /* if first field, return input string, else empty string */
2698                 if (fldnum == 1)
2699                         PG_RETURN_TEXT_P(inputstring);
2700                 else
2701                         PG_RETURN_TEXT_P(cstring_to_text(""));
2702         }
2703
2704         /* identify bounds of first field */
2705         start_posn = 1;
2706         end_posn = text_position_next(1, &state);
2707
2708         /* special case if fldsep not found at all */
2709         if (end_posn == 0)
2710         {
2711                 text_position_cleanup(&state);
2712                 /* if field 1 requested, return input string, else empty string */
2713                 if (fldnum == 1)
2714                         PG_RETURN_TEXT_P(inputstring);
2715                 else
2716                         PG_RETURN_TEXT_P(cstring_to_text(""));
2717         }
2718
2719         while (end_posn > 0 && --fldnum > 0)
2720         {
2721                 /* identify bounds of next field */
2722                 start_posn = end_posn + fldsep_len;
2723                 end_posn = text_position_next(start_posn, &state);
2724         }
2725
2726         text_position_cleanup(&state);
2727
2728         if (fldnum > 0)
2729         {
2730                 /* N'th field separator not found */
2731                 /* if last field requested, return it, else empty string */
2732                 if (fldnum == 1)
2733                         result_text = text_substring(PointerGetDatum(inputstring),
2734                                                                                  start_posn,
2735                                                                                  -1,
2736                                                                                  true);
2737                 else
2738                         result_text = cstring_to_text("");
2739         }
2740         else
2741         {
2742                 /* non-last field requested */
2743                 result_text = text_substring(PointerGetDatum(inputstring),
2744                                                                          start_posn,
2745                                                                          end_posn - start_posn,
2746                                                                          false);
2747         }
2748
2749         PG_RETURN_TEXT_P(result_text);
2750 }
2751
2752 /*
2753  * text_to_array
2754  * parse input string
2755  * return text array of elements
2756  * based on provided field separator
2757  */
2758 Datum
2759 text_to_array(PG_FUNCTION_ARGS)
2760 {
2761         text       *inputstring = PG_GETARG_TEXT_PP(0);
2762         text       *fldsep = PG_GETARG_TEXT_PP(1);
2763         int                     inputstring_len;
2764         int                     fldsep_len;
2765         TextPositionState state;
2766         int                     fldnum;
2767         int                     start_posn;
2768         int                     end_posn;
2769         int                     chunk_len;
2770         char       *start_ptr;
2771         text       *result_text;
2772         ArrayBuildState *astate = NULL;
2773
2774         text_position_setup(inputstring, fldsep, &state);
2775
2776         /*
2777          * Note: we check the converted string length, not the original, because
2778          * they could be different if the input contained invalid encoding.
2779          */
2780         inputstring_len = state.len1;
2781         fldsep_len = state.len2;
2782
2783         /* return NULL for empty input string */
2784         if (inputstring_len < 1)
2785         {
2786                 text_position_cleanup(&state);
2787                 PG_RETURN_NULL();
2788         }
2789
2790         /*
2791          * empty field separator return one element, 1D, array using the input
2792          * string
2793          */
2794         if (fldsep_len < 1)
2795         {
2796                 text_position_cleanup(&state);
2797                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2798                                                                                    PointerGetDatum(inputstring), 1));
2799         }
2800
2801         start_posn = 1;
2802         /* start_ptr points to the start_posn'th character of inputstring */
2803         start_ptr = VARDATA_ANY(inputstring);
2804
2805         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2806         {
2807                 CHECK_FOR_INTERRUPTS();
2808
2809                 end_posn = text_position_next(start_posn, &state);
2810
2811                 if (end_posn == 0)
2812                 {
2813                         /* fetch last field */
2814                         chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
2815                 }
2816                 else
2817                 {
2818                         /* fetch non-last field */
2819                         chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
2820                 }
2821
2822                 /* must build a temp text datum to pass to accumArrayResult */
2823                 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
2824
2825                 /* stash away this field */
2826                 astate = accumArrayResult(astate,
2827                                                                   PointerGetDatum(result_text),
2828                                                                   false,
2829                                                                   TEXTOID,
2830                                                                   CurrentMemoryContext);
2831
2832                 pfree(result_text);
2833
2834                 if (end_posn == 0)
2835                         break;
2836
2837                 start_posn = end_posn;
2838                 start_ptr += chunk_len;
2839                 start_posn += fldsep_len;
2840                 start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
2841         }
2842
2843         text_position_cleanup(&state);
2844
2845         PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2846                                                                                   CurrentMemoryContext));
2847 }
2848
2849 /*
2850  * array_to_text
2851  * concatenate Cstring representation of input array elements
2852  * using provided field separator
2853  */
2854 Datum
2855 array_to_text(PG_FUNCTION_ARGS)
2856 {
2857         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2858         char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
2859         int                     nitems,
2860                            *dims,
2861                                 ndims;
2862         Oid                     element_type;
2863         int                     typlen;
2864         bool            typbyval;
2865         char            typalign;
2866         StringInfoData buf;
2867         bool            printed = false;
2868         char       *p;
2869         bits8      *bitmap;
2870         int                     bitmask;
2871         int                     i;
2872         ArrayMetaState *my_extra;
2873
2874         ndims = ARR_NDIM(v);
2875         dims = ARR_DIMS(v);
2876         nitems = ArrayGetNItems(ndims, dims);
2877
2878         /* if there are no elements, return an empty string */
2879         if (nitems == 0)
2880                 PG_RETURN_TEXT_P(cstring_to_text(""));
2881
2882         element_type = ARR_ELEMTYPE(v);
2883         initStringInfo(&buf);
2884
2885         /*
2886          * We arrange to look up info about element type, including its output
2887          * conversion proc, only once per series of calls, assuming the element
2888          * type doesn't change underneath us.
2889          */
2890         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2891         if (my_extra == NULL)
2892         {
2893                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2894                                                                                                           sizeof(ArrayMetaState));
2895                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2896                 my_extra->element_type = ~element_type;
2897         }
2898
2899         if (my_extra->element_type != element_type)
2900         {
2901                 /*
2902                  * Get info about element type, including its output conversion proc
2903                  */
2904                 get_type_io_data(element_type, IOFunc_output,
2905                                                  &my_extra->typlen, &my_extra->typbyval,
2906                                                  &my_extra->typalign, &my_extra->typdelim,
2907                                                  &my_extra->typioparam, &my_extra->typiofunc);
2908                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2909                                           fcinfo->flinfo->fn_mcxt);
2910                 my_extra->element_type = element_type;
2911         }
2912         typlen = my_extra->typlen;
2913         typbyval = my_extra->typbyval;
2914         typalign = my_extra->typalign;
2915
2916         p = ARR_DATA_PTR(v);
2917         bitmap = ARR_NULLBITMAP(v);
2918         bitmask = 1;
2919
2920         for (i = 0; i < nitems; i++)
2921         {
2922                 Datum           itemvalue;
2923                 char       *value;
2924
2925                 /* Get source element, checking for NULL */
2926                 if (bitmap && (*bitmap & bitmask) == 0)
2927                 {
2928                         /* we ignore nulls */
2929                 }
2930                 else
2931                 {
2932                         itemvalue = fetch_att(p, typbyval, typlen);
2933
2934                         value = OutputFunctionCall(&my_extra->proc, itemvalue);
2935
2936                         if (printed)
2937                                 appendStringInfo(&buf, "%s%s", fldsep, value);
2938                         else
2939                                 appendStringInfoString(&buf, value);
2940                         printed = true;
2941
2942                         p = att_addlength_pointer(p, typlen, p);
2943                         p = (char *) att_align_nominal(p, typalign);
2944                 }
2945
2946                 /* advance bitmap pointer if any */
2947                 if (bitmap)
2948                 {
2949                         bitmask <<= 1;
2950                         if (bitmask == 0x100)
2951                         {
2952                                 bitmap++;
2953                                 bitmask = 1;
2954                         }
2955                 }
2956         }
2957
2958         PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
2959 }
2960
2961 #define HEXBASE 16
2962 /*
2963  * Convert a int32 to a string containing a base 16 (hex) representation of
2964  * the number.
2965  */
2966 Datum
2967 to_hex32(PG_FUNCTION_ARGS)
2968 {
2969         uint32          value = (uint32) PG_GETARG_INT32(0);
2970         char       *ptr;
2971         const char *digits = "0123456789abcdef";
2972         char            buf[32];                /* bigger than needed, but reasonable */
2973
2974         ptr = buf + sizeof(buf) - 1;
2975         *ptr = '\0';
2976
2977         do
2978         {
2979                 *--ptr = digits[value % HEXBASE];
2980                 value /= HEXBASE;
2981         } while (ptr > buf && value);
2982
2983         PG_RETURN_TEXT_P(cstring_to_text(ptr));
2984 }
2985
2986 /*
2987  * Convert a int64 to a string containing a base 16 (hex) representation of
2988  * the number.
2989  */
2990 Datum
2991 to_hex64(PG_FUNCTION_ARGS)
2992 {
2993         uint64          value = (uint64) PG_GETARG_INT64(0);
2994         char       *ptr;
2995         const char *digits = "0123456789abcdef";
2996         char            buf[32];                /* bigger than needed, but reasonable */
2997
2998         ptr = buf + sizeof(buf) - 1;
2999         *ptr = '\0';
3000
3001         do
3002         {
3003                 *--ptr = digits[value % HEXBASE];
3004                 value /= HEXBASE;
3005         } while (ptr > buf && value);
3006
3007         PG_RETURN_TEXT_P(cstring_to_text(ptr));
3008 }
3009
3010 /*
3011  * Create an md5 hash of a text string and return it as hex
3012  *
3013  * md5 produces a 16 byte (128 bit) hash; double it for hex
3014  */
3015 #define MD5_HASH_LEN  32
3016
3017 Datum
3018 md5_text(PG_FUNCTION_ARGS)
3019 {
3020         text       *in_text = PG_GETARG_TEXT_PP(0);
3021         size_t          len;
3022         char            hexsum[MD5_HASH_LEN + 1];
3023
3024         /* Calculate the length of the buffer using varlena metadata */
3025         len = VARSIZE_ANY_EXHDR(in_text);
3026
3027         /* get the hash result */
3028         if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3029                 ereport(ERROR,
3030                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3031                                  errmsg("out of memory")));
3032
3033         /* convert to text and return it */
3034         PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3035 }
3036
3037 /*
3038  * Create an md5 hash of a bytea field and return it as a hex string:
3039  * 16-byte md5 digest is represented in 32 hex characters.
3040  */
3041 Datum
3042 md5_bytea(PG_FUNCTION_ARGS)
3043 {
3044         bytea      *in = PG_GETARG_BYTEA_PP(0);
3045         size_t          len;
3046         char            hexsum[MD5_HASH_LEN + 1];
3047
3048         len = VARSIZE_ANY_EXHDR(in);
3049         if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3050                 ereport(ERROR,
3051                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3052                                  errmsg("out of memory")));
3053
3054         PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3055 }
3056
3057 /*
3058  * Return the size of a datum, possibly compressed
3059  *
3060  * Works on any data type
3061  */
3062 Datum
3063 pg_column_size(PG_FUNCTION_ARGS)
3064 {
3065         Datum           value = PG_GETARG_DATUM(0);
3066         int32           result;
3067         int                     typlen;
3068
3069         /* On first call, get the input type's typlen, and save at *fn_extra */
3070         if (fcinfo->flinfo->fn_extra == NULL)
3071         {
3072                 /* Lookup the datatype of the supplied argument */
3073                 Oid                     argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3074
3075                 typlen = get_typlen(argtypeid);
3076                 if (typlen == 0)                /* should not happen */
3077                         elog(ERROR, "cache lookup failed for type %u", argtypeid);
3078
3079                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3080                                                                                                           sizeof(int));
3081                 *((int *) fcinfo->flinfo->fn_extra) = typlen;
3082         }
3083         else
3084                 typlen = *((int *) fcinfo->flinfo->fn_extra);
3085
3086         if (typlen == -1)
3087         {
3088                 /* varlena type, possibly toasted */
3089                 result = toast_datum_size(value);
3090         }
3091         else if (typlen == -2)
3092         {
3093                 /* cstring */
3094                 result = strlen(DatumGetCString(value)) + 1;
3095         }
3096         else
3097         {
3098                 /* ordinary fixed-width type */
3099                 result = typlen;
3100         }
3101
3102         PG_RETURN_INT32(result);
3103 }