1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "libpq/md5.h"
22 #include "libpq/pqformat.h"
23 #include "miscadmin.h"
24 #include "parser/scansup.h"
25 #include "regex/regex.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
31 typedef struct varlena unknown
;
35 bool use_wchar
; /* T if multibyte encoding */
36 char *str1
; /* use these if not use_wchar */
37 char *str2
; /* note: these point to original texts */
38 pg_wchar
*wstr1
; /* use these if use_wchar */
39 pg_wchar
*wstr2
; /* note: these are palloc'd */
40 int len1
; /* string lengths in logical characters */
42 /* Skip table for Boyer-Moore-Horspool search algorithm: */
43 int skiptablemask
; /* mask for ANDing with skiptable subscripts */
44 int skiptable
[256]; /* skip distance for given mismatched char */
47 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
48 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
49 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
50 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
51 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
53 static int text_cmp(text
*arg1
, text
*arg2
);
54 static int32
text_length(Datum str
);
55 static int text_position(text
*t1
, text
*t2
);
56 static void text_position_setup(text
*t1
, text
*t2
, TextPositionState
*state
);
57 static int text_position_next(int start_pos
, TextPositionState
*state
);
58 static void text_position_cleanup(TextPositionState
*state
);
59 static text
*text_substring(Datum str
,
62 bool length_not_specified
);
63 static void appendStringInfoText(StringInfo str
, const text
*t
);
66 /*****************************************************************************
67 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
68 *****************************************************************************/
73 * Create a text value from a null-terminated C string.
75 * The new text value is freshly palloc'd with a full-size VARHDR.
78 cstring_to_text(const char *s
)
80 return cstring_to_text_with_len(s
, strlen(s
));
84 * cstring_to_text_with_len
86 * Same as cstring_to_text except the caller specifies the string length;
87 * the string need not be null_terminated.
90 cstring_to_text_with_len(const char *s
, int len
)
92 text
*result
= (text
*) palloc(len
+ VARHDRSZ
);
94 SET_VARSIZE(result
, len
+ VARHDRSZ
);
95 memcpy(VARDATA(result
), s
, len
);
103 * Create a palloc'd, null-terminated C string from a text value.
105 * We support being passed a compressed or toasted text value.
106 * This is a bit bogus since such values shouldn't really be referred to as
107 * "text *", but it seems useful for robustness. If we didn't handle that
108 * case here, we'd need another routine that did, anyway.
111 text_to_cstring(const text
*t
)
113 /* must cast away the const, unfortunately */
114 text
*tunpacked
= pg_detoast_datum_packed((struct varlena
*) t
);
115 int len
= VARSIZE_ANY_EXHDR(tunpacked
);
118 result
= (char *) palloc(len
+ 1);
119 memcpy(result
, VARDATA_ANY(tunpacked
), len
);
129 * text_to_cstring_buffer
131 * Copy a text value into a caller-supplied buffer of size dst_len.
133 * The text string is truncated if necessary to fit. The result is
134 * guaranteed null-terminated (unless dst_len == 0).
136 * We support being passed a compressed or toasted text value.
137 * This is a bit bogus since such values shouldn't really be referred to as
138 * "text *", but it seems useful for robustness. If we didn't handle that
139 * case here, we'd need another routine that did, anyway.
142 text_to_cstring_buffer(const text
*src
, char *dst
, size_t dst_len
)
144 /* must cast away the const, unfortunately */
145 text
*srcunpacked
= pg_detoast_datum_packed((struct varlena
*) src
);
146 size_t src_len
= VARSIZE_ANY_EXHDR(srcunpacked
);
151 if (dst_len
>= src_len
)
153 else /* ensure truncation is encoding-safe */
154 dst_len
= pg_mbcliplen(VARDATA_ANY(srcunpacked
), src_len
, dst_len
);
155 memcpy(dst
, VARDATA_ANY(srcunpacked
), dst_len
);
159 if (srcunpacked
!= src
)
164 /*****************************************************************************
165 * USER I/O ROUTINES *
166 *****************************************************************************/
169 #define VAL(CH) ((CH) - '0')
170 #define DIG(VAL) ((VAL) + '0')
173 * byteain - converts from printable representation of byte array
175 * Non-printable characters must be passed as '\nnn' (octal) and are
176 * converted to internal form. '\' must be passed as '\\'.
177 * ereport(ERROR, ...) if bad form.
180 * The input is scanned twice.
181 * The error checking of input is minimal.
184 byteain(PG_FUNCTION_ARGS
)
186 char *inputText
= PG_GETARG_CSTRING(0);
192 for (byte
= 0, tp
= inputText
; *tp
!= '\0'; byte
++)
196 else if ((tp
[0] == '\\') &&
197 (tp
[1] >= '0' && tp
[1] <= '3') &&
198 (tp
[2] >= '0' && tp
[2] <= '7') &&
199 (tp
[3] >= '0' && tp
[3] <= '7'))
201 else if ((tp
[0] == '\\') &&
207 * one backslash, not followed by 0 or ### valid octal
210 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
211 errmsg("invalid input syntax for type bytea")));
216 result
= (bytea
*) palloc(byte
);
217 SET_VARSIZE(result
, byte
);
220 rp
= VARDATA(result
);
225 else if ((tp
[0] == '\\') &&
226 (tp
[1] >= '0' && tp
[1] <= '3') &&
227 (tp
[2] >= '0' && tp
[2] <= '7') &&
228 (tp
[3] >= '0' && tp
[3] <= '7'))
234 *rp
++ = byte
+ VAL(tp
[3]);
237 else if ((tp
[0] == '\\') &&
246 * We should never get here. The first pass should not allow it.
249 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
250 errmsg("invalid input syntax for type bytea")));
254 PG_RETURN_BYTEA_P(result
);
258 * byteaout - converts to printable representation of byte array
260 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
263 * NULL vlena should be an error--returning string with NULL for now.
266 byteaout(PG_FUNCTION_ARGS
)
268 bytea
*vlena
= PG_GETARG_BYTEA_PP(0);
272 int val
; /* holds unprintable chars */
276 len
= 1; /* empty string has 1 char */
277 vp
= VARDATA_ANY(vlena
);
278 for (i
= VARSIZE_ANY_EXHDR(vlena
); i
!= 0; i
--, vp
++)
282 else if ((unsigned char) *vp
< 0x20 || (unsigned char) *vp
> 0x7e)
287 rp
= result
= (char *) palloc(len
);
288 vp
= VARDATA_ANY(vlena
);
289 for (i
= VARSIZE_ANY_EXHDR(vlena
); i
!= 0; i
--, vp
++)
296 else if ((unsigned char) *vp
< 0x20 || (unsigned char) *vp
> 0x7e)
300 rp
[3] = DIG(val
& 07);
302 rp
[2] = DIG(val
& 07);
304 rp
[1] = DIG(val
& 03);
311 PG_RETURN_CSTRING(result
);
315 * bytearecv - converts external binary format to bytea
318 bytearecv(PG_FUNCTION_ARGS
)
320 StringInfo buf
= (StringInfo
) PG_GETARG_POINTER(0);
324 nbytes
= buf
->len
- buf
->cursor
;
325 result
= (bytea
*) palloc(nbytes
+ VARHDRSZ
);
326 SET_VARSIZE(result
, nbytes
+ VARHDRSZ
);
327 pq_copymsgbytes(buf
, VARDATA(result
), nbytes
);
328 PG_RETURN_BYTEA_P(result
);
332 * byteasend - converts bytea to binary format
334 * This is a special case: just copy the input...
337 byteasend(PG_FUNCTION_ARGS
)
339 bytea
*vlena
= PG_GETARG_BYTEA_P_COPY(0);
341 PG_RETURN_BYTEA_P(vlena
);
346 * textin - converts "..." to internal representation
349 textin(PG_FUNCTION_ARGS
)
351 char *inputText
= PG_GETARG_CSTRING(0);
353 PG_RETURN_TEXT_P(cstring_to_text(inputText
));
357 * textout - converts internal representation to "..."
360 textout(PG_FUNCTION_ARGS
)
362 Datum txt
= PG_GETARG_DATUM(0);
364 PG_RETURN_CSTRING(TextDatumGetCString(txt
));
368 * textrecv - converts external binary format to text
371 textrecv(PG_FUNCTION_ARGS
)
373 StringInfo buf
= (StringInfo
) PG_GETARG_POINTER(0);
378 str
= pq_getmsgtext(buf
, buf
->len
- buf
->cursor
, &nbytes
);
380 result
= cstring_to_text_with_len(str
, nbytes
);
382 PG_RETURN_TEXT_P(result
);
386 * textsend - converts text to binary format
389 textsend(PG_FUNCTION_ARGS
)
391 text
*t
= PG_GETARG_TEXT_PP(0);
394 pq_begintypsend(&buf
);
395 pq_sendtext(&buf
, VARDATA_ANY(t
), VARSIZE_ANY_EXHDR(t
));
396 PG_RETURN_BYTEA_P(pq_endtypsend(&buf
));
401 * unknownin - converts "..." to internal representation
404 unknownin(PG_FUNCTION_ARGS
)
406 char *str
= PG_GETARG_CSTRING(0);
408 /* representation is same as cstring */
409 PG_RETURN_CSTRING(pstrdup(str
));
413 * unknownout - converts internal representation to "..."
416 unknownout(PG_FUNCTION_ARGS
)
418 /* representation is same as cstring */
419 char *str
= PG_GETARG_CSTRING(0);
421 PG_RETURN_CSTRING(pstrdup(str
));
425 * unknownrecv - converts external binary format to unknown
428 unknownrecv(PG_FUNCTION_ARGS
)
430 StringInfo buf
= (StringInfo
) PG_GETARG_POINTER(0);
434 str
= pq_getmsgtext(buf
, buf
->len
- buf
->cursor
, &nbytes
);
435 /* representation is same as cstring */
436 PG_RETURN_CSTRING(str
);
440 * unknownsend - converts unknown to binary format
443 unknownsend(PG_FUNCTION_ARGS
)
445 /* representation is same as cstring */
446 char *str
= PG_GETARG_CSTRING(0);
449 pq_begintypsend(&buf
);
450 pq_sendtext(&buf
, str
, strlen(str
));
451 PG_RETURN_BYTEA_P(pq_endtypsend(&buf
));
455 /* ========== PUBLIC ROUTINES ========== */
459 * returns the logical length of a text*
460 * (which is less than the VARSIZE of the text*)
463 textlen(PG_FUNCTION_ARGS
)
465 Datum str
= PG_GETARG_DATUM(0);
467 /* try to avoid decompressing argument */
468 PG_RETURN_INT32(text_length(str
));
473 * Does the real work for textlen()
475 * This is broken out so it can be called directly by other string processing
476 * functions. Note that the argument is passed as a Datum, to indicate that
477 * it may still be in compressed form. We can avoid decompressing it at all
481 text_length(Datum str
)
483 /* fastpath when max encoding length is one */
484 if (pg_database_encoding_max_length() == 1)
485 PG_RETURN_INT32(toast_raw_datum_size(str
) - VARHDRSZ
);
488 text
*t
= DatumGetTextPP(str
);
490 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t
),
491 VARSIZE_ANY_EXHDR(t
)));
497 * returns the physical length of a text*
498 * (which is less than the VARSIZE of the text*)
501 textoctetlen(PG_FUNCTION_ARGS
)
503 Datum str
= PG_GETARG_DATUM(0);
505 /* We need not detoast the input at all */
506 PG_RETURN_INT32(toast_raw_datum_size(str
) - VARHDRSZ
);
511 * takes two text* and returns a text* that is the concatenation of
514 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
515 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
516 * Allocate space for output in all cases.
517 * XXX - thomas 1997-07-10
520 textcat(PG_FUNCTION_ARGS
)
522 text
*t1
= PG_GETARG_TEXT_PP(0);
523 text
*t2
= PG_GETARG_TEXT_PP(1);
530 len1
= VARSIZE_ANY_EXHDR(t1
);
534 len2
= VARSIZE_ANY_EXHDR(t2
);
538 len
= len1
+ len2
+ VARHDRSZ
;
539 result
= (text
*) palloc(len
);
541 /* Set size of result string... */
542 SET_VARSIZE(result
, len
);
544 /* Fill data field of result string... */
545 ptr
= VARDATA(result
);
547 memcpy(ptr
, VARDATA_ANY(t1
), len1
);
549 memcpy(ptr
+ len1
, VARDATA_ANY(t2
), len2
);
551 PG_RETURN_TEXT_P(result
);
555 * charlen_to_bytelen()
556 * Compute the number of bytes occupied by n characters starting at *p
558 * It is caller's responsibility that there actually are n characters;
559 * the string need not be null-terminated.
562 charlen_to_bytelen(const char *p
, int n
)
564 if (pg_database_encoding_max_length() == 1)
566 /* Optimization for single-byte encodings */
573 for (s
= p
; n
> 0; n
--)
582 * Return a substring starting at the specified position.
583 * - thomas 1997-12-31
587 * - starting position (is one-based)
590 * If the starting position is zero or less, then return from the start of the string
591 * adjusting the length to be consistent with the "negative start" per SQL92.
592 * If the length is less than zero, return the remaining string.
594 * Added multibyte support.
595 * - Tatsuo Ishii 1998-4-21
596 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
597 * Formerly returned the entire string; now returns a portion.
598 * - Thomas Lockhart 1998-12-10
599 * Now uses faster TOAST-slicing interface
600 * - John Gray 2002-02-22
601 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
602 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
603 * error; if E < 1, return '', not entire string). Fixed MB related bug when
604 * S > LC and < LC + 4 sometimes garbage characters are returned.
605 * - Joe Conway 2002-08-10
608 text_substr(PG_FUNCTION_ARGS
)
610 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
617 * text_substr_no_len -
618 * Wrapper to avoid opr_sanity failure due to
619 * one function accepting a different number of args.
622 text_substr_no_len(PG_FUNCTION_ARGS
)
624 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
631 * Does the real work for text_substr() and text_substr_no_len()
633 * This is broken out so it can be called directly by other string processing
634 * functions. Note that the argument is passed as a Datum, to indicate that
635 * it may still be in compressed/toasted form. We can avoid detoasting all
636 * of it in some cases.
638 * The result is always a freshly palloc'd datum.
641 text_substring(Datum str
, int32 start
, int32 length
, bool length_not_specified
)
643 int32 eml
= pg_database_encoding_max_length();
644 int32 S
= start
; /* start position */
645 int32 S1
; /* adjusted start position */
646 int32 L1
; /* adjusted substring length */
648 /* life is easy if the encoding max length is 1 */
653 if (length_not_specified
) /* special case - get length to end of
662 * A negative value for L is the only way for the end position to
663 * be before the start. SQL99 says to throw an error.
667 (errcode(ERRCODE_SUBSTRING_ERROR
),
668 errmsg("negative substring length not allowed")));
671 * A zero or negative value for the end position can happen if the
672 * start was negative or one. SQL99 says to return a zero-length
676 return cstring_to_text("");
682 * If the start position is past the end of the string, SQL99 says to
683 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
684 * that for us. Convert to zero-based starting position
686 return DatumGetTextPSlice(str
, S1
- 1, L1
);
691 * When encoding max length is > 1, we can't get LC without
692 * detoasting, so we'll grab a conservatively large slice now and go
693 * back later to do the right thing
706 * if S is past the end of the string, the tuple toaster will return a
707 * zero-length string to us
712 * We need to start at position zero because there is no way to know
713 * in advance which byte offset corresponds to the supplied start
718 if (length_not_specified
) /* special case - get length to end of
720 slice_size
= L1
= -1;
726 * A negative value for L is the only way for the end position to
727 * be before the start. SQL99 says to throw an error.
731 (errcode(ERRCODE_SUBSTRING_ERROR
),
732 errmsg("negative substring length not allowed")));
735 * A zero or negative value for the end position can happen if the
736 * start was negative or one. SQL99 says to return a zero-length
740 return cstring_to_text("");
743 * if E is past the end of the string, the tuple toaster will
744 * truncate the length for us
749 * Total slice size in bytes can't be any longer than the start
750 * position plus substring length times the encoding max length.
752 slice_size
= (S1
+ L1
) * eml
;
756 * If we're working with an untoasted source, no need to do an extra
759 if (VARATT_IS_COMPRESSED(DatumGetPointer(str
)) ||
760 VARATT_IS_EXTERNAL(DatumGetPointer(str
)))
761 slice
= DatumGetTextPSlice(str
, slice_start
, slice_size
);
763 slice
= (text
*) DatumGetPointer(str
);
765 /* see if we got back an empty string */
766 if (VARSIZE_ANY_EXHDR(slice
) == 0)
768 if (slice
!= (text
*) DatumGetPointer(str
))
770 return cstring_to_text("");
773 /* Now we can get the actual length of the slice in MB characters */
774 slice_strlen
= pg_mbstrlen_with_len(VARDATA_ANY(slice
),
775 VARSIZE_ANY_EXHDR(slice
));
778 * Check that the start position wasn't > slice_strlen. If so, SQL99
779 * says to return a zero-length string.
781 if (S1
> slice_strlen
)
783 if (slice
!= (text
*) DatumGetPointer(str
))
785 return cstring_to_text("");
789 * Adjust L1 and E1 now that we know the slice string length. Again
790 * remember that S1 is one based, and slice_start is zero based.
793 E1
= Min(S1
+ L1
, slice_start
+ 1 + slice_strlen
);
795 E1
= slice_start
+ 1 + slice_strlen
;
798 * Find the start position in the slice; remember S1 is not zero based
800 p
= VARDATA_ANY(slice
);
801 for (i
= 0; i
< S1
- 1; i
++)
804 /* hang onto a pointer to our start position */
808 * Count the actual bytes used by the substring of the requested
811 for (i
= S1
; i
< E1
; i
++)
814 ret
= (text
*) palloc(VARHDRSZ
+ (p
- s
));
815 SET_VARSIZE(ret
, VARHDRSZ
+ (p
- s
));
816 memcpy(VARDATA(ret
), s
, (p
- s
));
818 if (slice
!= (text
*) DatumGetPointer(str
))
824 elog(ERROR
, "invalid backend encoding: encoding max length < 1");
826 /* not reached: suppress compiler warning */
832 * Return the position of the specified substring.
833 * Implements the SQL92 POSITION() function.
834 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
835 * - thomas 1997-07-27
838 textpos(PG_FUNCTION_ARGS
)
840 text
*str
= PG_GETARG_TEXT_PP(0);
841 text
*search_str
= PG_GETARG_TEXT_PP(1);
843 PG_RETURN_INT32((int32
) text_position(str
, search_str
));
848 * Does the real work for textpos()
851 * t1 - string to be searched
852 * t2 - pattern to match within t1
854 * Character index of the first matched char, starting from 1,
857 * This is broken out so it can be called directly by other string processing
861 text_position(text
*t1
, text
*t2
)
863 TextPositionState state
;
866 text_position_setup(t1
, t2
, &state
);
867 result
= text_position_next(1, &state
);
868 text_position_cleanup(&state
);
874 * text_position_setup, text_position_next, text_position_cleanup -
875 * Component steps of text_position()
877 * These are broken out so that a string can be efficiently searched for
878 * multiple occurrences of the same pattern. text_position_next may be
879 * called multiple times with increasing values of start_pos, which is
880 * the 1-based character position to start the search from. The "state"
881 * variable is normally just a local variable in the caller.
885 text_position_setup(text
*t1
, text
*t2
, TextPositionState
*state
)
887 int len1
= VARSIZE_ANY_EXHDR(t1
);
888 int len2
= VARSIZE_ANY_EXHDR(t2
);
890 if (pg_database_encoding_max_length() == 1)
892 /* simple case - single byte encoding */
893 state
->use_wchar
= false;
894 state
->str1
= VARDATA_ANY(t1
);
895 state
->str2
= VARDATA_ANY(t2
);
901 /* not as simple - multibyte encoding */
905 p1
= (pg_wchar
*) palloc((len1
+ 1) * sizeof(pg_wchar
));
906 len1
= pg_mb2wchar_with_len(VARDATA_ANY(t1
), p1
, len1
);
907 p2
= (pg_wchar
*) palloc((len2
+ 1) * sizeof(pg_wchar
));
908 len2
= pg_mb2wchar_with_len(VARDATA_ANY(t2
), p2
, len2
);
910 state
->use_wchar
= true;
918 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
919 * notes we use the terminology that the "haystack" is the string to be
920 * searched (t1) and the "needle" is the pattern being sought (t2).
922 * If the needle is empty or bigger than the haystack then there is no
923 * point in wasting cycles initializing the table. We also choose not
924 * to use B-M-H for needles of length 1, since the skip table can't
925 * possibly save anything in that case.
927 if (len1
>= len2
&& len2
> 1)
929 int searchlength
= len1
- len2
;
935 * First we must determine how much of the skip table to use. The
936 * declaration of TextPositionState allows up to 256 elements, but for
937 * short search problems we don't really want to have to initialize so
938 * many elements --- it would take too long in comparison to the
939 * actual search time. So we choose a useful skip table size based on
940 * the haystack length minus the needle length. The closer the needle
941 * length is to the haystack length the less useful skipping becomes.
943 * Note: since we use bit-masking to select table elements, the skip
944 * table size MUST be a power of 2, and so the mask must be 2^N-1.
946 if (searchlength
< 16)
948 else if (searchlength
< 64)
950 else if (searchlength
< 128)
952 else if (searchlength
< 512)
954 else if (searchlength
< 2048)
956 else if (searchlength
< 4096)
960 state
->skiptablemask
= skiptablemask
;
963 * Initialize the skip table. We set all elements to the needle
964 * length, since this is the correct skip distance for any character
965 * not found in the needle.
967 for (i
= 0; i
<= skiptablemask
; i
++)
968 state
->skiptable
[i
] = len2
;
971 * Now examine the needle. For each character except the last one,
972 * set the corresponding table element to the appropriate skip
973 * distance. Note that when two characters share the same skip table
974 * entry, the one later in the needle must determine the skip distance.
978 if (!state
->use_wchar
)
980 const char *str2
= state
->str2
;
982 for (i
= 0; i
< last
; i
++)
983 state
->skiptable
[(unsigned char) str2
[i
] & skiptablemask
] = last
- i
;
987 const pg_wchar
*wstr2
= state
->wstr2
;
989 for (i
= 0; i
< last
; i
++)
990 state
->skiptable
[wstr2
[i
] & skiptablemask
] = last
- i
;
996 text_position_next(int start_pos
, TextPositionState
*state
)
998 int haystack_len
= state
->len1
;
999 int needle_len
= state
->len2
;
1000 int skiptablemask
= state
->skiptablemask
;
1002 Assert(start_pos
> 0); /* else caller error */
1004 if (needle_len
<= 0)
1005 return start_pos
; /* result for empty pattern */
1007 start_pos
--; /* adjust for zero based arrays */
1009 /* Done if the needle can't possibly fit */
1010 if (haystack_len
< start_pos
+ needle_len
)
1013 if (!state
->use_wchar
)
1015 /* simple case - single byte encoding */
1016 const char *haystack
= state
->str1
;
1017 const char *needle
= state
->str2
;
1018 const char *haystack_end
= &haystack
[haystack_len
];
1021 if (needle_len
== 1)
1023 /* No point in using B-M-H for a one-character needle */
1024 char nchar
= *needle
;
1026 hptr
= &haystack
[start_pos
];
1027 while (hptr
< haystack_end
)
1030 return hptr
- haystack
+ 1;
1036 const char *needle_last
= &needle
[needle_len
- 1];
1038 /* Start at startpos plus the length of the needle */
1039 hptr
= &haystack
[start_pos
+ needle_len
- 1];
1040 while (hptr
< haystack_end
)
1042 /* Match the needle scanning *backward* */
1050 /* Matched it all? If so, return 1-based position */
1052 return p
- haystack
+ 1;
1056 * No match, so use the haystack char at hptr to decide how
1057 * far to advance. If the needle had any occurrence of that
1058 * character (or more precisely, one sharing the same
1059 * skiptable entry) before its last character, then we advance
1060 * far enough to align the last such needle character with
1061 * that haystack position. Otherwise we can advance by the
1062 * whole needle length.
1064 hptr
+= state
->skiptable
[(unsigned char) *hptr
& skiptablemask
];
1070 /* The multibyte char version. This works exactly the same way. */
1071 const pg_wchar
*haystack
= state
->wstr1
;
1072 const pg_wchar
*needle
= state
->wstr2
;
1073 const pg_wchar
*haystack_end
= &haystack
[haystack_len
];
1074 const pg_wchar
*hptr
;
1076 if (needle_len
== 1)
1078 /* No point in using B-M-H for a one-character needle */
1079 pg_wchar nchar
= *needle
;
1081 hptr
= &haystack
[start_pos
];
1082 while (hptr
< haystack_end
)
1085 return hptr
- haystack
+ 1;
1091 const pg_wchar
*needle_last
= &needle
[needle_len
- 1];
1093 /* Start at startpos plus the length of the needle */
1094 hptr
= &haystack
[start_pos
+ needle_len
- 1];
1095 while (hptr
< haystack_end
)
1097 /* Match the needle scanning *backward* */
1098 const pg_wchar
*nptr
;
1105 /* Matched it all? If so, return 1-based position */
1107 return p
- haystack
+ 1;
1111 * No match, so use the haystack char at hptr to decide how
1112 * far to advance. If the needle had any occurrence of that
1113 * character (or more precisely, one sharing the same
1114 * skiptable entry) before its last character, then we advance
1115 * far enough to align the last such needle character with
1116 * that haystack position. Otherwise we can advance by the
1117 * whole needle length.
1119 hptr
+= state
->skiptable
[*hptr
& skiptablemask
];
1124 return 0; /* not found */
1128 text_position_cleanup(TextPositionState
*state
)
1130 if (state
->use_wchar
)
1132 pfree(state
->wstr1
);
1133 pfree(state
->wstr2
);
1138 * Comparison function for text strings with given lengths.
1139 * Includes locale support, but must copy strings to temporary memory
1140 * to allow null-termination for inputs to strcoll().
1141 * Returns -1, 0 or 1
1144 varstr_cmp(char *arg1
, int len1
, char *arg2
, int len2
)
1149 * Unfortunately, there is no strncoll(), so in the non-C locale case we
1150 * have to do some memory copying. This turns out to be significantly
1151 * slower, so we optimize the case where LC_COLLATE is C. We also try to
1152 * optimize relatively-short strings by avoiding palloc/pfree overhead.
1154 if (lc_collate_is_c())
1156 result
= strncmp(arg1
, arg2
, Min(len1
, len2
));
1157 if ((result
== 0) && (len1
!= len2
))
1158 result
= (len1
< len2
) ? -1 : 1;
1162 #define STACKBUFLEN 1024
1164 char a1buf
[STACKBUFLEN
];
1165 char a2buf
[STACKBUFLEN
];
1170 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1171 if (GetDatabaseEncoding() == PG_UTF8
)
1177 if (len1
>= STACKBUFLEN
/ 2)
1179 a1len
= len1
* 2 + 2;
1180 a1p
= palloc(a1len
);
1184 a1len
= STACKBUFLEN
;
1187 if (len2
>= STACKBUFLEN
/ 2)
1189 a2len
= len2
* 2 + 2;
1190 a2p
= palloc(a2len
);
1194 a2len
= STACKBUFLEN
;
1198 /* stupid Microsloth API does not work for zero-length input */
1203 r
= MultiByteToWideChar(CP_UTF8
, 0, arg1
, len1
,
1204 (LPWSTR
) a1p
, a1len
/ 2);
1207 (errmsg("could not convert string to UTF-16: error %lu",
1210 ((LPWSTR
) a1p
)[r
] = 0;
1216 r
= MultiByteToWideChar(CP_UTF8
, 0, arg2
, len2
,
1217 (LPWSTR
) a2p
, a2len
/ 2);
1220 (errmsg("could not convert string to UTF-16: error %lu",
1223 ((LPWSTR
) a2p
)[r
] = 0;
1226 result
= wcscoll((LPWSTR
) a1p
, (LPWSTR
) a2p
);
1227 if (result
== 2147483647) /* _NLSCMPERROR; missing from mingw
1230 (errmsg("could not compare Unicode strings: %m")));
1233 * In some locales wcscoll() can claim that nonidentical strings
1234 * are equal. Believing that would be bad news for a number of
1235 * reasons, so we follow Perl's lead and sort "equal" strings
1236 * according to strcmp (on the UTF-8 representation).
1240 result
= strncmp(arg1
, arg2
, Min(len1
, len2
));
1241 if ((result
== 0) && (len1
!= len2
))
1242 result
= (len1
< len2
) ? -1 : 1;
1254 if (len1
>= STACKBUFLEN
)
1255 a1p
= (char *) palloc(len1
+ 1);
1258 if (len2
>= STACKBUFLEN
)
1259 a2p
= (char *) palloc(len2
+ 1);
1263 memcpy(a1p
, arg1
, len1
);
1265 memcpy(a2p
, arg2
, len2
);
1268 result
= strcoll(a1p
, a2p
);
1271 * In some locales strcoll() can claim that nonidentical strings are
1272 * equal. Believing that would be bad news for a number of reasons,
1273 * so we follow Perl's lead and sort "equal" strings according to
1277 result
= strcmp(a1p
, a2p
);
1290 * Internal comparison function for text strings.
1291 * Returns -1, 0 or 1
1294 text_cmp(text
*arg1
, text
*arg2
)
1301 a1p
= VARDATA_ANY(arg1
);
1302 a2p
= VARDATA_ANY(arg2
);
1304 len1
= VARSIZE_ANY_EXHDR(arg1
);
1305 len2
= VARSIZE_ANY_EXHDR(arg2
);
1307 return varstr_cmp(a1p
, len1
, a2p
, len2
);
1311 * Comparison functions for text strings.
1313 * Note: btree indexes need these routines not to leak memory; therefore,
1314 * be careful to free working copies of toasted datums. Most places don't
1315 * need to be so careful.
1319 texteq(PG_FUNCTION_ARGS
)
1321 text
*arg1
= PG_GETARG_TEXT_PP(0);
1322 text
*arg2
= PG_GETARG_TEXT_PP(1);
1326 * Since we only care about equality or not-equality, we can avoid all the
1327 * expense of strcoll() here, and just do bitwise comparison.
1329 if (VARSIZE_ANY_EXHDR(arg1
) != VARSIZE_ANY_EXHDR(arg2
))
1332 result
= (strncmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
),
1333 VARSIZE_ANY_EXHDR(arg1
)) == 0);
1335 PG_FREE_IF_COPY(arg1
, 0);
1336 PG_FREE_IF_COPY(arg2
, 1);
1338 PG_RETURN_BOOL(result
);
1342 textne(PG_FUNCTION_ARGS
)
1344 text
*arg1
= PG_GETARG_TEXT_PP(0);
1345 text
*arg2
= PG_GETARG_TEXT_PP(1);
1349 * Since we only care about equality or not-equality, we can avoid all the
1350 * expense of strcoll() here, and just do bitwise comparison.
1352 if (VARSIZE_ANY_EXHDR(arg1
) != VARSIZE_ANY_EXHDR(arg2
))
1355 result
= (strncmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
),
1356 VARSIZE_ANY_EXHDR(arg1
)) != 0);
1358 PG_FREE_IF_COPY(arg1
, 0);
1359 PG_FREE_IF_COPY(arg2
, 1);
1361 PG_RETURN_BOOL(result
);
1365 text_lt(PG_FUNCTION_ARGS
)
1367 text
*arg1
= PG_GETARG_TEXT_PP(0);
1368 text
*arg2
= PG_GETARG_TEXT_PP(1);
1371 result
= (text_cmp(arg1
, arg2
) < 0);
1373 PG_FREE_IF_COPY(arg1
, 0);
1374 PG_FREE_IF_COPY(arg2
, 1);
1376 PG_RETURN_BOOL(result
);
1380 text_le(PG_FUNCTION_ARGS
)
1382 text
*arg1
= PG_GETARG_TEXT_PP(0);
1383 text
*arg2
= PG_GETARG_TEXT_PP(1);
1386 result
= (text_cmp(arg1
, arg2
) <= 0);
1388 PG_FREE_IF_COPY(arg1
, 0);
1389 PG_FREE_IF_COPY(arg2
, 1);
1391 PG_RETURN_BOOL(result
);
1395 text_gt(PG_FUNCTION_ARGS
)
1397 text
*arg1
= PG_GETARG_TEXT_PP(0);
1398 text
*arg2
= PG_GETARG_TEXT_PP(1);
1401 result
= (text_cmp(arg1
, arg2
) > 0);
1403 PG_FREE_IF_COPY(arg1
, 0);
1404 PG_FREE_IF_COPY(arg2
, 1);
1406 PG_RETURN_BOOL(result
);
1410 text_ge(PG_FUNCTION_ARGS
)
1412 text
*arg1
= PG_GETARG_TEXT_PP(0);
1413 text
*arg2
= PG_GETARG_TEXT_PP(1);
1416 result
= (text_cmp(arg1
, arg2
) >= 0);
1418 PG_FREE_IF_COPY(arg1
, 0);
1419 PG_FREE_IF_COPY(arg2
, 1);
1421 PG_RETURN_BOOL(result
);
1425 bttextcmp(PG_FUNCTION_ARGS
)
1427 text
*arg1
= PG_GETARG_TEXT_PP(0);
1428 text
*arg2
= PG_GETARG_TEXT_PP(1);
1431 result
= text_cmp(arg1
, arg2
);
1433 PG_FREE_IF_COPY(arg1
, 0);
1434 PG_FREE_IF_COPY(arg2
, 1);
1436 PG_RETURN_INT32(result
);
1441 text_larger(PG_FUNCTION_ARGS
)
1443 text
*arg1
= PG_GETARG_TEXT_PP(0);
1444 text
*arg2
= PG_GETARG_TEXT_PP(1);
1447 result
= ((text_cmp(arg1
, arg2
) > 0) ? arg1
: arg2
);
1449 PG_RETURN_TEXT_P(result
);
1453 text_smaller(PG_FUNCTION_ARGS
)
1455 text
*arg1
= PG_GETARG_TEXT_PP(0);
1456 text
*arg2
= PG_GETARG_TEXT_PP(1);
1459 result
= ((text_cmp(arg1
, arg2
) < 0) ? arg1
: arg2
);
1461 PG_RETURN_TEXT_P(result
);
1466 * The following operators support character-by-character comparison
1467 * of text datums, to allow building indexes suitable for LIKE clauses.
1468 * Note that the regular texteq/textne comparison operators are assumed
1469 * to be compatible with these!
1473 internal_text_pattern_compare(text
*arg1
, text
*arg2
)
1479 len1
= VARSIZE_ANY_EXHDR(arg1
);
1480 len2
= VARSIZE_ANY_EXHDR(arg2
);
1482 result
= strncmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
1485 else if (len1
< len2
)
1487 else if (len1
> len2
)
1495 text_pattern_lt(PG_FUNCTION_ARGS
)
1497 text
*arg1
= PG_GETARG_TEXT_PP(0);
1498 text
*arg2
= PG_GETARG_TEXT_PP(1);
1501 result
= internal_text_pattern_compare(arg1
, arg2
);
1503 PG_FREE_IF_COPY(arg1
, 0);
1504 PG_FREE_IF_COPY(arg2
, 1);
1506 PG_RETURN_BOOL(result
< 0);
1511 text_pattern_le(PG_FUNCTION_ARGS
)
1513 text
*arg1
= PG_GETARG_TEXT_PP(0);
1514 text
*arg2
= PG_GETARG_TEXT_PP(1);
1517 result
= internal_text_pattern_compare(arg1
, arg2
);
1519 PG_FREE_IF_COPY(arg1
, 0);
1520 PG_FREE_IF_COPY(arg2
, 1);
1522 PG_RETURN_BOOL(result
<= 0);
1527 text_pattern_ge(PG_FUNCTION_ARGS
)
1529 text
*arg1
= PG_GETARG_TEXT_PP(0);
1530 text
*arg2
= PG_GETARG_TEXT_PP(1);
1533 result
= internal_text_pattern_compare(arg1
, arg2
);
1535 PG_FREE_IF_COPY(arg1
, 0);
1536 PG_FREE_IF_COPY(arg2
, 1);
1538 PG_RETURN_BOOL(result
>= 0);
1543 text_pattern_gt(PG_FUNCTION_ARGS
)
1545 text
*arg1
= PG_GETARG_TEXT_PP(0);
1546 text
*arg2
= PG_GETARG_TEXT_PP(1);
1549 result
= internal_text_pattern_compare(arg1
, arg2
);
1551 PG_FREE_IF_COPY(arg1
, 0);
1552 PG_FREE_IF_COPY(arg2
, 1);
1554 PG_RETURN_BOOL(result
> 0);
1559 bttext_pattern_cmp(PG_FUNCTION_ARGS
)
1561 text
*arg1
= PG_GETARG_TEXT_PP(0);
1562 text
*arg2
= PG_GETARG_TEXT_PP(1);
1565 result
= internal_text_pattern_compare(arg1
, arg2
);
1567 PG_FREE_IF_COPY(arg1
, 0);
1568 PG_FREE_IF_COPY(arg2
, 1);
1570 PG_RETURN_INT32(result
);
1574 /*-------------------------------------------------------------
1577 * get the number of bytes contained in an instance of type 'bytea'
1578 *-------------------------------------------------------------
1581 byteaoctetlen(PG_FUNCTION_ARGS
)
1583 Datum str
= PG_GETARG_DATUM(0);
1585 /* We need not detoast the input at all */
1586 PG_RETURN_INT32(toast_raw_datum_size(str
) - VARHDRSZ
);
1591 * takes two bytea* and returns a bytea* that is the concatenation of
1594 * Cloned from textcat and modified as required.
1597 byteacat(PG_FUNCTION_ARGS
)
1599 bytea
*t1
= PG_GETARG_BYTEA_PP(0);
1600 bytea
*t2
= PG_GETARG_BYTEA_PP(1);
1607 len1
= VARSIZE_ANY_EXHDR(t1
);
1611 len2
= VARSIZE_ANY_EXHDR(t2
);
1615 len
= len1
+ len2
+ VARHDRSZ
;
1616 result
= (bytea
*) palloc(len
);
1618 /* Set size of result string... */
1619 SET_VARSIZE(result
, len
);
1621 /* Fill data field of result string... */
1622 ptr
= VARDATA(result
);
1624 memcpy(ptr
, VARDATA_ANY(t1
), len1
);
1626 memcpy(ptr
+ len1
, VARDATA_ANY(t2
), len2
);
1628 PG_RETURN_BYTEA_P(result
);
1631 #define PG_STR_GET_BYTEA(str_) \
1632 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1636 * Return a substring starting at the specified position.
1637 * Cloned from text_substr and modified as required.
1641 * - starting position (is one-based)
1642 * - string length (optional)
1644 * If the starting position is zero or less, then return from the start of the string
1645 * adjusting the length to be consistent with the "negative start" per SQL92.
1646 * If the length is less than zero, an ERROR is thrown. If no third argument
1647 * (length) is provided, the length to the end of the string is assumed.
1650 bytea_substr(PG_FUNCTION_ARGS
)
1652 int S
= PG_GETARG_INT32(1); /* start position */
1653 int S1
; /* adjusted start position */
1654 int L1
; /* adjusted substring length */
1658 if (fcinfo
->nargs
== 2)
1661 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1662 * the end of the string if we pass it a negative value for length.
1669 int E
= S
+ PG_GETARG_INT32(2);
1672 * A negative value for L is the only way for the end position to be
1673 * before the start. SQL99 says to throw an error.
1677 (errcode(ERRCODE_SUBSTRING_ERROR
),
1678 errmsg("negative substring length not allowed")));
1681 * A zero or negative value for the end position can happen if the
1682 * start was negative or one. SQL99 says to return a zero-length
1686 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1692 * If the start position is past the end of the string, SQL99 says to
1693 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1694 * for us. Convert to zero-based starting position
1696 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1
- 1, L1
));
1700 * bytea_substr_no_len -
1701 * Wrapper to avoid opr_sanity failure due to
1702 * one function accepting a different number of args.
1705 bytea_substr_no_len(PG_FUNCTION_ARGS
)
1707 return bytea_substr(fcinfo
);
1712 * Return the position of the specified substring.
1713 * Implements the SQL92 POSITION() function.
1714 * Cloned from textpos and modified as required.
1717 byteapos(PG_FUNCTION_ARGS
)
1719 bytea
*t1
= PG_GETARG_BYTEA_PP(0);
1720 bytea
*t2
= PG_GETARG_BYTEA_PP(1);
1729 len1
= VARSIZE_ANY_EXHDR(t1
);
1730 len2
= VARSIZE_ANY_EXHDR(t2
);
1733 PG_RETURN_INT32(1); /* result for empty pattern */
1735 p1
= VARDATA_ANY(t1
);
1736 p2
= VARDATA_ANY(t2
);
1740 for (p
= 0; p
<= px
; p
++)
1742 if ((*p2
== *p1
) && (memcmp(p1
, p2
, len2
) == 0))
1750 PG_RETURN_INT32(pos
);
1753 /*-------------------------------------------------------------
1756 * this routine treats "bytea" as an array of bytes.
1757 * It returns the Nth byte (a number between 0 and 255).
1758 *-------------------------------------------------------------
1761 byteaGetByte(PG_FUNCTION_ARGS
)
1763 bytea
*v
= PG_GETARG_BYTEA_PP(0);
1764 int32 n
= PG_GETARG_INT32(1);
1768 len
= VARSIZE_ANY_EXHDR(v
);
1770 if (n
< 0 || n
>= len
)
1772 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1773 errmsg("index %d out of valid range, 0..%d",
1776 byte
= ((unsigned char *) VARDATA_ANY(v
))[n
];
1778 PG_RETURN_INT32(byte
);
1781 /*-------------------------------------------------------------
1784 * This routine treats a "bytea" type like an array of bits.
1785 * It returns the value of the Nth bit (0 or 1).
1787 *-------------------------------------------------------------
1790 byteaGetBit(PG_FUNCTION_ARGS
)
1792 bytea
*v
= PG_GETARG_BYTEA_PP(0);
1793 int32 n
= PG_GETARG_INT32(1);
1799 len
= VARSIZE_ANY_EXHDR(v
);
1801 if (n
< 0 || n
>= len
* 8)
1803 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1804 errmsg("index %d out of valid range, 0..%d",
1810 byte
= ((unsigned char *) VARDATA_ANY(v
))[byteNo
];
1812 if (byte
& (1 << bitNo
))
1818 /*-------------------------------------------------------------
1821 * Given an instance of type 'bytea' creates a new one with
1822 * the Nth byte set to the given value.
1824 *-------------------------------------------------------------
1827 byteaSetByte(PG_FUNCTION_ARGS
)
1829 bytea
*v
= PG_GETARG_BYTEA_P(0);
1830 int32 n
= PG_GETARG_INT32(1);
1831 int32 newByte
= PG_GETARG_INT32(2);
1835 len
= VARSIZE(v
) - VARHDRSZ
;
1837 if (n
< 0 || n
>= len
)
1839 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1840 errmsg("index %d out of valid range, 0..%d",
1844 * Make a copy of the original varlena.
1846 res
= (bytea
*) palloc(VARSIZE(v
));
1847 memcpy((char *) res
, (char *) v
, VARSIZE(v
));
1852 ((unsigned char *) VARDATA(res
))[n
] = newByte
;
1854 PG_RETURN_BYTEA_P(res
);
1857 /*-------------------------------------------------------------
1860 * Given an instance of type 'bytea' creates a new one with
1861 * the Nth bit set to the given value.
1863 *-------------------------------------------------------------
1866 byteaSetBit(PG_FUNCTION_ARGS
)
1868 bytea
*v
= PG_GETARG_BYTEA_P(0);
1869 int32 n
= PG_GETARG_INT32(1);
1870 int32 newBit
= PG_GETARG_INT32(2);
1878 len
= VARSIZE(v
) - VARHDRSZ
;
1880 if (n
< 0 || n
>= len
* 8)
1882 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1883 errmsg("index %d out of valid range, 0..%d",
1892 if (newBit
!= 0 && newBit
!= 1)
1894 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
1895 errmsg("new bit must be 0 or 1")));
1898 * Make a copy of the original varlena.
1900 res
= (bytea
*) palloc(VARSIZE(v
));
1901 memcpy((char *) res
, (char *) v
, VARSIZE(v
));
1906 oldByte
= ((unsigned char *) VARDATA(res
))[byteNo
];
1909 newByte
= oldByte
& (~(1 << bitNo
));
1911 newByte
= oldByte
| (1 << bitNo
);
1913 ((unsigned char *) VARDATA(res
))[byteNo
] = newByte
;
1915 PG_RETURN_BYTEA_P(res
);
1920 * Converts a text type to a Name type.
1923 text_name(PG_FUNCTION_ARGS
)
1925 text
*s
= PG_GETARG_TEXT_PP(0);
1929 len
= VARSIZE_ANY_EXHDR(s
);
1931 /* Truncate oversize input */
1932 if (len
>= NAMEDATALEN
)
1933 len
= NAMEDATALEN
- 1;
1935 result
= (Name
) palloc(NAMEDATALEN
);
1936 memcpy(NameStr(*result
), VARDATA_ANY(s
), len
);
1938 /* now null pad to full length... */
1939 while (len
< NAMEDATALEN
)
1941 *(NameStr(*result
) + len
) = '\0';
1945 PG_RETURN_NAME(result
);
1949 * Converts a Name type to a text type.
1952 name_text(PG_FUNCTION_ARGS
)
1954 Name s
= PG_GETARG_NAME(0);
1956 PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s
)));
1961 * textToQualifiedNameList - convert a text object to list of names
1963 * This implements the input parsing needed by nextval() and other
1964 * functions that take a text parameter representing a qualified name.
1965 * We split the name at dots, downcase if not double-quoted, and
1966 * truncate names if they're too long.
1969 textToQualifiedNameList(text
*textval
)
1976 /* Convert to C string (handles possible detoasting). */
1977 /* Note we rely on being able to modify rawname below. */
1978 rawname
= text_to_cstring(textval
);
1980 if (!SplitIdentifierString(rawname
, '.', &namelist
))
1982 (errcode(ERRCODE_INVALID_NAME
),
1983 errmsg("invalid name syntax")));
1985 if (namelist
== NIL
)
1987 (errcode(ERRCODE_INVALID_NAME
),
1988 errmsg("invalid name syntax")));
1990 foreach(l
, namelist
)
1992 char *curname
= (char *) lfirst(l
);
1994 result
= lappend(result
, makeString(pstrdup(curname
)));
1998 list_free(namelist
);
2004 * SplitIdentifierString --- parse a string containing identifiers
2006 * This is the guts of textToQualifiedNameList, and is exported for use in
2007 * other situations such as parsing GUC variables. In the GUC case, it's
2008 * important to avoid memory leaks, so the API is designed to minimize the
2009 * amount of stuff that needs to be allocated and freed.
2012 * rawstring: the input string; must be overwritable! On return, it's
2013 * been modified to contain the separated identifiers.
2014 * separator: the separator punctuation expected between identifiers
2015 * (typically '.' or ','). Whitespace may also appear around
2018 * namelist: filled with a palloc'd list of pointers to identifiers within
2019 * rawstring. Caller should list_free() this even on error return.
2021 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2023 * Note that an empty string is considered okay here, though not in
2024 * textToQualifiedNameList.
2027 SplitIdentifierString(char *rawstring
, char separator
,
2030 char *nextp
= rawstring
;
2035 while (isspace((unsigned char) *nextp
))
2036 nextp
++; /* skip leading whitespace */
2039 return true; /* allow empty string */
2041 /* At the top of the loop, we are at start of a new identifier. */
2049 /* Quoted name --- collapse quote-quote pairs, no downcasing */
2050 curname
= nextp
+ 1;
2053 endp
= strchr(nextp
+ 1, '\"');
2055 return false; /* mismatched quotes */
2056 if (endp
[1] != '\"')
2057 break; /* found end of quoted name */
2058 /* Collapse adjacent quotes into one quote, and look again */
2059 memmove(endp
, endp
+ 1, strlen(endp
));
2062 /* endp now points at the terminating quote */
2067 /* Unquoted name --- extends to separator or whitespace */
2072 while (*nextp
&& *nextp
!= separator
&&
2073 !isspace((unsigned char) *nextp
))
2076 if (curname
== nextp
)
2077 return false; /* empty unquoted name not allowed */
2080 * Downcase the identifier, using same code as main lexer does.
2082 * XXX because we want to overwrite the input in-place, we cannot
2083 * support a downcasing transformation that increases the string
2084 * length. This is not a problem given the current implementation
2085 * of downcase_truncate_identifier, but we'll probably have to do
2086 * something about this someday.
2088 len
= endp
- curname
;
2089 downname
= downcase_truncate_identifier(curname
, len
, false);
2090 Assert(strlen(downname
) <= len
);
2091 strncpy(curname
, downname
, len
);
2095 while (isspace((unsigned char) *nextp
))
2096 nextp
++; /* skip trailing whitespace */
2098 if (*nextp
== separator
)
2101 while (isspace((unsigned char) *nextp
))
2102 nextp
++; /* skip leading whitespace for next */
2103 /* we expect another name, so done remains false */
2105 else if (*nextp
== '\0')
2108 return false; /* invalid syntax */
2110 /* Now safe to overwrite separator with a null */
2113 /* Truncate name if it's overlength */
2114 truncate_identifier(curname
, strlen(curname
), false);
2117 * Finished isolating current name --- add it to list
2119 *namelist
= lappend(*namelist
, curname
);
2121 /* Loop back if we didn't reach end of string */
2128 /*****************************************************************************
2129 * Comparison Functions used for bytea
2131 * Note: btree indexes need these routines not to leak memory; therefore,
2132 * be careful to free working copies of toasted datums. Most places don't
2133 * need to be so careful.
2134 *****************************************************************************/
2137 byteaeq(PG_FUNCTION_ARGS
)
2139 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2140 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2145 len1
= VARSIZE_ANY_EXHDR(arg1
);
2146 len2
= VARSIZE_ANY_EXHDR(arg2
);
2148 /* fast path for different-length inputs */
2152 result
= (memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), len1
) == 0);
2154 PG_FREE_IF_COPY(arg1
, 0);
2155 PG_FREE_IF_COPY(arg2
, 1);
2157 PG_RETURN_BOOL(result
);
2161 byteane(PG_FUNCTION_ARGS
)
2163 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2164 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2169 len1
= VARSIZE_ANY_EXHDR(arg1
);
2170 len2
= VARSIZE_ANY_EXHDR(arg2
);
2172 /* fast path for different-length inputs */
2176 result
= (memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), len1
) != 0);
2178 PG_FREE_IF_COPY(arg1
, 0);
2179 PG_FREE_IF_COPY(arg2
, 1);
2181 PG_RETURN_BOOL(result
);
2185 bytealt(PG_FUNCTION_ARGS
)
2187 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2188 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2193 len1
= VARSIZE_ANY_EXHDR(arg1
);
2194 len2
= VARSIZE_ANY_EXHDR(arg2
);
2196 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2198 PG_FREE_IF_COPY(arg1
, 0);
2199 PG_FREE_IF_COPY(arg2
, 1);
2201 PG_RETURN_BOOL((cmp
< 0) || ((cmp
== 0) && (len1
< len2
)));
2205 byteale(PG_FUNCTION_ARGS
)
2207 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2208 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2213 len1
= VARSIZE_ANY_EXHDR(arg1
);
2214 len2
= VARSIZE_ANY_EXHDR(arg2
);
2216 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2218 PG_FREE_IF_COPY(arg1
, 0);
2219 PG_FREE_IF_COPY(arg2
, 1);
2221 PG_RETURN_BOOL((cmp
< 0) || ((cmp
== 0) && (len1
<= len2
)));
2225 byteagt(PG_FUNCTION_ARGS
)
2227 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2228 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2233 len1
= VARSIZE_ANY_EXHDR(arg1
);
2234 len2
= VARSIZE_ANY_EXHDR(arg2
);
2236 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2238 PG_FREE_IF_COPY(arg1
, 0);
2239 PG_FREE_IF_COPY(arg2
, 1);
2241 PG_RETURN_BOOL((cmp
> 0) || ((cmp
== 0) && (len1
> len2
)));
2245 byteage(PG_FUNCTION_ARGS
)
2247 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2248 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2253 len1
= VARSIZE_ANY_EXHDR(arg1
);
2254 len2
= VARSIZE_ANY_EXHDR(arg2
);
2256 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2258 PG_FREE_IF_COPY(arg1
, 0);
2259 PG_FREE_IF_COPY(arg2
, 1);
2261 PG_RETURN_BOOL((cmp
> 0) || ((cmp
== 0) && (len1
>= len2
)));
2265 byteacmp(PG_FUNCTION_ARGS
)
2267 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2268 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2273 len1
= VARSIZE_ANY_EXHDR(arg1
);
2274 len2
= VARSIZE_ANY_EXHDR(arg2
);
2276 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2277 if ((cmp
== 0) && (len1
!= len2
))
2278 cmp
= (len1
< len2
) ? -1 : 1;
2280 PG_FREE_IF_COPY(arg1
, 0);
2281 PG_FREE_IF_COPY(arg2
, 1);
2283 PG_RETURN_INT32(cmp
);
2287 * appendStringInfoText
2289 * Append a text to str.
2290 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2293 appendStringInfoText(StringInfo str
, const text
*t
)
2295 appendBinaryStringInfo(str
, VARDATA_ANY(t
), VARSIZE_ANY_EXHDR(t
));
2300 * replace all occurrences of 'old_sub_str' in 'orig_str'
2301 * with 'new_sub_str' to form 'new_str'
2303 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2304 * otherwise returns 'new_str'
2307 replace_text(PG_FUNCTION_ARGS
)
2309 text
*src_text
= PG_GETARG_TEXT_PP(0);
2310 text
*from_sub_text
= PG_GETARG_TEXT_PP(1);
2311 text
*to_sub_text
= PG_GETARG_TEXT_PP(2);
2313 int from_sub_text_len
;
2314 TextPositionState state
;
2322 text_position_setup(src_text
, from_sub_text
, &state
);
2325 * Note: we check the converted string length, not the original, because
2326 * they could be different if the input contained invalid encoding.
2328 src_text_len
= state
.len1
;
2329 from_sub_text_len
= state
.len2
;
2331 /* Return unmodified source string if empty source or pattern */
2332 if (src_text_len
< 1 || from_sub_text_len
< 1)
2334 text_position_cleanup(&state
);
2335 PG_RETURN_TEXT_P(src_text
);
2339 curr_posn
= text_position_next(1, &state
);
2341 /* When the from_sub_text is not found, there is nothing to do. */
2344 text_position_cleanup(&state
);
2345 PG_RETURN_TEXT_P(src_text
);
2348 /* start_ptr points to the start_posn'th character of src_text */
2349 start_ptr
= VARDATA_ANY(src_text
);
2351 initStringInfo(&str
);
2355 CHECK_FOR_INTERRUPTS();
2357 /* copy the data skipped over by last text_position_next() */
2358 chunk_len
= charlen_to_bytelen(start_ptr
, curr_posn
- start_posn
);
2359 appendBinaryStringInfo(&str
, start_ptr
, chunk_len
);
2361 appendStringInfoText(&str
, to_sub_text
);
2363 start_posn
= curr_posn
;
2364 start_ptr
+= chunk_len
;
2365 start_posn
+= from_sub_text_len
;
2366 start_ptr
+= charlen_to_bytelen(start_ptr
, from_sub_text_len
);
2368 curr_posn
= text_position_next(start_posn
, &state
);
2370 while (curr_posn
> 0);
2372 /* copy trailing data */
2373 chunk_len
= ((char *) src_text
+ VARSIZE_ANY(src_text
)) - start_ptr
;
2374 appendBinaryStringInfo(&str
, start_ptr
, chunk_len
);
2376 text_position_cleanup(&state
);
2378 ret_text
= cstring_to_text_with_len(str
.data
, str
.len
);
2381 PG_RETURN_TEXT_P(ret_text
);
2385 * check_replace_text_has_escape_char
2387 * check whether replace_text contains escape char.
2390 check_replace_text_has_escape_char(const text
*replace_text
)
2392 const char *p
= VARDATA_ANY(replace_text
);
2393 const char *p_end
= p
+ VARSIZE_ANY_EXHDR(replace_text
);
2395 if (pg_database_encoding_max_length() == 1)
2397 for (; p
< p_end
; p
++)
2405 for (; p
< p_end
; p
+= pg_mblen(p
))
2416 * appendStringInfoRegexpSubstr
2418 * Append replace_text to str, substituting regexp back references for
2419 * \n escapes. start_ptr is the start of the match in the source string,
2420 * at logical character position data_pos.
2423 appendStringInfoRegexpSubstr(StringInfo str
, text
*replace_text
,
2425 char *start_ptr
, int data_pos
)
2427 const char *p
= VARDATA_ANY(replace_text
);
2428 const char *p_end
= p
+ VARSIZE_ANY_EXHDR(replace_text
);
2429 int eml
= pg_database_encoding_max_length();
2433 const char *chunk_start
= p
;
2437 /* Find next escape char. */
2440 for (; p
< p_end
&& *p
!= '\\'; p
++)
2445 for (; p
< p_end
&& *p
!= '\\'; p
+= pg_mblen(p
))
2449 /* Copy the text we just scanned over, if any. */
2450 if (p
> chunk_start
)
2451 appendBinaryStringInfo(str
, chunk_start
, p
- chunk_start
);
2453 /* Done if at end of string, else advance over escape char. */
2460 /* Escape at very end of input. Treat same as unexpected char */
2461 appendStringInfoChar(str
, '\\');
2465 if (*p
>= '1' && *p
<= '9')
2467 /* Use the back reference of regexp. */
2470 so
= pmatch
[idx
].rm_so
;
2471 eo
= pmatch
[idx
].rm_eo
;
2476 /* Use the entire matched string. */
2477 so
= pmatch
[0].rm_so
;
2478 eo
= pmatch
[0].rm_eo
;
2481 else if (*p
== '\\')
2483 /* \\ means transfer one \ to output. */
2484 appendStringInfoChar(str
, '\\');
2491 * If escape char is not followed by any expected char, just treat
2492 * it as ordinary data to copy. (XXX would it be better to throw
2495 appendStringInfoChar(str
, '\\');
2499 if (so
!= -1 && eo
!= -1)
2502 * Copy the text that is back reference of regexp. Note so and eo
2503 * are counted in characters not bytes.
2508 Assert(so
>= data_pos
);
2509 chunk_start
= start_ptr
;
2510 chunk_start
+= charlen_to_bytelen(chunk_start
, so
- data_pos
);
2511 chunk_len
= charlen_to_bytelen(chunk_start
, eo
- so
);
2512 appendBinaryStringInfo(str
, chunk_start
, chunk_len
);
2517 #define REGEXP_REPLACE_BACKREF_CNT 10
2520 * replace_text_regexp
2522 * replace text that matches to regexp in src_text to replace_text.
2524 * Note: to avoid having to include regex.h in builtins.h, we declare
2525 * the regexp argument as void *, but really it's regex_t *.
2528 replace_text_regexp(text
*src_text
, void *regexp
,
2529 text
*replace_text
, bool glob
)
2532 regex_t
*re
= (regex_t
*) regexp
;
2533 int src_text_len
= VARSIZE_ANY_EXHDR(src_text
);
2535 regmatch_t pmatch
[REGEXP_REPLACE_BACKREF_CNT
];
2543 initStringInfo(&buf
);
2545 /* Convert data string to wide characters. */
2546 data
= (pg_wchar
*) palloc((src_text_len
+ 1) * sizeof(pg_wchar
));
2547 data_len
= pg_mb2wchar_with_len(VARDATA_ANY(src_text
), data
, src_text_len
);
2549 /* Check whether replace_text has escape char. */
2550 have_escape
= check_replace_text_has_escape_char(replace_text
);
2552 /* start_ptr points to the data_pos'th character of src_text */
2553 start_ptr
= (char *) VARDATA_ANY(src_text
);
2557 while (search_start
<= data_len
)
2561 CHECK_FOR_INTERRUPTS();
2563 regexec_result
= pg_regexec(re
,
2567 NULL
, /* no details */
2568 REGEXP_REPLACE_BACKREF_CNT
,
2572 if (regexec_result
== REG_NOMATCH
)
2575 if (regexec_result
!= REG_OKAY
)
2579 pg_regerror(regexec_result
, re
, errMsg
, sizeof(errMsg
));
2581 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION
),
2582 errmsg("regular expression failed: %s", errMsg
)));
2586 * Copy the text to the left of the match position. Note we are given
2587 * character not byte indexes.
2589 if (pmatch
[0].rm_so
- data_pos
> 0)
2593 chunk_len
= charlen_to_bytelen(start_ptr
,
2594 pmatch
[0].rm_so
- data_pos
);
2595 appendBinaryStringInfo(&buf
, start_ptr
, chunk_len
);
2598 * Advance start_ptr over that text, to avoid multiple rescans of
2599 * it if the replace_text contains multiple back-references.
2601 start_ptr
+= chunk_len
;
2602 data_pos
= pmatch
[0].rm_so
;
2606 * Copy the replace_text. Process back references when the
2607 * replace_text has escape characters.
2610 appendStringInfoRegexpSubstr(&buf
, replace_text
, pmatch
,
2611 start_ptr
, data_pos
);
2613 appendStringInfoText(&buf
, replace_text
);
2615 /* Advance start_ptr and data_pos over the matched text. */
2616 start_ptr
+= charlen_to_bytelen(start_ptr
,
2617 pmatch
[0].rm_eo
- data_pos
);
2618 data_pos
= pmatch
[0].rm_eo
;
2621 * When global option is off, replace the first instance only.
2627 * Search from next character when the matching text is zero width.
2629 search_start
= data_pos
;
2630 if (pmatch
[0].rm_so
== pmatch
[0].rm_eo
)
2635 * Copy the text to the right of the last match.
2637 if (data_pos
< data_len
)
2641 chunk_len
= ((char *) src_text
+ VARSIZE_ANY(src_text
)) - start_ptr
;
2642 appendBinaryStringInfo(&buf
, start_ptr
, chunk_len
);
2645 ret_text
= cstring_to_text_with_len(buf
.data
, buf
.len
);
2654 * parse input string
2655 * return ord item (1 based)
2656 * based on provided field separator
2659 split_text(PG_FUNCTION_ARGS
)
2661 text
*inputstring
= PG_GETARG_TEXT_PP(0);
2662 text
*fldsep
= PG_GETARG_TEXT_PP(1);
2663 int fldnum
= PG_GETARG_INT32(2);
2664 int inputstring_len
;
2666 TextPositionState state
;
2671 /* field number is 1 based */
2674 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
2675 errmsg("field position must be greater than zero")));
2677 text_position_setup(inputstring
, fldsep
, &state
);
2680 * Note: we check the converted string length, not the original, because
2681 * they could be different if the input contained invalid encoding.
2683 inputstring_len
= state
.len1
;
2684 fldsep_len
= state
.len2
;
2686 /* return empty string for empty input string */
2687 if (inputstring_len
< 1)
2689 text_position_cleanup(&state
);
2690 PG_RETURN_TEXT_P(cstring_to_text(""));
2693 /* empty field separator */
2696 text_position_cleanup(&state
);
2697 /* if first field, return input string, else empty string */
2699 PG_RETURN_TEXT_P(inputstring
);
2701 PG_RETURN_TEXT_P(cstring_to_text(""));
2704 /* identify bounds of first field */
2706 end_posn
= text_position_next(1, &state
);
2708 /* special case if fldsep not found at all */
2711 text_position_cleanup(&state
);
2712 /* if field 1 requested, return input string, else empty string */
2714 PG_RETURN_TEXT_P(inputstring
);
2716 PG_RETURN_TEXT_P(cstring_to_text(""));
2719 while (end_posn
> 0 && --fldnum
> 0)
2721 /* identify bounds of next field */
2722 start_posn
= end_posn
+ fldsep_len
;
2723 end_posn
= text_position_next(start_posn
, &state
);
2726 text_position_cleanup(&state
);
2730 /* N'th field separator not found */
2731 /* if last field requested, return it, else empty string */
2733 result_text
= text_substring(PointerGetDatum(inputstring
),
2738 result_text
= cstring_to_text("");
2742 /* non-last field requested */
2743 result_text
= text_substring(PointerGetDatum(inputstring
),
2745 end_posn
- start_posn
,
2749 PG_RETURN_TEXT_P(result_text
);
2754 * parse input string
2755 * return text array of elements
2756 * based on provided field separator
2759 text_to_array(PG_FUNCTION_ARGS
)
2761 text
*inputstring
= PG_GETARG_TEXT_PP(0);
2762 text
*fldsep
= PG_GETARG_TEXT_PP(1);
2763 int inputstring_len
;
2765 TextPositionState state
;
2772 ArrayBuildState
*astate
= NULL
;
2774 text_position_setup(inputstring
, fldsep
, &state
);
2777 * Note: we check the converted string length, not the original, because
2778 * they could be different if the input contained invalid encoding.
2780 inputstring_len
= state
.len1
;
2781 fldsep_len
= state
.len2
;
2783 /* return NULL for empty input string */
2784 if (inputstring_len
< 1)
2786 text_position_cleanup(&state
);
2791 * empty field separator return one element, 1D, array using the input
2796 text_position_cleanup(&state
);
2797 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo
, TEXTOID
,
2798 PointerGetDatum(inputstring
), 1));
2802 /* start_ptr points to the start_posn'th character of inputstring */
2803 start_ptr
= VARDATA_ANY(inputstring
);
2805 for (fldnum
= 1;; fldnum
++) /* field number is 1 based */
2807 CHECK_FOR_INTERRUPTS();
2809 end_posn
= text_position_next(start_posn
, &state
);
2813 /* fetch last field */
2814 chunk_len
= ((char *) inputstring
+ VARSIZE_ANY(inputstring
)) - start_ptr
;
2818 /* fetch non-last field */
2819 chunk_len
= charlen_to_bytelen(start_ptr
, end_posn
- start_posn
);
2822 /* must build a temp text datum to pass to accumArrayResult */
2823 result_text
= cstring_to_text_with_len(start_ptr
, chunk_len
);
2825 /* stash away this field */
2826 astate
= accumArrayResult(astate
,
2827 PointerGetDatum(result_text
),
2830 CurrentMemoryContext
);
2837 start_posn
= end_posn
;
2838 start_ptr
+= chunk_len
;
2839 start_posn
+= fldsep_len
;
2840 start_ptr
+= charlen_to_bytelen(start_ptr
, fldsep_len
);
2843 text_position_cleanup(&state
);
2845 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate
,
2846 CurrentMemoryContext
));
2851 * concatenate Cstring representation of input array elements
2852 * using provided field separator
2855 array_to_text(PG_FUNCTION_ARGS
)
2857 ArrayType
*v
= PG_GETARG_ARRAYTYPE_P(0);
2858 char *fldsep
= text_to_cstring(PG_GETARG_TEXT_PP(1));
2867 bool printed
= false;
2872 ArrayMetaState
*my_extra
;
2874 ndims
= ARR_NDIM(v
);
2876 nitems
= ArrayGetNItems(ndims
, dims
);
2878 /* if there are no elements, return an empty string */
2880 PG_RETURN_TEXT_P(cstring_to_text(""));
2882 element_type
= ARR_ELEMTYPE(v
);
2883 initStringInfo(&buf
);
2886 * We arrange to look up info about element type, including its output
2887 * conversion proc, only once per series of calls, assuming the element
2888 * type doesn't change underneath us.
2890 my_extra
= (ArrayMetaState
*) fcinfo
->flinfo
->fn_extra
;
2891 if (my_extra
== NULL
)
2893 fcinfo
->flinfo
->fn_extra
= MemoryContextAlloc(fcinfo
->flinfo
->fn_mcxt
,
2894 sizeof(ArrayMetaState
));
2895 my_extra
= (ArrayMetaState
*) fcinfo
->flinfo
->fn_extra
;
2896 my_extra
->element_type
= ~element_type
;
2899 if (my_extra
->element_type
!= element_type
)
2902 * Get info about element type, including its output conversion proc
2904 get_type_io_data(element_type
, IOFunc_output
,
2905 &my_extra
->typlen
, &my_extra
->typbyval
,
2906 &my_extra
->typalign
, &my_extra
->typdelim
,
2907 &my_extra
->typioparam
, &my_extra
->typiofunc
);
2908 fmgr_info_cxt(my_extra
->typiofunc
, &my_extra
->proc
,
2909 fcinfo
->flinfo
->fn_mcxt
);
2910 my_extra
->element_type
= element_type
;
2912 typlen
= my_extra
->typlen
;
2913 typbyval
= my_extra
->typbyval
;
2914 typalign
= my_extra
->typalign
;
2916 p
= ARR_DATA_PTR(v
);
2917 bitmap
= ARR_NULLBITMAP(v
);
2920 for (i
= 0; i
< nitems
; i
++)
2925 /* Get source element, checking for NULL */
2926 if (bitmap
&& (*bitmap
& bitmask
) == 0)
2928 /* we ignore nulls */
2932 itemvalue
= fetch_att(p
, typbyval
, typlen
);
2934 value
= OutputFunctionCall(&my_extra
->proc
, itemvalue
);
2937 appendStringInfo(&buf
, "%s%s", fldsep
, value
);
2939 appendStringInfoString(&buf
, value
);
2942 p
= att_addlength_pointer(p
, typlen
, p
);
2943 p
= (char *) att_align_nominal(p
, typalign
);
2946 /* advance bitmap pointer if any */
2950 if (bitmask
== 0x100)
2958 PG_RETURN_TEXT_P(cstring_to_text_with_len(buf
.data
, buf
.len
));
2963 * Convert a int32 to a string containing a base 16 (hex) representation of
2967 to_hex32(PG_FUNCTION_ARGS
)
2969 uint32 value
= (uint32
) PG_GETARG_INT32(0);
2971 const char *digits
= "0123456789abcdef";
2972 char buf
[32]; /* bigger than needed, but reasonable */
2974 ptr
= buf
+ sizeof(buf
) - 1;
2979 *--ptr
= digits
[value
% HEXBASE
];
2981 } while (ptr
> buf
&& value
);
2983 PG_RETURN_TEXT_P(cstring_to_text(ptr
));
2987 * Convert a int64 to a string containing a base 16 (hex) representation of
2991 to_hex64(PG_FUNCTION_ARGS
)
2993 uint64 value
= (uint64
) PG_GETARG_INT64(0);
2995 const char *digits
= "0123456789abcdef";
2996 char buf
[32]; /* bigger than needed, but reasonable */
2998 ptr
= buf
+ sizeof(buf
) - 1;
3003 *--ptr
= digits
[value
% HEXBASE
];
3005 } while (ptr
> buf
&& value
);
3007 PG_RETURN_TEXT_P(cstring_to_text(ptr
));
3011 * Create an md5 hash of a text string and return it as hex
3013 * md5 produces a 16 byte (128 bit) hash; double it for hex
3015 #define MD5_HASH_LEN 32
3018 md5_text(PG_FUNCTION_ARGS
)
3020 text
*in_text
= PG_GETARG_TEXT_PP(0);
3022 char hexsum
[MD5_HASH_LEN
+ 1];
3024 /* Calculate the length of the buffer using varlena metadata */
3025 len
= VARSIZE_ANY_EXHDR(in_text
);
3027 /* get the hash result */
3028 if (pg_md5_hash(VARDATA_ANY(in_text
), len
, hexsum
) == false)
3030 (errcode(ERRCODE_OUT_OF_MEMORY
),
3031 errmsg("out of memory")));
3033 /* convert to text and return it */
3034 PG_RETURN_TEXT_P(cstring_to_text(hexsum
));
3038 * Create an md5 hash of a bytea field and return it as a hex string:
3039 * 16-byte md5 digest is represented in 32 hex characters.
3042 md5_bytea(PG_FUNCTION_ARGS
)
3044 bytea
*in
= PG_GETARG_BYTEA_PP(0);
3046 char hexsum
[MD5_HASH_LEN
+ 1];
3048 len
= VARSIZE_ANY_EXHDR(in
);
3049 if (pg_md5_hash(VARDATA_ANY(in
), len
, hexsum
) == false)
3051 (errcode(ERRCODE_OUT_OF_MEMORY
),
3052 errmsg("out of memory")));
3054 PG_RETURN_TEXT_P(cstring_to_text(hexsum
));
3058 * Return the size of a datum, possibly compressed
3060 * Works on any data type
3063 pg_column_size(PG_FUNCTION_ARGS
)
3065 Datum value
= PG_GETARG_DATUM(0);
3069 /* On first call, get the input type's typlen, and save at *fn_extra */
3070 if (fcinfo
->flinfo
->fn_extra
== NULL
)
3072 /* Lookup the datatype of the supplied argument */
3073 Oid argtypeid
= get_fn_expr_argtype(fcinfo
->flinfo
, 0);
3075 typlen
= get_typlen(argtypeid
);
3076 if (typlen
== 0) /* should not happen */
3077 elog(ERROR
, "cache lookup failed for type %u", argtypeid
);
3079 fcinfo
->flinfo
->fn_extra
= MemoryContextAlloc(fcinfo
->flinfo
->fn_mcxt
,
3081 *((int *) fcinfo
->flinfo
->fn_extra
) = typlen
;
3084 typlen
= *((int *) fcinfo
->flinfo
->fn_extra
);
3088 /* varlena type, possibly toasted */
3089 result
= toast_datum_size(value
);
3091 else if (typlen
== -2)
3094 result
= strlen(DatumGetCString(value
)) + 1;
3098 /* ordinary fixed-width type */
3102 PG_RETURN_INT32(result
);