1 /*-------------------------------------------------------------------------
4 * like expression handling code.
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
14 * src/backend/utils/adt/like.c
16 *-------------------------------------------------------------------------
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
31 #define LIKE_ABORT (-1)
34 static int SB_MatchText(const char *t
, int tlen
, const char *p
, int plen
,
35 pg_locale_t locale
, bool locale_is_c
);
36 static text
*SB_do_like_escape(text
*, text
*);
38 static int MB_MatchText(const char *t
, int tlen
, const char *p
, int plen
,
39 pg_locale_t locale
, bool locale_is_c
);
40 static text
*MB_do_like_escape(text
*, text
*);
42 static int UTF8_MatchText(const char *t
, int tlen
, const char *p
, int plen
,
43 pg_locale_t locale
, bool locale_is_c
);
45 static int SB_IMatchText(const char *t
, int tlen
, const char *p
, int plen
,
46 pg_locale_t locale
, bool locale_is_c
);
48 static int GenericMatchText(const char *s
, int slen
, const char *p
, int plen
, Oid collation
);
49 static int Generic_Text_IC_like(text
*str
, text
*pat
, Oid collation
);
51 /*--------------------
52 * Support routine for MatchText. Compares given multibyte streams
53 * as wide characters. If they match, returns 1 otherwise returns 0.
57 wchareq(const char *p1
, const char *p2
)
61 /* Optimization: quickly compare the first byte. */
65 p1_len
= pg_mblen(p1
);
66 if (pg_mblen(p2
) != p1_len
)
69 /* They are the same length */
79 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80 * comparison of multibyte characters. It did not work at all, however,
81 * because it relied on tolower() which has a single-byte API ... and
82 * towlower() wouldn't be much better since we have no suitably cheap way
83 * of getting a single character transformed to the system's wchar_t format.
84 * So now, we just downcase the strings using lower() and apply regular LIKE
85 * comparison. This should be revisited when we install better locale support.
89 * We do handle case-insensitive matching for single-byte encodings using
90 * fold-on-the-fly processing, however.
93 SB_lower_char(unsigned char c
, pg_locale_t locale
, bool locale_is_c
)
96 return pg_ascii_tolower(c
);
99 return tolower_l(c
, locale
->info
.lt
);
102 return pg_tolower(c
);
106 #define NextByte(p, plen) ((p)++, (plen)--)
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113 do { int __l = pg_mblen(src); \
116 *(dst)++ = *(src)++; \
119 #define MatchText MB_MatchText
120 #define do_like_escape MB_do_like_escape
122 #include "like_match.c"
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
129 #define MatchText SB_MatchText
130 #define do_like_escape SB_do_like_escape
132 #include "like_match.c"
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
139 #include "like_match.c"
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
143 #define NextChar(p, plen) \
144 do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText UTF8_MatchText
147 #include "like_match.c"
149 /* Generic for all cases not requiring inline case-folding */
151 GenericMatchText(const char *s
, int slen
, const char *p
, int plen
, Oid collation
)
153 if (collation
&& !lc_ctype_is_c(collation
) && collation
!= DEFAULT_COLLATION_OID
)
155 pg_locale_t locale
= pg_newlocale_from_collation(collation
);
157 if (locale
&& !locale
->deterministic
)
159 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
160 errmsg("nondeterministic collations are not supported for LIKE")));
163 if (pg_database_encoding_max_length() == 1)
164 return SB_MatchText(s
, slen
, p
, plen
, 0, true);
165 else if (GetDatabaseEncoding() == PG_UTF8
)
166 return UTF8_MatchText(s
, slen
, p
, plen
, 0, true);
168 return MB_MatchText(s
, slen
, p
, plen
, 0, true);
172 Generic_Text_IC_like(text
*str
, text
*pat
, Oid collation
)
178 pg_locale_t locale
= 0;
179 bool locale_is_c
= false;
181 if (lc_ctype_is_c(collation
))
183 else if (collation
!= DEFAULT_COLLATION_OID
)
185 if (!OidIsValid(collation
))
188 * This typically means that the parser could not resolve a
189 * conflict of implicit collations, so report it that way.
192 (errcode(ERRCODE_INDETERMINATE_COLLATION
),
193 errmsg("could not determine which collation to use for ILIKE"),
194 errhint("Use the COLLATE clause to set the collation explicitly.")));
196 locale
= pg_newlocale_from_collation(collation
);
198 if (locale
&& !locale
->deterministic
)
200 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED
),
201 errmsg("nondeterministic collations are not supported for ILIKE")));
205 * For efficiency reasons, in the single byte case we don't call lower()
206 * on the pattern and text, but instead call SB_lower_char on each
207 * character. In the multi-byte case we don't have much choice :-(. Also,
208 * ICU does not support single-character case folding, so we go the long
212 if (pg_database_encoding_max_length() > 1 || (locale
&& locale
->provider
== COLLPROVIDER_ICU
))
214 pat
= DatumGetTextPP(DirectFunctionCall1Coll(lower
, collation
,
215 PointerGetDatum(pat
)));
216 p
= VARDATA_ANY(pat
);
217 plen
= VARSIZE_ANY_EXHDR(pat
);
218 str
= DatumGetTextPP(DirectFunctionCall1Coll(lower
, collation
,
219 PointerGetDatum(str
)));
220 s
= VARDATA_ANY(str
);
221 slen
= VARSIZE_ANY_EXHDR(str
);
222 if (GetDatabaseEncoding() == PG_UTF8
)
223 return UTF8_MatchText(s
, slen
, p
, plen
, 0, true);
225 return MB_MatchText(s
, slen
, p
, plen
, 0, true);
229 p
= VARDATA_ANY(pat
);
230 plen
= VARSIZE_ANY_EXHDR(pat
);
231 s
= VARDATA_ANY(str
);
232 slen
= VARSIZE_ANY_EXHDR(str
);
233 return SB_IMatchText(s
, slen
, p
, plen
, locale
, locale_is_c
);
238 * interface routines called by the function manager
242 namelike(PG_FUNCTION_ARGS
)
244 Name str
= PG_GETARG_NAME(0);
245 text
*pat
= PG_GETARG_TEXT_PP(1);
254 p
= VARDATA_ANY(pat
);
255 plen
= VARSIZE_ANY_EXHDR(pat
);
257 result
= (GenericMatchText(s
, slen
, p
, plen
, PG_GET_COLLATION()) == LIKE_TRUE
);
259 PG_RETURN_BOOL(result
);
263 namenlike(PG_FUNCTION_ARGS
)
265 Name str
= PG_GETARG_NAME(0);
266 text
*pat
= PG_GETARG_TEXT_PP(1);
275 p
= VARDATA_ANY(pat
);
276 plen
= VARSIZE_ANY_EXHDR(pat
);
278 result
= (GenericMatchText(s
, slen
, p
, plen
, PG_GET_COLLATION()) != LIKE_TRUE
);
280 PG_RETURN_BOOL(result
);
284 textlike(PG_FUNCTION_ARGS
)
286 text
*str
= PG_GETARG_TEXT_PP(0);
287 text
*pat
= PG_GETARG_TEXT_PP(1);
294 s
= VARDATA_ANY(str
);
295 slen
= VARSIZE_ANY_EXHDR(str
);
296 p
= VARDATA_ANY(pat
);
297 plen
= VARSIZE_ANY_EXHDR(pat
);
299 result
= (GenericMatchText(s
, slen
, p
, plen
, PG_GET_COLLATION()) == LIKE_TRUE
);
301 PG_RETURN_BOOL(result
);
305 textnlike(PG_FUNCTION_ARGS
)
307 text
*str
= PG_GETARG_TEXT_PP(0);
308 text
*pat
= PG_GETARG_TEXT_PP(1);
315 s
= VARDATA_ANY(str
);
316 slen
= VARSIZE_ANY_EXHDR(str
);
317 p
= VARDATA_ANY(pat
);
318 plen
= VARSIZE_ANY_EXHDR(pat
);
320 result
= (GenericMatchText(s
, slen
, p
, plen
, PG_GET_COLLATION()) != LIKE_TRUE
);
322 PG_RETURN_BOOL(result
);
326 bytealike(PG_FUNCTION_ARGS
)
328 bytea
*str
= PG_GETARG_BYTEA_PP(0);
329 bytea
*pat
= PG_GETARG_BYTEA_PP(1);
336 s
= VARDATA_ANY(str
);
337 slen
= VARSIZE_ANY_EXHDR(str
);
338 p
= VARDATA_ANY(pat
);
339 plen
= VARSIZE_ANY_EXHDR(pat
);
341 result
= (SB_MatchText(s
, slen
, p
, plen
, 0, true) == LIKE_TRUE
);
343 PG_RETURN_BOOL(result
);
347 byteanlike(PG_FUNCTION_ARGS
)
349 bytea
*str
= PG_GETARG_BYTEA_PP(0);
350 bytea
*pat
= PG_GETARG_BYTEA_PP(1);
357 s
= VARDATA_ANY(str
);
358 slen
= VARSIZE_ANY_EXHDR(str
);
359 p
= VARDATA_ANY(pat
);
360 plen
= VARSIZE_ANY_EXHDR(pat
);
362 result
= (SB_MatchText(s
, slen
, p
, plen
, 0, true) != LIKE_TRUE
);
364 PG_RETURN_BOOL(result
);
368 * Case-insensitive versions
372 nameiclike(PG_FUNCTION_ARGS
)
374 Name str
= PG_GETARG_NAME(0);
375 text
*pat
= PG_GETARG_TEXT_PP(1);
379 strtext
= DatumGetTextPP(DirectFunctionCall1(name_text
,
381 result
= (Generic_Text_IC_like(strtext
, pat
, PG_GET_COLLATION()) == LIKE_TRUE
);
383 PG_RETURN_BOOL(result
);
387 nameicnlike(PG_FUNCTION_ARGS
)
389 Name str
= PG_GETARG_NAME(0);
390 text
*pat
= PG_GETARG_TEXT_PP(1);
394 strtext
= DatumGetTextPP(DirectFunctionCall1(name_text
,
396 result
= (Generic_Text_IC_like(strtext
, pat
, PG_GET_COLLATION()) != LIKE_TRUE
);
398 PG_RETURN_BOOL(result
);
402 texticlike(PG_FUNCTION_ARGS
)
404 text
*str
= PG_GETARG_TEXT_PP(0);
405 text
*pat
= PG_GETARG_TEXT_PP(1);
408 result
= (Generic_Text_IC_like(str
, pat
, PG_GET_COLLATION()) == LIKE_TRUE
);
410 PG_RETURN_BOOL(result
);
414 texticnlike(PG_FUNCTION_ARGS
)
416 text
*str
= PG_GETARG_TEXT_PP(0);
417 text
*pat
= PG_GETARG_TEXT_PP(1);
420 result
= (Generic_Text_IC_like(str
, pat
, PG_GET_COLLATION()) != LIKE_TRUE
);
422 PG_RETURN_BOOL(result
);
426 * like_escape() --- given a pattern and an ESCAPE string,
427 * convert the pattern to use Postgres' standard backslash escape convention.
430 like_escape(PG_FUNCTION_ARGS
)
432 text
*pat
= PG_GETARG_TEXT_PP(0);
433 text
*esc
= PG_GETARG_TEXT_PP(1);
436 if (pg_database_encoding_max_length() == 1)
437 result
= SB_do_like_escape(pat
, esc
);
439 result
= MB_do_like_escape(pat
, esc
);
441 PG_RETURN_TEXT_P(result
);
445 * like_escape_bytea() --- given a pattern and an ESCAPE string,
446 * convert the pattern to use Postgres' standard backslash escape convention.
449 like_escape_bytea(PG_FUNCTION_ARGS
)
451 bytea
*pat
= PG_GETARG_BYTEA_PP(0);
452 bytea
*esc
= PG_GETARG_BYTEA_PP(1);
453 bytea
*result
= SB_do_like_escape((text
*) pat
, (text
*) esc
);
455 PG_RETURN_BYTEA_P((bytea
*) result
);