Update copyright for 2022
[pgsql.git] / src / backend / utils / adt / like.c
blob9f241dc7c664ad22742c18a4696632c7a2891f04
1 /*-------------------------------------------------------------------------
3 * like.c
4 * like expression handling code.
6 * NOTES
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10 * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
13 * IDENTIFICATION
14 * src/backend/utils/adt/like.c
16 *-------------------------------------------------------------------------
18 #include "postgres.h"
20 #include <ctype.h>
22 #include "catalog/pg_collation.h"
23 #include "mb/pg_wchar.h"
24 #include "miscadmin.h"
25 #include "utils/builtins.h"
26 #include "utils/pg_locale.h"
29 #define LIKE_TRUE 1
30 #define LIKE_FALSE 0
31 #define LIKE_ABORT (-1)
34 static int SB_MatchText(const char *t, int tlen, const char *p, int plen,
35 pg_locale_t locale, bool locale_is_c);
36 static text *SB_do_like_escape(text *, text *);
38 static int MB_MatchText(const char *t, int tlen, const char *p, int plen,
39 pg_locale_t locale, bool locale_is_c);
40 static text *MB_do_like_escape(text *, text *);
42 static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
43 pg_locale_t locale, bool locale_is_c);
45 static int SB_IMatchText(const char *t, int tlen, const char *p, int plen,
46 pg_locale_t locale, bool locale_is_c);
48 static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
49 static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
51 /*--------------------
52 * Support routine for MatchText. Compares given multibyte streams
53 * as wide characters. If they match, returns 1 otherwise returns 0.
54 *--------------------
56 static inline int
57 wchareq(const char *p1, const char *p2)
59 int p1_len;
61 /* Optimization: quickly compare the first byte. */
62 if (*p1 != *p2)
63 return 0;
65 p1_len = pg_mblen(p1);
66 if (pg_mblen(p2) != p1_len)
67 return 0;
69 /* They are the same length */
70 while (p1_len--)
72 if (*p1++ != *p2++)
73 return 0;
75 return 1;
79 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
80 * comparison of multibyte characters. It did not work at all, however,
81 * because it relied on tolower() which has a single-byte API ... and
82 * towlower() wouldn't be much better since we have no suitably cheap way
83 * of getting a single character transformed to the system's wchar_t format.
84 * So now, we just downcase the strings using lower() and apply regular LIKE
85 * comparison. This should be revisited when we install better locale support.
89 * We do handle case-insensitive matching for single-byte encodings using
90 * fold-on-the-fly processing, however.
92 static char
93 SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
95 if (locale_is_c)
96 return pg_ascii_tolower(c);
97 #ifdef HAVE_LOCALE_T
98 else if (locale)
99 return tolower_l(c, locale->info.lt);
100 #endif
101 else
102 return pg_tolower(c);
106 #define NextByte(p, plen) ((p)++, (plen)--)
108 /* Set up to compile like_match.c for multibyte characters */
109 #define CHAREQ(p1, p2) wchareq((p1), (p2))
110 #define NextChar(p, plen) \
111 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
112 #define CopyAdvChar(dst, src, srclen) \
113 do { int __l = pg_mblen(src); \
114 (srclen) -= __l; \
115 while (__l-- > 0) \
116 *(dst)++ = *(src)++; \
117 } while (0)
119 #define MatchText MB_MatchText
120 #define do_like_escape MB_do_like_escape
122 #include "like_match.c"
124 /* Set up to compile like_match.c for single-byte characters */
125 #define CHAREQ(p1, p2) (*(p1) == *(p2))
126 #define NextChar(p, plen) NextByte((p), (plen))
127 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
129 #define MatchText SB_MatchText
130 #define do_like_escape SB_do_like_escape
132 #include "like_match.c"
134 /* setup to compile like_match.c for single byte case insensitive matches */
135 #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
136 #define NextChar(p, plen) NextByte((p), (plen))
137 #define MatchText SB_IMatchText
139 #include "like_match.c"
141 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
143 #define NextChar(p, plen) \
144 do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
145 #define MatchText UTF8_MatchText
147 #include "like_match.c"
149 /* Generic for all cases not requiring inline case-folding */
150 static inline int
151 GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation)
153 if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID)
155 pg_locale_t locale = pg_newlocale_from_collation(collation);
157 if (locale && !locale->deterministic)
158 ereport(ERROR,
159 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
160 errmsg("nondeterministic collations are not supported for LIKE")));
163 if (pg_database_encoding_max_length() == 1)
164 return SB_MatchText(s, slen, p, plen, 0, true);
165 else if (GetDatabaseEncoding() == PG_UTF8)
166 return UTF8_MatchText(s, slen, p, plen, 0, true);
167 else
168 return MB_MatchText(s, slen, p, plen, 0, true);
171 static inline int
172 Generic_Text_IC_like(text *str, text *pat, Oid collation)
174 char *s,
176 int slen,
177 plen;
178 pg_locale_t locale = 0;
179 bool locale_is_c = false;
181 if (lc_ctype_is_c(collation))
182 locale_is_c = true;
183 else if (collation != DEFAULT_COLLATION_OID)
185 if (!OidIsValid(collation))
188 * This typically means that the parser could not resolve a
189 * conflict of implicit collations, so report it that way.
191 ereport(ERROR,
192 (errcode(ERRCODE_INDETERMINATE_COLLATION),
193 errmsg("could not determine which collation to use for ILIKE"),
194 errhint("Use the COLLATE clause to set the collation explicitly.")));
196 locale = pg_newlocale_from_collation(collation);
198 if (locale && !locale->deterministic)
199 ereport(ERROR,
200 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
201 errmsg("nondeterministic collations are not supported for ILIKE")));
205 * For efficiency reasons, in the single byte case we don't call lower()
206 * on the pattern and text, but instead call SB_lower_char on each
207 * character. In the multi-byte case we don't have much choice :-(. Also,
208 * ICU does not support single-character case folding, so we go the long
209 * way.
212 if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU))
214 pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
215 PointerGetDatum(pat)));
216 p = VARDATA_ANY(pat);
217 plen = VARSIZE_ANY_EXHDR(pat);
218 str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
219 PointerGetDatum(str)));
220 s = VARDATA_ANY(str);
221 slen = VARSIZE_ANY_EXHDR(str);
222 if (GetDatabaseEncoding() == PG_UTF8)
223 return UTF8_MatchText(s, slen, p, plen, 0, true);
224 else
225 return MB_MatchText(s, slen, p, plen, 0, true);
227 else
229 p = VARDATA_ANY(pat);
230 plen = VARSIZE_ANY_EXHDR(pat);
231 s = VARDATA_ANY(str);
232 slen = VARSIZE_ANY_EXHDR(str);
233 return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
238 * interface routines called by the function manager
241 Datum
242 namelike(PG_FUNCTION_ARGS)
244 Name str = PG_GETARG_NAME(0);
245 text *pat = PG_GETARG_TEXT_PP(1);
246 bool result;
247 char *s,
249 int slen,
250 plen;
252 s = NameStr(*str);
253 slen = strlen(s);
254 p = VARDATA_ANY(pat);
255 plen = VARSIZE_ANY_EXHDR(pat);
257 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
259 PG_RETURN_BOOL(result);
262 Datum
263 namenlike(PG_FUNCTION_ARGS)
265 Name str = PG_GETARG_NAME(0);
266 text *pat = PG_GETARG_TEXT_PP(1);
267 bool result;
268 char *s,
270 int slen,
271 plen;
273 s = NameStr(*str);
274 slen = strlen(s);
275 p = VARDATA_ANY(pat);
276 plen = VARSIZE_ANY_EXHDR(pat);
278 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
280 PG_RETURN_BOOL(result);
283 Datum
284 textlike(PG_FUNCTION_ARGS)
286 text *str = PG_GETARG_TEXT_PP(0);
287 text *pat = PG_GETARG_TEXT_PP(1);
288 bool result;
289 char *s,
291 int slen,
292 plen;
294 s = VARDATA_ANY(str);
295 slen = VARSIZE_ANY_EXHDR(str);
296 p = VARDATA_ANY(pat);
297 plen = VARSIZE_ANY_EXHDR(pat);
299 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE);
301 PG_RETURN_BOOL(result);
304 Datum
305 textnlike(PG_FUNCTION_ARGS)
307 text *str = PG_GETARG_TEXT_PP(0);
308 text *pat = PG_GETARG_TEXT_PP(1);
309 bool result;
310 char *s,
312 int slen,
313 plen;
315 s = VARDATA_ANY(str);
316 slen = VARSIZE_ANY_EXHDR(str);
317 p = VARDATA_ANY(pat);
318 plen = VARSIZE_ANY_EXHDR(pat);
320 result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE);
322 PG_RETURN_BOOL(result);
325 Datum
326 bytealike(PG_FUNCTION_ARGS)
328 bytea *str = PG_GETARG_BYTEA_PP(0);
329 bytea *pat = PG_GETARG_BYTEA_PP(1);
330 bool result;
331 char *s,
333 int slen,
334 plen;
336 s = VARDATA_ANY(str);
337 slen = VARSIZE_ANY_EXHDR(str);
338 p = VARDATA_ANY(pat);
339 plen = VARSIZE_ANY_EXHDR(pat);
341 result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
343 PG_RETURN_BOOL(result);
346 Datum
347 byteanlike(PG_FUNCTION_ARGS)
349 bytea *str = PG_GETARG_BYTEA_PP(0);
350 bytea *pat = PG_GETARG_BYTEA_PP(1);
351 bool result;
352 char *s,
354 int slen,
355 plen;
357 s = VARDATA_ANY(str);
358 slen = VARSIZE_ANY_EXHDR(str);
359 p = VARDATA_ANY(pat);
360 plen = VARSIZE_ANY_EXHDR(pat);
362 result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
364 PG_RETURN_BOOL(result);
368 * Case-insensitive versions
371 Datum
372 nameiclike(PG_FUNCTION_ARGS)
374 Name str = PG_GETARG_NAME(0);
375 text *pat = PG_GETARG_TEXT_PP(1);
376 bool result;
377 text *strtext;
379 strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
380 NameGetDatum(str)));
381 result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE);
383 PG_RETURN_BOOL(result);
386 Datum
387 nameicnlike(PG_FUNCTION_ARGS)
389 Name str = PG_GETARG_NAME(0);
390 text *pat = PG_GETARG_TEXT_PP(1);
391 bool result;
392 text *strtext;
394 strtext = DatumGetTextPP(DirectFunctionCall1(name_text,
395 NameGetDatum(str)));
396 result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE);
398 PG_RETURN_BOOL(result);
401 Datum
402 texticlike(PG_FUNCTION_ARGS)
404 text *str = PG_GETARG_TEXT_PP(0);
405 text *pat = PG_GETARG_TEXT_PP(1);
406 bool result;
408 result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE);
410 PG_RETURN_BOOL(result);
413 Datum
414 texticnlike(PG_FUNCTION_ARGS)
416 text *str = PG_GETARG_TEXT_PP(0);
417 text *pat = PG_GETARG_TEXT_PP(1);
418 bool result;
420 result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE);
422 PG_RETURN_BOOL(result);
426 * like_escape() --- given a pattern and an ESCAPE string,
427 * convert the pattern to use Postgres' standard backslash escape convention.
429 Datum
430 like_escape(PG_FUNCTION_ARGS)
432 text *pat = PG_GETARG_TEXT_PP(0);
433 text *esc = PG_GETARG_TEXT_PP(1);
434 text *result;
436 if (pg_database_encoding_max_length() == 1)
437 result = SB_do_like_escape(pat, esc);
438 else
439 result = MB_do_like_escape(pat, esc);
441 PG_RETURN_TEXT_P(result);
445 * like_escape_bytea() --- given a pattern and an ESCAPE string,
446 * convert the pattern to use Postgres' standard backslash escape convention.
448 Datum
449 like_escape_bytea(PG_FUNCTION_ARGS)
451 bytea *pat = PG_GETARG_BYTEA_PP(0);
452 bytea *esc = PG_GETARG_BYTEA_PP(1);
453 bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
455 PG_RETURN_BYTEA_P((bytea *) result);