Use a safer outfuncs/readfuncs representation for BitStrings.
[pgsql.git] / contrib / ltree / ltree_io.c
blob0a12c77a621f8e44f5a28310f70fc145a2c01b3f
1 /*
2 * in/out function for ltree and lquery
3 * Teodor Sigaev <teodor@stack.net>
4 * contrib/ltree/ltree_io.c
5 */
6 #include "postgres.h"
8 #include <ctype.h>
10 #include "crc32.h"
11 #include "libpq/pqformat.h"
12 #include "ltree.h"
13 #include "utils/memutils.h"
14 #include "varatt.h"
17 typedef struct
19 const char *start;
20 int len; /* length in bytes */
21 int flag;
22 int wlen; /* length in characters */
23 } nodeitem;
25 #define LTPRS_WAITNAME 0
26 #define LTPRS_WAITDELIM 1
28 static bool finish_nodeitem(nodeitem *lptr, const char *ptr,
29 bool is_lquery, int pos, struct Node *escontext);
33 * expects a null terminated string
34 * returns an ltree
36 static ltree *
37 parse_ltree(const char *buf, struct Node *escontext)
39 const char *ptr;
40 nodeitem *list,
41 *lptr;
42 int num = 0,
43 totallen = 0;
44 int state = LTPRS_WAITNAME;
45 ltree *result;
46 ltree_level *curlevel;
47 int charlen;
48 int pos = 1; /* character position for error messages */
50 #define UNCHAR ereturn(escontext, NULL,\
51 errcode(ERRCODE_SYNTAX_ERROR), \
52 errmsg("ltree syntax error at character %d", \
53 pos))
55 ptr = buf;
56 while (*ptr)
58 charlen = pg_mblen(ptr);
59 if (t_iseq(ptr, '.'))
60 num++;
61 ptr += charlen;
64 if (num + 1 > LTREE_MAX_LEVELS)
65 ereturn(escontext, NULL,
66 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
67 errmsg("number of ltree labels (%d) exceeds the maximum allowed (%d)",
68 num + 1, LTREE_MAX_LEVELS)));
69 list = lptr = (nodeitem *) palloc(sizeof(nodeitem) * (num + 1));
70 ptr = buf;
71 while (*ptr)
73 charlen = pg_mblen(ptr);
75 switch (state)
77 case LTPRS_WAITNAME:
78 if (ISLABEL(ptr))
80 lptr->start = ptr;
81 lptr->wlen = 0;
82 state = LTPRS_WAITDELIM;
84 else
85 UNCHAR;
86 break;
87 case LTPRS_WAITDELIM:
88 if (t_iseq(ptr, '.'))
90 if (!finish_nodeitem(lptr, ptr, false, pos, escontext))
91 return NULL;
92 totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
93 lptr++;
94 state = LTPRS_WAITNAME;
96 else if (!ISLABEL(ptr))
97 UNCHAR;
98 break;
99 default:
100 elog(ERROR, "internal error in ltree parser");
103 ptr += charlen;
104 lptr->wlen++;
105 pos++;
108 if (state == LTPRS_WAITDELIM)
110 if (!finish_nodeitem(lptr, ptr, false, pos, escontext))
111 return NULL;
112 totallen += MAXALIGN(lptr->len + LEVEL_HDRSIZE);
113 lptr++;
115 else if (!(state == LTPRS_WAITNAME && lptr == list))
116 ereturn(escontext, NULL,
117 (errcode(ERRCODE_SYNTAX_ERROR),
118 errmsg("ltree syntax error"),
119 errdetail("Unexpected end of input.")));
121 result = (ltree *) palloc0(LTREE_HDRSIZE + totallen);
122 SET_VARSIZE(result, LTREE_HDRSIZE + totallen);
123 result->numlevel = lptr - list;
124 curlevel = LTREE_FIRST(result);
125 lptr = list;
126 while (lptr - list < result->numlevel)
128 curlevel->len = (uint16) lptr->len;
129 memcpy(curlevel->name, lptr->start, lptr->len);
130 curlevel = LEVEL_NEXT(curlevel);
131 lptr++;
134 pfree(list);
135 return result;
137 #undef UNCHAR
141 * expects an ltree
142 * returns a null terminated string
144 static char *
145 deparse_ltree(const ltree *in)
147 char *buf,
148 *ptr;
149 int i;
150 ltree_level *curlevel;
152 ptr = buf = (char *) palloc(VARSIZE(in));
153 curlevel = LTREE_FIRST(in);
154 for (i = 0; i < in->numlevel; i++)
156 if (i != 0)
158 *ptr = '.';
159 ptr++;
161 memcpy(ptr, curlevel->name, curlevel->len);
162 ptr += curlevel->len;
163 curlevel = LEVEL_NEXT(curlevel);
166 *ptr = '\0';
167 return buf;
171 * Basic ltree I/O functions
173 PG_FUNCTION_INFO_V1(ltree_in);
174 Datum
175 ltree_in(PG_FUNCTION_ARGS)
177 char *buf = (char *) PG_GETARG_POINTER(0);
178 ltree *res;
180 if ((res = parse_ltree(buf, fcinfo->context)) == NULL)
181 PG_RETURN_NULL();
183 PG_RETURN_POINTER(res);
186 PG_FUNCTION_INFO_V1(ltree_out);
187 Datum
188 ltree_out(PG_FUNCTION_ARGS)
190 ltree *in = PG_GETARG_LTREE_P(0);
192 PG_RETURN_POINTER(deparse_ltree(in));
196 * ltree type send function
198 * The type is sent as text in binary mode, so this is almost the same
199 * as the output function, but it's prefixed with a version number so we
200 * can change the binary format sent in future if necessary. For now,
201 * only version 1 is supported.
203 PG_FUNCTION_INFO_V1(ltree_send);
204 Datum
205 ltree_send(PG_FUNCTION_ARGS)
207 ltree *in = PG_GETARG_LTREE_P(0);
208 StringInfoData buf;
209 int version = 1;
210 char *res = deparse_ltree(in);
212 pq_begintypsend(&buf);
213 pq_sendint8(&buf, version);
214 pq_sendtext(&buf, res, strlen(res));
215 pfree(res);
217 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
221 * ltree type recv function
223 * The type is sent as text in binary mode, so this is almost the same
224 * as the input function, but it's prefixed with a version number so we
225 * can change the binary format sent in future if necessary. For now,
226 * only version 1 is supported.
228 PG_FUNCTION_INFO_V1(ltree_recv);
229 Datum
230 ltree_recv(PG_FUNCTION_ARGS)
232 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
233 int version = pq_getmsgint(buf, 1);
234 char *str;
235 int nbytes;
236 ltree *res;
238 if (version != 1)
239 elog(ERROR, "unsupported ltree version number %d", version);
241 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
242 res = parse_ltree(str, NULL);
243 pfree(str);
245 PG_RETURN_POINTER(res);
249 #define LQPRS_WAITLEVEL 0
250 #define LQPRS_WAITDELIM 1
251 #define LQPRS_WAITOPEN 2
252 #define LQPRS_WAITFNUM 3
253 #define LQPRS_WAITSNUM 4
254 #define LQPRS_WAITND 5
255 #define LQPRS_WAITCLOSE 6
256 #define LQPRS_WAITEND 7
257 #define LQPRS_WAITVAR 8
260 #define GETVAR(x) ( *((nodeitem**)LQL_FIRST(x)) )
261 #define ITEMSIZE MAXALIGN(LQL_HDRSIZE+sizeof(nodeitem*))
262 #define NEXTLEV(x) ( (lquery_level*)( ((char*)(x)) + ITEMSIZE) )
265 * expects a null terminated string
266 * returns an lquery
268 static lquery *
269 parse_lquery(const char *buf, struct Node *escontext)
271 const char *ptr;
272 int num = 0,
273 totallen = 0,
274 numOR = 0;
275 int state = LQPRS_WAITLEVEL;
276 lquery *result;
277 nodeitem *lptr = NULL;
278 lquery_level *cur,
279 *curqlevel,
280 *tmpql;
281 lquery_variant *lrptr = NULL;
282 bool hasnot = false;
283 bool wasbad = false;
284 int charlen;
285 int pos = 1; /* character position for error messages */
287 #define UNCHAR ereturn(escontext, NULL,\
288 errcode(ERRCODE_SYNTAX_ERROR), \
289 errmsg("lquery syntax error at character %d", \
290 pos))
292 ptr = buf;
293 while (*ptr)
295 charlen = pg_mblen(ptr);
297 if (t_iseq(ptr, '.'))
298 num++;
299 else if (t_iseq(ptr, '|'))
300 numOR++;
302 ptr += charlen;
305 num++;
306 if (num > LQUERY_MAX_LEVELS)
307 ereturn(escontext, NULL,
308 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
309 errmsg("number of lquery items (%d) exceeds the maximum allowed (%d)",
310 num, LQUERY_MAX_LEVELS)));
311 curqlevel = tmpql = (lquery_level *) palloc0(ITEMSIZE * num);
312 ptr = buf;
313 while (*ptr)
315 charlen = pg_mblen(ptr);
317 switch (state)
319 case LQPRS_WAITLEVEL:
320 if (ISLABEL(ptr))
322 GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
323 lptr->start = ptr;
324 state = LQPRS_WAITDELIM;
325 curqlevel->numvar = 1;
327 else if (t_iseq(ptr, '!'))
329 GETVAR(curqlevel) = lptr = (nodeitem *) palloc0(sizeof(nodeitem) * (numOR + 1));
330 lptr->start = ptr + 1;
331 lptr->wlen = -1; /* compensate for counting ! below */
332 state = LQPRS_WAITDELIM;
333 curqlevel->numvar = 1;
334 curqlevel->flag |= LQL_NOT;
335 hasnot = true;
337 else if (t_iseq(ptr, '*'))
338 state = LQPRS_WAITOPEN;
339 else
340 UNCHAR;
341 break;
342 case LQPRS_WAITVAR:
343 if (ISLABEL(ptr))
345 lptr++;
346 lptr->start = ptr;
347 state = LQPRS_WAITDELIM;
348 curqlevel->numvar++;
350 else
351 UNCHAR;
352 break;
353 case LQPRS_WAITDELIM:
354 if (t_iseq(ptr, '@'))
356 lptr->flag |= LVAR_INCASE;
357 curqlevel->flag |= LVAR_INCASE;
359 else if (t_iseq(ptr, '*'))
361 lptr->flag |= LVAR_ANYEND;
362 curqlevel->flag |= LVAR_ANYEND;
364 else if (t_iseq(ptr, '%'))
366 lptr->flag |= LVAR_SUBLEXEME;
367 curqlevel->flag |= LVAR_SUBLEXEME;
369 else if (t_iseq(ptr, '|'))
371 if (!finish_nodeitem(lptr, ptr, true, pos, escontext))
372 return NULL;
373 state = LQPRS_WAITVAR;
375 else if (t_iseq(ptr, '{'))
377 if (!finish_nodeitem(lptr, ptr, true, pos, escontext))
378 return NULL;
379 curqlevel->flag |= LQL_COUNT;
380 state = LQPRS_WAITFNUM;
382 else if (t_iseq(ptr, '.'))
384 if (!finish_nodeitem(lptr, ptr, true, pos, escontext))
385 return NULL;
386 state = LQPRS_WAITLEVEL;
387 curqlevel = NEXTLEV(curqlevel);
389 else if (ISLABEL(ptr))
391 /* disallow more chars after a flag */
392 if (lptr->flag)
393 UNCHAR;
395 else
396 UNCHAR;
397 break;
398 case LQPRS_WAITOPEN:
399 if (t_iseq(ptr, '{'))
400 state = LQPRS_WAITFNUM;
401 else if (t_iseq(ptr, '.'))
403 /* We only get here for '*', so these are correct defaults */
404 curqlevel->low = 0;
405 curqlevel->high = LTREE_MAX_LEVELS;
406 curqlevel = NEXTLEV(curqlevel);
407 state = LQPRS_WAITLEVEL;
409 else
410 UNCHAR;
411 break;
412 case LQPRS_WAITFNUM:
413 if (t_iseq(ptr, ','))
414 state = LQPRS_WAITSNUM;
415 else if (t_isdigit(ptr))
417 int low = atoi(ptr);
419 if (low < 0 || low > LTREE_MAX_LEVELS)
420 ereturn(escontext, NULL,
421 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
422 errmsg("lquery syntax error"),
423 errdetail("Low limit (%d) exceeds the maximum allowed (%d), at character %d.",
424 low, LTREE_MAX_LEVELS, pos)));
426 curqlevel->low = (uint16) low;
427 state = LQPRS_WAITND;
429 else
430 UNCHAR;
431 break;
432 case LQPRS_WAITSNUM:
433 if (t_isdigit(ptr))
435 int high = atoi(ptr);
437 if (high < 0 || high > LTREE_MAX_LEVELS)
438 ereturn(escontext, NULL,
439 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
440 errmsg("lquery syntax error"),
441 errdetail("High limit (%d) exceeds the maximum allowed (%d), at character %d.",
442 high, LTREE_MAX_LEVELS, pos)));
443 else if (curqlevel->low > high)
444 ereturn(escontext, NULL,
445 (errcode(ERRCODE_SYNTAX_ERROR),
446 errmsg("lquery syntax error"),
447 errdetail("Low limit (%d) is greater than high limit (%d), at character %d.",
448 curqlevel->low, high, pos)));
450 curqlevel->high = (uint16) high;
451 state = LQPRS_WAITCLOSE;
453 else if (t_iseq(ptr, '}'))
455 curqlevel->high = LTREE_MAX_LEVELS;
456 state = LQPRS_WAITEND;
458 else
459 UNCHAR;
460 break;
461 case LQPRS_WAITCLOSE:
462 if (t_iseq(ptr, '}'))
463 state = LQPRS_WAITEND;
464 else if (!t_isdigit(ptr))
465 UNCHAR;
466 break;
467 case LQPRS_WAITND:
468 if (t_iseq(ptr, '}'))
470 curqlevel->high = curqlevel->low;
471 state = LQPRS_WAITEND;
473 else if (t_iseq(ptr, ','))
474 state = LQPRS_WAITSNUM;
475 else if (!t_isdigit(ptr))
476 UNCHAR;
477 break;
478 case LQPRS_WAITEND:
479 if (t_iseq(ptr, '.'))
481 state = LQPRS_WAITLEVEL;
482 curqlevel = NEXTLEV(curqlevel);
484 else
485 UNCHAR;
486 break;
487 default:
488 elog(ERROR, "internal error in lquery parser");
491 ptr += charlen;
492 if (state == LQPRS_WAITDELIM)
493 lptr->wlen++;
494 pos++;
497 if (state == LQPRS_WAITDELIM)
499 if (!finish_nodeitem(lptr, ptr, true, pos, escontext))
500 return NULL;
502 else if (state == LQPRS_WAITOPEN)
503 curqlevel->high = LTREE_MAX_LEVELS;
504 else if (state != LQPRS_WAITEND)
505 ereturn(escontext, NULL,
506 (errcode(ERRCODE_SYNTAX_ERROR),
507 errmsg("lquery syntax error"),
508 errdetail("Unexpected end of input.")));
510 curqlevel = tmpql;
511 totallen = LQUERY_HDRSIZE;
512 while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
514 totallen += LQL_HDRSIZE;
515 if (curqlevel->numvar)
517 lptr = GETVAR(curqlevel);
518 while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
520 totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
521 lptr++;
524 curqlevel = NEXTLEV(curqlevel);
527 result = (lquery *) palloc0(totallen);
528 SET_VARSIZE(result, totallen);
529 result->numlevel = num;
530 result->firstgood = 0;
531 result->flag = 0;
532 if (hasnot)
533 result->flag |= LQUERY_HASNOT;
534 cur = LQUERY_FIRST(result);
535 curqlevel = tmpql;
536 while ((char *) curqlevel - (char *) tmpql < num * ITEMSIZE)
538 memcpy(cur, curqlevel, LQL_HDRSIZE);
539 cur->totallen = LQL_HDRSIZE;
540 if (curqlevel->numvar)
542 lrptr = LQL_FIRST(cur);
543 lptr = GETVAR(curqlevel);
544 while (lptr - GETVAR(curqlevel) < curqlevel->numvar)
546 cur->totallen += MAXALIGN(LVAR_HDRSIZE + lptr->len);
547 lrptr->len = lptr->len;
548 lrptr->flag = lptr->flag;
549 lrptr->val = ltree_crc32_sz(lptr->start, lptr->len);
550 memcpy(lrptr->name, lptr->start, lptr->len);
551 lptr++;
552 lrptr = LVAR_NEXT(lrptr);
554 pfree(GETVAR(curqlevel));
555 if (cur->numvar > 1 || cur->flag != 0)
557 /* Not a simple match */
558 wasbad = true;
560 else if (wasbad == false)
562 /* count leading simple matches */
563 (result->firstgood)++;
566 else
568 /* '*', so this isn't a simple match */
569 wasbad = true;
571 curqlevel = NEXTLEV(curqlevel);
572 cur = LQL_NEXT(cur);
575 pfree(tmpql);
576 return result;
578 #undef UNCHAR
582 * Close out parsing an ltree or lquery nodeitem:
583 * compute the correct length, and complain if it's not OK
585 static bool
586 finish_nodeitem(nodeitem *lptr, const char *ptr, bool is_lquery, int pos,
587 struct Node *escontext)
589 if (is_lquery)
592 * Back up over any flag characters, and discount them from length and
593 * position.
595 while (ptr > lptr->start && strchr("@*%", ptr[-1]) != NULL)
597 ptr--;
598 lptr->wlen--;
599 pos--;
603 /* Now compute the byte length, which we weren't tracking before. */
604 lptr->len = ptr - lptr->start;
606 /* Complain if it's empty or too long */
607 if (lptr->len == 0)
608 ereturn(escontext, false,
609 (errcode(ERRCODE_SYNTAX_ERROR),
610 is_lquery ?
611 errmsg("lquery syntax error at character %d", pos) :
612 errmsg("ltree syntax error at character %d", pos),
613 errdetail("Empty labels are not allowed.")));
614 if (lptr->wlen > LTREE_LABEL_MAX_CHARS)
615 ereturn(escontext, false,
616 (errcode(ERRCODE_NAME_TOO_LONG),
617 errmsg("label string is too long"),
618 errdetail("Label length is %d, must be at most %d, at character %d.",
619 lptr->wlen, LTREE_LABEL_MAX_CHARS, pos)));
620 return true;
624 * expects an lquery
625 * returns a null terminated string
627 static char *
628 deparse_lquery(const lquery *in)
630 char *buf,
631 *ptr;
632 int i,
634 totallen = 1;
635 lquery_level *curqlevel;
636 lquery_variant *curtlevel;
638 curqlevel = LQUERY_FIRST(in);
639 for (i = 0; i < in->numlevel; i++)
641 totallen++;
642 if (curqlevel->numvar)
644 totallen += 1 + (curqlevel->numvar * 4) + curqlevel->totallen;
645 if (curqlevel->flag & LQL_COUNT)
646 totallen += 2 * 11 + 3;
648 else
649 totallen += 2 * 11 + 4;
650 curqlevel = LQL_NEXT(curqlevel);
653 ptr = buf = (char *) palloc(totallen);
654 curqlevel = LQUERY_FIRST(in);
655 for (i = 0; i < in->numlevel; i++)
657 if (i != 0)
659 *ptr = '.';
660 ptr++;
662 if (curqlevel->numvar)
664 if (curqlevel->flag & LQL_NOT)
666 *ptr = '!';
667 ptr++;
669 curtlevel = LQL_FIRST(curqlevel);
670 for (j = 0; j < curqlevel->numvar; j++)
672 if (j != 0)
674 *ptr = '|';
675 ptr++;
677 memcpy(ptr, curtlevel->name, curtlevel->len);
678 ptr += curtlevel->len;
679 if ((curtlevel->flag & LVAR_SUBLEXEME))
681 *ptr = '%';
682 ptr++;
684 if ((curtlevel->flag & LVAR_INCASE))
686 *ptr = '@';
687 ptr++;
689 if ((curtlevel->flag & LVAR_ANYEND))
691 *ptr = '*';
692 ptr++;
694 curtlevel = LVAR_NEXT(curtlevel);
697 else
699 *ptr = '*';
700 ptr++;
703 if ((curqlevel->flag & LQL_COUNT) || curqlevel->numvar == 0)
705 if (curqlevel->low == curqlevel->high)
707 sprintf(ptr, "{%d}", curqlevel->low);
709 else if (curqlevel->low == 0)
711 if (curqlevel->high == LTREE_MAX_LEVELS)
713 if (curqlevel->numvar == 0)
715 /* This is default for '*', so print nothing */
716 *ptr = '\0';
718 else
719 sprintf(ptr, "{,}");
721 else
722 sprintf(ptr, "{,%d}", curqlevel->high);
724 else if (curqlevel->high == LTREE_MAX_LEVELS)
726 sprintf(ptr, "{%d,}", curqlevel->low);
728 else
729 sprintf(ptr, "{%d,%d}", curqlevel->low, curqlevel->high);
730 ptr = strchr(ptr, '\0');
733 curqlevel = LQL_NEXT(curqlevel);
736 *ptr = '\0';
737 return buf;
741 * Basic lquery I/O functions
743 PG_FUNCTION_INFO_V1(lquery_in);
744 Datum
745 lquery_in(PG_FUNCTION_ARGS)
747 char *buf = (char *) PG_GETARG_POINTER(0);
748 lquery *res;
750 if ((res = parse_lquery(buf, fcinfo->context)) == NULL)
751 PG_RETURN_NULL();
753 PG_RETURN_POINTER(res);
756 PG_FUNCTION_INFO_V1(lquery_out);
757 Datum
758 lquery_out(PG_FUNCTION_ARGS)
760 lquery *in = PG_GETARG_LQUERY_P(0);
762 PG_RETURN_POINTER(deparse_lquery(in));
766 * lquery type send function
768 * The type is sent as text in binary mode, so this is almost the same
769 * as the output function, but it's prefixed with a version number so we
770 * can change the binary format sent in future if necessary. For now,
771 * only version 1 is supported.
773 PG_FUNCTION_INFO_V1(lquery_send);
774 Datum
775 lquery_send(PG_FUNCTION_ARGS)
777 lquery *in = PG_GETARG_LQUERY_P(0);
778 StringInfoData buf;
779 int version = 1;
780 char *res = deparse_lquery(in);
782 pq_begintypsend(&buf);
783 pq_sendint8(&buf, version);
784 pq_sendtext(&buf, res, strlen(res));
785 pfree(res);
787 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
791 * lquery type recv function
793 * The type is sent as text in binary mode, so this is almost the same
794 * as the input function, but it's prefixed with a version number so we
795 * can change the binary format sent in future if necessary. For now,
796 * only version 1 is supported.
798 PG_FUNCTION_INFO_V1(lquery_recv);
799 Datum
800 lquery_recv(PG_FUNCTION_ARGS)
802 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
803 int version = pq_getmsgint(buf, 1);
804 char *str;
805 int nbytes;
806 lquery *res;
808 if (version != 1)
809 elog(ERROR, "unsupported lquery version number %d", version);
811 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
812 res = parse_lquery(str, NULL);
813 pfree(str);
815 PG_RETURN_POINTER(res);