More sensible character_octet_length
[PostgreSQL.git] / contrib / ltree / ltxtquery_io.c
blob5d54a8dec2ceb7cf63ff216b5730605aae40a3c5
1 /*
2 * txtquery io
3 * Teodor Sigaev <teodor@stack.net>
4 * $PostgreSQL$
5 */
6 #include "postgres.h"
8 #include <ctype.h>
10 #include "crc32.h"
11 #include "ltree.h"
13 PG_FUNCTION_INFO_V1(ltxtq_in);
14 Datum ltxtq_in(PG_FUNCTION_ARGS);
16 PG_FUNCTION_INFO_V1(ltxtq_out);
17 Datum ltxtq_out(PG_FUNCTION_ARGS);
20 /* parser's states */
21 #define WAITOPERAND 1
22 #define INOPERAND 2
23 #define WAITOPERATOR 3
26 * node of query tree, also used
27 * for storing polish notation in parser
29 typedef struct NODE
31 int4 type;
32 int4 val;
33 int2 distance;
34 int2 length;
35 uint16 flag;
36 struct NODE *next;
37 } NODE;
39 typedef struct
41 char *buf;
42 int4 state;
43 int4 count;
44 /* reverse polish notation in list (for temporary usage) */
45 NODE *str;
46 /* number in str */
47 int4 num;
49 /* user-friendly operand */
50 int4 lenop;
51 int4 sumlen;
52 char *op;
53 char *curop;
54 } QPRS_STATE;
57 * get token from query string
59 static int4
60 gettoken_query(QPRS_STATE *state, int4 *val, int4 *lenval, char **strval, uint16 *flag)
62 int charlen;
64 for (;;)
66 charlen = pg_mblen(state->buf);
68 switch (state->state)
70 case WAITOPERAND:
71 if (charlen == 1 && t_iseq(state->buf, '!'))
73 (state->buf)++;
74 *val = (int4) '!';
75 return OPR;
77 else if (charlen == 1 && t_iseq(state->buf, '('))
79 state->count++;
80 (state->buf)++;
81 return OPEN;
83 else if (ISALNUM(state->buf))
85 state->state = INOPERAND;
86 *strval = state->buf;
87 *lenval = charlen;
88 *flag = 0;
90 else if (!t_isspace(state->buf))
91 ereport(ERROR,
92 (errcode(ERRCODE_SYNTAX_ERROR),
93 errmsg("operand syntax error")));
94 break;
95 case INOPERAND:
96 if (ISALNUM(state->buf))
98 if (*flag)
99 ereport(ERROR,
100 (errcode(ERRCODE_SYNTAX_ERROR),
101 errmsg("modificators syntax error")));
102 *lenval += charlen;
104 else if (charlen == 1 && t_iseq(state->buf, '%'))
105 *flag |= LVAR_SUBLEXEME;
106 else if (charlen == 1 && t_iseq(state->buf, '@'))
107 *flag |= LVAR_INCASE;
108 else if (charlen == 1 && t_iseq(state->buf, '*'))
109 *flag |= LVAR_ANYEND;
110 else
112 state->state = WAITOPERATOR;
113 return VAL;
115 break;
116 case WAITOPERATOR:
117 if (charlen == 1 && (t_iseq(state->buf, '&') || t_iseq(state->buf, '|')))
119 state->state = WAITOPERAND;
120 *val = (int4) *(state->buf);
121 (state->buf)++;
122 return OPR;
124 else if (charlen == 1 && t_iseq(state->buf, ')'))
126 (state->buf)++;
127 state->count--;
128 return (state->count < 0) ? ERR : CLOSE;
130 else if (*(state->buf) == '\0')
131 return (state->count) ? ERR : END;
132 else if (charlen == 1 && !t_iseq(state->buf, ' '))
133 return ERR;
134 break;
135 default:
136 return ERR;
137 break;
140 state->buf += charlen;
142 return END;
146 * push new one in polish notation reverse view
148 static void
149 pushquery(QPRS_STATE *state, int4 type, int4 val, int4 distance, int4 lenval, uint16 flag)
151 NODE *tmp = (NODE *) palloc(sizeof(NODE));
153 tmp->type = type;
154 tmp->val = val;
155 tmp->flag = flag;
156 if (distance > 0xffff)
157 ereport(ERROR,
158 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
159 errmsg("value is too big")));
160 if (lenval > 0xff)
161 ereport(ERROR,
162 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
163 errmsg("operand is too long")));
164 tmp->distance = distance;
165 tmp->length = lenval;
166 tmp->next = state->str;
167 state->str = tmp;
168 state->num++;
172 * This function is used for query_txt parsing
174 static void
175 pushval_asis(QPRS_STATE *state, int type, char *strval, int lenval, uint16 flag)
177 if (lenval > 0xffff)
178 ereport(ERROR,
179 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
180 errmsg("word is too long")));
182 pushquery(state, type, ltree_crc32_sz(strval, lenval),
183 state->curop - state->op, lenval, flag);
185 while (state->curop - state->op + lenval + 1 >= state->lenop)
187 int4 tmp = state->curop - state->op;
189 state->lenop *= 2;
190 state->op = (char *) repalloc((void *) state->op, state->lenop);
191 state->curop = state->op + tmp;
193 memcpy((void *) state->curop, (void *) strval, lenval);
194 state->curop += lenval;
195 *(state->curop) = '\0';
196 state->curop++;
197 state->sumlen += lenval + 1;
198 return;
201 #define STACKDEPTH 32
203 * make polish notaion of query
205 static int4
206 makepol(QPRS_STATE *state)
208 int4 val = 0,
209 type;
210 int4 lenval = 0;
211 char *strval = NULL;
212 int4 stack[STACKDEPTH];
213 int4 lenstack = 0;
214 uint16 flag = 0;
216 while ((type = gettoken_query(state, &val, &lenval, &strval, &flag)) != END)
218 switch (type)
220 case VAL:
221 pushval_asis(state, VAL, strval, lenval, flag);
222 while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
223 stack[lenstack - 1] == (int4) '!'))
225 lenstack--;
226 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
228 break;
229 case OPR:
230 if (lenstack && val == (int4) '|')
231 pushquery(state, OPR, val, 0, 0, 0);
232 else
234 if (lenstack == STACKDEPTH)
235 /* internal error */
236 elog(ERROR, "stack too short");
237 stack[lenstack] = val;
238 lenstack++;
240 break;
241 case OPEN:
242 if (makepol(state) == ERR)
243 return ERR;
244 if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
245 stack[lenstack - 1] == (int4) '!'))
247 lenstack--;
248 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
250 break;
251 case CLOSE:
252 while (lenstack)
254 lenstack--;
255 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
257 return END;
258 break;
259 case ERR:
260 default:
261 ereport(ERROR,
262 (errcode(ERRCODE_SYNTAX_ERROR),
263 errmsg("syntax error")));
265 return ERR;
269 while (lenstack)
271 lenstack--;
272 pushquery(state, OPR, stack[lenstack], 0, 0, 0);
274 return END;
277 static void
278 findoprnd(ITEM *ptr, int4 *pos)
280 if (ptr[*pos].type == VAL || ptr[*pos].type == VALTRUE)
282 ptr[*pos].left = 0;
283 (*pos)++;
285 else if (ptr[*pos].val == (int4) '!')
287 ptr[*pos].left = 1;
288 (*pos)++;
289 findoprnd(ptr, pos);
291 else
293 ITEM *curitem = &ptr[*pos];
294 int4 tmp = *pos;
296 (*pos)++;
297 findoprnd(ptr, pos);
298 curitem->left = *pos - tmp;
299 findoprnd(ptr, pos);
305 * input
307 static ltxtquery *
308 queryin(char *buf)
310 QPRS_STATE state;
311 int4 i;
312 ltxtquery *query;
313 int4 commonlen;
314 ITEM *ptr;
315 NODE *tmp;
316 int4 pos = 0;
318 #ifdef BS_DEBUG
319 char pbuf[16384],
320 *cur;
321 #endif
323 /* init state */
324 state.buf = buf;
325 state.state = WAITOPERAND;
326 state.count = 0;
327 state.num = 0;
328 state.str = NULL;
330 /* init list of operand */
331 state.sumlen = 0;
332 state.lenop = 64;
333 state.curop = state.op = (char *) palloc(state.lenop);
334 *(state.curop) = '\0';
336 /* parse query & make polish notation (postfix, but in reverse order) */
337 makepol(&state);
338 if (!state.num)
339 ereport(ERROR,
340 (errcode(ERRCODE_SYNTAX_ERROR),
341 errmsg("syntax error"),
342 errdetail("Empty query.")));
344 /* make finish struct */
345 commonlen = COMPUTESIZE(state.num, state.sumlen);
346 query = (ltxtquery *) palloc(commonlen);
347 SET_VARSIZE(query, commonlen);
348 query->size = state.num;
349 ptr = GETQUERY(query);
351 /* set item in polish notation */
352 for (i = 0; i < state.num; i++)
354 ptr[i].type = state.str->type;
355 ptr[i].val = state.str->val;
356 ptr[i].distance = state.str->distance;
357 ptr[i].length = state.str->length;
358 ptr[i].flag = state.str->flag;
359 tmp = state.str->next;
360 pfree(state.str);
361 state.str = tmp;
364 /* set user friendly-operand view */
365 memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
366 pfree(state.op);
368 /* set left operand's position for every operator */
369 pos = 0;
370 findoprnd(ptr, &pos);
372 return query;
376 * in without morphology
378 Datum
379 ltxtq_in(PG_FUNCTION_ARGS)
381 PG_RETURN_POINTER(queryin((char *) PG_GETARG_POINTER(0)));
385 * out function
387 typedef struct
389 ITEM *curpol;
390 char *buf;
391 char *cur;
392 char *op;
393 int4 buflen;
394 } INFIX;
396 #define RESIZEBUF(inf,addsize) \
397 while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
399 int4 len = (inf)->cur - (inf)->buf; \
400 (inf)->buflen *= 2; \
401 (inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
402 (inf)->cur = (inf)->buf + len; \
406 * recursive walk on tree and print it in
407 * infix (human-readable) view
409 static void
410 infix(INFIX *in, bool first)
412 if (in->curpol->type == VAL)
414 char *op = in->op + in->curpol->distance;
416 RESIZEBUF(in, in->curpol->length * 2 + 5);
417 while (*op)
419 *(in->cur) = *op;
420 op++;
421 in->cur++;
423 if (in->curpol->flag & LVAR_SUBLEXEME)
425 *(in->cur) = '%';
426 in->cur++;
428 if (in->curpol->flag & LVAR_INCASE)
430 *(in->cur) = '@';
431 in->cur++;
433 if (in->curpol->flag & LVAR_ANYEND)
435 *(in->cur) = '*';
436 in->cur++;
438 *(in->cur) = '\0';
439 in->curpol++;
441 else if (in->curpol->val == (int4) '!')
443 bool isopr = false;
445 RESIZEBUF(in, 1);
446 *(in->cur) = '!';
447 in->cur++;
448 *(in->cur) = '\0';
449 in->curpol++;
450 if (in->curpol->type == OPR)
452 isopr = true;
453 RESIZEBUF(in, 2);
454 sprintf(in->cur, "( ");
455 in->cur = strchr(in->cur, '\0');
457 infix(in, isopr);
458 if (isopr)
460 RESIZEBUF(in, 2);
461 sprintf(in->cur, " )");
462 in->cur = strchr(in->cur, '\0');
465 else
467 int4 op = in->curpol->val;
468 INFIX nrm;
470 in->curpol++;
471 if (op == (int4) '|' && !first)
473 RESIZEBUF(in, 2);
474 sprintf(in->cur, "( ");
475 in->cur = strchr(in->cur, '\0');
478 nrm.curpol = in->curpol;
479 nrm.op = in->op;
480 nrm.buflen = 16;
481 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
483 /* get right operand */
484 infix(&nrm, false);
486 /* get & print left operand */
487 in->curpol = nrm.curpol;
488 infix(in, false);
490 /* print operator & right operand */
491 RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
492 sprintf(in->cur, " %c %s", op, nrm.buf);
493 in->cur = strchr(in->cur, '\0');
494 pfree(nrm.buf);
496 if (op == (int4) '|' && !first)
498 RESIZEBUF(in, 2);
499 sprintf(in->cur, " )");
500 in->cur = strchr(in->cur, '\0');
505 Datum
506 ltxtq_out(PG_FUNCTION_ARGS)
508 ltxtquery *query = PG_GETARG_LTXTQUERY(0);
509 INFIX nrm;
511 if (query->size == 0)
512 ereport(ERROR,
513 (errcode(ERRCODE_SYNTAX_ERROR),
514 errmsg("syntax error"),
515 errdetail("Empty query.")));
517 nrm.curpol = GETQUERY(query);
518 nrm.buflen = 32;
519 nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
520 *(nrm.cur) = '\0';
521 nrm.op = GETOPERAND(query);
522 infix(&nrm, true);
524 PG_FREE_IF_COPY(query, 0);
525 PG_RETURN_POINTER(nrm.buf);