psql: Add missing punctuation in help output
[pgsql.git] / contrib / xml2 / xpath.c
blobb999b1f706656bea0cef73e6e069986756065c5b
1 /*
2 * contrib/xml2/xpath.c
4 * Parser interface for DOM-based parser (libxml) rather than
5 * stream-based SAX-type parser
6 */
7 #include "postgres.h"
9 #include "access/htup_details.h"
10 #include "executor/spi.h"
11 #include "fmgr.h"
12 #include "funcapi.h"
13 #include "lib/stringinfo.h"
14 #include "miscadmin.h"
15 #include "utils/builtins.h"
16 #include "utils/xml.h"
18 /* libxml includes */
20 #include <libxml/xpath.h>
21 #include <libxml/tree.h>
22 #include <libxml/xmlmemory.h>
23 #include <libxml/xmlerror.h>
24 #include <libxml/parserInternals.h>
26 PG_MODULE_MAGIC;
28 /* exported for use by xslt_proc.c */
30 PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
32 /* workspace for pgxml_xpath() */
34 typedef struct
36 xmlDocPtr doctree;
37 xmlXPathContextPtr ctxt;
38 xmlXPathObjectPtr res;
39 } xpath_workspace;
41 /* local declarations */
43 static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
44 xmlChar *toptagname, xmlChar *septagname,
45 xmlChar *plainsep);
47 static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
48 xmlChar *septag, xmlChar *plainsep);
50 static xmlChar *pgxml_texttoxmlchar(text *textstring);
52 static xmlXPathObjectPtr pgxml_xpath(text *document, xmlChar *xpath,
53 xpath_workspace *workspace);
55 static void cleanup_workspace(xpath_workspace *workspace);
59 * Initialize for xml parsing.
61 * As with the underlying pg_xml_init function, calls to this MUST be followed
62 * by a PG_TRY block that guarantees that pg_xml_done is called.
64 PgXmlErrorContext *
65 pgxml_parser_init(PgXmlStrictness strictness)
67 PgXmlErrorContext *xmlerrcxt;
69 /* Set up error handling (we share the core's error handler) */
70 xmlerrcxt = pg_xml_init(strictness);
72 /* Note: we're assuming an elog cannot be thrown by the following calls */
74 /* Initialize libxml */
75 xmlInitParser();
77 return xmlerrcxt;
81 /* Encodes special characters (<, >, &, " and \r) as XML entities */
83 PG_FUNCTION_INFO_V1(xml_encode_special_chars);
85 Datum
86 xml_encode_special_chars(PG_FUNCTION_ARGS)
88 text *tin = PG_GETARG_TEXT_PP(0);
89 text *tout;
90 xmlChar *ts,
91 *tt;
93 ts = pgxml_texttoxmlchar(tin);
95 tt = xmlEncodeSpecialChars(NULL, ts);
97 pfree(ts);
99 tout = cstring_to_text((char *) tt);
101 xmlFree(tt);
103 PG_RETURN_TEXT_P(tout);
107 * Function translates a nodeset into a text representation
109 * iterates over each node in the set and calls xmlNodeDump to write it to
110 * an xmlBuffer -from which an xmlChar * string is returned.
112 * each representation is surrounded by <tagname> ... </tagname>
114 * plainsep is an ordinary (not tag) separator - if used, then nodes are
115 * cast to string as output method
117 static xmlChar *
118 pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
119 xmlChar *toptagname,
120 xmlChar *septagname,
121 xmlChar *plainsep)
123 xmlBufferPtr buf;
124 xmlChar *result;
125 int i;
127 buf = xmlBufferCreate();
129 if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
131 xmlBufferWriteChar(buf, "<");
132 xmlBufferWriteCHAR(buf, toptagname);
133 xmlBufferWriteChar(buf, ">");
135 if (nodeset != NULL)
137 for (i = 0; i < nodeset->nodeNr; i++)
139 if (plainsep != NULL)
141 xmlBufferWriteCHAR(buf,
142 xmlXPathCastNodeToString(nodeset->nodeTab[i]));
144 /* If this isn't the last entry, write the plain sep. */
145 if (i < (nodeset->nodeNr) - 1)
146 xmlBufferWriteChar(buf, (char *) plainsep);
148 else
150 if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
152 xmlBufferWriteChar(buf, "<");
153 xmlBufferWriteCHAR(buf, septagname);
154 xmlBufferWriteChar(buf, ">");
156 xmlNodeDump(buf,
157 nodeset->nodeTab[i]->doc,
158 nodeset->nodeTab[i],
159 1, 0);
161 if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
163 xmlBufferWriteChar(buf, "</");
164 xmlBufferWriteCHAR(buf, septagname);
165 xmlBufferWriteChar(buf, ">");
171 if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
173 xmlBufferWriteChar(buf, "</");
174 xmlBufferWriteCHAR(buf, toptagname);
175 xmlBufferWriteChar(buf, ">");
177 result = xmlStrdup(buf->content);
178 xmlBufferFree(buf);
179 return result;
183 /* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
184 * into the libxml2 representation
186 static xmlChar *
187 pgxml_texttoxmlchar(text *textstring)
189 return (xmlChar *) text_to_cstring(textstring);
192 /* Publicly visible XPath functions */
195 * This is a "raw" xpath function. Check that it returns child elements
196 * properly
198 PG_FUNCTION_INFO_V1(xpath_nodeset);
200 Datum
201 xpath_nodeset(PG_FUNCTION_ARGS)
203 text *document = PG_GETARG_TEXT_PP(0);
204 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
205 xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
206 xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
207 xmlChar *xpath;
208 text *xpres;
209 xmlXPathObjectPtr res;
210 xpath_workspace workspace;
212 xpath = pgxml_texttoxmlchar(xpathsupp);
214 res = pgxml_xpath(document, xpath, &workspace);
216 xpres = pgxml_result_to_text(res, toptag, septag, NULL);
218 cleanup_workspace(&workspace);
220 pfree(xpath);
222 if (xpres == NULL)
223 PG_RETURN_NULL();
224 PG_RETURN_TEXT_P(xpres);
228 * The following function is almost identical, but returns the elements in
229 * a list.
231 PG_FUNCTION_INFO_V1(xpath_list);
233 Datum
234 xpath_list(PG_FUNCTION_ARGS)
236 text *document = PG_GETARG_TEXT_PP(0);
237 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
238 xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
239 xmlChar *xpath;
240 text *xpres;
241 xmlXPathObjectPtr res;
242 xpath_workspace workspace;
244 xpath = pgxml_texttoxmlchar(xpathsupp);
246 res = pgxml_xpath(document, xpath, &workspace);
248 xpres = pgxml_result_to_text(res, NULL, NULL, plainsep);
250 cleanup_workspace(&workspace);
252 pfree(xpath);
254 if (xpres == NULL)
255 PG_RETURN_NULL();
256 PG_RETURN_TEXT_P(xpres);
260 PG_FUNCTION_INFO_V1(xpath_string);
262 Datum
263 xpath_string(PG_FUNCTION_ARGS)
265 text *document = PG_GETARG_TEXT_PP(0);
266 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
267 xmlChar *xpath;
268 int32 pathsize;
269 text *xpres;
270 xmlXPathObjectPtr res;
271 xpath_workspace workspace;
273 pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
276 * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
277 * at end
279 /* We could try casting to string using the libxml function? */
281 xpath = (xmlChar *) palloc(pathsize + 9);
282 memcpy((char *) xpath, "string(", 7);
283 memcpy((char *) (xpath + 7), VARDATA_ANY(xpathsupp), pathsize);
284 xpath[pathsize + 7] = ')';
285 xpath[pathsize + 8] = '\0';
287 res = pgxml_xpath(document, xpath, &workspace);
289 xpres = pgxml_result_to_text(res, NULL, NULL, NULL);
291 cleanup_workspace(&workspace);
293 pfree(xpath);
295 if (xpres == NULL)
296 PG_RETURN_NULL();
297 PG_RETURN_TEXT_P(xpres);
301 PG_FUNCTION_INFO_V1(xpath_number);
303 Datum
304 xpath_number(PG_FUNCTION_ARGS)
306 text *document = PG_GETARG_TEXT_PP(0);
307 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
308 xmlChar *xpath;
309 float4 fRes;
310 xmlXPathObjectPtr res;
311 xpath_workspace workspace;
313 xpath = pgxml_texttoxmlchar(xpathsupp);
315 res = pgxml_xpath(document, xpath, &workspace);
317 pfree(xpath);
319 if (res == NULL)
320 PG_RETURN_NULL();
322 fRes = xmlXPathCastToNumber(res);
324 cleanup_workspace(&workspace);
326 if (xmlXPathIsNaN(fRes))
327 PG_RETURN_NULL();
329 PG_RETURN_FLOAT4(fRes);
333 PG_FUNCTION_INFO_V1(xpath_bool);
335 Datum
336 xpath_bool(PG_FUNCTION_ARGS)
338 text *document = PG_GETARG_TEXT_PP(0);
339 text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
340 xmlChar *xpath;
341 int bRes;
342 xmlXPathObjectPtr res;
343 xpath_workspace workspace;
345 xpath = pgxml_texttoxmlchar(xpathsupp);
347 res = pgxml_xpath(document, xpath, &workspace);
349 pfree(xpath);
351 if (res == NULL)
352 PG_RETURN_BOOL(false);
354 bRes = xmlXPathCastToBoolean(res);
356 cleanup_workspace(&workspace);
358 PG_RETURN_BOOL(bRes);
363 /* Core function to evaluate XPath query */
365 static xmlXPathObjectPtr
366 pgxml_xpath(text *document, xmlChar *xpath, xpath_workspace *workspace)
368 int32 docsize = VARSIZE_ANY_EXHDR(document);
369 PgXmlErrorContext *xmlerrcxt;
370 xmlXPathCompExprPtr comppath;
372 workspace->doctree = NULL;
373 workspace->ctxt = NULL;
374 workspace->res = NULL;
376 xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
378 PG_TRY();
380 workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
381 docsize, NULL, NULL,
382 XML_PARSE_NOENT);
383 if (workspace->doctree != NULL)
385 workspace->ctxt = xmlXPathNewContext(workspace->doctree);
386 workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
388 /* compile the path */
389 comppath = xmlXPathCompile(xpath);
390 if (comppath == NULL)
391 xml_ereport(xmlerrcxt, ERROR, ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
392 "XPath Syntax Error");
394 /* Now evaluate the path expression. */
395 workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
397 xmlXPathFreeCompExpr(comppath);
400 PG_CATCH();
402 cleanup_workspace(workspace);
404 pg_xml_done(xmlerrcxt, true);
406 PG_RE_THROW();
408 PG_END_TRY();
410 if (workspace->res == NULL)
411 cleanup_workspace(workspace);
413 pg_xml_done(xmlerrcxt, false);
415 return workspace->res;
418 /* Clean up after processing the result of pgxml_xpath() */
419 static void
420 cleanup_workspace(xpath_workspace *workspace)
422 if (workspace->res)
423 xmlXPathFreeObject(workspace->res);
424 workspace->res = NULL;
425 if (workspace->ctxt)
426 xmlXPathFreeContext(workspace->ctxt);
427 workspace->ctxt = NULL;
428 if (workspace->doctree)
429 xmlFreeDoc(workspace->doctree);
430 workspace->doctree = NULL;
433 static text *
434 pgxml_result_to_text(xmlXPathObjectPtr res,
435 xmlChar *toptag,
436 xmlChar *septag,
437 xmlChar *plainsep)
439 xmlChar *xpresstr;
440 text *xpres;
442 if (res == NULL)
443 return NULL;
445 switch (res->type)
447 case XPATH_NODESET:
448 xpresstr = pgxmlNodeSetToText(res->nodesetval,
449 toptag,
450 septag, plainsep);
451 break;
453 case XPATH_STRING:
454 xpresstr = xmlStrdup(res->stringval);
455 break;
457 default:
458 elog(NOTICE, "unsupported XQuery result: %d", res->type);
459 xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
462 /* Now convert this result back to text */
463 xpres = cstring_to_text((char *) xpresstr);
465 /* Free various storage */
466 xmlFree(xpresstr);
468 return xpres;
472 * xpath_table is a table function. It needs some tidying (as do the
473 * other functions here!
475 PG_FUNCTION_INFO_V1(xpath_table);
477 Datum
478 xpath_table(PG_FUNCTION_ARGS)
480 /* Function parameters */
481 char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
482 char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
483 char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
484 char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
485 char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
487 /* SPI (input tuple) support */
488 SPITupleTable *tuptable;
489 HeapTuple spi_tuple;
490 TupleDesc spi_tupdesc;
493 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
494 AttInMetadata *attinmeta;
496 char **values;
497 xmlChar **xpaths;
498 char *pos;
499 const char *pathsep = "|";
501 int numpaths;
502 int ret;
503 uint64 proc;
504 int j;
505 int rownr; /* For issuing multiple rows from one original
506 * document */
507 bool had_values; /* To determine end of nodeset results */
508 StringInfoData query_buf;
509 PgXmlErrorContext *xmlerrcxt;
510 volatile xmlDocPtr doctree = NULL;
512 InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
514 /* must have at least one output column (for the pkey) */
515 if (rsinfo->setDesc->natts < 1)
516 ereport(ERROR,
517 (errcode(ERRCODE_SYNTAX_ERROR),
518 errmsg("xpath_table must have at least one output column")));
521 * At the moment we assume that the returned attributes make sense for the
522 * XPath specified (i.e. we trust the caller). It's not fatal if they get
523 * it wrong - the input function for the column type will raise an error
524 * if the path result can't be converted into the correct binary
525 * representation.
528 attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
530 values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
531 xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
534 * Split XPaths. xpathset is a writable CString.
536 * Note that we stop splitting once we've done all needed for tupdesc
538 numpaths = 0;
539 pos = xpathset;
540 while (numpaths < (rsinfo->setDesc->natts - 1))
542 xpaths[numpaths++] = (xmlChar *) pos;
543 pos = strstr(pos, pathsep);
544 if (pos != NULL)
546 *pos = '\0';
547 pos++;
549 else
550 break;
553 /* Now build query */
554 initStringInfo(&query_buf);
556 /* Build initial sql statement */
557 appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
558 pkeyfield,
559 xmlfield,
560 relname,
561 condition);
563 if ((ret = SPI_connect()) < 0)
564 elog(ERROR, "xpath_table: SPI_connect returned %d", ret);
566 if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
567 elog(ERROR, "xpath_table: SPI execution failed for query %s",
568 query_buf.data);
570 proc = SPI_processed;
571 tuptable = SPI_tuptable;
572 spi_tupdesc = tuptable->tupdesc;
575 * Check that SPI returned correct result. If you put a comma into one of
576 * the function parameters, this will catch it when the SPI query returns
577 * e.g. 3 columns.
579 if (spi_tupdesc->natts != 2)
581 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
582 errmsg("expression returning multiple columns is not valid in parameter list"),
583 errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
587 * Setup the parser. This should happen after we are done evaluating the
588 * query, in case it calls functions that set up libxml differently.
590 xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
592 PG_TRY();
594 /* For each row i.e. document returned from SPI */
595 uint64 i;
597 for (i = 0; i < proc; i++)
599 char *pkey;
600 char *xmldoc;
601 xmlXPathContextPtr ctxt;
602 xmlXPathObjectPtr res;
603 xmlChar *resstr;
604 xmlXPathCompExprPtr comppath;
605 HeapTuple ret_tuple;
607 /* Extract the row data as C Strings */
608 spi_tuple = tuptable->vals[i];
609 pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
610 xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
613 * Clear the values array, so that not-well-formed documents
614 * return NULL in all columns. Note that this also means that
615 * spare columns will be NULL.
617 for (j = 0; j < rsinfo->setDesc->natts; j++)
618 values[j] = NULL;
620 /* Insert primary key */
621 values[0] = pkey;
623 /* Parse the document */
624 if (xmldoc)
625 doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
626 NULL, NULL,
627 XML_PARSE_NOENT);
628 else /* treat NULL as not well-formed */
629 doctree = NULL;
631 if (doctree == NULL)
633 /* not well-formed, so output all-NULL tuple */
634 ret_tuple = BuildTupleFromCStrings(attinmeta, values);
635 tuplestore_puttuple(rsinfo->setResult, ret_tuple);
636 heap_freetuple(ret_tuple);
638 else
640 /* New loop here - we have to deal with nodeset results */
641 rownr = 0;
645 /* Now evaluate the set of xpaths. */
646 had_values = false;
647 for (j = 0; j < numpaths; j++)
649 ctxt = xmlXPathNewContext(doctree);
650 ctxt->node = xmlDocGetRootElement(doctree);
652 /* compile the path */
653 comppath = xmlXPathCompile(xpaths[j]);
654 if (comppath == NULL)
655 xml_ereport(xmlerrcxt, ERROR,
656 ERRCODE_EXTERNAL_ROUTINE_EXCEPTION,
657 "XPath Syntax Error");
659 /* Now evaluate the path expression. */
660 res = xmlXPathCompiledEval(comppath, ctxt);
661 xmlXPathFreeCompExpr(comppath);
663 if (res != NULL)
665 switch (res->type)
667 case XPATH_NODESET:
668 /* We see if this nodeset has enough nodes */
669 if (res->nodesetval != NULL &&
670 rownr < res->nodesetval->nodeNr)
672 resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
673 had_values = true;
675 else
676 resstr = NULL;
678 break;
680 case XPATH_STRING:
681 resstr = xmlStrdup(res->stringval);
682 break;
684 default:
685 elog(NOTICE, "unsupported XQuery result: %d", res->type);
686 resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
690 * Insert this into the appropriate column in the
691 * result tuple.
693 values[j + 1] = (char *) resstr;
695 xmlXPathFreeContext(ctxt);
698 /* Now add the tuple to the output, if there is one. */
699 if (had_values)
701 ret_tuple = BuildTupleFromCStrings(attinmeta, values);
702 tuplestore_puttuple(rsinfo->setResult, ret_tuple);
703 heap_freetuple(ret_tuple);
706 rownr++;
707 } while (had_values);
710 if (doctree != NULL)
711 xmlFreeDoc(doctree);
712 doctree = NULL;
714 if (pkey)
715 pfree(pkey);
716 if (xmldoc)
717 pfree(xmldoc);
720 PG_CATCH();
722 if (doctree != NULL)
723 xmlFreeDoc(doctree);
725 pg_xml_done(xmlerrcxt, true);
727 PG_RE_THROW();
729 PG_END_TRY();
731 if (doctree != NULL)
732 xmlFreeDoc(doctree);
734 pg_xml_done(xmlerrcxt, false);
736 SPI_finish();
739 * SFRM_Materialize mode expects us to return a NULL Datum. The actual
740 * tuples are in our tuplestore and passed back through rsinfo->setResult.
741 * rsinfo->setDesc is set to the tuple description that we actually used
742 * to build our tuples with, so the caller can verify we did what it was
743 * expecting.
745 return (Datum) 0;