Line data Source code
1 : /*
2 : * contrib/xml2/xpath.c
3 : *
4 : * Parser interface for DOM-based parser (libxml) rather than
5 : * stream-based SAX-type parser
6 : */
7 : #include "postgres.h"
8 :
9 : #include "access/htup_details.h"
10 : #include "executor/spi.h"
11 : #include "fmgr.h"
12 : #include "funcapi.h"
13 : #include "lib/stringinfo.h"
14 : #include "utils/builtins.h"
15 : #include "utils/tuplestore.h"
16 : #include "utils/xml.h"
17 :
18 : /* libxml includes */
19 :
20 : #include <libxml/xpath.h>
21 : #include <libxml/tree.h>
22 : #include <libxml/xmlmemory.h>
23 : #include <libxml/xmlerror.h>
24 : #include <libxml/parserInternals.h>
25 :
26 1 : PG_MODULE_MAGIC_EXT(
27 : .name = "xml2",
28 : .version = PG_VERSION
29 : );
30 :
31 : /* exported for use by xslt_proc.c */
32 :
33 : PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
34 :
35 : /* workspace for pgxml_xpath() */
36 :
37 : typedef struct
38 : {
39 : xmlDocPtr doctree;
40 : xmlXPathContextPtr ctxt;
41 : xmlXPathObjectPtr res;
42 : } xpath_workspace;
43 :
44 : /* local declarations */
45 :
46 : static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
47 : xmlChar *toptagname, xmlChar *septagname,
48 : xmlChar *plainsep);
49 :
50 : static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
51 : xmlChar *septag, xmlChar *plainsep);
52 :
53 : static xmlChar *pgxml_texttoxmlchar(text *textstring);
54 :
55 : static xpath_workspace *pgxml_xpath(text *document, xmlChar *xpath,
56 : PgXmlErrorContext *xmlerrcxt);
57 :
58 : static void cleanup_workspace(xpath_workspace *workspace);
59 :
60 :
61 : /*
62 : * Initialize for xml parsing.
63 : *
64 : * As with the underlying pg_xml_init function, calls to this MUST be followed
65 : * by a PG_TRY block that guarantees that pg_xml_done is called.
66 : */
67 : PgXmlErrorContext *
68 12 : pgxml_parser_init(PgXmlStrictness strictness)
69 : {
70 : PgXmlErrorContext *xmlerrcxt;
71 :
72 : /* Set up error handling (we share the core's error handler) */
73 12 : xmlerrcxt = pg_xml_init(strictness);
74 :
75 : /* Note: we're assuming an elog cannot be thrown by the following calls */
76 :
77 : /* Initialize libxml */
78 12 : xmlInitParser();
79 :
80 12 : return xmlerrcxt;
81 : }
82 :
83 :
84 : /* Encodes special characters (<, >, &, " and \r) as XML entities */
85 :
86 1 : PG_FUNCTION_INFO_V1(xml_encode_special_chars);
87 :
88 : Datum
89 0 : xml_encode_special_chars(PG_FUNCTION_ARGS)
90 : {
91 0 : text *tin = PG_GETARG_TEXT_PP(0);
92 0 : text *volatile tout = NULL;
93 0 : xmlChar *volatile tt = NULL;
94 : PgXmlErrorContext *xmlerrcxt;
95 :
96 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
97 :
98 0 : PG_TRY();
99 : {
100 : xmlChar *ts;
101 :
102 0 : ts = pgxml_texttoxmlchar(tin);
103 :
104 0 : tt = xmlEncodeSpecialChars(NULL, ts);
105 0 : if (tt == NULL || pg_xml_error_occurred(xmlerrcxt))
106 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
107 : "could not allocate xmlChar");
108 0 : pfree(ts);
109 :
110 0 : tout = cstring_to_text((char *) tt);
111 : }
112 0 : PG_CATCH();
113 : {
114 0 : if (tt != NULL)
115 0 : xmlFree(tt);
116 :
117 0 : pg_xml_done(xmlerrcxt, true);
118 :
119 0 : PG_RE_THROW();
120 : }
121 0 : PG_END_TRY();
122 :
123 0 : if (tt != NULL)
124 0 : xmlFree(tt);
125 :
126 0 : pg_xml_done(xmlerrcxt, false);
127 :
128 0 : PG_RETURN_TEXT_P(tout);
129 : }
130 :
131 : /*
132 : * Function translates a nodeset into a text representation
133 : *
134 : * iterates over each node in the set and calls xmlNodeDump to write it to
135 : * an xmlBuffer -from which an xmlChar * string is returned.
136 : *
137 : * each representation is surrounded by <tagname> ... </tagname>
138 : *
139 : * plainsep is an ordinary (not tag) separator - if used, then nodes are
140 : * cast to string as output method
141 : */
142 : static xmlChar *
143 6 : pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
144 : xmlChar *toptagname,
145 : xmlChar *septagname,
146 : xmlChar *plainsep)
147 : {
148 6 : volatile xmlBufferPtr buf = NULL;
149 6 : xmlChar *volatile result = NULL;
150 6 : xmlChar *volatile str = NULL;
151 : PgXmlErrorContext *xmlerrcxt;
152 :
153 : /* spin up some error handling */
154 6 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
155 :
156 6 : PG_TRY();
157 : {
158 6 : buf = xmlBufferCreate();
159 :
160 6 : if (buf == NULL || pg_xml_error_occurred(xmlerrcxt))
161 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
162 : "could not allocate xmlBuffer");
163 :
164 6 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
165 : {
166 1 : xmlBufferWriteChar(buf, "<");
167 1 : xmlBufferWriteCHAR(buf, toptagname);
168 1 : xmlBufferWriteChar(buf, ">");
169 : }
170 6 : if (nodeset != NULL)
171 : {
172 17 : for (int i = 0; i < nodeset->nodeNr; i++)
173 : {
174 11 : if (plainsep != NULL)
175 : {
176 4 : str = xmlXPathCastNodeToString(nodeset->nodeTab[i]);
177 4 : if (str == NULL || pg_xml_error_occurred(xmlerrcxt))
178 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
179 : "could not allocate node text");
180 :
181 4 : xmlBufferWriteCHAR(buf, str);
182 4 : xmlFree(str);
183 4 : str = NULL;
184 :
185 : /* If this isn't the last entry, write the plain sep. */
186 4 : if (i < (nodeset->nodeNr) - 1)
187 2 : xmlBufferWriteChar(buf, (char *) plainsep);
188 : }
189 : else
190 : {
191 7 : xmlNodePtr node = nodeset->nodeTab[i];
192 :
193 7 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
194 : {
195 4 : xmlBufferWriteChar(buf, "<");
196 4 : xmlBufferWriteCHAR(buf, septagname);
197 4 : xmlBufferWriteChar(buf, ">");
198 : }
199 :
200 : /*
201 : * XML_NAMESPACE_DECL nodes are xmlNs structs, that cannot
202 : * be processed by xmlNodeDump().
203 : */
204 7 : if (node->type == XML_NAMESPACE_DECL)
205 : {
206 1 : str = xmlXPathCastNodeToString(node);
207 1 : if (str == NULL || pg_xml_error_occurred(xmlerrcxt))
208 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
209 : "could not allocate node text");
210 1 : xmlBufferWriteCHAR(buf, str);
211 1 : xmlFree(str);
212 1 : str = NULL;
213 : }
214 : else
215 6 : xmlNodeDump(buf, node->doc, node, 1, 0);
216 :
217 7 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
218 : {
219 4 : xmlBufferWriteChar(buf, "</");
220 4 : xmlBufferWriteCHAR(buf, septagname);
221 4 : xmlBufferWriteChar(buf, ">");
222 : }
223 : }
224 : }
225 : }
226 :
227 6 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
228 : {
229 1 : xmlBufferWriteChar(buf, "</");
230 1 : xmlBufferWriteCHAR(buf, toptagname);
231 1 : xmlBufferWriteChar(buf, ">");
232 : }
233 :
234 6 : result = xmlStrdup(xmlBufferContent(buf));
235 6 : if (result == NULL || pg_xml_error_occurred(xmlerrcxt))
236 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
237 : "could not allocate result");
238 : }
239 0 : PG_CATCH();
240 : {
241 0 : if (result)
242 0 : xmlFree(result);
243 0 : if (str)
244 0 : xmlFree(str);
245 0 : if (buf)
246 0 : xmlBufferFree(buf);
247 :
248 0 : pg_xml_done(xmlerrcxt, true);
249 :
250 0 : PG_RE_THROW();
251 : }
252 6 : PG_END_TRY();
253 :
254 6 : xmlBufferFree(buf);
255 6 : pg_xml_done(xmlerrcxt, false);
256 :
257 6 : return result;
258 : }
259 :
260 :
261 : /*
262 : * Translate a PostgreSQL "varlena" -i.e. a variable length parameter
263 : * into the libxml2 representation
264 : */
265 : static xmlChar *
266 16 : pgxml_texttoxmlchar(text *textstring)
267 : {
268 16 : return (xmlChar *) text_to_cstring(textstring);
269 : }
270 :
271 : /* Publicly visible XPath functions */
272 :
273 : /*
274 : * This is a "raw" xpath function. Check that it returns child elements
275 : * properly
276 : */
277 2 : PG_FUNCTION_INFO_V1(xpath_nodeset);
278 :
279 : Datum
280 4 : xpath_nodeset(PG_FUNCTION_ARGS)
281 : {
282 4 : text *document = PG_GETARG_TEXT_PP(0);
283 4 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
284 4 : xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
285 4 : xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
286 : xmlChar *xpath;
287 4 : text *volatile xpres = NULL;
288 4 : xpath_workspace *volatile workspace = NULL;
289 : PgXmlErrorContext *xmlerrcxt;
290 :
291 4 : xpath = pgxml_texttoxmlchar(xpathsupp);
292 4 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
293 :
294 4 : PG_TRY();
295 : {
296 4 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
297 4 : xpres = pgxml_result_to_text(workspace->res, toptag, septag, NULL);
298 : }
299 0 : PG_CATCH();
300 : {
301 0 : if (workspace)
302 0 : cleanup_workspace(workspace);
303 :
304 0 : pg_xml_done(xmlerrcxt, true);
305 0 : PG_RE_THROW();
306 : }
307 4 : PG_END_TRY();
308 :
309 4 : cleanup_workspace(workspace);
310 4 : pg_xml_done(xmlerrcxt, false);
311 :
312 4 : pfree(xpath);
313 :
314 4 : if (xpres == NULL)
315 0 : PG_RETURN_NULL();
316 4 : PG_RETURN_TEXT_P(xpres);
317 : }
318 :
319 : /*
320 : * The following function is almost identical, but returns the elements in
321 : * a list.
322 : */
323 2 : PG_FUNCTION_INFO_V1(xpath_list);
324 :
325 : Datum
326 2 : xpath_list(PG_FUNCTION_ARGS)
327 : {
328 2 : text *document = PG_GETARG_TEXT_PP(0);
329 2 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
330 2 : xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
331 : xmlChar *xpath;
332 2 : text *volatile xpres = NULL;
333 2 : xpath_workspace *volatile workspace = NULL;
334 : PgXmlErrorContext *xmlerrcxt;
335 :
336 2 : xpath = pgxml_texttoxmlchar(xpathsupp);
337 2 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
338 :
339 2 : PG_TRY();
340 : {
341 2 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
342 2 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, plainsep);
343 : }
344 0 : PG_CATCH();
345 : {
346 0 : if (workspace)
347 0 : cleanup_workspace(workspace);
348 :
349 0 : pg_xml_done(xmlerrcxt, true);
350 0 : PG_RE_THROW();
351 : }
352 2 : PG_END_TRY();
353 :
354 2 : cleanup_workspace(workspace);
355 2 : pg_xml_done(xmlerrcxt, false);
356 :
357 2 : pfree(xpath);
358 :
359 2 : if (xpres == NULL)
360 0 : PG_RETURN_NULL();
361 2 : PG_RETURN_TEXT_P(xpres);
362 : }
363 :
364 :
365 2 : PG_FUNCTION_INFO_V1(xpath_string);
366 :
367 : Datum
368 1 : xpath_string(PG_FUNCTION_ARGS)
369 : {
370 1 : text *document = PG_GETARG_TEXT_PP(0);
371 1 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
372 : xmlChar *xpath;
373 : int32 pathsize;
374 1 : text *volatile xpres = NULL;
375 1 : xpath_workspace *volatile workspace = NULL;
376 : PgXmlErrorContext *xmlerrcxt;
377 :
378 1 : pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
379 :
380 : /*
381 : * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
382 : * at end
383 : */
384 : /* We could try casting to string using the libxml function? */
385 :
386 1 : xpath = (xmlChar *) palloc(pathsize + 9);
387 1 : memcpy(xpath, "string(", 7);
388 1 : memcpy(xpath + 7, VARDATA_ANY(xpathsupp), pathsize);
389 1 : xpath[pathsize + 7] = ')';
390 1 : xpath[pathsize + 8] = '\0';
391 :
392 1 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
393 :
394 1 : PG_TRY();
395 : {
396 1 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
397 1 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
398 : }
399 0 : PG_CATCH();
400 : {
401 0 : if (workspace)
402 0 : cleanup_workspace(workspace);
403 :
404 0 : pg_xml_done(xmlerrcxt, true);
405 0 : PG_RE_THROW();
406 : }
407 1 : PG_END_TRY();
408 :
409 1 : cleanup_workspace(workspace);
410 1 : pg_xml_done(xmlerrcxt, false);
411 :
412 1 : pfree(xpath);
413 :
414 1 : if (xpres == NULL)
415 1 : PG_RETURN_NULL();
416 0 : PG_RETURN_TEXT_P(xpres);
417 : }
418 :
419 :
420 1 : PG_FUNCTION_INFO_V1(xpath_number);
421 :
422 : Datum
423 0 : xpath_number(PG_FUNCTION_ARGS)
424 : {
425 0 : text *document = PG_GETARG_TEXT_PP(0);
426 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
427 : xmlChar *xpath;
428 0 : volatile float4 fRes = 0.0;
429 0 : volatile bool isNull = false;
430 0 : xpath_workspace *volatile workspace = NULL;
431 : PgXmlErrorContext *xmlerrcxt;
432 :
433 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
434 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
435 :
436 0 : PG_TRY();
437 : {
438 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
439 0 : pfree(xpath);
440 :
441 0 : if (workspace->res == NULL)
442 0 : isNull = true;
443 : else
444 0 : fRes = xmlXPathCastToNumber(workspace->res);
445 : }
446 0 : PG_CATCH();
447 : {
448 0 : if (workspace)
449 0 : cleanup_workspace(workspace);
450 :
451 0 : pg_xml_done(xmlerrcxt, true);
452 0 : PG_RE_THROW();
453 : }
454 0 : PG_END_TRY();
455 :
456 0 : cleanup_workspace(workspace);
457 0 : pg_xml_done(xmlerrcxt, false);
458 :
459 0 : if (isNull || xmlXPathIsNaN(fRes))
460 0 : PG_RETURN_NULL();
461 :
462 0 : PG_RETURN_FLOAT4(fRes);
463 : }
464 :
465 :
466 1 : PG_FUNCTION_INFO_V1(xpath_bool);
467 :
468 : Datum
469 0 : xpath_bool(PG_FUNCTION_ARGS)
470 : {
471 0 : text *document = PG_GETARG_TEXT_PP(0);
472 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
473 : xmlChar *xpath;
474 0 : volatile int bRes = 0;
475 0 : xpath_workspace *volatile workspace = NULL;
476 : PgXmlErrorContext *xmlerrcxt;
477 :
478 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
479 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
480 :
481 0 : PG_TRY();
482 : {
483 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
484 0 : pfree(xpath);
485 :
486 0 : if (workspace->res == NULL)
487 0 : bRes = 0;
488 : else
489 0 : bRes = xmlXPathCastToBoolean(workspace->res);
490 : }
491 0 : PG_CATCH();
492 : {
493 0 : if (workspace)
494 0 : cleanup_workspace(workspace);
495 :
496 0 : pg_xml_done(xmlerrcxt, true);
497 0 : PG_RE_THROW();
498 : }
499 0 : PG_END_TRY();
500 :
501 0 : cleanup_workspace(workspace);
502 0 : pg_xml_done(xmlerrcxt, false);
503 :
504 0 : PG_RETURN_BOOL(bRes);
505 : }
506 :
507 :
508 :
509 : /* Core function to evaluate XPath query */
510 :
511 : static xpath_workspace *
512 7 : pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
513 : {
514 7 : int32 docsize = VARSIZE_ANY_EXHDR(document);
515 7 : xmlXPathCompExprPtr volatile comppath = NULL;
516 7 : xpath_workspace *workspace = palloc0_object(xpath_workspace);
517 :
518 7 : workspace->doctree = NULL;
519 7 : workspace->ctxt = NULL;
520 7 : workspace->res = NULL;
521 :
522 7 : PG_TRY();
523 : {
524 7 : workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
525 : docsize, NULL, NULL,
526 : XML_PARSE_NOENT);
527 7 : if (workspace->doctree != NULL)
528 : {
529 6 : workspace->ctxt = xmlXPathNewContext(workspace->doctree);
530 6 : if (workspace->ctxt == NULL)
531 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
532 : "could not allocate XPath context");
533 :
534 6 : workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
535 :
536 : /* compile the path */
537 6 : comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
538 6 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
539 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
540 : "XPath Syntax Error");
541 :
542 : /* Now evaluate the path expression. */
543 6 : workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
544 :
545 6 : xmlXPathFreeCompExpr(comppath);
546 6 : comppath = NULL;
547 : }
548 : }
549 0 : PG_CATCH();
550 : {
551 0 : if (comppath != NULL)
552 0 : xmlXPathFreeCompExpr(comppath);
553 0 : cleanup_workspace(workspace);
554 :
555 0 : PG_RE_THROW();
556 : }
557 7 : PG_END_TRY();
558 :
559 7 : return workspace;
560 : }
561 :
562 : /* Clean up after processing the result of pgxml_xpath() */
563 : static void
564 7 : cleanup_workspace(xpath_workspace *workspace)
565 : {
566 7 : if (workspace->res)
567 6 : xmlXPathFreeObject(workspace->res);
568 7 : workspace->res = NULL;
569 7 : if (workspace->ctxt)
570 6 : xmlXPathFreeContext(workspace->ctxt);
571 7 : workspace->ctxt = NULL;
572 7 : if (workspace->doctree)
573 6 : xmlFreeDoc(workspace->doctree);
574 7 : workspace->doctree = NULL;
575 7 : }
576 :
577 : static text *
578 7 : pgxml_result_to_text(xmlXPathObjectPtr res,
579 : xmlChar *toptag,
580 : xmlChar *septag,
581 : xmlChar *plainsep)
582 : {
583 7 : xmlChar *volatile xpresstr = NULL;
584 7 : text *volatile xpres = NULL;
585 : PgXmlErrorContext *xmlerrcxt;
586 :
587 7 : if (res == NULL)
588 1 : return NULL;
589 :
590 : /* spin some error handling */
591 6 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
592 :
593 6 : PG_TRY();
594 : {
595 6 : switch (res->type)
596 : {
597 6 : case XPATH_NODESET:
598 6 : xpresstr = pgxmlNodeSetToText(res->nodesetval,
599 : toptag,
600 : septag, plainsep);
601 6 : break;
602 :
603 0 : case XPATH_STRING:
604 0 : xpresstr = xmlStrdup(res->stringval);
605 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
606 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
607 : "could not allocate result");
608 0 : break;
609 :
610 0 : default:
611 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
612 0 : xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
613 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
614 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
615 : "could not allocate result");
616 : }
617 :
618 : /* Now convert this result back to text */
619 6 : xpres = cstring_to_text((char *) xpresstr);
620 : }
621 0 : PG_CATCH();
622 : {
623 0 : if (xpresstr != NULL)
624 0 : xmlFree(xpresstr);
625 :
626 0 : pg_xml_done(xmlerrcxt, true);
627 :
628 0 : PG_RE_THROW();
629 : }
630 6 : PG_END_TRY();
631 :
632 : /* Free various storage */
633 6 : xmlFree(xpresstr);
634 :
635 6 : pg_xml_done(xmlerrcxt, false);
636 :
637 6 : return xpres;
638 : }
639 :
640 : /*
641 : * xpath_table is a table function. It needs some tidying (as do the
642 : * other functions here!
643 : */
644 2 : PG_FUNCTION_INFO_V1(xpath_table);
645 :
646 : Datum
647 5 : xpath_table(PG_FUNCTION_ARGS)
648 : {
649 : /* Function parameters */
650 5 : char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
651 5 : char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
652 5 : char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
653 5 : char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
654 5 : char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
655 :
656 : /* SPI (input tuple) support */
657 : SPITupleTable *tuptable;
658 : HeapTuple spi_tuple;
659 : TupleDesc spi_tupdesc;
660 :
661 :
662 5 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
663 : AttInMetadata *attinmeta;
664 :
665 : char **values;
666 : xmlChar **xpaths;
667 : char *pos;
668 5 : const char *pathsep = "|";
669 :
670 : int numpaths;
671 : int ret;
672 : uint64 proc;
673 : int j;
674 : int rownr; /* For issuing multiple rows from one original
675 : * document */
676 : bool had_values; /* To determine end of nodeset results */
677 : StringInfoData query_buf;
678 : PgXmlErrorContext *xmlerrcxt;
679 5 : volatile xmlDocPtr doctree = NULL;
680 5 : xmlXPathContextPtr volatile ctxt = NULL;
681 5 : xmlXPathObjectPtr volatile res = NULL;
682 5 : xmlXPathCompExprPtr volatile comppath = NULL;
683 5 : xmlChar *volatile resstr = NULL;
684 :
685 5 : InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
686 :
687 : /* must have at least one output column (for the pkey) */
688 5 : if (rsinfo->setDesc->natts < 1)
689 0 : ereport(ERROR,
690 : (errcode(ERRCODE_SYNTAX_ERROR),
691 : errmsg("xpath_table must have at least one output column")));
692 :
693 : /*
694 : * At the moment we assume that the returned attributes make sense for the
695 : * XPath specified (i.e. we trust the caller). It's not fatal if they get
696 : * it wrong - the input function for the column type will raise an error
697 : * if the path result can't be converted into the correct binary
698 : * representation.
699 : */
700 :
701 5 : attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
702 :
703 5 : values = (char **) palloc0(rsinfo->setDesc->natts * sizeof(char *));
704 5 : xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
705 :
706 : /*
707 : * Split XPaths. xpathset is a writable CString.
708 : *
709 : * Note that we stop splitting once we've done all needed for tupdesc
710 : */
711 5 : numpaths = 0;
712 5 : pos = xpathset;
713 7 : while (numpaths < (rsinfo->setDesc->natts - 1))
714 : {
715 5 : xpaths[numpaths++] = (xmlChar *) pos;
716 5 : pos = strstr(pos, pathsep);
717 5 : if (pos != NULL)
718 : {
719 2 : *pos = '\0';
720 2 : pos++;
721 : }
722 : else
723 3 : break;
724 : }
725 :
726 : /* Now build query */
727 5 : initStringInfo(&query_buf);
728 :
729 : /* Build initial sql statement */
730 5 : appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
731 : pkeyfield,
732 : xmlfield,
733 : relname,
734 : condition);
735 :
736 5 : SPI_connect();
737 :
738 5 : if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
739 0 : elog(ERROR, "xpath_table: SPI execution failed for query %s",
740 : query_buf.data);
741 :
742 5 : proc = SPI_processed;
743 5 : tuptable = SPI_tuptable;
744 5 : spi_tupdesc = tuptable->tupdesc;
745 :
746 : /*
747 : * Check that SPI returned correct result. If you put a comma into one of
748 : * the function parameters, this will catch it when the SPI query returns
749 : * e.g. 3 columns.
750 : */
751 5 : if (spi_tupdesc->natts != 2)
752 : {
753 0 : ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
754 : errmsg("expression returning multiple columns is not valid in parameter list"),
755 : errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
756 : }
757 :
758 : /*
759 : * Setup the parser. This should happen after we are done evaluating the
760 : * query, in case it calls functions that set up libxml differently.
761 : */
762 5 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
763 :
764 5 : PG_TRY();
765 : {
766 : /* For each row i.e. document returned from SPI */
767 : uint64 i;
768 :
769 10 : for (i = 0; i < proc; i++)
770 : {
771 : char *pkey;
772 : char *xmldoc;
773 : HeapTuple ret_tuple;
774 :
775 : /* Extract the row data as C Strings */
776 5 : spi_tuple = tuptable->vals[i];
777 5 : pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
778 5 : xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
779 :
780 : /*
781 : * Clear the values array, so that not-well-formed documents
782 : * return NULL in all columns. Note that this also means that
783 : * spare columns will be NULL.
784 : */
785 15 : for (j = 0; j < rsinfo->setDesc->natts; j++)
786 10 : values[j] = NULL;
787 :
788 : /* Insert primary key */
789 5 : values[0] = pkey;
790 :
791 : /* Parse the document */
792 5 : if (xmldoc)
793 5 : doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
794 : NULL, NULL,
795 : XML_PARSE_NOENT);
796 : else /* treat NULL as not well-formed */
797 0 : doctree = NULL;
798 :
799 5 : if (doctree == NULL)
800 : {
801 : /* not well-formed, so output all-NULL tuple */
802 0 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
803 0 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
804 0 : heap_freetuple(ret_tuple);
805 : }
806 : else
807 : {
808 : /* New loop here - we have to deal with nodeset results */
809 5 : rownr = 0;
810 :
811 : do
812 : {
813 : /* Now evaluate the set of xpaths. */
814 8 : had_values = false;
815 18 : for (j = 0; j < numpaths; j++)
816 : {
817 10 : ctxt = NULL;
818 10 : res = NULL;
819 10 : comppath = NULL;
820 10 : resstr = NULL;
821 :
822 10 : ctxt = xmlXPathNewContext(doctree);
823 10 : if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
824 0 : xml_ereport(xmlerrcxt,
825 : ERROR, ERRCODE_OUT_OF_MEMORY,
826 : "could not allocate XPath context");
827 :
828 10 : ctxt->node = xmlDocGetRootElement(doctree);
829 :
830 : /* compile the path */
831 10 : comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]);
832 10 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
833 0 : xml_ereport(xmlerrcxt, ERROR,
834 : ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
835 : "XPath Syntax Error");
836 :
837 : /* Now evaluate the path expression. */
838 10 : res = xmlXPathCompiledEval(comppath, ctxt);
839 10 : xmlXPathFreeCompExpr(comppath);
840 10 : comppath = NULL;
841 :
842 10 : if (res != NULL)
843 : {
844 10 : switch (res->type)
845 : {
846 10 : case XPATH_NODESET:
847 : /* We see if this nodeset has enough nodes */
848 10 : if (res->nodesetval != NULL &&
849 10 : rownr < res->nodesetval->nodeNr)
850 : {
851 4 : resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
852 4 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
853 0 : xml_ereport(xmlerrcxt,
854 : ERROR, ERRCODE_OUT_OF_MEMORY,
855 : "could not allocate result");
856 4 : had_values = true;
857 : }
858 : else
859 6 : resstr = NULL;
860 :
861 10 : break;
862 :
863 0 : case XPATH_STRING:
864 0 : resstr = xmlStrdup(res->stringval);
865 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
866 0 : xml_ereport(xmlerrcxt,
867 : ERROR, ERRCODE_OUT_OF_MEMORY,
868 : "could not allocate result");
869 0 : break;
870 :
871 0 : default:
872 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
873 0 : resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
874 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
875 0 : xml_ereport(xmlerrcxt,
876 : ERROR, ERRCODE_OUT_OF_MEMORY,
877 : "could not allocate result");
878 : }
879 :
880 : /*
881 : * Insert this into the appropriate column in the
882 : * result tuple.
883 : */
884 10 : values[j + 1] = (char *) resstr;
885 10 : resstr = NULL;
886 : }
887 :
888 10 : if (res != NULL)
889 : {
890 10 : xmlXPathFreeObject(res);
891 10 : res = NULL;
892 : }
893 10 : xmlXPathFreeContext(ctxt);
894 10 : ctxt = NULL;
895 : }
896 :
897 : /* Now add the tuple to the output, if there is one. */
898 8 : if (had_values)
899 : {
900 3 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
901 3 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
902 3 : heap_freetuple(ret_tuple);
903 : }
904 :
905 : /* BuildTupleFromCStrings() has copied the values. */
906 18 : for (j = 1; j < rsinfo->setDesc->natts; j++)
907 : {
908 10 : if (values[j] != NULL)
909 : {
910 4 : xmlFree((xmlChar *) values[j]);
911 4 : values[j] = NULL;
912 : }
913 : }
914 :
915 8 : rownr++;
916 8 : } while (had_values);
917 : }
918 :
919 5 : if (doctree != NULL)
920 5 : xmlFreeDoc(doctree);
921 5 : doctree = NULL;
922 :
923 5 : if (pkey)
924 5 : pfree(pkey);
925 5 : if (xmldoc)
926 5 : pfree(xmldoc);
927 : }
928 : }
929 0 : PG_CATCH();
930 : {
931 0 : if (resstr != NULL)
932 0 : xmlFree(resstr);
933 0 : for (j = 1; j < rsinfo->setDesc->natts; j++)
934 : {
935 0 : if (values[j] != NULL)
936 0 : xmlFree((xmlChar *) values[j]);
937 : }
938 0 : if (res != NULL)
939 0 : xmlXPathFreeObject(res);
940 0 : if (comppath != NULL)
941 0 : xmlXPathFreeCompExpr(comppath);
942 0 : if (ctxt != NULL)
943 0 : xmlXPathFreeContext(ctxt);
944 0 : if (doctree != NULL)
945 0 : xmlFreeDoc(doctree);
946 :
947 0 : pg_xml_done(xmlerrcxt, true);
948 :
949 0 : PG_RE_THROW();
950 : }
951 5 : PG_END_TRY();
952 :
953 5 : if (doctree != NULL)
954 0 : xmlFreeDoc(doctree);
955 :
956 5 : pg_xml_done(xmlerrcxt, false);
957 :
958 5 : SPI_finish();
959 :
960 : /*
961 : * SFRM_Materialize mode expects us to return a NULL Datum. The actual
962 : * tuples are in our tuplestore and passed back through rsinfo->setResult.
963 : * rsinfo->setDesc is set to the tuple description that we actually used
964 : * to build our tuples with, so the caller can verify we did what it was
965 : * expecting.
966 : */
967 5 : return (Datum) 0;
968 : }
|