Line data Source code
1 : /*
2 : * contrib/xml2/xpath.c
3 : *
4 : * Parser interface for DOM-based parser (libxml) rather than
5 : * stream-based SAX-type parser
6 : */
7 : #include "postgres.h"
8 :
9 : #include "access/htup_details.h"
10 : #include "executor/spi.h"
11 : #include "fmgr.h"
12 : #include "funcapi.h"
13 : #include "lib/stringinfo.h"
14 : #include "utils/builtins.h"
15 : #include "utils/tuplestore.h"
16 : #include "utils/xml.h"
17 :
18 : /* libxml includes */
19 :
20 : #include <libxml/xpath.h>
21 : #include <libxml/tree.h>
22 : #include <libxml/xmlmemory.h>
23 : #include <libxml/xmlerror.h>
24 : #include <libxml/parserInternals.h>
25 :
26 1 : PG_MODULE_MAGIC_EXT(
27 : .name = "xml2",
28 : .version = PG_VERSION
29 : );
30 :
31 : /* exported for use by xslt_proc.c */
32 :
33 : PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
34 :
35 : /* workspace for pgxml_xpath() */
36 :
37 : typedef struct
38 : {
39 : xmlDocPtr doctree;
40 : xmlXPathContextPtr ctxt;
41 : xmlXPathObjectPtr res;
42 : } xpath_workspace;
43 :
44 : /* local declarations */
45 :
46 : static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
47 : xmlChar *toptagname, xmlChar *septagname,
48 : xmlChar *plainsep);
49 :
50 : static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
51 : xmlChar *septag, xmlChar *plainsep);
52 :
53 : static xmlChar *pgxml_texttoxmlchar(text *textstring);
54 :
55 : static xpath_workspace *pgxml_xpath(text *document, xmlChar *xpath,
56 : PgXmlErrorContext *xmlerrcxt);
57 :
58 : static void cleanup_workspace(xpath_workspace *workspace);
59 :
60 :
61 : /*
62 : * Initialize for xml parsing.
63 : *
64 : * As with the underlying pg_xml_init function, calls to this MUST be followed
65 : * by a PG_TRY block that guarantees that pg_xml_done is called.
66 : */
67 : PgXmlErrorContext *
68 11 : pgxml_parser_init(PgXmlStrictness strictness)
69 : {
70 : PgXmlErrorContext *xmlerrcxt;
71 :
72 : /* Set up error handling (we share the core's error handler) */
73 11 : xmlerrcxt = pg_xml_init(strictness);
74 :
75 : /* Note: we're assuming an elog cannot be thrown by the following calls */
76 :
77 : /* Initialize libxml */
78 11 : xmlInitParser();
79 :
80 11 : return xmlerrcxt;
81 : }
82 :
83 :
84 : /* Encodes special characters (<, >, &, " and \r) as XML entities */
85 :
86 1 : PG_FUNCTION_INFO_V1(xml_encode_special_chars);
87 :
88 : Datum
89 0 : xml_encode_special_chars(PG_FUNCTION_ARGS)
90 : {
91 0 : text *tin = PG_GETARG_TEXT_PP(0);
92 0 : text *volatile tout = NULL;
93 0 : xmlChar *volatile tt = NULL;
94 : PgXmlErrorContext *xmlerrcxt;
95 :
96 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
97 :
98 0 : PG_TRY();
99 : {
100 : xmlChar *ts;
101 :
102 0 : ts = pgxml_texttoxmlchar(tin);
103 :
104 0 : tt = xmlEncodeSpecialChars(NULL, ts);
105 0 : if (tt == NULL || pg_xml_error_occurred(xmlerrcxt))
106 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
107 : "could not allocate xmlChar");
108 0 : pfree(ts);
109 :
110 0 : tout = cstring_to_text((char *) tt);
111 : }
112 0 : PG_CATCH();
113 : {
114 0 : if (tt != NULL)
115 0 : xmlFree(tt);
116 :
117 0 : pg_xml_done(xmlerrcxt, true);
118 :
119 0 : PG_RE_THROW();
120 : }
121 0 : PG_END_TRY();
122 :
123 0 : if (tt != NULL)
124 0 : xmlFree(tt);
125 :
126 0 : pg_xml_done(xmlerrcxt, false);
127 :
128 0 : PG_RETURN_TEXT_P(tout);
129 : }
130 :
131 : /*
132 : * Function translates a nodeset into a text representation
133 : *
134 : * iterates over each node in the set and calls xmlNodeDump to write it to
135 : * an xmlBuffer -from which an xmlChar * string is returned.
136 : *
137 : * each representation is surrounded by <tagname> ... </tagname>
138 : *
139 : * plainsep is an ordinary (not tag) separator - if used, then nodes are
140 : * cast to string as output method
141 : */
142 : static xmlChar *
143 5 : pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
144 : xmlChar *toptagname,
145 : xmlChar *septagname,
146 : xmlChar *plainsep)
147 : {
148 5 : volatile xmlBufferPtr buf = NULL;
149 5 : xmlChar *volatile result = NULL;
150 : PgXmlErrorContext *xmlerrcxt;
151 :
152 : /* spin up some error handling */
153 5 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
154 :
155 5 : PG_TRY();
156 : {
157 5 : buf = xmlBufferCreate();
158 :
159 5 : if (buf == NULL || pg_xml_error_occurred(xmlerrcxt))
160 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
161 : "could not allocate xmlBuffer");
162 :
163 5 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
164 : {
165 1 : xmlBufferWriteChar(buf, "<");
166 1 : xmlBufferWriteCHAR(buf, toptagname);
167 1 : xmlBufferWriteChar(buf, ">");
168 : }
169 5 : if (nodeset != NULL)
170 : {
171 15 : for (int i = 0; i < nodeset->nodeNr; i++)
172 : {
173 10 : if (plainsep != NULL)
174 : {
175 4 : xmlBufferWriteCHAR(buf,
176 4 : xmlXPathCastNodeToString(nodeset->nodeTab[i]));
177 :
178 : /* If this isn't the last entry, write the plain sep. */
179 4 : if (i < (nodeset->nodeNr) - 1)
180 2 : xmlBufferWriteChar(buf, (char *) plainsep);
181 : }
182 : else
183 : {
184 6 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
185 : {
186 4 : xmlBufferWriteChar(buf, "<");
187 4 : xmlBufferWriteCHAR(buf, septagname);
188 4 : xmlBufferWriteChar(buf, ">");
189 : }
190 6 : xmlNodeDump(buf,
191 6 : nodeset->nodeTab[i]->doc,
192 6 : nodeset->nodeTab[i],
193 : 1, 0);
194 :
195 6 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
196 : {
197 4 : xmlBufferWriteChar(buf, "</");
198 4 : xmlBufferWriteCHAR(buf, septagname);
199 4 : xmlBufferWriteChar(buf, ">");
200 : }
201 : }
202 : }
203 : }
204 :
205 5 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
206 : {
207 1 : xmlBufferWriteChar(buf, "</");
208 1 : xmlBufferWriteCHAR(buf, toptagname);
209 1 : xmlBufferWriteChar(buf, ">");
210 : }
211 :
212 5 : result = xmlStrdup(xmlBufferContent(buf));
213 5 : if (result == NULL || pg_xml_error_occurred(xmlerrcxt))
214 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
215 : "could not allocate result");
216 : }
217 0 : PG_CATCH();
218 : {
219 0 : if (buf)
220 0 : xmlBufferFree(buf);
221 :
222 0 : pg_xml_done(xmlerrcxt, true);
223 :
224 0 : PG_RE_THROW();
225 : }
226 5 : PG_END_TRY();
227 :
228 5 : xmlBufferFree(buf);
229 5 : pg_xml_done(xmlerrcxt, false);
230 :
231 5 : return result;
232 : }
233 :
234 :
235 : /* Translate a PostgreSQL "varlena" -i.e. a variable length parameter
236 : * into the libxml2 representation
237 : */
238 : static xmlChar *
239 13 : pgxml_texttoxmlchar(text *textstring)
240 : {
241 13 : return (xmlChar *) text_to_cstring(textstring);
242 : }
243 :
244 : /* Publicly visible XPath functions */
245 :
246 : /*
247 : * This is a "raw" xpath function. Check that it returns child elements
248 : * properly
249 : */
250 2 : PG_FUNCTION_INFO_V1(xpath_nodeset);
251 :
252 : Datum
253 3 : xpath_nodeset(PG_FUNCTION_ARGS)
254 : {
255 3 : text *document = PG_GETARG_TEXT_PP(0);
256 3 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
257 3 : xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
258 3 : xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
259 : xmlChar *xpath;
260 3 : text *volatile xpres = NULL;
261 3 : xpath_workspace *volatile workspace = NULL;
262 : PgXmlErrorContext *xmlerrcxt;
263 :
264 3 : xpath = pgxml_texttoxmlchar(xpathsupp);
265 3 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
266 :
267 3 : PG_TRY();
268 : {
269 3 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
270 3 : xpres = pgxml_result_to_text(workspace->res, toptag, septag, NULL);
271 : }
272 0 : PG_CATCH();
273 : {
274 0 : if (workspace)
275 0 : cleanup_workspace(workspace);
276 :
277 0 : pg_xml_done(xmlerrcxt, true);
278 0 : PG_RE_THROW();
279 : }
280 3 : PG_END_TRY();
281 :
282 3 : cleanup_workspace(workspace);
283 3 : pg_xml_done(xmlerrcxt, false);
284 :
285 3 : pfree(xpath);
286 :
287 3 : if (xpres == NULL)
288 0 : PG_RETURN_NULL();
289 3 : PG_RETURN_TEXT_P(xpres);
290 : }
291 :
292 : /*
293 : * The following function is almost identical, but returns the elements in
294 : * a list.
295 : */
296 2 : PG_FUNCTION_INFO_V1(xpath_list);
297 :
298 : Datum
299 2 : xpath_list(PG_FUNCTION_ARGS)
300 : {
301 2 : text *document = PG_GETARG_TEXT_PP(0);
302 2 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
303 2 : xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
304 : xmlChar *xpath;
305 2 : text *volatile xpres = NULL;
306 2 : xpath_workspace *volatile workspace = NULL;
307 : PgXmlErrorContext *xmlerrcxt;
308 :
309 2 : xpath = pgxml_texttoxmlchar(xpathsupp);
310 2 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
311 :
312 2 : PG_TRY();
313 : {
314 2 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
315 2 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, plainsep);
316 : }
317 0 : PG_CATCH();
318 : {
319 0 : if (workspace)
320 0 : cleanup_workspace(workspace);
321 :
322 0 : pg_xml_done(xmlerrcxt, true);
323 0 : PG_RE_THROW();
324 : }
325 2 : PG_END_TRY();
326 :
327 2 : cleanup_workspace(workspace);
328 2 : pg_xml_done(xmlerrcxt, false);
329 :
330 2 : pfree(xpath);
331 :
332 2 : if (xpres == NULL)
333 0 : PG_RETURN_NULL();
334 2 : PG_RETURN_TEXT_P(xpres);
335 : }
336 :
337 :
338 2 : PG_FUNCTION_INFO_V1(xpath_string);
339 :
340 : Datum
341 1 : xpath_string(PG_FUNCTION_ARGS)
342 : {
343 1 : text *document = PG_GETARG_TEXT_PP(0);
344 1 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
345 : xmlChar *xpath;
346 : int32 pathsize;
347 1 : text *volatile xpres = NULL;
348 1 : xpath_workspace *volatile workspace = NULL;
349 : PgXmlErrorContext *xmlerrcxt;
350 :
351 1 : pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
352 :
353 : /*
354 : * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
355 : * at end
356 : */
357 : /* We could try casting to string using the libxml function? */
358 :
359 1 : xpath = (xmlChar *) palloc(pathsize + 9);
360 1 : memcpy(xpath, "string(", 7);
361 1 : memcpy(xpath + 7, VARDATA_ANY(xpathsupp), pathsize);
362 1 : xpath[pathsize + 7] = ')';
363 1 : xpath[pathsize + 8] = '\0';
364 :
365 1 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
366 :
367 1 : PG_TRY();
368 : {
369 1 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
370 1 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
371 : }
372 0 : PG_CATCH();
373 : {
374 0 : if (workspace)
375 0 : cleanup_workspace(workspace);
376 :
377 0 : pg_xml_done(xmlerrcxt, true);
378 0 : PG_RE_THROW();
379 : }
380 1 : PG_END_TRY();
381 :
382 1 : cleanup_workspace(workspace);
383 1 : pg_xml_done(xmlerrcxt, false);
384 :
385 1 : pfree(xpath);
386 :
387 1 : if (xpres == NULL)
388 1 : PG_RETURN_NULL();
389 0 : PG_RETURN_TEXT_P(xpres);
390 : }
391 :
392 :
393 1 : PG_FUNCTION_INFO_V1(xpath_number);
394 :
395 : Datum
396 0 : xpath_number(PG_FUNCTION_ARGS)
397 : {
398 0 : text *document = PG_GETARG_TEXT_PP(0);
399 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
400 : xmlChar *xpath;
401 0 : volatile float4 fRes = 0.0;
402 0 : volatile bool isNull = false;
403 0 : xpath_workspace *volatile workspace = NULL;
404 : PgXmlErrorContext *xmlerrcxt;
405 :
406 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
407 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
408 :
409 0 : PG_TRY();
410 : {
411 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
412 0 : pfree(xpath);
413 :
414 0 : if (workspace->res == NULL)
415 0 : isNull = true;
416 : else
417 0 : fRes = xmlXPathCastToNumber(workspace->res);
418 : }
419 0 : PG_CATCH();
420 : {
421 0 : if (workspace)
422 0 : cleanup_workspace(workspace);
423 :
424 0 : pg_xml_done(xmlerrcxt, true);
425 0 : PG_RE_THROW();
426 : }
427 0 : PG_END_TRY();
428 :
429 0 : cleanup_workspace(workspace);
430 0 : pg_xml_done(xmlerrcxt, false);
431 :
432 0 : if (isNull || xmlXPathIsNaN(fRes))
433 0 : PG_RETURN_NULL();
434 :
435 0 : PG_RETURN_FLOAT4(fRes);
436 : }
437 :
438 :
439 1 : PG_FUNCTION_INFO_V1(xpath_bool);
440 :
441 : Datum
442 0 : xpath_bool(PG_FUNCTION_ARGS)
443 : {
444 0 : text *document = PG_GETARG_TEXT_PP(0);
445 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
446 : xmlChar *xpath;
447 0 : volatile int bRes = 0;
448 0 : xpath_workspace *volatile workspace = NULL;
449 : PgXmlErrorContext *xmlerrcxt;
450 :
451 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
452 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
453 :
454 0 : PG_TRY();
455 : {
456 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
457 0 : pfree(xpath);
458 :
459 0 : if (workspace->res == NULL)
460 0 : bRes = 0;
461 : else
462 0 : bRes = xmlXPathCastToBoolean(workspace->res);
463 : }
464 0 : PG_CATCH();
465 : {
466 0 : if (workspace)
467 0 : cleanup_workspace(workspace);
468 :
469 0 : pg_xml_done(xmlerrcxt, true);
470 0 : PG_RE_THROW();
471 : }
472 0 : PG_END_TRY();
473 :
474 0 : cleanup_workspace(workspace);
475 0 : pg_xml_done(xmlerrcxt, false);
476 :
477 0 : PG_RETURN_BOOL(bRes);
478 : }
479 :
480 :
481 :
482 : /* Core function to evaluate XPath query */
483 :
484 : static xpath_workspace *
485 6 : pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
486 : {
487 6 : int32 docsize = VARSIZE_ANY_EXHDR(document);
488 : xmlXPathCompExprPtr comppath;
489 6 : xpath_workspace *workspace = palloc0_object(xpath_workspace);
490 :
491 6 : workspace->doctree = NULL;
492 6 : workspace->ctxt = NULL;
493 6 : workspace->res = NULL;
494 :
495 6 : workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
496 : docsize, NULL, NULL,
497 : XML_PARSE_NOENT);
498 6 : if (workspace->doctree != NULL)
499 : {
500 5 : workspace->ctxt = xmlXPathNewContext(workspace->doctree);
501 5 : workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
502 :
503 : /* compile the path */
504 5 : comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
505 5 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
506 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
507 : "XPath Syntax Error");
508 :
509 : /* Now evaluate the path expression. */
510 5 : workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
511 :
512 5 : xmlXPathFreeCompExpr(comppath);
513 : }
514 :
515 6 : return workspace;
516 : }
517 :
518 : /* Clean up after processing the result of pgxml_xpath() */
519 : static void
520 6 : cleanup_workspace(xpath_workspace *workspace)
521 : {
522 6 : if (workspace->res)
523 5 : xmlXPathFreeObject(workspace->res);
524 6 : workspace->res = NULL;
525 6 : if (workspace->ctxt)
526 5 : xmlXPathFreeContext(workspace->ctxt);
527 6 : workspace->ctxt = NULL;
528 6 : if (workspace->doctree)
529 5 : xmlFreeDoc(workspace->doctree);
530 6 : workspace->doctree = NULL;
531 6 : }
532 :
533 : static text *
534 6 : pgxml_result_to_text(xmlXPathObjectPtr res,
535 : xmlChar *toptag,
536 : xmlChar *septag,
537 : xmlChar *plainsep)
538 : {
539 6 : xmlChar *volatile xpresstr = NULL;
540 6 : text *volatile xpres = NULL;
541 : PgXmlErrorContext *xmlerrcxt;
542 :
543 6 : if (res == NULL)
544 1 : return NULL;
545 :
546 : /* spin some error handling */
547 5 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
548 :
549 5 : PG_TRY();
550 : {
551 5 : switch (res->type)
552 : {
553 5 : case XPATH_NODESET:
554 5 : xpresstr = pgxmlNodeSetToText(res->nodesetval,
555 : toptag,
556 : septag, plainsep);
557 5 : break;
558 :
559 0 : case XPATH_STRING:
560 0 : xpresstr = xmlStrdup(res->stringval);
561 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
562 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
563 : "could not allocate result");
564 0 : break;
565 :
566 0 : default:
567 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
568 0 : xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
569 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
570 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
571 : "could not allocate result");
572 : }
573 :
574 : /* Now convert this result back to text */
575 5 : xpres = cstring_to_text((char *) xpresstr);
576 : }
577 0 : PG_CATCH();
578 : {
579 0 : if (xpresstr != NULL)
580 0 : xmlFree(xpresstr);
581 :
582 0 : pg_xml_done(xmlerrcxt, true);
583 :
584 0 : PG_RE_THROW();
585 : }
586 5 : PG_END_TRY();
587 :
588 : /* Free various storage */
589 5 : xmlFree(xpresstr);
590 :
591 5 : pg_xml_done(xmlerrcxt, false);
592 :
593 5 : return xpres;
594 : }
595 :
596 : /*
597 : * xpath_table is a table function. It needs some tidying (as do the
598 : * other functions here!
599 : */
600 2 : PG_FUNCTION_INFO_V1(xpath_table);
601 :
602 : Datum
603 5 : xpath_table(PG_FUNCTION_ARGS)
604 : {
605 : /* Function parameters */
606 5 : char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
607 5 : char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
608 5 : char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
609 5 : char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
610 5 : char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
611 :
612 : /* SPI (input tuple) support */
613 : SPITupleTable *tuptable;
614 : HeapTuple spi_tuple;
615 : TupleDesc spi_tupdesc;
616 :
617 :
618 5 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
619 : AttInMetadata *attinmeta;
620 :
621 : char **values;
622 : xmlChar **xpaths;
623 : char *pos;
624 5 : const char *pathsep = "|";
625 :
626 : int numpaths;
627 : int ret;
628 : uint64 proc;
629 : int j;
630 : int rownr; /* For issuing multiple rows from one original
631 : * document */
632 : bool had_values; /* To determine end of nodeset results */
633 : StringInfoData query_buf;
634 : PgXmlErrorContext *xmlerrcxt;
635 5 : volatile xmlDocPtr doctree = NULL;
636 :
637 5 : InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
638 :
639 : /* must have at least one output column (for the pkey) */
640 5 : if (rsinfo->setDesc->natts < 1)
641 0 : ereport(ERROR,
642 : (errcode(ERRCODE_SYNTAX_ERROR),
643 : errmsg("xpath_table must have at least one output column")));
644 :
645 : /*
646 : * At the moment we assume that the returned attributes make sense for the
647 : * XPath specified (i.e. we trust the caller). It's not fatal if they get
648 : * it wrong - the input function for the column type will raise an error
649 : * if the path result can't be converted into the correct binary
650 : * representation.
651 : */
652 :
653 5 : attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
654 :
655 5 : values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
656 5 : xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
657 :
658 : /*
659 : * Split XPaths. xpathset is a writable CString.
660 : *
661 : * Note that we stop splitting once we've done all needed for tupdesc
662 : */
663 5 : numpaths = 0;
664 5 : pos = xpathset;
665 7 : while (numpaths < (rsinfo->setDesc->natts - 1))
666 : {
667 5 : xpaths[numpaths++] = (xmlChar *) pos;
668 5 : pos = strstr(pos, pathsep);
669 5 : if (pos != NULL)
670 : {
671 2 : *pos = '\0';
672 2 : pos++;
673 : }
674 : else
675 3 : break;
676 : }
677 :
678 : /* Now build query */
679 5 : initStringInfo(&query_buf);
680 :
681 : /* Build initial sql statement */
682 5 : appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
683 : pkeyfield,
684 : xmlfield,
685 : relname,
686 : condition);
687 :
688 5 : SPI_connect();
689 :
690 5 : if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
691 0 : elog(ERROR, "xpath_table: SPI execution failed for query %s",
692 : query_buf.data);
693 :
694 5 : proc = SPI_processed;
695 5 : tuptable = SPI_tuptable;
696 5 : spi_tupdesc = tuptable->tupdesc;
697 :
698 : /*
699 : * Check that SPI returned correct result. If you put a comma into one of
700 : * the function parameters, this will catch it when the SPI query returns
701 : * e.g. 3 columns.
702 : */
703 5 : if (spi_tupdesc->natts != 2)
704 : {
705 0 : ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
706 : errmsg("expression returning multiple columns is not valid in parameter list"),
707 : errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
708 : }
709 :
710 : /*
711 : * Setup the parser. This should happen after we are done evaluating the
712 : * query, in case it calls functions that set up libxml differently.
713 : */
714 5 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
715 :
716 5 : PG_TRY();
717 : {
718 : /* For each row i.e. document returned from SPI */
719 : uint64 i;
720 :
721 10 : for (i = 0; i < proc; i++)
722 : {
723 : char *pkey;
724 : char *xmldoc;
725 : xmlXPathContextPtr ctxt;
726 : xmlXPathObjectPtr res;
727 : xmlChar *resstr;
728 : xmlXPathCompExprPtr comppath;
729 : HeapTuple ret_tuple;
730 :
731 : /* Extract the row data as C Strings */
732 5 : spi_tuple = tuptable->vals[i];
733 5 : pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
734 5 : xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
735 :
736 : /*
737 : * Clear the values array, so that not-well-formed documents
738 : * return NULL in all columns. Note that this also means that
739 : * spare columns will be NULL.
740 : */
741 15 : for (j = 0; j < rsinfo->setDesc->natts; j++)
742 10 : values[j] = NULL;
743 :
744 : /* Insert primary key */
745 5 : values[0] = pkey;
746 :
747 : /* Parse the document */
748 5 : if (xmldoc)
749 5 : doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
750 : NULL, NULL,
751 : XML_PARSE_NOENT);
752 : else /* treat NULL as not well-formed */
753 0 : doctree = NULL;
754 :
755 5 : if (doctree == NULL)
756 : {
757 : /* not well-formed, so output all-NULL tuple */
758 0 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
759 0 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
760 0 : heap_freetuple(ret_tuple);
761 : }
762 : else
763 : {
764 : /* New loop here - we have to deal with nodeset results */
765 5 : rownr = 0;
766 :
767 : do
768 : {
769 : /* Now evaluate the set of xpaths. */
770 8 : had_values = false;
771 18 : for (j = 0; j < numpaths; j++)
772 : {
773 10 : ctxt = xmlXPathNewContext(doctree);
774 10 : if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
775 0 : xml_ereport(xmlerrcxt,
776 : ERROR, ERRCODE_OUT_OF_MEMORY,
777 : "could not allocate XPath context");
778 :
779 10 : ctxt->node = xmlDocGetRootElement(doctree);
780 :
781 : /* compile the path */
782 10 : comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]);
783 10 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
784 0 : xml_ereport(xmlerrcxt, ERROR,
785 : ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
786 : "XPath Syntax Error");
787 :
788 : /* Now evaluate the path expression. */
789 10 : res = xmlXPathCompiledEval(comppath, ctxt);
790 10 : xmlXPathFreeCompExpr(comppath);
791 :
792 10 : if (res != NULL)
793 : {
794 10 : switch (res->type)
795 : {
796 10 : case XPATH_NODESET:
797 : /* We see if this nodeset has enough nodes */
798 10 : if (res->nodesetval != NULL &&
799 10 : rownr < res->nodesetval->nodeNr)
800 : {
801 4 : resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
802 4 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
803 0 : xml_ereport(xmlerrcxt,
804 : ERROR, ERRCODE_OUT_OF_MEMORY,
805 : "could not allocate result");
806 4 : had_values = true;
807 : }
808 : else
809 6 : resstr = NULL;
810 :
811 10 : break;
812 :
813 0 : case XPATH_STRING:
814 0 : resstr = xmlStrdup(res->stringval);
815 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
816 0 : xml_ereport(xmlerrcxt,
817 : ERROR, ERRCODE_OUT_OF_MEMORY,
818 : "could not allocate result");
819 0 : break;
820 :
821 0 : default:
822 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
823 0 : resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
824 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
825 0 : xml_ereport(xmlerrcxt,
826 : ERROR, ERRCODE_OUT_OF_MEMORY,
827 : "could not allocate result");
828 : }
829 :
830 : /*
831 : * Insert this into the appropriate column in the
832 : * result tuple.
833 : */
834 10 : values[j + 1] = (char *) resstr;
835 : }
836 10 : xmlXPathFreeContext(ctxt);
837 : }
838 :
839 : /* Now add the tuple to the output, if there is one. */
840 8 : if (had_values)
841 : {
842 3 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
843 3 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
844 3 : heap_freetuple(ret_tuple);
845 : }
846 :
847 8 : rownr++;
848 8 : } while (had_values);
849 : }
850 :
851 5 : if (doctree != NULL)
852 5 : xmlFreeDoc(doctree);
853 5 : doctree = NULL;
854 :
855 5 : if (pkey)
856 5 : pfree(pkey);
857 5 : if (xmldoc)
858 5 : pfree(xmldoc);
859 : }
860 : }
861 0 : PG_CATCH();
862 : {
863 0 : if (doctree != NULL)
864 0 : xmlFreeDoc(doctree);
865 :
866 0 : pg_xml_done(xmlerrcxt, true);
867 :
868 0 : PG_RE_THROW();
869 : }
870 5 : PG_END_TRY();
871 :
872 5 : if (doctree != NULL)
873 0 : xmlFreeDoc(doctree);
874 :
875 5 : pg_xml_done(xmlerrcxt, false);
876 :
877 5 : SPI_finish();
878 :
879 : /*
880 : * SFRM_Materialize mode expects us to return a NULL Datum. The actual
881 : * tuples are in our tuplestore and passed back through rsinfo->setResult.
882 : * rsinfo->setDesc is set to the tuple description that we actually used
883 : * to build our tuples with, so the caller can verify we did what it was
884 : * expecting.
885 : */
886 5 : return (Datum) 0;
887 : }
|