Line data Source code
1 : /*
2 : * contrib/xml2/xpath.c
3 : *
4 : * Parser interface for DOM-based parser (libxml) rather than
5 : * stream-based SAX-type parser
6 : */
7 : #include "postgres.h"
8 :
9 : #include "access/htup_details.h"
10 : #include "executor/spi.h"
11 : #include "fmgr.h"
12 : #include "funcapi.h"
13 : #include "lib/stringinfo.h"
14 : #include "utils/builtins.h"
15 : #include "utils/tuplestore.h"
16 : #include "utils/xml.h"
17 :
18 : /* libxml includes */
19 :
20 : #include <libxml/xpath.h>
21 : #include <libxml/tree.h>
22 : #include <libxml/xmlmemory.h>
23 : #include <libxml/xmlerror.h>
24 : #include <libxml/parserInternals.h>
25 :
26 1 : PG_MODULE_MAGIC_EXT(
27 : .name = "xml2",
28 : .version = PG_VERSION
29 : );
30 :
31 : /* exported for use by xslt_proc.c */
32 :
33 : PgXmlErrorContext *pgxml_parser_init(PgXmlStrictness strictness);
34 :
35 : /* workspace for pgxml_xpath() */
36 :
37 : typedef struct
38 : {
39 : xmlDocPtr doctree;
40 : xmlXPathContextPtr ctxt;
41 : xmlXPathObjectPtr res;
42 : } xpath_workspace;
43 :
44 : /* local declarations */
45 :
46 : static xmlChar *pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
47 : xmlChar *toptagname, xmlChar *septagname,
48 : xmlChar *plainsep);
49 :
50 : static text *pgxml_result_to_text(xmlXPathObjectPtr res, xmlChar *toptag,
51 : xmlChar *septag, xmlChar *plainsep);
52 :
53 : static xmlChar *pgxml_texttoxmlchar(text *textstring);
54 :
55 : static xpath_workspace *pgxml_xpath(text *document, xmlChar *xpath,
56 : PgXmlErrorContext *xmlerrcxt);
57 :
58 : static void cleanup_workspace(xpath_workspace *workspace);
59 :
60 :
61 : /*
62 : * Initialize for xml parsing.
63 : *
64 : * As with the underlying pg_xml_init function, calls to this MUST be followed
65 : * by a PG_TRY block that guarantees that pg_xml_done is called.
66 : */
67 : PgXmlErrorContext *
68 11 : pgxml_parser_init(PgXmlStrictness strictness)
69 : {
70 : PgXmlErrorContext *xmlerrcxt;
71 :
72 : /* Set up error handling (we share the core's error handler) */
73 11 : xmlerrcxt = pg_xml_init(strictness);
74 :
75 : /* Note: we're assuming an elog cannot be thrown by the following calls */
76 :
77 : /* Initialize libxml */
78 11 : xmlInitParser();
79 :
80 11 : return xmlerrcxt;
81 : }
82 :
83 :
84 : /* Encodes special characters (<, >, &, " and \r) as XML entities */
85 :
86 1 : PG_FUNCTION_INFO_V1(xml_encode_special_chars);
87 :
88 : Datum
89 0 : xml_encode_special_chars(PG_FUNCTION_ARGS)
90 : {
91 0 : text *tin = PG_GETARG_TEXT_PP(0);
92 0 : text *volatile tout = NULL;
93 0 : xmlChar *volatile tt = NULL;
94 : PgXmlErrorContext *xmlerrcxt;
95 :
96 0 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
97 :
98 0 : PG_TRY();
99 : {
100 : xmlChar *ts;
101 :
102 0 : ts = pgxml_texttoxmlchar(tin);
103 :
104 0 : tt = xmlEncodeSpecialChars(NULL, ts);
105 0 : if (tt == NULL || pg_xml_error_occurred(xmlerrcxt))
106 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
107 : "could not allocate xmlChar");
108 0 : pfree(ts);
109 :
110 0 : tout = cstring_to_text((char *) tt);
111 : }
112 0 : PG_CATCH();
113 : {
114 0 : if (tt != NULL)
115 0 : xmlFree(tt);
116 :
117 0 : pg_xml_done(xmlerrcxt, true);
118 :
119 0 : PG_RE_THROW();
120 : }
121 0 : PG_END_TRY();
122 :
123 0 : if (tt != NULL)
124 0 : xmlFree(tt);
125 :
126 0 : pg_xml_done(xmlerrcxt, false);
127 :
128 0 : PG_RETURN_TEXT_P(tout);
129 : }
130 :
131 : /*
132 : * Function translates a nodeset into a text representation
133 : *
134 : * iterates over each node in the set and calls xmlNodeDump to write it to
135 : * an xmlBuffer -from which an xmlChar * string is returned.
136 : *
137 : * each representation is surrounded by <tagname> ... </tagname>
138 : *
139 : * plainsep is an ordinary (not tag) separator - if used, then nodes are
140 : * cast to string as output method
141 : */
142 : static xmlChar *
143 5 : pgxmlNodeSetToText(xmlNodeSetPtr nodeset,
144 : xmlChar *toptagname,
145 : xmlChar *septagname,
146 : xmlChar *plainsep)
147 : {
148 5 : volatile xmlBufferPtr buf = NULL;
149 5 : xmlChar *volatile result = NULL;
150 : PgXmlErrorContext *xmlerrcxt;
151 :
152 : /* spin up some error handling */
153 5 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
154 :
155 5 : PG_TRY();
156 : {
157 5 : buf = xmlBufferCreate();
158 :
159 5 : if (buf == NULL || pg_xml_error_occurred(xmlerrcxt))
160 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
161 : "could not allocate xmlBuffer");
162 :
163 5 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
164 : {
165 1 : xmlBufferWriteChar(buf, "<");
166 1 : xmlBufferWriteCHAR(buf, toptagname);
167 1 : xmlBufferWriteChar(buf, ">");
168 : }
169 5 : if (nodeset != NULL)
170 : {
171 15 : for (int i = 0; i < nodeset->nodeNr; i++)
172 : {
173 10 : if (plainsep != NULL)
174 : {
175 4 : xmlBufferWriteCHAR(buf,
176 4 : xmlXPathCastNodeToString(nodeset->nodeTab[i]));
177 :
178 : /* If this isn't the last entry, write the plain sep. */
179 4 : if (i < (nodeset->nodeNr) - 1)
180 2 : xmlBufferWriteChar(buf, (char *) plainsep);
181 : }
182 : else
183 : {
184 6 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
185 : {
186 4 : xmlBufferWriteChar(buf, "<");
187 4 : xmlBufferWriteCHAR(buf, septagname);
188 4 : xmlBufferWriteChar(buf, ">");
189 : }
190 6 : xmlNodeDump(buf,
191 6 : nodeset->nodeTab[i]->doc,
192 6 : nodeset->nodeTab[i],
193 : 1, 0);
194 :
195 6 : if ((septagname != NULL) && (xmlStrlen(septagname) > 0))
196 : {
197 4 : xmlBufferWriteChar(buf, "</");
198 4 : xmlBufferWriteCHAR(buf, septagname);
199 4 : xmlBufferWriteChar(buf, ">");
200 : }
201 : }
202 : }
203 : }
204 :
205 5 : if ((toptagname != NULL) && (xmlStrlen(toptagname) > 0))
206 : {
207 1 : xmlBufferWriteChar(buf, "</");
208 1 : xmlBufferWriteCHAR(buf, toptagname);
209 1 : xmlBufferWriteChar(buf, ">");
210 : }
211 :
212 5 : result = xmlStrdup(xmlBufferContent(buf));
213 5 : if (result == NULL || pg_xml_error_occurred(xmlerrcxt))
214 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
215 : "could not allocate result");
216 : }
217 0 : PG_CATCH();
218 : {
219 0 : if (buf)
220 0 : xmlBufferFree(buf);
221 :
222 0 : pg_xml_done(xmlerrcxt, true);
223 :
224 0 : PG_RE_THROW();
225 : }
226 5 : PG_END_TRY();
227 :
228 5 : xmlBufferFree(buf);
229 5 : pg_xml_done(xmlerrcxt, false);
230 :
231 5 : return result;
232 : }
233 :
234 :
235 : /*
236 : * Translate a PostgreSQL "varlena" -i.e. a variable length parameter
237 : * into the libxml2 representation
238 : */
239 : static xmlChar *
240 13 : pgxml_texttoxmlchar(text *textstring)
241 : {
242 13 : return (xmlChar *) text_to_cstring(textstring);
243 : }
244 :
245 : /* Publicly visible XPath functions */
246 :
247 : /*
248 : * This is a "raw" xpath function. Check that it returns child elements
249 : * properly
250 : */
251 2 : PG_FUNCTION_INFO_V1(xpath_nodeset);
252 :
253 : Datum
254 3 : xpath_nodeset(PG_FUNCTION_ARGS)
255 : {
256 3 : text *document = PG_GETARG_TEXT_PP(0);
257 3 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
258 3 : xmlChar *toptag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
259 3 : xmlChar *septag = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(3));
260 : xmlChar *xpath;
261 3 : text *volatile xpres = NULL;
262 3 : xpath_workspace *volatile workspace = NULL;
263 : PgXmlErrorContext *xmlerrcxt;
264 :
265 3 : xpath = pgxml_texttoxmlchar(xpathsupp);
266 3 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
267 :
268 3 : PG_TRY();
269 : {
270 3 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
271 3 : xpres = pgxml_result_to_text(workspace->res, toptag, septag, NULL);
272 : }
273 0 : PG_CATCH();
274 : {
275 0 : if (workspace)
276 0 : cleanup_workspace(workspace);
277 :
278 0 : pg_xml_done(xmlerrcxt, true);
279 0 : PG_RE_THROW();
280 : }
281 3 : PG_END_TRY();
282 :
283 3 : cleanup_workspace(workspace);
284 3 : pg_xml_done(xmlerrcxt, false);
285 :
286 3 : pfree(xpath);
287 :
288 3 : if (xpres == NULL)
289 0 : PG_RETURN_NULL();
290 3 : PG_RETURN_TEXT_P(xpres);
291 : }
292 :
293 : /*
294 : * The following function is almost identical, but returns the elements in
295 : * a list.
296 : */
297 2 : PG_FUNCTION_INFO_V1(xpath_list);
298 :
299 : Datum
300 2 : xpath_list(PG_FUNCTION_ARGS)
301 : {
302 2 : text *document = PG_GETARG_TEXT_PP(0);
303 2 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
304 2 : xmlChar *plainsep = pgxml_texttoxmlchar(PG_GETARG_TEXT_PP(2));
305 : xmlChar *xpath;
306 2 : text *volatile xpres = NULL;
307 2 : xpath_workspace *volatile workspace = NULL;
308 : PgXmlErrorContext *xmlerrcxt;
309 :
310 2 : xpath = pgxml_texttoxmlchar(xpathsupp);
311 2 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
312 :
313 2 : PG_TRY();
314 : {
315 2 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
316 2 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, plainsep);
317 : }
318 0 : PG_CATCH();
319 : {
320 0 : if (workspace)
321 0 : cleanup_workspace(workspace);
322 :
323 0 : pg_xml_done(xmlerrcxt, true);
324 0 : PG_RE_THROW();
325 : }
326 2 : PG_END_TRY();
327 :
328 2 : cleanup_workspace(workspace);
329 2 : pg_xml_done(xmlerrcxt, false);
330 :
331 2 : pfree(xpath);
332 :
333 2 : if (xpres == NULL)
334 0 : PG_RETURN_NULL();
335 2 : PG_RETURN_TEXT_P(xpres);
336 : }
337 :
338 :
339 2 : PG_FUNCTION_INFO_V1(xpath_string);
340 :
341 : Datum
342 1 : xpath_string(PG_FUNCTION_ARGS)
343 : {
344 1 : text *document = PG_GETARG_TEXT_PP(0);
345 1 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
346 : xmlChar *xpath;
347 : int32 pathsize;
348 1 : text *volatile xpres = NULL;
349 1 : xpath_workspace *volatile workspace = NULL;
350 : PgXmlErrorContext *xmlerrcxt;
351 :
352 1 : pathsize = VARSIZE_ANY_EXHDR(xpathsupp);
353 :
354 : /*
355 : * We encapsulate the supplied path with "string()" = 8 chars + 1 for NUL
356 : * at end
357 : */
358 : /* We could try casting to string using the libxml function? */
359 :
360 1 : xpath = (xmlChar *) palloc(pathsize + 9);
361 1 : memcpy(xpath, "string(", 7);
362 1 : memcpy(xpath + 7, VARDATA_ANY(xpathsupp), pathsize);
363 1 : xpath[pathsize + 7] = ')';
364 1 : xpath[pathsize + 8] = '\0';
365 :
366 1 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
367 :
368 1 : PG_TRY();
369 : {
370 1 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
371 1 : xpres = pgxml_result_to_text(workspace->res, NULL, NULL, NULL);
372 : }
373 0 : PG_CATCH();
374 : {
375 0 : if (workspace)
376 0 : cleanup_workspace(workspace);
377 :
378 0 : pg_xml_done(xmlerrcxt, true);
379 0 : PG_RE_THROW();
380 : }
381 1 : PG_END_TRY();
382 :
383 1 : cleanup_workspace(workspace);
384 1 : pg_xml_done(xmlerrcxt, false);
385 :
386 1 : pfree(xpath);
387 :
388 1 : if (xpres == NULL)
389 1 : PG_RETURN_NULL();
390 0 : PG_RETURN_TEXT_P(xpres);
391 : }
392 :
393 :
394 1 : PG_FUNCTION_INFO_V1(xpath_number);
395 :
396 : Datum
397 0 : xpath_number(PG_FUNCTION_ARGS)
398 : {
399 0 : text *document = PG_GETARG_TEXT_PP(0);
400 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
401 : xmlChar *xpath;
402 0 : volatile float4 fRes = 0.0;
403 0 : volatile bool isNull = false;
404 0 : xpath_workspace *volatile workspace = NULL;
405 : PgXmlErrorContext *xmlerrcxt;
406 :
407 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
408 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
409 :
410 0 : PG_TRY();
411 : {
412 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
413 0 : pfree(xpath);
414 :
415 0 : if (workspace->res == NULL)
416 0 : isNull = true;
417 : else
418 0 : fRes = xmlXPathCastToNumber(workspace->res);
419 : }
420 0 : PG_CATCH();
421 : {
422 0 : if (workspace)
423 0 : cleanup_workspace(workspace);
424 :
425 0 : pg_xml_done(xmlerrcxt, true);
426 0 : PG_RE_THROW();
427 : }
428 0 : PG_END_TRY();
429 :
430 0 : cleanup_workspace(workspace);
431 0 : pg_xml_done(xmlerrcxt, false);
432 :
433 0 : if (isNull || xmlXPathIsNaN(fRes))
434 0 : PG_RETURN_NULL();
435 :
436 0 : PG_RETURN_FLOAT4(fRes);
437 : }
438 :
439 :
440 1 : PG_FUNCTION_INFO_V1(xpath_bool);
441 :
442 : Datum
443 0 : xpath_bool(PG_FUNCTION_ARGS)
444 : {
445 0 : text *document = PG_GETARG_TEXT_PP(0);
446 0 : text *xpathsupp = PG_GETARG_TEXT_PP(1); /* XPath expression */
447 : xmlChar *xpath;
448 0 : volatile int bRes = 0;
449 0 : xpath_workspace *volatile workspace = NULL;
450 : PgXmlErrorContext *xmlerrcxt;
451 :
452 0 : xpath = pgxml_texttoxmlchar(xpathsupp);
453 0 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
454 :
455 0 : PG_TRY();
456 : {
457 0 : workspace = pgxml_xpath(document, xpath, xmlerrcxt);
458 0 : pfree(xpath);
459 :
460 0 : if (workspace->res == NULL)
461 0 : bRes = 0;
462 : else
463 0 : bRes = xmlXPathCastToBoolean(workspace->res);
464 : }
465 0 : PG_CATCH();
466 : {
467 0 : if (workspace)
468 0 : cleanup_workspace(workspace);
469 :
470 0 : pg_xml_done(xmlerrcxt, true);
471 0 : PG_RE_THROW();
472 : }
473 0 : PG_END_TRY();
474 :
475 0 : cleanup_workspace(workspace);
476 0 : pg_xml_done(xmlerrcxt, false);
477 :
478 0 : PG_RETURN_BOOL(bRes);
479 : }
480 :
481 :
482 :
483 : /* Core function to evaluate XPath query */
484 :
485 : static xpath_workspace *
486 6 : pgxml_xpath(text *document, xmlChar *xpath, PgXmlErrorContext *xmlerrcxt)
487 : {
488 6 : int32 docsize = VARSIZE_ANY_EXHDR(document);
489 : xmlXPathCompExprPtr comppath;
490 6 : xpath_workspace *workspace = palloc0_object(xpath_workspace);
491 :
492 6 : workspace->doctree = NULL;
493 6 : workspace->ctxt = NULL;
494 6 : workspace->res = NULL;
495 :
496 6 : workspace->doctree = xmlReadMemory((char *) VARDATA_ANY(document),
497 : docsize, NULL, NULL,
498 : XML_PARSE_NOENT);
499 6 : if (workspace->doctree != NULL)
500 : {
501 5 : workspace->ctxt = xmlXPathNewContext(workspace->doctree);
502 5 : workspace->ctxt->node = xmlDocGetRootElement(workspace->doctree);
503 :
504 : /* compile the path */
505 5 : comppath = xmlXPathCtxtCompile(workspace->ctxt, xpath);
506 5 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
507 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
508 : "XPath Syntax Error");
509 :
510 : /* Now evaluate the path expression. */
511 5 : workspace->res = xmlXPathCompiledEval(comppath, workspace->ctxt);
512 :
513 5 : xmlXPathFreeCompExpr(comppath);
514 : }
515 :
516 6 : return workspace;
517 : }
518 :
519 : /* Clean up after processing the result of pgxml_xpath() */
520 : static void
521 6 : cleanup_workspace(xpath_workspace *workspace)
522 : {
523 6 : if (workspace->res)
524 5 : xmlXPathFreeObject(workspace->res);
525 6 : workspace->res = NULL;
526 6 : if (workspace->ctxt)
527 5 : xmlXPathFreeContext(workspace->ctxt);
528 6 : workspace->ctxt = NULL;
529 6 : if (workspace->doctree)
530 5 : xmlFreeDoc(workspace->doctree);
531 6 : workspace->doctree = NULL;
532 6 : }
533 :
534 : static text *
535 6 : pgxml_result_to_text(xmlXPathObjectPtr res,
536 : xmlChar *toptag,
537 : xmlChar *septag,
538 : xmlChar *plainsep)
539 : {
540 6 : xmlChar *volatile xpresstr = NULL;
541 6 : text *volatile xpres = NULL;
542 : PgXmlErrorContext *xmlerrcxt;
543 :
544 6 : if (res == NULL)
545 1 : return NULL;
546 :
547 : /* spin some error handling */
548 5 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
549 :
550 5 : PG_TRY();
551 : {
552 5 : switch (res->type)
553 : {
554 5 : case XPATH_NODESET:
555 5 : xpresstr = pgxmlNodeSetToText(res->nodesetval,
556 : toptag,
557 : septag, plainsep);
558 5 : break;
559 :
560 0 : case XPATH_STRING:
561 0 : xpresstr = xmlStrdup(res->stringval);
562 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
563 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
564 : "could not allocate result");
565 0 : break;
566 :
567 0 : default:
568 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
569 0 : xpresstr = xmlStrdup((const xmlChar *) "<unsupported/>");
570 0 : if (xpresstr == NULL || pg_xml_error_occurred(xmlerrcxt))
571 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
572 : "could not allocate result");
573 : }
574 :
575 : /* Now convert this result back to text */
576 5 : xpres = cstring_to_text((char *) xpresstr);
577 : }
578 0 : PG_CATCH();
579 : {
580 0 : if (xpresstr != NULL)
581 0 : xmlFree(xpresstr);
582 :
583 0 : pg_xml_done(xmlerrcxt, true);
584 :
585 0 : PG_RE_THROW();
586 : }
587 5 : PG_END_TRY();
588 :
589 : /* Free various storage */
590 5 : xmlFree(xpresstr);
591 :
592 5 : pg_xml_done(xmlerrcxt, false);
593 :
594 5 : return xpres;
595 : }
596 :
597 : /*
598 : * xpath_table is a table function. It needs some tidying (as do the
599 : * other functions here!
600 : */
601 2 : PG_FUNCTION_INFO_V1(xpath_table);
602 :
603 : Datum
604 5 : xpath_table(PG_FUNCTION_ARGS)
605 : {
606 : /* Function parameters */
607 5 : char *pkeyfield = text_to_cstring(PG_GETARG_TEXT_PP(0));
608 5 : char *xmlfield = text_to_cstring(PG_GETARG_TEXT_PP(1));
609 5 : char *relname = text_to_cstring(PG_GETARG_TEXT_PP(2));
610 5 : char *xpathset = text_to_cstring(PG_GETARG_TEXT_PP(3));
611 5 : char *condition = text_to_cstring(PG_GETARG_TEXT_PP(4));
612 :
613 : /* SPI (input tuple) support */
614 : SPITupleTable *tuptable;
615 : HeapTuple spi_tuple;
616 : TupleDesc spi_tupdesc;
617 :
618 :
619 5 : ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
620 : AttInMetadata *attinmeta;
621 :
622 : char **values;
623 : xmlChar **xpaths;
624 : char *pos;
625 5 : const char *pathsep = "|";
626 :
627 : int numpaths;
628 : int ret;
629 : uint64 proc;
630 : int j;
631 : int rownr; /* For issuing multiple rows from one original
632 : * document */
633 : bool had_values; /* To determine end of nodeset results */
634 : StringInfoData query_buf;
635 : PgXmlErrorContext *xmlerrcxt;
636 5 : volatile xmlDocPtr doctree = NULL;
637 :
638 5 : InitMaterializedSRF(fcinfo, MAT_SRF_USE_EXPECTED_DESC);
639 :
640 : /* must have at least one output column (for the pkey) */
641 5 : if (rsinfo->setDesc->natts < 1)
642 0 : ereport(ERROR,
643 : (errcode(ERRCODE_SYNTAX_ERROR),
644 : errmsg("xpath_table must have at least one output column")));
645 :
646 : /*
647 : * At the moment we assume that the returned attributes make sense for the
648 : * XPath specified (i.e. we trust the caller). It's not fatal if they get
649 : * it wrong - the input function for the column type will raise an error
650 : * if the path result can't be converted into the correct binary
651 : * representation.
652 : */
653 :
654 5 : attinmeta = TupleDescGetAttInMetadata(rsinfo->setDesc);
655 :
656 5 : values = (char **) palloc(rsinfo->setDesc->natts * sizeof(char *));
657 5 : xpaths = (xmlChar **) palloc(rsinfo->setDesc->natts * sizeof(xmlChar *));
658 :
659 : /*
660 : * Split XPaths. xpathset is a writable CString.
661 : *
662 : * Note that we stop splitting once we've done all needed for tupdesc
663 : */
664 5 : numpaths = 0;
665 5 : pos = xpathset;
666 7 : while (numpaths < (rsinfo->setDesc->natts - 1))
667 : {
668 5 : xpaths[numpaths++] = (xmlChar *) pos;
669 5 : pos = strstr(pos, pathsep);
670 5 : if (pos != NULL)
671 : {
672 2 : *pos = '\0';
673 2 : pos++;
674 : }
675 : else
676 3 : break;
677 : }
678 :
679 : /* Now build query */
680 5 : initStringInfo(&query_buf);
681 :
682 : /* Build initial sql statement */
683 5 : appendStringInfo(&query_buf, "SELECT %s, %s FROM %s WHERE %s",
684 : pkeyfield,
685 : xmlfield,
686 : relname,
687 : condition);
688 :
689 5 : SPI_connect();
690 :
691 5 : if ((ret = SPI_exec(query_buf.data, 0)) != SPI_OK_SELECT)
692 0 : elog(ERROR, "xpath_table: SPI execution failed for query %s",
693 : query_buf.data);
694 :
695 5 : proc = SPI_processed;
696 5 : tuptable = SPI_tuptable;
697 5 : spi_tupdesc = tuptable->tupdesc;
698 :
699 : /*
700 : * Check that SPI returned correct result. If you put a comma into one of
701 : * the function parameters, this will catch it when the SPI query returns
702 : * e.g. 3 columns.
703 : */
704 5 : if (spi_tupdesc->natts != 2)
705 : {
706 0 : ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
707 : errmsg("expression returning multiple columns is not valid in parameter list"),
708 : errdetail("Expected two columns in SPI result, got %d.", spi_tupdesc->natts)));
709 : }
710 :
711 : /*
712 : * Setup the parser. This should happen after we are done evaluating the
713 : * query, in case it calls functions that set up libxml differently.
714 : */
715 5 : xmlerrcxt = pgxml_parser_init(PG_XML_STRICTNESS_LEGACY);
716 :
717 5 : PG_TRY();
718 : {
719 : /* For each row i.e. document returned from SPI */
720 : uint64 i;
721 :
722 10 : for (i = 0; i < proc; i++)
723 : {
724 : char *pkey;
725 : char *xmldoc;
726 : xmlXPathContextPtr ctxt;
727 : xmlXPathObjectPtr res;
728 : xmlChar *resstr;
729 : xmlXPathCompExprPtr comppath;
730 : HeapTuple ret_tuple;
731 :
732 : /* Extract the row data as C Strings */
733 5 : spi_tuple = tuptable->vals[i];
734 5 : pkey = SPI_getvalue(spi_tuple, spi_tupdesc, 1);
735 5 : xmldoc = SPI_getvalue(spi_tuple, spi_tupdesc, 2);
736 :
737 : /*
738 : * Clear the values array, so that not-well-formed documents
739 : * return NULL in all columns. Note that this also means that
740 : * spare columns will be NULL.
741 : */
742 15 : for (j = 0; j < rsinfo->setDesc->natts; j++)
743 10 : values[j] = NULL;
744 :
745 : /* Insert primary key */
746 5 : values[0] = pkey;
747 :
748 : /* Parse the document */
749 5 : if (xmldoc)
750 5 : doctree = xmlReadMemory(xmldoc, strlen(xmldoc),
751 : NULL, NULL,
752 : XML_PARSE_NOENT);
753 : else /* treat NULL as not well-formed */
754 0 : doctree = NULL;
755 :
756 5 : if (doctree == NULL)
757 : {
758 : /* not well-formed, so output all-NULL tuple */
759 0 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
760 0 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
761 0 : heap_freetuple(ret_tuple);
762 : }
763 : else
764 : {
765 : /* New loop here - we have to deal with nodeset results */
766 5 : rownr = 0;
767 :
768 : do
769 : {
770 : /* Now evaluate the set of xpaths. */
771 8 : had_values = false;
772 18 : for (j = 0; j < numpaths; j++)
773 : {
774 10 : ctxt = xmlXPathNewContext(doctree);
775 10 : if (ctxt == NULL || pg_xml_error_occurred(xmlerrcxt))
776 0 : xml_ereport(xmlerrcxt,
777 : ERROR, ERRCODE_OUT_OF_MEMORY,
778 : "could not allocate XPath context");
779 :
780 10 : ctxt->node = xmlDocGetRootElement(doctree);
781 :
782 : /* compile the path */
783 10 : comppath = xmlXPathCtxtCompile(ctxt, xpaths[j]);
784 10 : if (comppath == NULL || pg_xml_error_occurred(xmlerrcxt))
785 0 : xml_ereport(xmlerrcxt, ERROR,
786 : ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
787 : "XPath Syntax Error");
788 :
789 : /* Now evaluate the path expression. */
790 10 : res = xmlXPathCompiledEval(comppath, ctxt);
791 10 : xmlXPathFreeCompExpr(comppath);
792 :
793 10 : if (res != NULL)
794 : {
795 10 : switch (res->type)
796 : {
797 10 : case XPATH_NODESET:
798 : /* We see if this nodeset has enough nodes */
799 10 : if (res->nodesetval != NULL &&
800 10 : rownr < res->nodesetval->nodeNr)
801 : {
802 4 : resstr = xmlXPathCastNodeToString(res->nodesetval->nodeTab[rownr]);
803 4 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
804 0 : xml_ereport(xmlerrcxt,
805 : ERROR, ERRCODE_OUT_OF_MEMORY,
806 : "could not allocate result");
807 4 : had_values = true;
808 : }
809 : else
810 6 : resstr = NULL;
811 :
812 10 : break;
813 :
814 0 : case XPATH_STRING:
815 0 : resstr = xmlStrdup(res->stringval);
816 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
817 0 : xml_ereport(xmlerrcxt,
818 : ERROR, ERRCODE_OUT_OF_MEMORY,
819 : "could not allocate result");
820 0 : break;
821 :
822 0 : default:
823 0 : elog(NOTICE, "unsupported XQuery result: %d", res->type);
824 0 : resstr = xmlStrdup((const xmlChar *) "<unsupported/>");
825 0 : if (resstr == NULL || pg_xml_error_occurred(xmlerrcxt))
826 0 : xml_ereport(xmlerrcxt,
827 : ERROR, ERRCODE_OUT_OF_MEMORY,
828 : "could not allocate result");
829 : }
830 :
831 : /*
832 : * Insert this into the appropriate column in the
833 : * result tuple.
834 : */
835 10 : values[j + 1] = (char *) resstr;
836 : }
837 10 : xmlXPathFreeContext(ctxt);
838 : }
839 :
840 : /* Now add the tuple to the output, if there is one. */
841 8 : if (had_values)
842 : {
843 3 : ret_tuple = BuildTupleFromCStrings(attinmeta, values);
844 3 : tuplestore_puttuple(rsinfo->setResult, ret_tuple);
845 3 : heap_freetuple(ret_tuple);
846 : }
847 :
848 8 : rownr++;
849 8 : } while (had_values);
850 : }
851 :
852 5 : if (doctree != NULL)
853 5 : xmlFreeDoc(doctree);
854 5 : doctree = NULL;
855 :
856 5 : if (pkey)
857 5 : pfree(pkey);
858 5 : if (xmldoc)
859 5 : pfree(xmldoc);
860 : }
861 : }
862 0 : PG_CATCH();
863 : {
864 0 : if (doctree != NULL)
865 0 : xmlFreeDoc(doctree);
866 :
867 0 : pg_xml_done(xmlerrcxt, true);
868 :
869 0 : PG_RE_THROW();
870 : }
871 5 : PG_END_TRY();
872 :
873 5 : if (doctree != NULL)
874 0 : xmlFreeDoc(doctree);
875 :
876 5 : pg_xml_done(xmlerrcxt, false);
877 :
878 5 : SPI_finish();
879 :
880 : /*
881 : * SFRM_Materialize mode expects us to return a NULL Datum. The actual
882 : * tuples are in our tuplestore and passed back through rsinfo->setResult.
883 : * rsinfo->setDesc is set to the tuple description that we actually used
884 : * to build our tuples with, so the caller can verify we did what it was
885 : * expecting.
886 : */
887 5 : return (Datum) 0;
888 : }
|