Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/entities.h>
51 : #include <libxml/parser.h>
52 : #include <libxml/parserInternals.h>
53 : #include <libxml/tree.h>
54 : #include <libxml/uri.h>
55 : #include <libxml/xmlerror.h>
56 : #include <libxml/xmlsave.h>
57 : #include <libxml/xmlversion.h>
58 : #include <libxml/xmlwriter.h>
59 : #include <libxml/xpath.h>
60 : #include <libxml/xpathInternals.h>
61 :
62 : /*
63 : * We used to check for xmlStructuredErrorContext via a configure test; but
64 : * that doesn't work on Windows, so instead use this grottier method of
65 : * testing the library version number.
66 : */
67 : #if LIBXML_VERSION >= 20704
68 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69 : #endif
70 :
71 : /*
72 : * libxml2 2.12 decided to insert "const" into the error handler API.
73 : */
74 : #if LIBXML_VERSION >= 21200
75 : #define PgXmlErrorPtr const xmlError *
76 : #else
77 : #define PgXmlErrorPtr xmlErrorPtr
78 : #endif
79 :
80 : #endif /* USE_LIBXML */
81 :
82 : #include "access/htup_details.h"
83 : #include "access/table.h"
84 : #include "catalog/namespace.h"
85 : #include "catalog/pg_class.h"
86 : #include "catalog/pg_type.h"
87 : #include "commands/dbcommands.h"
88 : #include "executor/spi.h"
89 : #include "executor/tablefunc.h"
90 : #include "fmgr.h"
91 : #include "lib/stringinfo.h"
92 : #include "libpq/pqformat.h"
93 : #include "mb/pg_wchar.h"
94 : #include "miscadmin.h"
95 : #include "nodes/execnodes.h"
96 : #include "nodes/miscnodes.h"
97 : #include "nodes/nodeFuncs.h"
98 : #include "utils/array.h"
99 : #include "utils/builtins.h"
100 : #include "utils/date.h"
101 : #include "utils/datetime.h"
102 : #include "utils/lsyscache.h"
103 : #include "utils/rel.h"
104 : #include "utils/syscache.h"
105 : #include "utils/xml.h"
106 :
107 :
108 : /* GUC variables */
109 : int xmlbinary = XMLBINARY_BASE64;
110 : int xmloption = XMLOPTION_CONTENT;
111 :
112 : #ifdef USE_LIBXML
113 :
114 : /* random number to identify PgXmlErrorContext */
115 : #define ERRCXT_MAGIC 68275028
116 :
117 : struct PgXmlErrorContext
118 : {
119 : int magic;
120 : /* strictness argument passed to pg_xml_init */
121 : PgXmlStrictness strictness;
122 : /* current error status and accumulated message, if any */
123 : bool err_occurred;
124 : StringInfoData err_buf;
125 : /* previous libxml error handling state (saved by pg_xml_init) */
126 : xmlStructuredErrorFunc saved_errfunc;
127 : void *saved_errcxt;
128 : /* previous libxml entity handler (saved by pg_xml_init) */
129 : xmlExternalEntityLoader saved_entityfunc;
130 : };
131 :
132 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
133 : xmlParserCtxtPtr ctxt);
134 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
135 : int sqlcode, const char *msg);
136 : static void xml_errorHandler(void *data, PgXmlErrorPtr error);
137 : static int errdetail_for_xml_code(int code);
138 : static void chopStringInfoNewlines(StringInfo str);
139 : static void appendStringInfoLineSeparator(StringInfo str);
140 :
141 : #ifdef USE_LIBXMLCONTEXT
142 :
143 : static MemoryContext LibxmlContext = NULL;
144 :
145 : static void xml_memory_init(void);
146 : static void *xml_palloc(size_t size);
147 : static void *xml_repalloc(void *ptr, size_t size);
148 : static void xml_pfree(void *ptr);
149 : static char *xml_pstrdup(const char *string);
150 : #endif /* USE_LIBXMLCONTEXT */
151 :
152 : static xmlChar *xml_text2xmlChar(text *in);
153 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
154 : xmlChar **version, xmlChar **encoding, int *standalone);
155 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
156 : pg_enc encoding, int standalone);
157 : static bool xml_doctype_in_content(const xmlChar *str);
158 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
159 : bool preserve_whitespace, int encoding,
160 : XmlOptionType *parsed_xmloptiontype,
161 : xmlNodePtr *parsed_nodes,
162 : Node *escontext);
163 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
164 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
165 : ArrayBuildState *astate,
166 : PgXmlErrorContext *xmlerrcxt);
167 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
168 : #endif /* USE_LIBXML */
169 :
170 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
171 : const char *xmlschema, const char *targetns,
172 : bool top_level);
173 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
174 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
175 : const char *xmlschema, bool nulls, bool tableforest,
176 : const char *targetns, bool top_level);
177 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
178 : bool nulls, bool tableforest, const char *targetns);
179 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
180 : List *relid_list, bool nulls,
181 : bool tableforest, const char *targetns);
182 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
183 : bool nulls, bool tableforest,
184 : const char *targetns);
185 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
186 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
187 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
188 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
189 : char *tablename, bool nulls, bool tableforest,
190 : const char *targetns, bool top_level);
191 :
192 : /* XMLTABLE support */
193 : #ifdef USE_LIBXML
194 : /* random number to identify XmlTableContext */
195 : #define XMLTABLE_CONTEXT_MAGIC 46922182
196 : typedef struct XmlTableBuilderData
197 : {
198 : int magic;
199 : int natts;
200 : long int row_count;
201 : PgXmlErrorContext *xmlerrcxt;
202 : xmlParserCtxtPtr ctxt;
203 : xmlDocPtr doc;
204 : xmlXPathContextPtr xpathcxt;
205 : xmlXPathCompExprPtr xpathcomp;
206 : xmlXPathObjectPtr xpathobj;
207 : xmlXPathCompExprPtr *xpathscomp;
208 : } XmlTableBuilderData;
209 : #endif
210 :
211 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
212 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
213 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
214 : const char *uri);
215 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
216 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
217 : const char *path, int colnum);
218 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
219 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
220 : Oid typid, int32 typmod, bool *isnull);
221 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
222 :
223 : const TableFuncRoutine XmlTableRoutine =
224 : {
225 : .InitOpaque = XmlTableInitOpaque,
226 : .SetDocument = XmlTableSetDocument,
227 : .SetNamespace = XmlTableSetNamespace,
228 : .SetRowFilter = XmlTableSetRowFilter,
229 : .SetColumnFilter = XmlTableSetColumnFilter,
230 : .FetchRow = XmlTableFetchRow,
231 : .GetValue = XmlTableGetValue,
232 : .DestroyOpaque = XmlTableDestroyOpaque
233 : };
234 :
235 : #define NO_XML_SUPPORT() \
236 : ereport(ERROR, \
237 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
238 : errmsg("unsupported XML feature"), \
239 : errdetail("This functionality requires the server to be built with libxml support.")))
240 :
241 :
242 : /* from SQL/XML:2008 section 4.9 */
243 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
244 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
245 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
246 :
247 :
248 : #ifdef USE_LIBXML
249 :
250 : static int
251 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
252 : {
253 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
254 :
255 0 : if (encoding < 0)
256 0 : ereport(ERROR,
257 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
258 : errmsg("invalid encoding name \"%s\"",
259 : (const char *) encoding_name)));
260 0 : return encoding;
261 : }
262 : #endif
263 :
264 :
265 : /*
266 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
267 : * we use the conversion function for the text datatype.
268 : *
269 : * This is only acceptable so long as xmltype and text use the same
270 : * representation.
271 : */
272 : Datum
273 840 : xml_in(PG_FUNCTION_ARGS)
274 : {
275 : #ifdef USE_LIBXML
276 840 : char *s = PG_GETARG_CSTRING(0);
277 : xmltype *vardata;
278 : xmlDocPtr doc;
279 :
280 : /* Build the result object. */
281 840 : vardata = (xmltype *) cstring_to_text(s);
282 :
283 : /*
284 : * Parse the data to check if it is well-formed XML data.
285 : *
286 : * Note: we don't need to worry about whether a soft error is detected.
287 : */
288 840 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
289 840 : NULL, NULL, fcinfo->context);
290 792 : if (doc != NULL)
291 780 : xmlFreeDoc(doc);
292 :
293 792 : PG_RETURN_XML_P(vardata);
294 : #else
295 : NO_XML_SUPPORT();
296 : return 0;
297 : #endif
298 : }
299 :
300 :
301 : #define PG_XML_DEFAULT_VERSION "1.0"
302 :
303 :
304 : /*
305 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
306 : * time being we use the conversion function for the text datatype.
307 : *
308 : * This is only acceptable so long as xmltype and text use the same
309 : * representation.
310 : */
311 : static char *
312 23164 : xml_out_internal(xmltype *x, pg_enc target_encoding)
313 : {
314 23164 : char *str = text_to_cstring((text *) x);
315 :
316 : #ifdef USE_LIBXML
317 23164 : size_t len = strlen(str);
318 : xmlChar *version;
319 : int standalone;
320 : int res_code;
321 :
322 23164 : if ((res_code = parse_xml_decl((xmlChar *) str,
323 : &len, &version, NULL, &standalone)) == 0)
324 : {
325 : StringInfoData buf;
326 :
327 23164 : initStringInfo(&buf);
328 :
329 23164 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
330 : {
331 : /*
332 : * If we are not going to produce an XML declaration, eat a single
333 : * newline in the original string to prevent empty first lines in
334 : * the output.
335 : */
336 23116 : if (*(str + len) == '\n')
337 6 : len += 1;
338 : }
339 23164 : appendStringInfoString(&buf, str + len);
340 :
341 23164 : pfree(str);
342 :
343 23164 : return buf.data;
344 : }
345 :
346 0 : ereport(WARNING,
347 : errcode(ERRCODE_DATA_CORRUPTED),
348 : errmsg_internal("could not parse XML declaration in stored value"),
349 : errdetail_for_xml_code(res_code));
350 : #endif
351 0 : return str;
352 : }
353 :
354 :
355 : Datum
356 22900 : xml_out(PG_FUNCTION_ARGS)
357 : {
358 22900 : xmltype *x = PG_GETARG_XML_P(0);
359 :
360 : /*
361 : * xml_out removes the encoding property in all cases. This is because we
362 : * cannot control from here whether the datum will be converted to a
363 : * different client encoding, so we'd do more harm than good by including
364 : * it.
365 : */
366 22900 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
367 : }
368 :
369 :
370 : Datum
371 0 : xml_recv(PG_FUNCTION_ARGS)
372 : {
373 : #ifdef USE_LIBXML
374 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
375 : xmltype *result;
376 : char *str;
377 : char *newstr;
378 : int nbytes;
379 : xmlDocPtr doc;
380 0 : xmlChar *encodingStr = NULL;
381 : int encoding;
382 :
383 : /*
384 : * Read the data in raw format. We don't know yet what the encoding is, as
385 : * that information is embedded in the xml declaration; so we have to
386 : * parse that before converting to server encoding.
387 : */
388 0 : nbytes = buf->len - buf->cursor;
389 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
390 :
391 : /*
392 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
393 : * than make a separate copy, make the temporary result one byte bigger
394 : * than it needs to be.
395 : */
396 0 : result = palloc(nbytes + 1 + VARHDRSZ);
397 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
398 0 : memcpy(VARDATA(result), str, nbytes);
399 0 : str = VARDATA(result);
400 0 : str[nbytes] = '\0';
401 :
402 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403 :
404 : /*
405 : * If encoding wasn't explicitly specified in the XML header, treat it as
406 : * UTF-8, as that's the default in XML. This is different from xml_in(),
407 : * where the input has to go through the normal client to server encoding
408 : * conversion.
409 : */
410 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411 :
412 : /*
413 : * Parse the data to check if it is well-formed XML data. Assume that
414 : * xml_parse will throw ERROR if not.
415 : */
416 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417 0 : xmlFreeDoc(doc);
418 :
419 : /* Now that we know what we're dealing with, convert to server encoding */
420 0 : newstr = pg_any_to_server(str, nbytes, encoding);
421 :
422 0 : if (newstr != str)
423 : {
424 0 : pfree(result);
425 0 : result = (xmltype *) cstring_to_text(newstr);
426 0 : pfree(newstr);
427 : }
428 :
429 0 : PG_RETURN_XML_P(result);
430 : #else
431 : NO_XML_SUPPORT();
432 : return 0;
433 : #endif
434 : }
435 :
436 :
437 : Datum
438 0 : xml_send(PG_FUNCTION_ARGS)
439 : {
440 0 : xmltype *x = PG_GETARG_XML_P(0);
441 : char *outval;
442 : StringInfoData buf;
443 :
444 : /*
445 : * xml_out_internal doesn't convert the encoding, it just prints the right
446 : * declaration. pq_sendtext will do the conversion.
447 : */
448 0 : outval = xml_out_internal(x, pg_get_client_encoding());
449 :
450 0 : pq_begintypsend(&buf);
451 0 : pq_sendtext(&buf, outval, strlen(outval));
452 0 : pfree(outval);
453 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454 : }
455 :
456 :
457 : #ifdef USE_LIBXML
458 : static void
459 132 : appendStringInfoText(StringInfo str, const text *t)
460 : {
461 132 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462 132 : }
463 : #endif
464 :
465 :
466 : static xmltype *
467 22144 : stringinfo_to_xmltype(StringInfo buf)
468 : {
469 22144 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470 : }
471 :
472 :
473 : static xmltype *
474 78 : cstring_to_xmltype(const char *string)
475 : {
476 78 : return (xmltype *) cstring_to_text(string);
477 : }
478 :
479 :
480 : #ifdef USE_LIBXML
481 : static xmltype *
482 22242 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
483 : {
484 22242 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485 : xmlBufferLength(buf));
486 : }
487 : #endif
488 :
489 :
490 : Datum
491 42 : xmlcomment(PG_FUNCTION_ARGS)
492 : {
493 : #ifdef USE_LIBXML
494 42 : text *arg = PG_GETARG_TEXT_PP(0);
495 42 : char *argdata = VARDATA_ANY(arg);
496 42 : int len = VARSIZE_ANY_EXHDR(arg);
497 : StringInfoData buf;
498 : int i;
499 :
500 : /* check for "--" in string or "-" at the end */
501 180 : for (i = 1; i < len; i++)
502 : {
503 144 : if (argdata[i] == '-' && argdata[i - 1] == '-')
504 6 : ereport(ERROR,
505 : (errcode(ERRCODE_INVALID_XML_COMMENT),
506 : errmsg("invalid XML comment")));
507 : }
508 36 : if (len > 0 && argdata[len - 1] == '-')
509 6 : ereport(ERROR,
510 : (errcode(ERRCODE_INVALID_XML_COMMENT),
511 : errmsg("invalid XML comment")));
512 :
513 30 : initStringInfo(&buf);
514 30 : appendStringInfoString(&buf, "<!--");
515 30 : appendStringInfoText(&buf, arg);
516 30 : appendStringInfoString(&buf, "-->");
517 :
518 30 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519 : #else
520 : NO_XML_SUPPORT();
521 : return 0;
522 : #endif
523 : }
524 :
525 :
526 : Datum
527 30 : xmltext(PG_FUNCTION_ARGS)
528 : {
529 : #ifdef USE_LIBXML
530 30 : text *arg = PG_GETARG_TEXT_PP(0);
531 : text *result;
532 30 : xmlChar *xmlbuf = NULL;
533 :
534 30 : xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
535 :
536 : Assert(xmlbuf);
537 :
538 30 : result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
539 30 : xmlFree(xmlbuf);
540 30 : PG_RETURN_XML_P(result);
541 : #else
542 : NO_XML_SUPPORT();
543 : return 0;
544 : #endif /* not USE_LIBXML */
545 : }
546 :
547 :
548 : /*
549 : * TODO: xmlconcat needs to merge the notations and unparsed entities
550 : * of the argument values. Not very important in practice, though.
551 : */
552 : xmltype *
553 21894 : xmlconcat(List *args)
554 : {
555 : #ifdef USE_LIBXML
556 21894 : int global_standalone = 1;
557 21894 : xmlChar *global_version = NULL;
558 21894 : bool global_version_no_value = false;
559 : StringInfoData buf;
560 : ListCell *v;
561 :
562 21894 : initStringInfo(&buf);
563 65688 : foreach(v, args)
564 : {
565 43794 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
566 : size_t len;
567 : xmlChar *version;
568 : int standalone;
569 : char *str;
570 :
571 43794 : len = VARSIZE(x) - VARHDRSZ;
572 43794 : str = text_to_cstring((text *) x);
573 :
574 43794 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
575 :
576 43794 : if (standalone == 0 && global_standalone == 1)
577 0 : global_standalone = 0;
578 43794 : if (standalone < 0)
579 43782 : global_standalone = -1;
580 :
581 43794 : if (!version)
582 43776 : global_version_no_value = true;
583 18 : else if (!global_version)
584 12 : global_version = version;
585 6 : else if (xmlStrcmp(version, global_version) != 0)
586 0 : global_version_no_value = true;
587 :
588 43794 : appendStringInfoString(&buf, str + len);
589 43794 : pfree(str);
590 : }
591 :
592 21894 : if (!global_version_no_value || global_standalone >= 0)
593 : {
594 : StringInfoData buf2;
595 :
596 6 : initStringInfo(&buf2);
597 :
598 6 : print_xml_decl(&buf2,
599 6 : (!global_version_no_value) ? global_version : NULL,
600 : 0,
601 : global_standalone);
602 :
603 6 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
604 6 : buf = buf2;
605 : }
606 :
607 21894 : return stringinfo_to_xmltype(&buf);
608 : #else
609 : NO_XML_SUPPORT();
610 : return NULL;
611 : #endif
612 : }
613 :
614 :
615 : /*
616 : * XMLAGG support
617 : */
618 : Datum
619 21870 : xmlconcat2(PG_FUNCTION_ARGS)
620 : {
621 21870 : if (PG_ARGISNULL(0))
622 : {
623 18 : if (PG_ARGISNULL(1))
624 0 : PG_RETURN_NULL();
625 : else
626 18 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
627 : }
628 21852 : else if (PG_ARGISNULL(1))
629 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
630 : else
631 21852 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
632 : PG_GETARG_XML_P(1))));
633 : }
634 :
635 :
636 : Datum
637 6 : texttoxml(PG_FUNCTION_ARGS)
638 : {
639 6 : text *data = PG_GETARG_TEXT_PP(0);
640 :
641 6 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
642 : }
643 :
644 :
645 : Datum
646 0 : xmltotext(PG_FUNCTION_ARGS)
647 : {
648 0 : xmltype *data = PG_GETARG_XML_P(0);
649 :
650 : /* It's actually binary compatible. */
651 0 : PG_RETURN_TEXT_P((text *) data);
652 : }
653 :
654 :
655 : text *
656 180 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
657 : {
658 : #ifdef USE_LIBXML
659 : text *volatile result;
660 : xmlDocPtr doc;
661 : XmlOptionType parsed_xmloptiontype;
662 : xmlNodePtr content_nodes;
663 180 : volatile xmlBufferPtr buf = NULL;
664 180 : volatile xmlSaveCtxtPtr ctxt = NULL;
665 180 : ErrorSaveContext escontext = {T_ErrorSaveContext};
666 : PgXmlErrorContext *xmlerrcxt;
667 : #endif
668 :
669 180 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
670 : {
671 : /*
672 : * We don't actually need to do anything, so just return the
673 : * binary-compatible input. For backwards-compatibility reasons,
674 : * allow such cases to succeed even without USE_LIBXML.
675 : */
676 36 : return (text *) data;
677 : }
678 :
679 : #ifdef USE_LIBXML
680 :
681 : /*
682 : * Parse the input according to the xmloption.
683 : *
684 : * preserve_whitespace is set to false in case we are indenting, otherwise
685 : * libxml2 will fail to indent elements that have whitespace between them.
686 : */
687 144 : doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
688 : &parsed_xmloptiontype, &content_nodes,
689 144 : (Node *) &escontext);
690 144 : if (doc == NULL || escontext.error_occurred)
691 : {
692 30 : if (doc)
693 0 : xmlFreeDoc(doc);
694 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
695 30 : ereport(ERROR,
696 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
697 : errmsg("not an XML document")));
698 : }
699 :
700 : /* If we weren't asked to indent, we're done. */
701 114 : if (!indent)
702 : {
703 18 : xmlFreeDoc(doc);
704 18 : return (text *) data;
705 : }
706 :
707 : /* Otherwise, we gotta spin up some error handling. */
708 96 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
709 :
710 96 : PG_TRY();
711 : {
712 96 : size_t decl_len = 0;
713 :
714 : /* The serialized data will go into this buffer. */
715 96 : buf = xmlBufferCreate();
716 :
717 96 : if (buf == NULL || xmlerrcxt->err_occurred)
718 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
719 : "could not allocate xmlBuffer");
720 :
721 : /* Detect whether there's an XML declaration */
722 96 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
723 :
724 : /*
725 : * Emit declaration only if the input had one. Note: some versions of
726 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
727 : * passed, so don't do that. We don't want any encoding conversion
728 : * anyway.
729 : */
730 96 : if (decl_len == 0)
731 84 : ctxt = xmlSaveToBuffer(buf, NULL,
732 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
733 : else
734 12 : ctxt = xmlSaveToBuffer(buf, NULL,
735 : XML_SAVE_FORMAT);
736 :
737 96 : if (ctxt == NULL || xmlerrcxt->err_occurred)
738 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
739 : "could not allocate xmlSaveCtxt");
740 :
741 96 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
742 : {
743 : /* If it's a document, saving is easy. */
744 42 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
745 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
746 : "could not save document to xmlBuffer");
747 : }
748 54 : else if (content_nodes != NULL)
749 : {
750 : /*
751 : * Deal with the case where we have non-singly-rooted XML.
752 : * libxml's dump functions don't work well for that without help.
753 : * We build a fake root node that serves as a container for the
754 : * content nodes, and then iterate over the nodes.
755 : */
756 : xmlNodePtr root;
757 : xmlNodePtr newline;
758 :
759 48 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
760 48 : if (root == NULL || xmlerrcxt->err_occurred)
761 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
762 : "could not allocate xml node");
763 :
764 : /* This attaches root to doc, so we need not free it separately. */
765 48 : xmlDocSetRootElement(doc, root);
766 48 : xmlAddChildList(root, content_nodes);
767 :
768 : /*
769 : * We use this node to insert newlines in the dump. Note: in at
770 : * least some libxml versions, xmlNewDocText would not attach the
771 : * node to the document even if we passed it. Therefore, manage
772 : * freeing of this node manually, and pass NULL here to make sure
773 : * there's not a dangling link.
774 : */
775 48 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
776 48 : if (newline == NULL || xmlerrcxt->err_occurred)
777 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
778 : "could not allocate xml node");
779 :
780 126 : for (xmlNodePtr node = root->children; node; node = node->next)
781 : {
782 : /* insert newlines between nodes */
783 78 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
784 : {
785 24 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
786 : {
787 0 : xmlFreeNode(newline);
788 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
789 : "could not save newline to xmlBuffer");
790 : }
791 : }
792 :
793 78 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
794 : {
795 0 : xmlFreeNode(newline);
796 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
797 : "could not save content to xmlBuffer");
798 : }
799 : }
800 :
801 48 : xmlFreeNode(newline);
802 : }
803 :
804 96 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
805 : {
806 0 : ctxt = NULL; /* don't try to close it again */
807 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
808 : "could not close xmlSaveCtxtPtr");
809 : }
810 :
811 : /*
812 : * xmlDocContentDumpOutput may add a trailing newline, so remove that.
813 : */
814 96 : if (xmloption_arg == XMLOPTION_DOCUMENT)
815 : {
816 36 : const char *str = (const char *) xmlBufferContent(buf);
817 36 : int len = xmlBufferLength(buf);
818 :
819 72 : while (len > 0 && (str[len - 1] == '\n' ||
820 36 : str[len - 1] == '\r'))
821 36 : len--;
822 :
823 36 : result = cstring_to_text_with_len(str, len);
824 : }
825 : else
826 60 : result = (text *) xmlBuffer_to_xmltype(buf);
827 : }
828 0 : PG_CATCH();
829 : {
830 0 : if (ctxt)
831 0 : xmlSaveClose(ctxt);
832 0 : if (buf)
833 0 : xmlBufferFree(buf);
834 0 : if (doc)
835 0 : xmlFreeDoc(doc);
836 :
837 0 : pg_xml_done(xmlerrcxt, true);
838 :
839 0 : PG_RE_THROW();
840 : }
841 96 : PG_END_TRY();
842 :
843 96 : xmlBufferFree(buf);
844 96 : xmlFreeDoc(doc);
845 :
846 96 : pg_xml_done(xmlerrcxt, false);
847 :
848 96 : return result;
849 : #else
850 : NO_XML_SUPPORT();
851 : return NULL;
852 : #endif
853 : }
854 :
855 :
856 : xmltype *
857 22026 : xmlelement(XmlExpr *xexpr,
858 : Datum *named_argvalue, bool *named_argnull,
859 : Datum *argvalue, bool *argnull)
860 : {
861 : #ifdef USE_LIBXML
862 : xmltype *result;
863 : List *named_arg_strings;
864 : List *arg_strings;
865 : int i;
866 : ListCell *arg;
867 : ListCell *narg;
868 : PgXmlErrorContext *xmlerrcxt;
869 22026 : volatile xmlBufferPtr buf = NULL;
870 22026 : volatile xmlTextWriterPtr writer = NULL;
871 :
872 : /*
873 : * All arguments are already evaluated, and their values are passed in the
874 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
875 : * issues if one of the arguments involves a call to some other function
876 : * or subsystem that wants to use libxml on its own terms. We examine the
877 : * original XmlExpr to identify the numbers and types of the arguments.
878 : */
879 22026 : named_arg_strings = NIL;
880 22026 : i = 0;
881 22074 : foreach(arg, xexpr->named_args)
882 : {
883 54 : Expr *e = (Expr *) lfirst(arg);
884 : char *str;
885 :
886 54 : if (named_argnull[i])
887 0 : str = NULL;
888 : else
889 54 : str = map_sql_value_to_xml_value(named_argvalue[i],
890 : exprType((Node *) e),
891 : false);
892 48 : named_arg_strings = lappend(named_arg_strings, str);
893 48 : i++;
894 : }
895 :
896 22020 : arg_strings = NIL;
897 22020 : i = 0;
898 44016 : foreach(arg, xexpr->args)
899 : {
900 21996 : Expr *e = (Expr *) lfirst(arg);
901 : char *str;
902 :
903 : /* here we can just forget NULL elements immediately */
904 21996 : if (!argnull[i])
905 : {
906 21996 : str = map_sql_value_to_xml_value(argvalue[i],
907 : exprType((Node *) e),
908 : true);
909 21996 : arg_strings = lappend(arg_strings, str);
910 : }
911 21996 : i++;
912 : }
913 :
914 22020 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
915 :
916 22020 : PG_TRY();
917 : {
918 22020 : buf = xmlBufferCreate();
919 22020 : if (buf == NULL || xmlerrcxt->err_occurred)
920 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
921 : "could not allocate xmlBuffer");
922 22020 : writer = xmlNewTextWriterMemory(buf, 0);
923 22020 : if (writer == NULL || xmlerrcxt->err_occurred)
924 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
925 : "could not allocate xmlTextWriter");
926 :
927 22020 : xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
928 :
929 22068 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
930 : {
931 48 : char *str = (char *) lfirst(arg);
932 48 : char *argname = strVal(lfirst(narg));
933 :
934 48 : if (str)
935 48 : xmlTextWriterWriteAttribute(writer,
936 : (xmlChar *) argname,
937 : (xmlChar *) str);
938 : }
939 :
940 44016 : foreach(arg, arg_strings)
941 : {
942 21996 : char *str = (char *) lfirst(arg);
943 :
944 21996 : xmlTextWriterWriteRaw(writer, (xmlChar *) str);
945 : }
946 :
947 22020 : xmlTextWriterEndElement(writer);
948 :
949 : /* we MUST do this now to flush data out to the buffer ... */
950 22020 : xmlFreeTextWriter(writer);
951 22020 : writer = NULL;
952 :
953 22020 : result = xmlBuffer_to_xmltype(buf);
954 : }
955 0 : PG_CATCH();
956 : {
957 0 : if (writer)
958 0 : xmlFreeTextWriter(writer);
959 0 : if (buf)
960 0 : xmlBufferFree(buf);
961 :
962 0 : pg_xml_done(xmlerrcxt, true);
963 :
964 0 : PG_RE_THROW();
965 : }
966 22020 : PG_END_TRY();
967 :
968 22020 : xmlBufferFree(buf);
969 :
970 22020 : pg_xml_done(xmlerrcxt, false);
971 :
972 22020 : return result;
973 : #else
974 : NO_XML_SUPPORT();
975 : return NULL;
976 : #endif
977 : }
978 :
979 :
980 : xmltype *
981 138 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
982 : {
983 : #ifdef USE_LIBXML
984 : xmlDocPtr doc;
985 :
986 138 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
987 : GetDatabaseEncoding(), NULL, NULL, NULL);
988 90 : xmlFreeDoc(doc);
989 :
990 90 : return (xmltype *) data;
991 : #else
992 : NO_XML_SUPPORT();
993 : return NULL;
994 : #endif
995 : }
996 :
997 :
998 : xmltype *
999 72 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1000 : {
1001 : #ifdef USE_LIBXML
1002 : xmltype *result;
1003 : StringInfoData buf;
1004 :
1005 72 : if (pg_strcasecmp(target, "xml") == 0)
1006 12 : ereport(ERROR,
1007 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1008 : errmsg("invalid XML processing instruction"),
1009 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1010 :
1011 : /*
1012 : * Following the SQL standard, the null check comes after the syntax check
1013 : * above.
1014 : */
1015 60 : *result_is_null = arg_is_null;
1016 60 : if (*result_is_null)
1017 12 : return NULL;
1018 :
1019 48 : initStringInfo(&buf);
1020 :
1021 48 : appendStringInfo(&buf, "<?%s", target);
1022 :
1023 48 : if (arg != NULL)
1024 : {
1025 : char *string;
1026 :
1027 24 : string = text_to_cstring(arg);
1028 24 : if (strstr(string, "?>") != NULL)
1029 6 : ereport(ERROR,
1030 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1031 : errmsg("invalid XML processing instruction"),
1032 : errdetail("XML processing instruction cannot contain \"?>\".")));
1033 :
1034 18 : appendStringInfoChar(&buf, ' ');
1035 18 : appendStringInfoString(&buf, string + strspn(string, " "));
1036 18 : pfree(string);
1037 : }
1038 42 : appendStringInfoString(&buf, "?>");
1039 :
1040 42 : result = stringinfo_to_xmltype(&buf);
1041 42 : pfree(buf.data);
1042 42 : return result;
1043 : #else
1044 : NO_XML_SUPPORT();
1045 : return NULL;
1046 : #endif
1047 : }
1048 :
1049 :
1050 : xmltype *
1051 60 : xmlroot(xmltype *data, text *version, int standalone)
1052 : {
1053 : #ifdef USE_LIBXML
1054 : char *str;
1055 : size_t len;
1056 : xmlChar *orig_version;
1057 : int orig_standalone;
1058 : StringInfoData buf;
1059 :
1060 60 : len = VARSIZE(data) - VARHDRSZ;
1061 60 : str = text_to_cstring((text *) data);
1062 :
1063 60 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1064 :
1065 60 : if (version)
1066 24 : orig_version = xml_text2xmlChar(version);
1067 : else
1068 36 : orig_version = NULL;
1069 :
1070 60 : switch (standalone)
1071 : {
1072 18 : case XML_STANDALONE_YES:
1073 18 : orig_standalone = 1;
1074 18 : break;
1075 12 : case XML_STANDALONE_NO:
1076 12 : orig_standalone = 0;
1077 12 : break;
1078 12 : case XML_STANDALONE_NO_VALUE:
1079 12 : orig_standalone = -1;
1080 12 : break;
1081 18 : case XML_STANDALONE_OMITTED:
1082 : /* leave original value */
1083 18 : break;
1084 : }
1085 :
1086 60 : initStringInfo(&buf);
1087 60 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
1088 60 : appendStringInfoString(&buf, str + len);
1089 :
1090 60 : return stringinfo_to_xmltype(&buf);
1091 : #else
1092 : NO_XML_SUPPORT();
1093 : return NULL;
1094 : #endif
1095 : }
1096 :
1097 :
1098 : /*
1099 : * Validate document (given as string) against DTD (given as external link)
1100 : *
1101 : * This has been removed because it is a security hole: unprivileged users
1102 : * should not be able to use Postgres to fetch arbitrary external files,
1103 : * which unfortunately is exactly what libxml is willing to do with the DTD
1104 : * parameter.
1105 : */
1106 : Datum
1107 0 : xmlvalidate(PG_FUNCTION_ARGS)
1108 : {
1109 0 : ereport(ERROR,
1110 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1111 : errmsg("xmlvalidate is not implemented")));
1112 : return 0;
1113 : }
1114 :
1115 :
1116 : bool
1117 24 : xml_is_document(xmltype *arg)
1118 : {
1119 : #ifdef USE_LIBXML
1120 : xmlDocPtr doc;
1121 24 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1122 :
1123 : /*
1124 : * We'll report "true" if no soft error is reported by xml_parse().
1125 : */
1126 24 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1127 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1128 24 : if (doc)
1129 12 : xmlFreeDoc(doc);
1130 :
1131 24 : return !escontext.error_occurred;
1132 : #else /* not USE_LIBXML */
1133 : NO_XML_SUPPORT();
1134 : return false;
1135 : #endif /* not USE_LIBXML */
1136 : }
1137 :
1138 :
1139 : #ifdef USE_LIBXML
1140 :
1141 : /*
1142 : * pg_xml_init_library --- set up for use of libxml
1143 : *
1144 : * This should be called by each function that is about to use libxml
1145 : * facilities but doesn't require error handling. It initializes libxml
1146 : * and verifies compatibility with the loaded libxml version. These are
1147 : * once-per-session activities.
1148 : *
1149 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1150 : * check)
1151 : */
1152 : void
1153 92882 : pg_xml_init_library(void)
1154 : {
1155 : static bool first_time = true;
1156 :
1157 92882 : if (first_time)
1158 : {
1159 : /* Stuff we need do only once per session */
1160 :
1161 : /*
1162 : * Currently, we have no pure UTF-8 support for internals -- check if
1163 : * we can work.
1164 : */
1165 : if (sizeof(char) != sizeof(xmlChar))
1166 : ereport(ERROR,
1167 : (errmsg("could not initialize XML library"),
1168 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1169 : sizeof(char), sizeof(xmlChar))));
1170 :
1171 : #ifdef USE_LIBXMLCONTEXT
1172 : /* Set up libxml's memory allocation our way */
1173 : xml_memory_init();
1174 : #endif
1175 :
1176 : /* Check library compatibility */
1177 26 : LIBXML_TEST_VERSION;
1178 :
1179 26 : first_time = false;
1180 : }
1181 92882 : }
1182 :
1183 : /*
1184 : * pg_xml_init --- set up for use of libxml and register an error handler
1185 : *
1186 : * This should be called by each function that is about to use libxml
1187 : * facilities and requires error handling. It initializes libxml with
1188 : * pg_xml_init_library() and establishes our libxml error handler.
1189 : *
1190 : * strictness determines which errors are reported and which are ignored.
1191 : *
1192 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1193 : * that pg_xml_done() is called during either normal or error exit.
1194 : *
1195 : * This is exported for use by contrib/xml2, as well as other code that might
1196 : * wish to share use of this module's libxml error handler.
1197 : */
1198 : PgXmlErrorContext *
1199 24250 : pg_xml_init(PgXmlStrictness strictness)
1200 : {
1201 : PgXmlErrorContext *errcxt;
1202 : void *new_errcxt;
1203 :
1204 : /* Do one-time setup if needed */
1205 24250 : pg_xml_init_library();
1206 :
1207 : /* Create error handling context structure */
1208 24250 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1209 24250 : errcxt->magic = ERRCXT_MAGIC;
1210 24250 : errcxt->strictness = strictness;
1211 24250 : errcxt->err_occurred = false;
1212 24250 : initStringInfo(&errcxt->err_buf);
1213 :
1214 : /*
1215 : * Save original error handler and install ours. libxml originally didn't
1216 : * distinguish between the contexts for generic and for structured error
1217 : * handlers. If we're using an old libxml version, we must thus save the
1218 : * generic error context, even though we're using a structured error
1219 : * handler.
1220 : */
1221 24250 : errcxt->saved_errfunc = xmlStructuredError;
1222 :
1223 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1224 24250 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1225 : #else
1226 : errcxt->saved_errcxt = xmlGenericErrorContext;
1227 : #endif
1228 :
1229 24250 : xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1230 :
1231 : /*
1232 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1233 : * expected it to. If not, the error context pointer we just saved is not
1234 : * the correct thing to restore, and since that leaves us without a way to
1235 : * restore the context in pg_xml_done, we must fail.
1236 : *
1237 : * The only known situation in which this test fails is if we compile with
1238 : * headers from a libxml2 that doesn't track the structured error context
1239 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1240 : * versa. The libxml2 authors did not treat that change as constituting
1241 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1242 : * fails to protect us from this.
1243 : */
1244 :
1245 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1246 24250 : new_errcxt = xmlStructuredErrorContext;
1247 : #else
1248 : new_errcxt = xmlGenericErrorContext;
1249 : #endif
1250 :
1251 24250 : if (new_errcxt != (void *) errcxt)
1252 0 : ereport(ERROR,
1253 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1254 : errmsg("could not set up XML error handler"),
1255 : errhint("This probably indicates that the version of libxml2"
1256 : " being used is not compatible with the libxml2"
1257 : " header files that PostgreSQL was built with.")));
1258 :
1259 : /*
1260 : * Also, install an entity loader to prevent unwanted fetches of external
1261 : * files and URLs.
1262 : */
1263 24250 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1264 24250 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1265 :
1266 24250 : return errcxt;
1267 : }
1268 :
1269 :
1270 : /*
1271 : * pg_xml_done --- restore previous libxml error handling
1272 : *
1273 : * Resets libxml's global error-handling state to what it was before
1274 : * pg_xml_init() was called.
1275 : *
1276 : * This routine verifies that all pending errors have been dealt with
1277 : * (in assert-enabled builds, anyway).
1278 : */
1279 : void
1280 24250 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1281 : {
1282 : void *cur_errcxt;
1283 :
1284 : /* An assert seems like enough protection here */
1285 : Assert(errcxt->magic == ERRCXT_MAGIC);
1286 :
1287 : /*
1288 : * In a normal exit, there should be no un-handled libxml errors. But we
1289 : * shouldn't try to enforce this during error recovery, since the longjmp
1290 : * could have been thrown before xml_ereport had a chance to run.
1291 : */
1292 : Assert(!errcxt->err_occurred || isError);
1293 :
1294 : /*
1295 : * Check that libxml's global state is correct, warn if not. This is a
1296 : * real test and not an Assert because it has a higher probability of
1297 : * happening.
1298 : */
1299 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1300 24250 : cur_errcxt = xmlStructuredErrorContext;
1301 : #else
1302 : cur_errcxt = xmlGenericErrorContext;
1303 : #endif
1304 :
1305 24250 : if (cur_errcxt != (void *) errcxt)
1306 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1307 :
1308 : /* Restore the saved handlers */
1309 24250 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1310 24250 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1311 :
1312 : /*
1313 : * Mark the struct as invalid, just in case somebody somehow manages to
1314 : * call xml_errorHandler or xml_ereport with it.
1315 : */
1316 24250 : errcxt->magic = 0;
1317 :
1318 : /* Release memory */
1319 24250 : pfree(errcxt->err_buf.data);
1320 24250 : pfree(errcxt);
1321 24250 : }
1322 :
1323 :
1324 : /*
1325 : * pg_xml_error_occurred() --- test the error flag
1326 : */
1327 : bool
1328 0 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1329 : {
1330 0 : return errcxt->err_occurred;
1331 : }
1332 :
1333 :
1334 : /*
1335 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1336 : * documents" are specified by the XML specification and are parsed
1337 : * easily by libxml. "XML content" is specified by SQL/XML as the
1338 : * production "XMLDecl? content". But libxml can only parse the
1339 : * "content" part, so we have to parse the XML declaration ourselves
1340 : * to complete this.
1341 : */
1342 :
1343 : #define CHECK_XML_SPACE(p) \
1344 : do { \
1345 : if (!xmlIsBlank_ch(*(p))) \
1346 : return XML_ERR_SPACE_REQUIRED; \
1347 : } while (0)
1348 :
1349 : #define SKIP_XML_SPACE(p) \
1350 : while (xmlIsBlank_ch(*(p))) (p)++
1351 :
1352 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1353 : /* Beware of multiple evaluations of argument! */
1354 : #define PG_XMLISNAMECHAR(c) \
1355 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1356 : || xmlIsDigit_ch(c) \
1357 : || c == '.' || c == '-' || c == '_' || c == ':' \
1358 : || xmlIsCombiningQ(c) \
1359 : || xmlIsExtender_ch(c))
1360 :
1361 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1362 : static xmlChar *
1363 192 : xml_pnstrdup(const xmlChar *str, size_t len)
1364 : {
1365 : xmlChar *result;
1366 :
1367 192 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1368 192 : memcpy(result, str, len * sizeof(xmlChar));
1369 192 : result[len] = 0;
1370 192 : return result;
1371 : }
1372 :
1373 : /* Ditto, except input is char* */
1374 : static xmlChar *
1375 2424 : pg_xmlCharStrndup(const char *str, size_t len)
1376 : {
1377 : xmlChar *result;
1378 :
1379 2424 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1380 2424 : memcpy(result, str, len);
1381 2424 : result[len] = '\0';
1382 :
1383 2424 : return result;
1384 : }
1385 :
1386 : /*
1387 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1388 : *
1389 : * The input xmlChar is freed regardless of success of the copy.
1390 : */
1391 : static char *
1392 110760 : xml_pstrdup_and_free(xmlChar *str)
1393 : {
1394 : char *result;
1395 :
1396 110760 : if (str)
1397 : {
1398 110760 : PG_TRY();
1399 : {
1400 110760 : result = pstrdup((char *) str);
1401 : }
1402 0 : PG_FINALLY();
1403 : {
1404 110760 : xmlFree(str);
1405 : }
1406 110760 : PG_END_TRY();
1407 : }
1408 : else
1409 0 : result = NULL;
1410 :
1411 110760 : return result;
1412 : }
1413 :
1414 : /*
1415 : * str is the null-terminated input string. Remaining arguments are
1416 : * output arguments; each can be NULL if value is not wanted.
1417 : * version and encoding are returned as locally-palloc'd strings.
1418 : * Result is 0 if OK, an error code if not.
1419 : */
1420 : static int
1421 68632 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1422 : xmlChar **version, xmlChar **encoding, int *standalone)
1423 : {
1424 : const xmlChar *p;
1425 : const xmlChar *save_p;
1426 : size_t len;
1427 : int utf8char;
1428 : int utf8len;
1429 :
1430 : /*
1431 : * Only initialize libxml. We don't need error handling here, but we do
1432 : * need to make sure libxml is initialized before calling any of its
1433 : * functions. Note that this is safe (and a no-op) if caller has already
1434 : * done pg_xml_init().
1435 : */
1436 68632 : pg_xml_init_library();
1437 :
1438 : /* Initialize output arguments to "not present" */
1439 68632 : if (version)
1440 67984 : *version = NULL;
1441 68632 : if (encoding)
1442 0 : *encoding = NULL;
1443 68632 : if (standalone)
1444 67984 : *standalone = -1;
1445 :
1446 68632 : p = str;
1447 :
1448 68632 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1449 68410 : goto finished;
1450 :
1451 : /*
1452 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1453 : * rather than an XMLDecl, so we have done what we came to do and found no
1454 : * XMLDecl.
1455 : *
1456 : * We need an input length value for xmlGetUTF8Char, but there's no need
1457 : * to count the whole document size, so use strnlen not strlen.
1458 : */
1459 222 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1460 222 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1461 222 : if (PG_XMLISNAMECHAR(utf8char))
1462 12 : goto finished;
1463 :
1464 210 : p += 5;
1465 :
1466 : /* version */
1467 210 : CHECK_XML_SPACE(p);
1468 420 : SKIP_XML_SPACE(p);
1469 210 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1470 0 : return XML_ERR_VERSION_MISSING;
1471 210 : p += 7;
1472 210 : SKIP_XML_SPACE(p);
1473 210 : if (*p != '=')
1474 0 : return XML_ERR_VERSION_MISSING;
1475 210 : p += 1;
1476 210 : SKIP_XML_SPACE(p);
1477 :
1478 210 : if (*p == '\'' || *p == '"')
1479 210 : {
1480 : const xmlChar *q;
1481 :
1482 210 : q = xmlStrchr(p + 1, *p);
1483 210 : if (!q)
1484 0 : return XML_ERR_VERSION_MISSING;
1485 :
1486 210 : if (version)
1487 192 : *version = xml_pnstrdup(p + 1, q - p - 1);
1488 210 : p = q + 1;
1489 : }
1490 : else
1491 0 : return XML_ERR_VERSION_MISSING;
1492 :
1493 : /* encoding */
1494 210 : save_p = p;
1495 372 : SKIP_XML_SPACE(p);
1496 210 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1497 : {
1498 54 : CHECK_XML_SPACE(save_p);
1499 54 : p += 8;
1500 54 : SKIP_XML_SPACE(p);
1501 54 : if (*p != '=')
1502 0 : return XML_ERR_MISSING_ENCODING;
1503 54 : p += 1;
1504 54 : SKIP_XML_SPACE(p);
1505 :
1506 54 : if (*p == '\'' || *p == '"')
1507 54 : {
1508 : const xmlChar *q;
1509 :
1510 54 : q = xmlStrchr(p + 1, *p);
1511 54 : if (!q)
1512 0 : return XML_ERR_MISSING_ENCODING;
1513 :
1514 54 : if (encoding)
1515 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1516 54 : p = q + 1;
1517 : }
1518 : else
1519 0 : return XML_ERR_MISSING_ENCODING;
1520 : }
1521 : else
1522 : {
1523 156 : p = save_p;
1524 : }
1525 :
1526 : /* standalone */
1527 210 : save_p = p;
1528 318 : SKIP_XML_SPACE(p);
1529 210 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1530 : {
1531 108 : CHECK_XML_SPACE(save_p);
1532 108 : p += 10;
1533 108 : SKIP_XML_SPACE(p);
1534 108 : if (*p != '=')
1535 0 : return XML_ERR_STANDALONE_VALUE;
1536 108 : p += 1;
1537 108 : SKIP_XML_SPACE(p);
1538 216 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1539 108 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1540 : {
1541 60 : if (standalone)
1542 60 : *standalone = 1;
1543 60 : p += 5;
1544 : }
1545 96 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1546 48 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1547 : {
1548 36 : if (standalone)
1549 36 : *standalone = 0;
1550 36 : p += 4;
1551 : }
1552 : else
1553 12 : return XML_ERR_STANDALONE_VALUE;
1554 : }
1555 : else
1556 : {
1557 102 : p = save_p;
1558 : }
1559 :
1560 198 : SKIP_XML_SPACE(p);
1561 198 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1562 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1563 198 : p += 2;
1564 :
1565 68620 : finished:
1566 68620 : len = p - str;
1567 :
1568 75352 : for (p = str; p < str + len; p++)
1569 6732 : if (*p > 127)
1570 0 : return XML_ERR_INVALID_CHAR;
1571 :
1572 68620 : if (lenp)
1573 68620 : *lenp = len;
1574 :
1575 68620 : return XML_ERR_OK;
1576 : }
1577 :
1578 :
1579 : /*
1580 : * Write an XML declaration. On output, we adjust the XML declaration
1581 : * as follows. (These rules are the moral equivalent of the clause
1582 : * "Serialization of an XML value" in the SQL standard.)
1583 : *
1584 : * We try to avoid generating an XML declaration if possible. This is
1585 : * so that you don't get trivial things like xml '<foo/>' resulting in
1586 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1587 : * must provide a declaration if the standalone property is specified
1588 : * or if we include an encoding declaration. If we have a
1589 : * declaration, we must specify a version (XML requires this).
1590 : * Otherwise we only make a declaration if the version is not "1.0",
1591 : * which is the default version specified in SQL:2003.
1592 : */
1593 : static bool
1594 23230 : print_xml_decl(StringInfo buf, const xmlChar *version,
1595 : pg_enc encoding, int standalone)
1596 : {
1597 23230 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1598 23194 : || (encoding && encoding != PG_UTF8)
1599 23194 : || standalone != -1)
1600 : {
1601 96 : appendStringInfoString(buf, "<?xml");
1602 :
1603 96 : if (version)
1604 72 : appendStringInfo(buf, " version=\"%s\"", version);
1605 : else
1606 24 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1607 :
1608 96 : if (encoding && encoding != PG_UTF8)
1609 : {
1610 : /*
1611 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1612 : * instead of LATIN1 etc.); needs field experience
1613 : */
1614 0 : appendStringInfo(buf, " encoding=\"%s\"",
1615 : pg_encoding_to_char(encoding));
1616 : }
1617 :
1618 96 : if (standalone == 1)
1619 48 : appendStringInfoString(buf, " standalone=\"yes\"");
1620 48 : else if (standalone == 0)
1621 24 : appendStringInfoString(buf, " standalone=\"no\"");
1622 96 : appendStringInfoString(buf, "?>");
1623 :
1624 96 : return true;
1625 : }
1626 : else
1627 23134 : return false;
1628 : }
1629 :
1630 : /*
1631 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1632 : *
1633 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1634 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1635 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1636 : * later fix that, by redefining content with reference to the "more
1637 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1638 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1639 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1640 : * pg_restore).
1641 : *
1642 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1643 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1644 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1645 : * by detecting this case first and simply doing the parse as DOCUMENT.
1646 : *
1647 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1648 : * it will ordinarily start within a few dozen characters. The only things
1649 : * that can precede it are an XMLDecl (here, the caller will have called
1650 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1651 : * This function need only return true if it sees a valid sequence of such
1652 : * things leading to <!DOCTYPE. It can simply return false in any other
1653 : * cases, including malformed input; that will mean the input gets parsed as
1654 : * CONTENT as originally planned, with libxml reporting any errors.
1655 : *
1656 : * This is only to be called from xml_parse, when pg_xml_init has already
1657 : * been called. The input is already in UTF8 encoding.
1658 : */
1659 : static bool
1660 954 : xml_doctype_in_content(const xmlChar *str)
1661 : {
1662 954 : const xmlChar *p = str;
1663 :
1664 : for (;;)
1665 36 : {
1666 : const xmlChar *e;
1667 :
1668 1080 : SKIP_XML_SPACE(p);
1669 990 : if (*p != '<')
1670 206 : return false;
1671 784 : p++;
1672 :
1673 784 : if (*p == '!')
1674 : {
1675 72 : p++;
1676 :
1677 : /* if we see <!DOCTYPE, we can return true */
1678 72 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1679 42 : return true;
1680 :
1681 : /* otherwise, if it's not a comment, fail */
1682 30 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1683 0 : return false;
1684 : /* find end of comment: find -- and a > must follow */
1685 30 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1686 30 : if (!p || p[2] != '>')
1687 0 : return false;
1688 : /* advance over comment, and keep scanning */
1689 30 : p += 3;
1690 30 : continue;
1691 : }
1692 :
1693 : /* otherwise, if it's not a PI <?target something?>, fail */
1694 712 : if (*p != '?')
1695 706 : return false;
1696 6 : p++;
1697 :
1698 : /* find end of PI (the string ?> is forbidden within a PI) */
1699 6 : e = xmlStrstr(p, (xmlChar *) "?>");
1700 6 : if (!e)
1701 0 : return false;
1702 :
1703 : /* advance over PI, keep scanning */
1704 6 : p = e + 2;
1705 : }
1706 : }
1707 :
1708 :
1709 : /*
1710 : * Convert a text object to XML internal representation
1711 : *
1712 : * data is the source data (must not be toasted!), encoding is its encoding,
1713 : * and xmloption_arg and preserve_whitespace are options for the
1714 : * transformation.
1715 : *
1716 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1717 : * XmlOptionType actually used to parse the input (typically the same as
1718 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1719 : *
1720 : * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1721 : * of parsed nodes from the xmlParseInNodeContext call will be returned
1722 : * to *parsed_nodes. (It is caller's responsibility to free that.)
1723 : *
1724 : * Errors normally result in ereport(ERROR), but if escontext is an
1725 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1726 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1727 : *
1728 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1729 : * else a permanent memory leak will ensue! But note the result could
1730 : * be NULL after a soft error.
1731 : *
1732 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1733 : * yet do not use SAX - see xmlreader.c)
1734 : */
1735 : static xmlDocPtr
1736 1260 : xml_parse(text *data, XmlOptionType xmloption_arg,
1737 : bool preserve_whitespace, int encoding,
1738 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1739 : Node *escontext)
1740 : {
1741 : int32 len;
1742 : xmlChar *string;
1743 : xmlChar *utf8string;
1744 : PgXmlErrorContext *xmlerrcxt;
1745 1260 : volatile xmlParserCtxtPtr ctxt = NULL;
1746 1260 : volatile xmlDocPtr doc = NULL;
1747 :
1748 : /*
1749 : * This step looks annoyingly redundant, but we must do it to have a
1750 : * null-terminated string in case encoding conversion isn't required.
1751 : */
1752 1260 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1753 1260 : string = xml_text2xmlChar(data);
1754 :
1755 : /*
1756 : * If the data isn't UTF8, we must translate before giving it to libxml.
1757 : *
1758 : * XXX ideally, we'd catch any encoding conversion failure and return a
1759 : * soft error. However, failure to convert to UTF8 should be pretty darn
1760 : * rare, so for now this is left undone.
1761 : */
1762 1260 : utf8string = pg_do_encoding_conversion(string,
1763 : len,
1764 : encoding,
1765 : PG_UTF8);
1766 :
1767 : /* Start up libxml and its parser */
1768 1260 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1769 :
1770 : /* Use a TRY block to ensure we clean up correctly */
1771 1260 : PG_TRY();
1772 : {
1773 1260 : bool parse_as_document = false;
1774 : int options;
1775 : int res_code;
1776 1260 : size_t count = 0;
1777 1260 : xmlChar *version = NULL;
1778 1260 : int standalone = 0;
1779 :
1780 : /* Any errors here are reported as hard ereport's */
1781 1260 : xmlInitParser();
1782 :
1783 : /* Decide whether to parse as document or content */
1784 1260 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1785 294 : parse_as_document = true;
1786 : else
1787 : {
1788 : /* Parse and skip over the XML declaration, if any */
1789 966 : res_code = parse_xml_decl(utf8string,
1790 : &count, &version, NULL, &standalone);
1791 966 : if (res_code != 0)
1792 : {
1793 12 : errsave(escontext,
1794 : errcode(ERRCODE_INVALID_XML_CONTENT),
1795 : errmsg_internal("invalid XML content: invalid XML declaration"),
1796 : errdetail_for_xml_code(res_code));
1797 12 : goto fail;
1798 : }
1799 :
1800 : /* Is there a DOCTYPE element? */
1801 954 : if (xml_doctype_in_content(utf8string + count))
1802 42 : parse_as_document = true;
1803 : }
1804 :
1805 : /*
1806 : * Select parse options.
1807 : *
1808 : * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1809 : * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1810 : * internal DTD are applied'. As for external DTDs, we try to support
1811 : * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1812 : * happen because xmlPgEntityLoader prevents it.
1813 : */
1814 1248 : options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1815 1248 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1816 :
1817 : /* initialize output parameters */
1818 1248 : if (parsed_xmloptiontype != NULL)
1819 144 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1820 : XMLOPTION_CONTENT;
1821 1248 : if (parsed_nodes != NULL)
1822 144 : *parsed_nodes = NULL;
1823 :
1824 1248 : if (parse_as_document)
1825 : {
1826 336 : ctxt = xmlNewParserCtxt();
1827 336 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1828 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1829 : "could not allocate parser context");
1830 :
1831 336 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1832 : NULL, /* no URL */
1833 : "UTF-8",
1834 : options);
1835 :
1836 336 : if (doc == NULL || xmlerrcxt->err_occurred)
1837 : {
1838 : /* Use original option to decide which error code to report */
1839 144 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1840 138 : xml_errsave(escontext, xmlerrcxt,
1841 : ERRCODE_INVALID_XML_DOCUMENT,
1842 : "invalid XML document");
1843 : else
1844 6 : xml_errsave(escontext, xmlerrcxt,
1845 : ERRCODE_INVALID_XML_CONTENT,
1846 : "invalid XML content");
1847 96 : goto fail;
1848 : }
1849 : }
1850 : else
1851 : {
1852 : xmlNodePtr root;
1853 :
1854 : /* set up document with empty root node to be the context node */
1855 912 : doc = xmlNewDoc(version);
1856 912 : if (doc == NULL || xmlerrcxt->err_occurred)
1857 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1858 : "could not allocate XML document");
1859 :
1860 : Assert(doc->encoding == NULL);
1861 912 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1862 912 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1863 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1864 : "could not allocate XML document");
1865 912 : doc->standalone = standalone;
1866 :
1867 912 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1868 912 : if (root == NULL || xmlerrcxt->err_occurred)
1869 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1870 : "could not allocate xml node");
1871 : /* This attaches root to doc, so we need not free it separately. */
1872 912 : xmlDocSetRootElement(doc, root);
1873 :
1874 : /* allow empty content */
1875 912 : if (*(utf8string + count))
1876 : {
1877 888 : xmlNodePtr node_list = NULL;
1878 : xmlParserErrors res;
1879 :
1880 1776 : res = xmlParseInNodeContext(root,
1881 : (char *) utf8string + count,
1882 888 : strlen((char *) utf8string + count),
1883 : options,
1884 : &node_list);
1885 :
1886 888 : if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1887 : {
1888 60 : xmlFreeNodeList(node_list);
1889 60 : xml_errsave(escontext, xmlerrcxt,
1890 : ERRCODE_INVALID_XML_CONTENT,
1891 : "invalid XML content");
1892 12 : goto fail;
1893 : }
1894 :
1895 828 : if (parsed_nodes != NULL)
1896 48 : *parsed_nodes = node_list;
1897 : else
1898 780 : xmlFreeNodeList(node_list);
1899 : }
1900 : }
1901 :
1902 1164 : fail:
1903 : ;
1904 : }
1905 96 : PG_CATCH();
1906 : {
1907 96 : if (doc != NULL)
1908 48 : xmlFreeDoc(doc);
1909 96 : if (ctxt != NULL)
1910 48 : xmlFreeParserCtxt(ctxt);
1911 :
1912 96 : pg_xml_done(xmlerrcxt, true);
1913 :
1914 96 : PG_RE_THROW();
1915 : }
1916 1164 : PG_END_TRY();
1917 :
1918 1164 : if (ctxt != NULL)
1919 288 : xmlFreeParserCtxt(ctxt);
1920 :
1921 1164 : pg_xml_done(xmlerrcxt, false);
1922 :
1923 1164 : return doc;
1924 : }
1925 :
1926 :
1927 : /*
1928 : * xmlChar<->text conversions
1929 : */
1930 : static xmlChar *
1931 1410 : xml_text2xmlChar(text *in)
1932 : {
1933 1410 : return (xmlChar *) text_to_cstring(in);
1934 : }
1935 :
1936 :
1937 : #ifdef USE_LIBXMLCONTEXT
1938 :
1939 : /*
1940 : * Manage the special context used for all libxml allocations (but only
1941 : * in special debug builds; see notes at top of file)
1942 : */
1943 : static void
1944 : xml_memory_init(void)
1945 : {
1946 : /* Create memory context if not there already */
1947 : if (LibxmlContext == NULL)
1948 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1949 : "Libxml context",
1950 : ALLOCSET_DEFAULT_SIZES);
1951 :
1952 : /* Re-establish the callbacks even if already set */
1953 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1954 : }
1955 :
1956 : /*
1957 : * Wrappers for memory management functions
1958 : */
1959 : static void *
1960 : xml_palloc(size_t size)
1961 : {
1962 : return MemoryContextAlloc(LibxmlContext, size);
1963 : }
1964 :
1965 :
1966 : static void *
1967 : xml_repalloc(void *ptr, size_t size)
1968 : {
1969 : return repalloc(ptr, size);
1970 : }
1971 :
1972 :
1973 : static void
1974 : xml_pfree(void *ptr)
1975 : {
1976 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1977 : if (ptr)
1978 : pfree(ptr);
1979 : }
1980 :
1981 :
1982 : static char *
1983 : xml_pstrdup(const char *string)
1984 : {
1985 : return MemoryContextStrdup(LibxmlContext, string);
1986 : }
1987 : #endif /* USE_LIBXMLCONTEXT */
1988 :
1989 :
1990 : /*
1991 : * xmlPgEntityLoader --- entity loader callback function
1992 : *
1993 : * Silently prevent any external entity URL from being loaded. We don't want
1994 : * to throw an error, so instead make the entity appear to expand to an empty
1995 : * string.
1996 : *
1997 : * We would prefer to allow loading entities that exist in the system's
1998 : * global XML catalog; but the available libxml2 APIs make that a complex
1999 : * and fragile task. For now, just shut down all external access.
2000 : */
2001 : static xmlParserInputPtr
2002 30 : xmlPgEntityLoader(const char *URL, const char *ID,
2003 : xmlParserCtxtPtr ctxt)
2004 : {
2005 30 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2006 : }
2007 :
2008 :
2009 : /*
2010 : * xml_ereport --- report an XML-related error
2011 : *
2012 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2013 : * standard. This function adds libxml's native error message, if any, as
2014 : * detail.
2015 : *
2016 : * This is exported for modules that want to share the core libxml error
2017 : * handler. Note that pg_xml_init() *must* have been called previously.
2018 : */
2019 : void
2020 12 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2021 : {
2022 : char *detail;
2023 :
2024 : /* Defend against someone passing us a bogus context struct */
2025 12 : if (errcxt->magic != ERRCXT_MAGIC)
2026 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2027 :
2028 : /* Flag that the current libxml error has been reported */
2029 12 : errcxt->err_occurred = false;
2030 :
2031 : /* Include detail only if we have some text from libxml */
2032 12 : if (errcxt->err_buf.len > 0)
2033 12 : detail = errcxt->err_buf.data;
2034 : else
2035 0 : detail = NULL;
2036 :
2037 12 : ereport(level,
2038 : (errcode(sqlcode),
2039 : errmsg_internal("%s", msg),
2040 : detail ? errdetail_internal("%s", detail) : 0));
2041 0 : }
2042 :
2043 :
2044 : /*
2045 : * xml_errsave --- save an XML-related error
2046 : *
2047 : * If escontext is an ErrorSaveContext, error details are saved into it,
2048 : * and control returns normally.
2049 : *
2050 : * Otherwise, the error is thrown, so that this is equivalent to
2051 : * xml_ereport() with level == ERROR.
2052 : *
2053 : * This should be used only for errors that we're sure we do not need
2054 : * a transaction abort to clean up after.
2055 : */
2056 : static void
2057 204 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2058 : int sqlcode, const char *msg)
2059 : {
2060 : char *detail;
2061 :
2062 : /* Defend against someone passing us a bogus context struct */
2063 204 : if (errcxt->magic != ERRCXT_MAGIC)
2064 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2065 :
2066 : /* Flag that the current libxml error has been reported */
2067 204 : errcxt->err_occurred = false;
2068 :
2069 : /* Include detail only if we have some text from libxml */
2070 204 : if (errcxt->err_buf.len > 0)
2071 204 : detail = errcxt->err_buf.data;
2072 : else
2073 0 : detail = NULL;
2074 :
2075 204 : errsave(escontext,
2076 : (errcode(sqlcode),
2077 : errmsg_internal("%s", msg),
2078 : detail ? errdetail_internal("%s", detail) : 0));
2079 108 : }
2080 :
2081 :
2082 : /*
2083 : * Error handler for libxml errors and warnings
2084 : */
2085 : static void
2086 398 : xml_errorHandler(void *data, PgXmlErrorPtr error)
2087 : {
2088 398 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2089 398 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2090 398 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2091 398 : xmlNodePtr node = error->node;
2092 398 : const xmlChar *name = (node != NULL &&
2093 398 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2094 398 : int domain = error->domain;
2095 398 : int level = error->level;
2096 : StringInfo errorBuf;
2097 :
2098 : /*
2099 : * Defend against someone passing us a bogus context struct.
2100 : *
2101 : * We force a backend exit if this check fails because longjmp'ing out of
2102 : * libxml would likely render it unsafe to use further.
2103 : */
2104 398 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
2105 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2106 :
2107 : /*----------
2108 : * Older libxml versions report some errors differently.
2109 : * First, some errors were previously reported as coming from the parser
2110 : * domain but are now reported as coming from the namespace domain.
2111 : * Second, some warnings were upgraded to errors.
2112 : * We attempt to compensate for that here.
2113 : *----------
2114 : */
2115 398 : switch (error->code)
2116 : {
2117 30 : case XML_WAR_NS_URI:
2118 30 : level = XML_ERR_ERROR;
2119 30 : domain = XML_FROM_NAMESPACE;
2120 30 : break;
2121 :
2122 54 : case XML_ERR_NS_DECL_ERROR:
2123 : case XML_WAR_NS_URI_RELATIVE:
2124 : case XML_WAR_NS_COLUMN:
2125 : case XML_NS_ERR_XML_NAMESPACE:
2126 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2127 : case XML_NS_ERR_QNAME:
2128 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2129 : case XML_NS_ERR_EMPTY:
2130 54 : domain = XML_FROM_NAMESPACE;
2131 54 : break;
2132 : }
2133 :
2134 : /* Decide whether to act on the error or not */
2135 398 : switch (domain)
2136 : {
2137 314 : case XML_FROM_PARSER:
2138 :
2139 : /*
2140 : * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2141 : * other, more on-point error. Furthermore, libxml2 2.13 reports
2142 : * it under a completely different set of rules than prior
2143 : * versions. To avoid cross-version behavioral differences,
2144 : * suppress it so long as we already logged some error.
2145 : */
2146 314 : if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2147 30 : xmlerrcxt->err_occurred)
2148 30 : return;
2149 : /* fall through */
2150 :
2151 : case XML_FROM_NONE:
2152 : case XML_FROM_MEMORY:
2153 : case XML_FROM_IO:
2154 :
2155 : /*
2156 : * Suppress warnings about undeclared entities. We need to do
2157 : * this to avoid problems due to not loading DTD definitions.
2158 : */
2159 284 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
2160 6 : return;
2161 :
2162 : /* Otherwise, accept error regardless of the parsing purpose */
2163 278 : break;
2164 :
2165 84 : default:
2166 : /* Ignore error if only doing well-formedness check */
2167 84 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2168 66 : return;
2169 18 : break;
2170 : }
2171 :
2172 : /* Prepare error message in errorBuf */
2173 296 : errorBuf = makeStringInfo();
2174 :
2175 296 : if (error->line > 0)
2176 296 : appendStringInfo(errorBuf, "line %d: ", error->line);
2177 296 : if (name != NULL)
2178 0 : appendStringInfo(errorBuf, "element %s: ", name);
2179 296 : if (error->message != NULL)
2180 296 : appendStringInfoString(errorBuf, error->message);
2181 : else
2182 0 : appendStringInfoString(errorBuf, "(no message provided)");
2183 :
2184 : /*
2185 : * Append context information to errorBuf.
2186 : *
2187 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2188 : * write the context. Since we don't want to duplicate libxml
2189 : * functionality here, we set up a generic error handler temporarily.
2190 : *
2191 : * We use appendStringInfo() directly as libxml's generic error handler.
2192 : * This should work because it has essentially the same signature as
2193 : * libxml expects, namely (void *ptr, const char *msg, ...).
2194 : */
2195 296 : if (input != NULL)
2196 : {
2197 296 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2198 296 : void *errCtxSaved = xmlGenericErrorContext;
2199 :
2200 296 : xmlSetGenericErrorFunc((void *) errorBuf,
2201 : (xmlGenericErrorFunc) appendStringInfo);
2202 :
2203 : /* Add context information to errorBuf */
2204 296 : appendStringInfoLineSeparator(errorBuf);
2205 :
2206 296 : xmlParserPrintFileContext(input);
2207 :
2208 : /* Restore generic error func */
2209 296 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2210 : }
2211 :
2212 : /* Get rid of any trailing newlines in errorBuf */
2213 296 : chopStringInfoNewlines(errorBuf);
2214 :
2215 : /*
2216 : * Legacy error handling mode. err_occurred is never set, we just add the
2217 : * message to err_buf. This mode exists because the xml2 contrib module
2218 : * uses our error-handling infrastructure, but we don't want to change its
2219 : * behaviour since it's deprecated anyway. This is also why we don't
2220 : * distinguish between notices, warnings and errors here --- the old-style
2221 : * generic error handler wouldn't have done that either.
2222 : */
2223 296 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2224 : {
2225 2 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2226 2 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2227 : errorBuf->len);
2228 :
2229 2 : destroyStringInfo(errorBuf);
2230 2 : return;
2231 : }
2232 :
2233 : /*
2234 : * We don't want to ereport() here because that'd probably leave libxml in
2235 : * an inconsistent state. Instead, we remember the error and ereport()
2236 : * from xml_ereport().
2237 : *
2238 : * Warnings and notices can be reported immediately since they won't cause
2239 : * a longjmp() out of libxml.
2240 : */
2241 294 : if (level >= XML_ERR_ERROR)
2242 : {
2243 288 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2244 288 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2245 : errorBuf->len);
2246 :
2247 288 : xmlerrcxt->err_occurred = true;
2248 : }
2249 6 : else if (level >= XML_ERR_WARNING)
2250 : {
2251 6 : ereport(WARNING,
2252 : (errmsg_internal("%s", errorBuf->data)));
2253 : }
2254 : else
2255 : {
2256 0 : ereport(NOTICE,
2257 : (errmsg_internal("%s", errorBuf->data)));
2258 : }
2259 :
2260 294 : destroyStringInfo(errorBuf);
2261 : }
2262 :
2263 :
2264 : /*
2265 : * Convert libxml error codes into textual errdetail messages.
2266 : *
2267 : * This should be called within an ereport or errsave invocation,
2268 : * just as errdetail would be.
2269 : *
2270 : * At the moment, we only need to cover those codes that we
2271 : * may raise in this file.
2272 : */
2273 : static int
2274 6 : errdetail_for_xml_code(int code)
2275 : {
2276 : const char *det;
2277 :
2278 6 : switch (code)
2279 : {
2280 0 : case XML_ERR_INVALID_CHAR:
2281 0 : det = gettext_noop("Invalid character value.");
2282 0 : break;
2283 0 : case XML_ERR_SPACE_REQUIRED:
2284 0 : det = gettext_noop("Space required.");
2285 0 : break;
2286 6 : case XML_ERR_STANDALONE_VALUE:
2287 6 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2288 6 : break;
2289 0 : case XML_ERR_VERSION_MISSING:
2290 0 : det = gettext_noop("Malformed declaration: missing version.");
2291 0 : break;
2292 0 : case XML_ERR_MISSING_ENCODING:
2293 0 : det = gettext_noop("Missing encoding in text declaration.");
2294 0 : break;
2295 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
2296 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
2297 0 : break;
2298 0 : default:
2299 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
2300 0 : break;
2301 : }
2302 :
2303 6 : return errdetail(det, code);
2304 : }
2305 :
2306 :
2307 : /*
2308 : * Remove all trailing newlines from a StringInfo string
2309 : */
2310 : static void
2311 882 : chopStringInfoNewlines(StringInfo str)
2312 : {
2313 1474 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2314 592 : str->data[--str->len] = '\0';
2315 882 : }
2316 :
2317 :
2318 : /*
2319 : * Append a newline after removing any existing trailing newlines
2320 : */
2321 : static void
2322 586 : appendStringInfoLineSeparator(StringInfo str)
2323 : {
2324 586 : chopStringInfoNewlines(str);
2325 586 : if (str->len > 0)
2326 368 : appendStringInfoChar(str, '\n');
2327 586 : }
2328 :
2329 :
2330 : /*
2331 : * Convert one char in the current server encoding to a Unicode codepoint.
2332 : */
2333 : static pg_wchar
2334 18280 : sqlchar_to_unicode(const char *s)
2335 : {
2336 : char *utf8string;
2337 : pg_wchar ret[2]; /* need space for trailing zero */
2338 :
2339 : /* note we're not assuming s is null-terminated */
2340 18280 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2341 :
2342 18280 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2343 : pg_encoding_mblen(PG_UTF8, utf8string));
2344 :
2345 18280 : if (utf8string != s)
2346 0 : pfree(utf8string);
2347 :
2348 18280 : return ret[0];
2349 : }
2350 :
2351 :
2352 : static bool
2353 3638 : is_valid_xml_namefirst(pg_wchar c)
2354 : {
2355 : /* (Letter | '_' | ':') */
2356 3644 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2357 7282 : || c == '_' || c == ':');
2358 : }
2359 :
2360 :
2361 : static bool
2362 14642 : is_valid_xml_namechar(pg_wchar c)
2363 : {
2364 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2365 15532 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2366 890 : || xmlIsDigitQ(c)
2367 254 : || c == '.' || c == '-' || c == '_' || c == ':'
2368 12 : || xmlIsCombiningQ(c)
2369 31064 : || xmlIsExtenderQ(c));
2370 : }
2371 : #endif /* USE_LIBXML */
2372 :
2373 :
2374 : /*
2375 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2376 : */
2377 : char *
2378 3652 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2379 : bool escape_period)
2380 : {
2381 : #ifdef USE_LIBXML
2382 : StringInfoData buf;
2383 : const char *p;
2384 :
2385 : /*
2386 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2387 : * mistake.
2388 : */
2389 : Assert(fully_escaped || !escape_period);
2390 :
2391 3652 : initStringInfo(&buf);
2392 :
2393 21952 : for (p = ident; *p; p += pg_mblen(p))
2394 : {
2395 18300 : if (*p == ':' && (p == ident || fully_escaped))
2396 14 : appendStringInfoString(&buf, "_x003A_");
2397 18286 : else if (*p == '_' && *(p + 1) == 'x')
2398 6 : appendStringInfoString(&buf, "_x005F_");
2399 21568 : else if (fully_escaped && p == ident &&
2400 3288 : pg_strncasecmp(p, "xml", 3) == 0)
2401 : {
2402 0 : if (*p == 'x')
2403 0 : appendStringInfoString(&buf, "_x0078_");
2404 : else
2405 0 : appendStringInfoString(&buf, "_x0058_");
2406 : }
2407 18280 : else if (escape_period && *p == '.')
2408 0 : appendStringInfoString(&buf, "_x002E_");
2409 : else
2410 : {
2411 18280 : pg_wchar u = sqlchar_to_unicode(p);
2412 :
2413 36560 : if ((p == ident)
2414 3638 : ? !is_valid_xml_namefirst(u)
2415 14642 : : !is_valid_xml_namechar(u))
2416 18 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2417 : else
2418 18262 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2419 : }
2420 : }
2421 :
2422 3652 : return buf.data;
2423 : #else /* not USE_LIBXML */
2424 : NO_XML_SUPPORT();
2425 : return NULL;
2426 : #endif /* not USE_LIBXML */
2427 : }
2428 :
2429 :
2430 : /*
2431 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2432 : */
2433 : char *
2434 128 : map_xml_name_to_sql_identifier(const char *name)
2435 : {
2436 : StringInfoData buf;
2437 : const char *p;
2438 :
2439 128 : initStringInfo(&buf);
2440 :
2441 704 : for (p = name; *p; p += pg_mblen(p))
2442 : {
2443 576 : if (*p == '_' && *(p + 1) == 'x'
2444 16 : && isxdigit((unsigned char) *(p + 2))
2445 16 : && isxdigit((unsigned char) *(p + 3))
2446 16 : && isxdigit((unsigned char) *(p + 4))
2447 16 : && isxdigit((unsigned char) *(p + 5))
2448 16 : && *(p + 6) == '_')
2449 16 : {
2450 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2451 : unsigned int u;
2452 :
2453 16 : sscanf(p + 2, "%X", &u);
2454 16 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2455 16 : appendStringInfoString(&buf, cbuf);
2456 16 : p += 6;
2457 : }
2458 : else
2459 560 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2460 : }
2461 :
2462 128 : return buf.data;
2463 : }
2464 :
2465 : /*
2466 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2467 : *
2468 : * When xml_escape_strings is true, then certain characters in string
2469 : * values are replaced by entity references (< etc.), as specified
2470 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2471 : * wanted. The false case is mainly useful when the resulting value
2472 : * is used with xmlTextWriterWriteAttribute() to write out an
2473 : * attribute, because that function does the escaping itself.
2474 : */
2475 : char *
2476 132426 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2477 : {
2478 132426 : if (type_is_array_domain(type))
2479 : {
2480 : ArrayType *array;
2481 : Oid elmtype;
2482 : int16 elmlen;
2483 : bool elmbyval;
2484 : char elmalign;
2485 : int num_elems;
2486 : Datum *elem_values;
2487 : bool *elem_nulls;
2488 : StringInfoData buf;
2489 : int i;
2490 :
2491 6 : array = DatumGetArrayTypeP(value);
2492 6 : elmtype = ARR_ELEMTYPE(array);
2493 6 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2494 :
2495 6 : deconstruct_array(array, elmtype,
2496 : elmlen, elmbyval, elmalign,
2497 : &elem_values, &elem_nulls,
2498 : &num_elems);
2499 :
2500 6 : initStringInfo(&buf);
2501 :
2502 24 : for (i = 0; i < num_elems; i++)
2503 : {
2504 18 : if (elem_nulls[i])
2505 0 : continue;
2506 18 : appendStringInfoString(&buf, "<element>");
2507 18 : appendStringInfoString(&buf,
2508 18 : map_sql_value_to_xml_value(elem_values[i],
2509 : elmtype, true));
2510 18 : appendStringInfoString(&buf, "</element>");
2511 : }
2512 :
2513 6 : pfree(elem_values);
2514 6 : pfree(elem_nulls);
2515 :
2516 6 : return buf.data;
2517 : }
2518 : else
2519 : {
2520 : Oid typeOut;
2521 : bool isvarlena;
2522 : char *str;
2523 :
2524 : /*
2525 : * Flatten domains; the special-case treatments below should apply to,
2526 : * eg, domains over boolean not just boolean.
2527 : */
2528 132420 : type = getBaseType(type);
2529 :
2530 : /*
2531 : * Special XSD formatting for some data types
2532 : */
2533 132420 : switch (type)
2534 : {
2535 66 : case BOOLOID:
2536 66 : if (DatumGetBool(value))
2537 60 : return "true";
2538 : else
2539 6 : return "false";
2540 :
2541 48 : case DATEOID:
2542 : {
2543 : DateADT date;
2544 : struct pg_tm tm;
2545 : char buf[MAXDATELEN + 1];
2546 :
2547 48 : date = DatumGetDateADT(value);
2548 : /* XSD doesn't support infinite values */
2549 48 : if (DATE_NOT_FINITE(date))
2550 0 : ereport(ERROR,
2551 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2552 : errmsg("date out of range"),
2553 : errdetail("XML does not support infinite date values.")));
2554 48 : j2date(date + POSTGRES_EPOCH_JDATE,
2555 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2556 48 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2557 :
2558 48 : return pstrdup(buf);
2559 : }
2560 :
2561 36 : case TIMESTAMPOID:
2562 : {
2563 : Timestamp timestamp;
2564 : struct pg_tm tm;
2565 : fsec_t fsec;
2566 : char buf[MAXDATELEN + 1];
2567 :
2568 36 : timestamp = DatumGetTimestamp(value);
2569 :
2570 : /* XSD doesn't support infinite values */
2571 36 : if (TIMESTAMP_NOT_FINITE(timestamp))
2572 6 : ereport(ERROR,
2573 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2574 : errmsg("timestamp out of range"),
2575 : errdetail("XML does not support infinite timestamp values.")));
2576 30 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2577 30 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2578 : else
2579 0 : ereport(ERROR,
2580 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2581 : errmsg("timestamp out of range")));
2582 :
2583 30 : return pstrdup(buf);
2584 : }
2585 :
2586 24 : case TIMESTAMPTZOID:
2587 : {
2588 : TimestampTz timestamp;
2589 : struct pg_tm tm;
2590 : int tz;
2591 : fsec_t fsec;
2592 24 : const char *tzn = NULL;
2593 : char buf[MAXDATELEN + 1];
2594 :
2595 24 : timestamp = DatumGetTimestamp(value);
2596 :
2597 : /* XSD doesn't support infinite values */
2598 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2599 0 : ereport(ERROR,
2600 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2601 : errmsg("timestamp out of range"),
2602 : errdetail("XML does not support infinite timestamp values.")));
2603 24 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2604 24 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2605 : else
2606 0 : ereport(ERROR,
2607 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2608 : errmsg("timestamp out of range")));
2609 :
2610 24 : return pstrdup(buf);
2611 : }
2612 :
2613 : #ifdef USE_LIBXML
2614 36 : case BYTEAOID:
2615 : {
2616 36 : bytea *bstr = DatumGetByteaPP(value);
2617 : PgXmlErrorContext *xmlerrcxt;
2618 36 : volatile xmlBufferPtr buf = NULL;
2619 36 : volatile xmlTextWriterPtr writer = NULL;
2620 : char *result;
2621 :
2622 36 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2623 :
2624 36 : PG_TRY();
2625 : {
2626 36 : buf = xmlBufferCreate();
2627 36 : if (buf == NULL || xmlerrcxt->err_occurred)
2628 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2629 : "could not allocate xmlBuffer");
2630 36 : writer = xmlNewTextWriterMemory(buf, 0);
2631 36 : if (writer == NULL || xmlerrcxt->err_occurred)
2632 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2633 : "could not allocate xmlTextWriter");
2634 :
2635 36 : if (xmlbinary == XMLBINARY_BASE64)
2636 30 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2637 30 : 0, VARSIZE_ANY_EXHDR(bstr));
2638 : else
2639 6 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2640 6 : 0, VARSIZE_ANY_EXHDR(bstr));
2641 :
2642 : /* we MUST do this now to flush data out to the buffer */
2643 36 : xmlFreeTextWriter(writer);
2644 36 : writer = NULL;
2645 :
2646 36 : result = pstrdup((const char *) xmlBufferContent(buf));
2647 : }
2648 0 : PG_CATCH();
2649 : {
2650 0 : if (writer)
2651 0 : xmlFreeTextWriter(writer);
2652 0 : if (buf)
2653 0 : xmlBufferFree(buf);
2654 :
2655 0 : pg_xml_done(xmlerrcxt, true);
2656 :
2657 0 : PG_RE_THROW();
2658 : }
2659 36 : PG_END_TRY();
2660 :
2661 36 : xmlBufferFree(buf);
2662 :
2663 36 : pg_xml_done(xmlerrcxt, false);
2664 :
2665 36 : return result;
2666 : }
2667 : #endif /* USE_LIBXML */
2668 :
2669 : }
2670 :
2671 : /*
2672 : * otherwise, just use the type's native text representation
2673 : */
2674 132210 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2675 132210 : str = OidOutputFunctionCall(typeOut, value);
2676 :
2677 : /* ... exactly as-is for XML, and when escaping is not wanted */
2678 132210 : if (type == XMLOID || !xml_escape_strings)
2679 21936 : return str;
2680 :
2681 : /* otherwise, translate special characters as needed */
2682 110274 : return escape_xml(str);
2683 : }
2684 : }
2685 :
2686 :
2687 : /*
2688 : * Escape characters in text that have special meanings in XML.
2689 : *
2690 : * Returns a palloc'd string.
2691 : *
2692 : * NB: this is intentionally not dependent on libxml.
2693 : */
2694 : char *
2695 110526 : escape_xml(const char *str)
2696 : {
2697 : StringInfoData buf;
2698 : const char *p;
2699 :
2700 110526 : initStringInfo(&buf);
2701 693300 : for (p = str; *p; p++)
2702 : {
2703 582774 : switch (*p)
2704 : {
2705 0 : case '&':
2706 0 : appendStringInfoString(&buf, "&");
2707 0 : break;
2708 36 : case '<':
2709 36 : appendStringInfoString(&buf, "<");
2710 36 : break;
2711 24 : case '>':
2712 24 : appendStringInfoString(&buf, ">");
2713 24 : break;
2714 0 : case '\r':
2715 0 : appendStringInfoString(&buf, "
");
2716 0 : break;
2717 582714 : default:
2718 582714 : appendStringInfoCharMacro(&buf, *p);
2719 582714 : break;
2720 : }
2721 : }
2722 110526 : return buf.data;
2723 : }
2724 :
2725 :
2726 : static char *
2727 24 : _SPI_strdup(const char *s)
2728 : {
2729 24 : size_t len = strlen(s) + 1;
2730 24 : char *ret = SPI_palloc(len);
2731 :
2732 24 : memcpy(ret, s, len);
2733 24 : return ret;
2734 : }
2735 :
2736 :
2737 : /*
2738 : * SQL to XML mapping functions
2739 : *
2740 : * What follows below was at one point intentionally organized so that
2741 : * you can read along in the SQL/XML standard. The functions are
2742 : * mostly split up the way the clauses lay out in the standards
2743 : * document, and the identifiers are also aligned with the standard
2744 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2745 : * differently than SQL/XML:2003, so the order below doesn't make much
2746 : * sense anymore.
2747 : *
2748 : * There are many things going on there:
2749 : *
2750 : * There are two kinds of mappings: Mapping SQL data (table contents)
2751 : * to XML documents, and mapping SQL structure (the "schema") to XML
2752 : * Schema. And there are functions that do both at the same time.
2753 : *
2754 : * Then you can map a database, a schema, or a table, each in both
2755 : * ways. This breaks down recursively: Mapping a database invokes
2756 : * mapping schemas, which invokes mapping tables, which invokes
2757 : * mapping rows, which invokes mapping columns, although you can't
2758 : * call the last two from the outside. Because of this, there are a
2759 : * number of xyz_internal() functions which are to be called both from
2760 : * the function manager wrapper and from some upper layer in a
2761 : * recursive call.
2762 : *
2763 : * See the documentation about what the common function arguments
2764 : * nulls, tableforest, and targetns mean.
2765 : *
2766 : * Some style guidelines for XML output: Use double quotes for quoting
2767 : * XML attributes. Indent XML elements by two spaces, but remember
2768 : * that a lot of code is called recursively at different levels, so
2769 : * it's better not to indent rather than create output that indents
2770 : * and outdents weirdly. Add newlines to make the output look nice.
2771 : */
2772 :
2773 :
2774 : /*
2775 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2776 : * 4.10.8.
2777 : */
2778 :
2779 : /*
2780 : * Given a query, which must return type oid as first column, produce
2781 : * a list of Oids with the query results.
2782 : */
2783 : static List *
2784 36 : query_to_oid_list(const char *query)
2785 : {
2786 : uint64 i;
2787 36 : List *list = NIL;
2788 : int spi_result;
2789 :
2790 36 : spi_result = SPI_execute(query, true, 0);
2791 36 : if (spi_result != SPI_OK_SELECT)
2792 0 : elog(ERROR, "SPI_execute returned %s for %s",
2793 : SPI_result_code_string(spi_result), query);
2794 :
2795 108 : for (i = 0; i < SPI_processed; i++)
2796 : {
2797 : Datum oid;
2798 : bool isnull;
2799 :
2800 72 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2801 72 : SPI_tuptable->tupdesc,
2802 : 1,
2803 : &isnull);
2804 72 : if (!isnull)
2805 72 : list = lappend_oid(list, DatumGetObjectId(oid));
2806 : }
2807 :
2808 36 : return list;
2809 : }
2810 :
2811 :
2812 : static List *
2813 36 : schema_get_xml_visible_tables(Oid nspid)
2814 : {
2815 : StringInfoData query;
2816 :
2817 36 : initStringInfo(&query);
2818 36 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2819 : " WHERE relnamespace = %u AND relkind IN ("
2820 : CppAsString2(RELKIND_RELATION) ","
2821 : CppAsString2(RELKIND_MATVIEW) ","
2822 : CppAsString2(RELKIND_VIEW) ")"
2823 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2824 : " ORDER BY relname;", nspid);
2825 :
2826 36 : return query_to_oid_list(query.data);
2827 : }
2828 :
2829 :
2830 : /*
2831 : * Including the system schemas is probably not useful for a database
2832 : * mapping.
2833 : */
2834 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2835 :
2836 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2837 :
2838 :
2839 : static List *
2840 0 : database_get_xml_visible_schemas(void)
2841 : {
2842 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2843 : }
2844 :
2845 :
2846 : static List *
2847 0 : database_get_xml_visible_tables(void)
2848 : {
2849 : /* At the moment there is no order required here. */
2850 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2851 : " WHERE relkind IN ("
2852 : CppAsString2(RELKIND_RELATION) ","
2853 : CppAsString2(RELKIND_MATVIEW) ","
2854 : CppAsString2(RELKIND_VIEW) ")"
2855 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2856 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2857 : }
2858 :
2859 :
2860 : /*
2861 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2862 : * section 9.11.
2863 : */
2864 :
2865 : static StringInfo
2866 96 : table_to_xml_internal(Oid relid,
2867 : const char *xmlschema, bool nulls, bool tableforest,
2868 : const char *targetns, bool top_level)
2869 : {
2870 : StringInfoData query;
2871 :
2872 96 : initStringInfo(&query);
2873 96 : appendStringInfo(&query, "SELECT * FROM %s",
2874 : DatumGetCString(DirectFunctionCall1(regclassout,
2875 : ObjectIdGetDatum(relid))));
2876 96 : return query_to_xml_internal(query.data, get_rel_name(relid),
2877 : xmlschema, nulls, tableforest,
2878 : targetns, top_level);
2879 : }
2880 :
2881 :
2882 : Datum
2883 36 : table_to_xml(PG_FUNCTION_ARGS)
2884 : {
2885 36 : Oid relid = PG_GETARG_OID(0);
2886 36 : bool nulls = PG_GETARG_BOOL(1);
2887 36 : bool tableforest = PG_GETARG_BOOL(2);
2888 36 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2889 :
2890 36 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2891 : nulls, tableforest,
2892 : targetns, true)));
2893 : }
2894 :
2895 :
2896 : Datum
2897 10 : query_to_xml(PG_FUNCTION_ARGS)
2898 : {
2899 10 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2900 10 : bool nulls = PG_GETARG_BOOL(1);
2901 10 : bool tableforest = PG_GETARG_BOOL(2);
2902 10 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2903 :
2904 10 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2905 : NULL, nulls, tableforest,
2906 : targetns, true)));
2907 : }
2908 :
2909 :
2910 : Datum
2911 12 : cursor_to_xml(PG_FUNCTION_ARGS)
2912 : {
2913 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2914 12 : int32 count = PG_GETARG_INT32(1);
2915 12 : bool nulls = PG_GETARG_BOOL(2);
2916 12 : bool tableforest = PG_GETARG_BOOL(3);
2917 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2918 :
2919 : StringInfoData result;
2920 : Portal portal;
2921 : uint64 i;
2922 :
2923 12 : initStringInfo(&result);
2924 :
2925 12 : if (!tableforest)
2926 : {
2927 6 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2928 6 : appendStringInfoChar(&result, '\n');
2929 : }
2930 :
2931 12 : SPI_connect();
2932 12 : portal = SPI_cursor_find(name);
2933 12 : if (portal == NULL)
2934 0 : ereport(ERROR,
2935 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2936 : errmsg("cursor \"%s\" does not exist", name)));
2937 :
2938 12 : SPI_cursor_fetch(portal, true, count);
2939 48 : for (i = 0; i < SPI_processed; i++)
2940 36 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2941 : tableforest, targetns, true);
2942 :
2943 12 : SPI_finish();
2944 :
2945 12 : if (!tableforest)
2946 6 : xmldata_root_element_end(&result, "table");
2947 :
2948 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2949 : }
2950 :
2951 :
2952 : /*
2953 : * Write the start tag of the root element of a data mapping.
2954 : *
2955 : * top_level means that this is the very top level of the eventual
2956 : * output. For example, when the user calls table_to_xml, then a call
2957 : * with a table name to this function is the top level. When the user
2958 : * calls database_to_xml, then a call with a schema name to this
2959 : * function is not the top level. If top_level is false, then the XML
2960 : * namespace declarations are omitted, because they supposedly already
2961 : * appeared earlier in the output. Repeating them is not wrong, but
2962 : * it looks ugly.
2963 : */
2964 : static void
2965 238 : xmldata_root_element_start(StringInfo result, const char *eltname,
2966 : const char *xmlschema, const char *targetns,
2967 : bool top_level)
2968 : {
2969 : /* This isn't really wrong but currently makes no sense. */
2970 : Assert(top_level || !xmlschema);
2971 :
2972 238 : appendStringInfo(result, "<%s", eltname);
2973 238 : if (top_level)
2974 : {
2975 178 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2976 178 : if (strlen(targetns) > 0)
2977 30 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
2978 : }
2979 238 : if (xmlschema)
2980 : {
2981 : /* FIXME: better targets */
2982 18 : if (strlen(targetns) > 0)
2983 6 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2984 : else
2985 12 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2986 : }
2987 238 : appendStringInfoString(result, ">\n");
2988 238 : }
2989 :
2990 :
2991 : static void
2992 238 : xmldata_root_element_end(StringInfo result, const char *eltname)
2993 : {
2994 238 : appendStringInfo(result, "</%s>\n", eltname);
2995 238 : }
2996 :
2997 :
2998 : static StringInfo
2999 112 : query_to_xml_internal(const char *query, char *tablename,
3000 : const char *xmlschema, bool nulls, bool tableforest,
3001 : const char *targetns, bool top_level)
3002 : {
3003 : StringInfo result;
3004 : char *xmltn;
3005 : uint64 i;
3006 :
3007 112 : if (tablename)
3008 96 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3009 : else
3010 16 : xmltn = "table";
3011 :
3012 112 : result = makeStringInfo();
3013 :
3014 112 : SPI_connect();
3015 112 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3016 0 : ereport(ERROR,
3017 : (errcode(ERRCODE_DATA_EXCEPTION),
3018 : errmsg("invalid query")));
3019 :
3020 112 : if (!tableforest)
3021 : {
3022 52 : xmldata_root_element_start(result, xmltn, xmlschema,
3023 : targetns, top_level);
3024 52 : appendStringInfoChar(result, '\n');
3025 : }
3026 :
3027 112 : if (xmlschema)
3028 30 : appendStringInfo(result, "%s\n\n", xmlschema);
3029 :
3030 388 : for (i = 0; i < SPI_processed; i++)
3031 276 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3032 : tableforest, targetns, top_level);
3033 :
3034 112 : if (!tableforest)
3035 52 : xmldata_root_element_end(result, xmltn);
3036 :
3037 112 : SPI_finish();
3038 :
3039 112 : return result;
3040 : }
3041 :
3042 :
3043 : Datum
3044 30 : table_to_xmlschema(PG_FUNCTION_ARGS)
3045 : {
3046 30 : Oid relid = PG_GETARG_OID(0);
3047 30 : bool nulls = PG_GETARG_BOOL(1);
3048 30 : bool tableforest = PG_GETARG_BOOL(2);
3049 30 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3050 : const char *result;
3051 : Relation rel;
3052 :
3053 30 : rel = table_open(relid, AccessShareLock);
3054 30 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3055 : tableforest, targetns);
3056 30 : table_close(rel, NoLock);
3057 :
3058 30 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3059 : }
3060 :
3061 :
3062 : Datum
3063 6 : query_to_xmlschema(PG_FUNCTION_ARGS)
3064 : {
3065 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3066 6 : bool nulls = PG_GETARG_BOOL(1);
3067 6 : bool tableforest = PG_GETARG_BOOL(2);
3068 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3069 : const char *result;
3070 : SPIPlanPtr plan;
3071 : Portal portal;
3072 :
3073 6 : SPI_connect();
3074 :
3075 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3076 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3077 :
3078 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3079 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3080 :
3081 6 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3082 : InvalidOid, nulls,
3083 : tableforest, targetns));
3084 6 : SPI_cursor_close(portal);
3085 6 : SPI_finish();
3086 :
3087 6 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3088 : }
3089 :
3090 :
3091 : Datum
3092 12 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
3093 : {
3094 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3095 12 : bool nulls = PG_GETARG_BOOL(1);
3096 12 : bool tableforest = PG_GETARG_BOOL(2);
3097 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3098 : const char *xmlschema;
3099 : Portal portal;
3100 :
3101 12 : SPI_connect();
3102 12 : portal = SPI_cursor_find(name);
3103 12 : if (portal == NULL)
3104 0 : ereport(ERROR,
3105 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3106 : errmsg("cursor \"%s\" does not exist", name)));
3107 12 : if (portal->tupDesc == NULL)
3108 0 : ereport(ERROR,
3109 : (errcode(ERRCODE_INVALID_CURSOR_STATE),
3110 : errmsg("portal \"%s\" does not return tuples", name)));
3111 :
3112 12 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3113 : InvalidOid, nulls,
3114 : tableforest, targetns));
3115 12 : SPI_finish();
3116 :
3117 12 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3118 : }
3119 :
3120 :
3121 : Datum
3122 24 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3123 : {
3124 24 : Oid relid = PG_GETARG_OID(0);
3125 24 : bool nulls = PG_GETARG_BOOL(1);
3126 24 : bool tableforest = PG_GETARG_BOOL(2);
3127 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3128 : Relation rel;
3129 : const char *xmlschema;
3130 :
3131 24 : rel = table_open(relid, AccessShareLock);
3132 24 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3133 : tableforest, targetns);
3134 24 : table_close(rel, NoLock);
3135 :
3136 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3137 : xmlschema, nulls, tableforest,
3138 : targetns, true)));
3139 : }
3140 :
3141 :
3142 : Datum
3143 6 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3144 : {
3145 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3146 6 : bool nulls = PG_GETARG_BOOL(1);
3147 6 : bool tableforest = PG_GETARG_BOOL(2);
3148 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3149 :
3150 : const char *xmlschema;
3151 : SPIPlanPtr plan;
3152 : Portal portal;
3153 :
3154 6 : SPI_connect();
3155 :
3156 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3157 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3158 :
3159 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3160 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3161 :
3162 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3163 : InvalidOid, nulls, tableforest, targetns));
3164 6 : SPI_cursor_close(portal);
3165 6 : SPI_finish();
3166 :
3167 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3168 : xmlschema, nulls, tableforest,
3169 : targetns, true)));
3170 : }
3171 :
3172 :
3173 : /*
3174 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3175 : * sections 9.13, 9.14.
3176 : */
3177 :
3178 : static StringInfo
3179 18 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3180 : bool tableforest, const char *targetns, bool top_level)
3181 : {
3182 : StringInfo result;
3183 : char *xmlsn;
3184 : List *relid_list;
3185 : ListCell *cell;
3186 :
3187 18 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3188 : true, false);
3189 18 : result = makeStringInfo();
3190 :
3191 18 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3192 18 : appendStringInfoChar(result, '\n');
3193 :
3194 18 : if (xmlschema)
3195 6 : appendStringInfo(result, "%s\n\n", xmlschema);
3196 :
3197 18 : SPI_connect();
3198 :
3199 18 : relid_list = schema_get_xml_visible_tables(nspid);
3200 :
3201 54 : foreach(cell, relid_list)
3202 : {
3203 36 : Oid relid = lfirst_oid(cell);
3204 : StringInfo subres;
3205 :
3206 36 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3207 : targetns, false);
3208 :
3209 36 : appendBinaryStringInfo(result, subres->data, subres->len);
3210 36 : appendStringInfoChar(result, '\n');
3211 : }
3212 :
3213 18 : SPI_finish();
3214 :
3215 18 : xmldata_root_element_end(result, xmlsn);
3216 :
3217 18 : return result;
3218 : }
3219 :
3220 :
3221 : Datum
3222 12 : schema_to_xml(PG_FUNCTION_ARGS)
3223 : {
3224 12 : Name name = PG_GETARG_NAME(0);
3225 12 : bool nulls = PG_GETARG_BOOL(1);
3226 12 : bool tableforest = PG_GETARG_BOOL(2);
3227 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3228 :
3229 : char *schemaname;
3230 : Oid nspid;
3231 :
3232 12 : schemaname = NameStr(*name);
3233 12 : nspid = LookupExplicitNamespace(schemaname, false);
3234 :
3235 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3236 : nulls, tableforest, targetns, true)));
3237 : }
3238 :
3239 :
3240 : /*
3241 : * Write the start element of the root element of an XML Schema mapping.
3242 : */
3243 : static void
3244 96 : xsd_schema_element_start(StringInfo result, const char *targetns)
3245 : {
3246 96 : appendStringInfoString(result,
3247 : "<xsd:schema\n"
3248 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
3249 96 : if (strlen(targetns) > 0)
3250 18 : appendStringInfo(result,
3251 : "\n"
3252 : " targetNamespace=\"%s\"\n"
3253 : " elementFormDefault=\"qualified\"",
3254 : targetns);
3255 96 : appendStringInfoString(result,
3256 : ">\n\n");
3257 96 : }
3258 :
3259 :
3260 : static void
3261 96 : xsd_schema_element_end(StringInfo result)
3262 : {
3263 96 : appendStringInfoString(result, "</xsd:schema>");
3264 96 : }
3265 :
3266 :
3267 : static StringInfo
3268 18 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3269 : bool tableforest, const char *targetns)
3270 : {
3271 : Oid nspid;
3272 : List *relid_list;
3273 : List *tupdesc_list;
3274 : ListCell *cell;
3275 : StringInfo result;
3276 :
3277 18 : result = makeStringInfo();
3278 :
3279 18 : nspid = LookupExplicitNamespace(schemaname, false);
3280 :
3281 18 : xsd_schema_element_start(result, targetns);
3282 :
3283 18 : SPI_connect();
3284 :
3285 18 : relid_list = schema_get_xml_visible_tables(nspid);
3286 :
3287 18 : tupdesc_list = NIL;
3288 54 : foreach(cell, relid_list)
3289 : {
3290 : Relation rel;
3291 :
3292 36 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3293 36 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3294 36 : table_close(rel, NoLock);
3295 : }
3296 :
3297 18 : appendStringInfoString(result,
3298 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3299 :
3300 18 : appendStringInfoString(result,
3301 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3302 : nulls, tableforest, targetns));
3303 :
3304 18 : xsd_schema_element_end(result);
3305 :
3306 18 : SPI_finish();
3307 :
3308 18 : return result;
3309 : }
3310 :
3311 :
3312 : Datum
3313 12 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3314 : {
3315 12 : Name name = PG_GETARG_NAME(0);
3316 12 : bool nulls = PG_GETARG_BOOL(1);
3317 12 : bool tableforest = PG_GETARG_BOOL(2);
3318 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3319 :
3320 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3321 : nulls, tableforest, targetns)));
3322 : }
3323 :
3324 :
3325 : Datum
3326 6 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3327 : {
3328 6 : Name name = PG_GETARG_NAME(0);
3329 6 : bool nulls = PG_GETARG_BOOL(1);
3330 6 : bool tableforest = PG_GETARG_BOOL(2);
3331 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3332 : char *schemaname;
3333 : Oid nspid;
3334 : StringInfo xmlschema;
3335 :
3336 6 : schemaname = NameStr(*name);
3337 6 : nspid = LookupExplicitNamespace(schemaname, false);
3338 :
3339 6 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3340 : tableforest, targetns);
3341 :
3342 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3343 : xmlschema->data, nulls,
3344 : tableforest, targetns, true)));
3345 : }
3346 :
3347 :
3348 : /*
3349 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3350 : * sections 9.16, 9.17.
3351 : */
3352 :
3353 : static StringInfo
3354 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3355 : bool tableforest, const char *targetns)
3356 : {
3357 : StringInfo result;
3358 : List *nspid_list;
3359 : ListCell *cell;
3360 : char *xmlcn;
3361 :
3362 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3363 : true, false);
3364 0 : result = makeStringInfo();
3365 :
3366 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3367 0 : appendStringInfoChar(result, '\n');
3368 :
3369 0 : if (xmlschema)
3370 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3371 :
3372 0 : SPI_connect();
3373 :
3374 0 : nspid_list = database_get_xml_visible_schemas();
3375 :
3376 0 : foreach(cell, nspid_list)
3377 : {
3378 0 : Oid nspid = lfirst_oid(cell);
3379 : StringInfo subres;
3380 :
3381 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3382 : tableforest, targetns, false);
3383 :
3384 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3385 0 : appendStringInfoChar(result, '\n');
3386 : }
3387 :
3388 0 : SPI_finish();
3389 :
3390 0 : xmldata_root_element_end(result, xmlcn);
3391 :
3392 0 : return result;
3393 : }
3394 :
3395 :
3396 : Datum
3397 0 : database_to_xml(PG_FUNCTION_ARGS)
3398 : {
3399 0 : bool nulls = PG_GETARG_BOOL(0);
3400 0 : bool tableforest = PG_GETARG_BOOL(1);
3401 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3402 :
3403 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3404 : tableforest, targetns)));
3405 : }
3406 :
3407 :
3408 : static StringInfo
3409 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3410 : const char *targetns)
3411 : {
3412 : List *relid_list;
3413 : List *nspid_list;
3414 : List *tupdesc_list;
3415 : ListCell *cell;
3416 : StringInfo result;
3417 :
3418 0 : result = makeStringInfo();
3419 :
3420 0 : xsd_schema_element_start(result, targetns);
3421 :
3422 0 : SPI_connect();
3423 :
3424 0 : relid_list = database_get_xml_visible_tables();
3425 0 : nspid_list = database_get_xml_visible_schemas();
3426 :
3427 0 : tupdesc_list = NIL;
3428 0 : foreach(cell, relid_list)
3429 : {
3430 : Relation rel;
3431 :
3432 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3433 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3434 0 : table_close(rel, NoLock);
3435 : }
3436 :
3437 0 : appendStringInfoString(result,
3438 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3439 :
3440 0 : appendStringInfoString(result,
3441 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3442 :
3443 0 : xsd_schema_element_end(result);
3444 :
3445 0 : SPI_finish();
3446 :
3447 0 : return result;
3448 : }
3449 :
3450 :
3451 : Datum
3452 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3453 : {
3454 0 : bool nulls = PG_GETARG_BOOL(0);
3455 0 : bool tableforest = PG_GETARG_BOOL(1);
3456 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3457 :
3458 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3459 : tableforest, targetns)));
3460 : }
3461 :
3462 :
3463 : Datum
3464 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3465 : {
3466 0 : bool nulls = PG_GETARG_BOOL(0);
3467 0 : bool tableforest = PG_GETARG_BOOL(1);
3468 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3469 : StringInfo xmlschema;
3470 :
3471 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3472 :
3473 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3474 : nulls, tableforest, targetns)));
3475 : }
3476 :
3477 :
3478 : /*
3479 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3480 : * 9.2.
3481 : */
3482 : static char *
3483 384 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3484 : {
3485 : StringInfoData result;
3486 :
3487 384 : initStringInfo(&result);
3488 :
3489 384 : if (a)
3490 384 : appendStringInfoString(&result,
3491 384 : map_sql_identifier_to_xml_name(a, true, true));
3492 384 : if (b)
3493 384 : appendStringInfo(&result, ".%s",
3494 : map_sql_identifier_to_xml_name(b, true, true));
3495 384 : if (c)
3496 384 : appendStringInfo(&result, ".%s",
3497 : map_sql_identifier_to_xml_name(c, true, true));
3498 384 : if (d)
3499 366 : appendStringInfo(&result, ".%s",
3500 : map_sql_identifier_to_xml_name(d, true, true));
3501 :
3502 384 : return result.data;
3503 : }
3504 :
3505 :
3506 : /*
3507 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3508 : * section 9.11.
3509 : *
3510 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3511 : * 9.9.
3512 : */
3513 : static const char *
3514 78 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3515 : bool tableforest, const char *targetns)
3516 : {
3517 : int i;
3518 : char *xmltn;
3519 : char *tabletypename;
3520 : char *rowtypename;
3521 : StringInfoData result;
3522 :
3523 78 : initStringInfo(&result);
3524 :
3525 78 : if (OidIsValid(relid))
3526 : {
3527 : HeapTuple tuple;
3528 : Form_pg_class reltuple;
3529 :
3530 54 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3531 54 : if (!HeapTupleIsValid(tuple))
3532 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3533 54 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3534 :
3535 54 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3536 : true, false);
3537 :
3538 54 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3539 54 : get_database_name(MyDatabaseId),
3540 54 : get_namespace_name(reltuple->relnamespace),
3541 54 : NameStr(reltuple->relname));
3542 :
3543 54 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3544 54 : get_database_name(MyDatabaseId),
3545 54 : get_namespace_name(reltuple->relnamespace),
3546 54 : NameStr(reltuple->relname));
3547 :
3548 54 : ReleaseSysCache(tuple);
3549 : }
3550 : else
3551 : {
3552 24 : if (tableforest)
3553 12 : xmltn = "row";
3554 : else
3555 12 : xmltn = "table";
3556 :
3557 24 : tabletypename = "TableType";
3558 24 : rowtypename = "RowType";
3559 : }
3560 :
3561 78 : xsd_schema_element_start(&result, targetns);
3562 :
3563 78 : appendStringInfoString(&result,
3564 78 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3565 :
3566 78 : appendStringInfo(&result,
3567 : "<xsd:complexType name=\"%s\">\n"
3568 : " <xsd:sequence>\n",
3569 : rowtypename);
3570 :
3571 324 : for (i = 0; i < tupdesc->natts; i++)
3572 : {
3573 246 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3574 :
3575 246 : if (att->attisdropped)
3576 6 : continue;
3577 480 : appendStringInfo(&result,
3578 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3579 240 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3580 : true, false),
3581 : map_sql_type_to_xml_name(att->atttypid, -1),
3582 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3583 : }
3584 :
3585 78 : appendStringInfoString(&result,
3586 : " </xsd:sequence>\n"
3587 : "</xsd:complexType>\n\n");
3588 :
3589 78 : if (!tableforest)
3590 : {
3591 42 : appendStringInfo(&result,
3592 : "<xsd:complexType name=\"%s\">\n"
3593 : " <xsd:sequence>\n"
3594 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3595 : " </xsd:sequence>\n"
3596 : "</xsd:complexType>\n\n",
3597 : tabletypename, rowtypename);
3598 :
3599 42 : appendStringInfo(&result,
3600 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3601 : xmltn, tabletypename);
3602 : }
3603 : else
3604 36 : appendStringInfo(&result,
3605 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3606 : xmltn, rowtypename);
3607 :
3608 78 : xsd_schema_element_end(&result);
3609 :
3610 78 : return result.data;
3611 : }
3612 :
3613 :
3614 : /*
3615 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3616 : * section 9.12.
3617 : */
3618 : static const char *
3619 18 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3620 : bool tableforest, const char *targetns)
3621 : {
3622 : char *dbname;
3623 : char *nspname;
3624 : char *xmlsn;
3625 : char *schematypename;
3626 : StringInfoData result;
3627 : ListCell *cell;
3628 :
3629 18 : dbname = get_database_name(MyDatabaseId);
3630 18 : nspname = get_namespace_name(nspid);
3631 :
3632 18 : initStringInfo(&result);
3633 :
3634 18 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3635 :
3636 18 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3637 : dbname,
3638 : nspname,
3639 : NULL);
3640 :
3641 18 : appendStringInfo(&result,
3642 : "<xsd:complexType name=\"%s\">\n", schematypename);
3643 18 : if (!tableforest)
3644 6 : appendStringInfoString(&result,
3645 : " <xsd:all>\n");
3646 : else
3647 12 : appendStringInfoString(&result,
3648 : " <xsd:sequence>\n");
3649 :
3650 54 : foreach(cell, relid_list)
3651 : {
3652 36 : Oid relid = lfirst_oid(cell);
3653 36 : char *relname = get_rel_name(relid);
3654 36 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3655 36 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3656 : dbname,
3657 : nspname,
3658 : relname);
3659 :
3660 36 : if (!tableforest)
3661 12 : appendStringInfo(&result,
3662 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3663 : xmltn, tabletypename);
3664 : else
3665 24 : appendStringInfo(&result,
3666 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3667 : xmltn, tabletypename);
3668 : }
3669 :
3670 18 : if (!tableforest)
3671 6 : appendStringInfoString(&result,
3672 : " </xsd:all>\n");
3673 : else
3674 12 : appendStringInfoString(&result,
3675 : " </xsd:sequence>\n");
3676 18 : appendStringInfoString(&result,
3677 : "</xsd:complexType>\n\n");
3678 :
3679 18 : appendStringInfo(&result,
3680 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3681 : xmlsn, schematypename);
3682 :
3683 18 : return result.data;
3684 : }
3685 :
3686 :
3687 : /*
3688 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3689 : * section 9.15.
3690 : */
3691 : static const char *
3692 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3693 : bool tableforest, const char *targetns)
3694 : {
3695 : char *dbname;
3696 : char *xmlcn;
3697 : char *catalogtypename;
3698 : StringInfoData result;
3699 : ListCell *cell;
3700 :
3701 0 : dbname = get_database_name(MyDatabaseId);
3702 :
3703 0 : initStringInfo(&result);
3704 :
3705 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3706 :
3707 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3708 : dbname,
3709 : NULL,
3710 : NULL);
3711 :
3712 0 : appendStringInfo(&result,
3713 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3714 0 : appendStringInfoString(&result,
3715 : " <xsd:all>\n");
3716 :
3717 0 : foreach(cell, nspid_list)
3718 : {
3719 0 : Oid nspid = lfirst_oid(cell);
3720 0 : char *nspname = get_namespace_name(nspid);
3721 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3722 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3723 : dbname,
3724 : nspname,
3725 : NULL);
3726 :
3727 0 : appendStringInfo(&result,
3728 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3729 : xmlsn, schematypename);
3730 : }
3731 :
3732 0 : appendStringInfoString(&result,
3733 : " </xsd:all>\n");
3734 0 : appendStringInfoString(&result,
3735 : "</xsd:complexType>\n\n");
3736 :
3737 0 : appendStringInfo(&result,
3738 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3739 : xmlcn, catalogtypename);
3740 :
3741 0 : return result.data;
3742 : }
3743 :
3744 :
3745 : /*
3746 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3747 : */
3748 : static const char *
3749 810 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3750 : {
3751 : StringInfoData result;
3752 :
3753 810 : initStringInfo(&result);
3754 :
3755 810 : switch (typeoid)
3756 : {
3757 30 : case BPCHAROID:
3758 30 : if (typmod == -1)
3759 30 : appendStringInfoString(&result, "CHAR");
3760 : else
3761 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3762 30 : break;
3763 54 : case VARCHAROID:
3764 54 : if (typmod == -1)
3765 54 : appendStringInfoString(&result, "VARCHAR");
3766 : else
3767 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3768 54 : break;
3769 30 : case NUMERICOID:
3770 30 : if (typmod == -1)
3771 30 : appendStringInfoString(&result, "NUMERIC");
3772 : else
3773 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3774 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3775 0 : (typmod - VARHDRSZ) & 0xffff);
3776 30 : break;
3777 174 : case INT4OID:
3778 174 : appendStringInfoString(&result, "INTEGER");
3779 174 : break;
3780 30 : case INT2OID:
3781 30 : appendStringInfoString(&result, "SMALLINT");
3782 30 : break;
3783 30 : case INT8OID:
3784 30 : appendStringInfoString(&result, "BIGINT");
3785 30 : break;
3786 30 : case FLOAT4OID:
3787 30 : appendStringInfoString(&result, "REAL");
3788 30 : break;
3789 0 : case FLOAT8OID:
3790 0 : appendStringInfoString(&result, "DOUBLE");
3791 0 : break;
3792 30 : case BOOLOID:
3793 30 : appendStringInfoString(&result, "BOOLEAN");
3794 30 : break;
3795 30 : case TIMEOID:
3796 30 : if (typmod == -1)
3797 30 : appendStringInfoString(&result, "TIME");
3798 : else
3799 0 : appendStringInfo(&result, "TIME_%d", typmod);
3800 30 : break;
3801 30 : case TIMETZOID:
3802 30 : if (typmod == -1)
3803 30 : appendStringInfoString(&result, "TIME_WTZ");
3804 : else
3805 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3806 30 : break;
3807 30 : case TIMESTAMPOID:
3808 30 : if (typmod == -1)
3809 30 : appendStringInfoString(&result, "TIMESTAMP");
3810 : else
3811 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3812 30 : break;
3813 30 : case TIMESTAMPTZOID:
3814 30 : if (typmod == -1)
3815 30 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3816 : else
3817 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3818 30 : break;
3819 30 : case DATEOID:
3820 30 : appendStringInfoString(&result, "DATE");
3821 30 : break;
3822 30 : case XMLOID:
3823 30 : appendStringInfoString(&result, "XML");
3824 30 : break;
3825 222 : default:
3826 : {
3827 : HeapTuple tuple;
3828 : Form_pg_type typtuple;
3829 :
3830 222 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3831 222 : if (!HeapTupleIsValid(tuple))
3832 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3833 222 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3834 :
3835 222 : appendStringInfoString(&result,
3836 222 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3837 222 : get_database_name(MyDatabaseId),
3838 222 : get_namespace_name(typtuple->typnamespace),
3839 222 : NameStr(typtuple->typname)));
3840 :
3841 222 : ReleaseSysCache(tuple);
3842 : }
3843 : }
3844 :
3845 810 : return result.data;
3846 : }
3847 :
3848 :
3849 : /*
3850 : * Map a collection of SQL data types to XML Schema data types; see
3851 : * SQL/XML:2008 section 9.7.
3852 : */
3853 : static const char *
3854 96 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3855 : {
3856 96 : List *uniquetypes = NIL;
3857 : int i;
3858 : StringInfoData result;
3859 : ListCell *cell0;
3860 :
3861 : /* extract all column types used in the set of TupleDescs */
3862 210 : foreach(cell0, tupdesc_list)
3863 : {
3864 114 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3865 :
3866 702 : for (i = 0; i < tupdesc->natts; i++)
3867 : {
3868 588 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3869 :
3870 588 : if (att->attisdropped)
3871 24 : continue;
3872 564 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3873 : }
3874 : }
3875 :
3876 : /* add base types of domains */
3877 642 : foreach(cell0, uniquetypes)
3878 : {
3879 546 : Oid typid = lfirst_oid(cell0);
3880 546 : Oid basetypid = getBaseType(typid);
3881 :
3882 546 : if (basetypid != typid)
3883 24 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3884 : }
3885 :
3886 : /* Convert to textual form */
3887 96 : initStringInfo(&result);
3888 :
3889 642 : foreach(cell0, uniquetypes)
3890 : {
3891 546 : appendStringInfo(&result, "%s\n",
3892 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3893 : -1));
3894 : }
3895 :
3896 96 : return result.data;
3897 : }
3898 :
3899 :
3900 : /*
3901 : * Map an SQL data type to a named XML Schema data type; see
3902 : * SQL/XML:2008 sections 9.5 and 9.6.
3903 : *
3904 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3905 : * a name attribute, which this function does. The name-less version
3906 : * 9.5 doesn't appear to be required anywhere.)
3907 : */
3908 : static const char *
3909 546 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3910 : {
3911 : StringInfoData result;
3912 546 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3913 :
3914 546 : initStringInfo(&result);
3915 :
3916 546 : if (typeoid == XMLOID)
3917 : {
3918 24 : appendStringInfoString(&result,
3919 : "<xsd:complexType mixed=\"true\">\n"
3920 : " <xsd:sequence>\n"
3921 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3922 : " </xsd:sequence>\n"
3923 : "</xsd:complexType>\n");
3924 : }
3925 : else
3926 : {
3927 522 : appendStringInfo(&result,
3928 : "<xsd:simpleType name=\"%s\">\n", typename);
3929 :
3930 522 : switch (typeoid)
3931 : {
3932 138 : case BPCHAROID:
3933 : case VARCHAROID:
3934 : case TEXTOID:
3935 138 : appendStringInfoString(&result,
3936 : " <xsd:restriction base=\"xsd:string\">\n");
3937 138 : if (typmod != -1)
3938 0 : appendStringInfo(&result,
3939 : " <xsd:maxLength value=\"%d\"/>\n",
3940 : typmod - VARHDRSZ);
3941 138 : appendStringInfoString(&result, " </xsd:restriction>\n");
3942 138 : break;
3943 :
3944 24 : case BYTEAOID:
3945 24 : appendStringInfo(&result,
3946 : " <xsd:restriction base=\"xsd:%s\">\n"
3947 : " </xsd:restriction>\n",
3948 24 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3949 24 : break;
3950 :
3951 24 : case NUMERICOID:
3952 24 : if (typmod != -1)
3953 0 : appendStringInfo(&result,
3954 : " <xsd:restriction base=\"xsd:decimal\">\n"
3955 : " <xsd:totalDigits value=\"%d\"/>\n"
3956 : " <xsd:fractionDigits value=\"%d\"/>\n"
3957 : " </xsd:restriction>\n",
3958 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3959 0 : (typmod - VARHDRSZ) & 0xffff);
3960 24 : break;
3961 :
3962 24 : case INT2OID:
3963 24 : appendStringInfo(&result,
3964 : " <xsd:restriction base=\"xsd:short\">\n"
3965 : " <xsd:maxInclusive value=\"%d\"/>\n"
3966 : " <xsd:minInclusive value=\"%d\"/>\n"
3967 : " </xsd:restriction>\n",
3968 : SHRT_MAX, SHRT_MIN);
3969 24 : break;
3970 :
3971 96 : case INT4OID:
3972 96 : appendStringInfo(&result,
3973 : " <xsd:restriction base=\"xsd:int\">\n"
3974 : " <xsd:maxInclusive value=\"%d\"/>\n"
3975 : " <xsd:minInclusive value=\"%d\"/>\n"
3976 : " </xsd:restriction>\n",
3977 : INT_MAX, INT_MIN);
3978 96 : break;
3979 :
3980 24 : case INT8OID:
3981 24 : appendStringInfo(&result,
3982 : " <xsd:restriction base=\"xsd:long\">\n"
3983 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3984 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3985 : " </xsd:restriction>\n",
3986 : PG_INT64_MAX,
3987 : PG_INT64_MIN);
3988 24 : break;
3989 :
3990 24 : case FLOAT4OID:
3991 24 : appendStringInfoString(&result,
3992 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3993 24 : break;
3994 :
3995 0 : case FLOAT8OID:
3996 0 : appendStringInfoString(&result,
3997 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3998 0 : break;
3999 :
4000 24 : case BOOLOID:
4001 24 : appendStringInfoString(&result,
4002 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4003 24 : break;
4004 :
4005 48 : case TIMEOID:
4006 : case TIMETZOID:
4007 : {
4008 48 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4009 :
4010 48 : if (typmod == -1)
4011 48 : appendStringInfo(&result,
4012 : " <xsd:restriction base=\"xsd:time\">\n"
4013 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4014 : " </xsd:restriction>\n", tz);
4015 0 : else if (typmod == 0)
4016 0 : appendStringInfo(&result,
4017 : " <xsd:restriction base=\"xsd:time\">\n"
4018 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4019 : " </xsd:restriction>\n", tz);
4020 : else
4021 0 : appendStringInfo(&result,
4022 : " <xsd:restriction base=\"xsd:time\">\n"
4023 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4024 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4025 48 : break;
4026 : }
4027 :
4028 48 : case TIMESTAMPOID:
4029 : case TIMESTAMPTZOID:
4030 : {
4031 48 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4032 :
4033 48 : if (typmod == -1)
4034 48 : appendStringInfo(&result,
4035 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4036 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4037 : " </xsd:restriction>\n", tz);
4038 0 : else if (typmod == 0)
4039 0 : appendStringInfo(&result,
4040 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4041 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4042 : " </xsd:restriction>\n", tz);
4043 : else
4044 0 : appendStringInfo(&result,
4045 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4046 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4047 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4048 48 : break;
4049 : }
4050 :
4051 24 : case DATEOID:
4052 24 : appendStringInfoString(&result,
4053 : " <xsd:restriction base=\"xsd:date\">\n"
4054 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4055 : " </xsd:restriction>\n");
4056 24 : break;
4057 :
4058 24 : default:
4059 24 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4060 : {
4061 : Oid base_typeoid;
4062 24 : int32 base_typmod = -1;
4063 :
4064 24 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4065 :
4066 24 : appendStringInfo(&result,
4067 : " <xsd:restriction base=\"%s\"/>\n",
4068 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
4069 : }
4070 24 : break;
4071 : }
4072 522 : appendStringInfoString(&result, "</xsd:simpleType>\n");
4073 : }
4074 :
4075 546 : return result.data;
4076 : }
4077 :
4078 :
4079 : /*
4080 : * Map an SQL row to an XML element, taking the row from the active
4081 : * SPI cursor. See also SQL/XML:2008 section 9.10.
4082 : */
4083 : static void
4084 312 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4085 : bool nulls, bool tableforest,
4086 : const char *targetns, bool top_level)
4087 : {
4088 : int i;
4089 : char *xmltn;
4090 :
4091 312 : if (tablename)
4092 228 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4093 : else
4094 : {
4095 84 : if (tableforest)
4096 36 : xmltn = "row";
4097 : else
4098 48 : xmltn = "table";
4099 : }
4100 :
4101 312 : if (tableforest)
4102 162 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4103 : else
4104 150 : appendStringInfoString(result, "<row>\n");
4105 :
4106 1272 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4107 : {
4108 : char *colname;
4109 : Datum colval;
4110 : bool isnull;
4111 :
4112 960 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4113 : true, false);
4114 960 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4115 960 : SPI_tuptable->tupdesc,
4116 : i,
4117 : &isnull);
4118 960 : if (isnull)
4119 : {
4120 114 : if (nulls)
4121 60 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
4122 : }
4123 : else
4124 846 : appendStringInfo(result, " <%s>%s</%s>\n",
4125 : colname,
4126 : map_sql_value_to_xml_value(colval,
4127 846 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4128 : colname);
4129 : }
4130 :
4131 312 : if (tableforest)
4132 : {
4133 162 : xmldata_root_element_end(result, xmltn);
4134 162 : appendStringInfoChar(result, '\n');
4135 : }
4136 : else
4137 150 : appendStringInfoString(result, "</row>\n\n");
4138 312 : }
4139 :
4140 :
4141 : /*
4142 : * XPath related functions
4143 : */
4144 :
4145 : #ifdef USE_LIBXML
4146 :
4147 : /*
4148 : * Convert XML node to text.
4149 : *
4150 : * For attribute and text nodes, return the escaped text. For anything else,
4151 : * dump the whole subtree.
4152 : */
4153 : static text *
4154 192 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4155 : {
4156 192 : xmltype *result = NULL;
4157 :
4158 192 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4159 162 : {
4160 162 : void (*volatile nodefree) (xmlNodePtr) = NULL;
4161 162 : volatile xmlBufferPtr buf = NULL;
4162 162 : volatile xmlNodePtr cur_copy = NULL;
4163 :
4164 162 : PG_TRY();
4165 : {
4166 : int bytes;
4167 :
4168 162 : buf = xmlBufferCreate();
4169 162 : if (buf == NULL || xmlerrcxt->err_occurred)
4170 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4171 : "could not allocate xmlBuffer");
4172 :
4173 : /*
4174 : * Produce a dump of the node that we can serialize. xmlNodeDump
4175 : * does that, but the result of that function won't contain
4176 : * namespace definitions from ancestor nodes, so we first do a
4177 : * xmlCopyNode() which duplicates the node along with its required
4178 : * namespace definitions.
4179 : *
4180 : * Some old libxml2 versions such as 2.7.6 produce partially
4181 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4182 : * copying them. xmlNodeDump of such a node works fine, but
4183 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4184 : */
4185 162 : cur_copy = xmlCopyNode(cur, 1);
4186 162 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
4187 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4188 : "could not copy node");
4189 324 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4190 162 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4191 :
4192 162 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4193 162 : if (bytes == -1 || xmlerrcxt->err_occurred)
4194 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4195 : "could not dump node");
4196 :
4197 162 : result = xmlBuffer_to_xmltype(buf);
4198 : }
4199 0 : PG_FINALLY();
4200 : {
4201 162 : if (nodefree)
4202 162 : nodefree(cur_copy);
4203 162 : if (buf)
4204 162 : xmlBufferFree(buf);
4205 : }
4206 162 : PG_END_TRY();
4207 : }
4208 : else
4209 : {
4210 : xmlChar *str;
4211 :
4212 30 : str = xmlXPathCastNodeToString(cur);
4213 30 : PG_TRY();
4214 : {
4215 : /* Here we rely on XML having the same representation as TEXT */
4216 30 : char *escaped = escape_xml((char *) str);
4217 :
4218 30 : result = (xmltype *) cstring_to_text(escaped);
4219 30 : pfree(escaped);
4220 : }
4221 0 : PG_FINALLY();
4222 : {
4223 30 : xmlFree(str);
4224 : }
4225 30 : PG_END_TRY();
4226 : }
4227 :
4228 192 : return result;
4229 : }
4230 :
4231 : /*
4232 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4233 : * to an array of xml values, which are appended to astate. The function
4234 : * result value is the number of elements in the array.
4235 : *
4236 : * If "astate" is NULL then we don't generate the array value, but we still
4237 : * return the number of elements it would have had.
4238 : *
4239 : * Nodesets are converted to an array containing the nodes' textual
4240 : * representations. Primitive values (float, double, string) are converted
4241 : * to a single-element array containing the value's string representation.
4242 : */
4243 : static int
4244 540 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4245 : ArrayBuildState *astate,
4246 : PgXmlErrorContext *xmlerrcxt)
4247 : {
4248 540 : int result = 0;
4249 : Datum datum;
4250 : Oid datumtype;
4251 : char *result_str;
4252 :
4253 540 : switch (xpathobj->type)
4254 : {
4255 498 : case XPATH_NODESET:
4256 498 : if (xpathobj->nodesetval != NULL)
4257 : {
4258 354 : result = xpathobj->nodesetval->nodeNr;
4259 354 : if (astate != NULL)
4260 : {
4261 : int i;
4262 :
4263 168 : for (i = 0; i < result; i++)
4264 : {
4265 90 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4266 : xmlerrcxt));
4267 90 : (void) accumArrayResult(astate, datum, false,
4268 : XMLOID, CurrentMemoryContext);
4269 : }
4270 : }
4271 : }
4272 498 : return result;
4273 :
4274 12 : case XPATH_BOOLEAN:
4275 12 : if (astate == NULL)
4276 0 : return 1;
4277 12 : datum = BoolGetDatum(xpathobj->boolval);
4278 12 : datumtype = BOOLOID;
4279 12 : break;
4280 :
4281 18 : case XPATH_NUMBER:
4282 18 : if (astate == NULL)
4283 12 : return 1;
4284 6 : datum = Float8GetDatum(xpathobj->floatval);
4285 6 : datumtype = FLOAT8OID;
4286 6 : break;
4287 :
4288 12 : case XPATH_STRING:
4289 12 : if (astate == NULL)
4290 0 : return 1;
4291 12 : datum = CStringGetDatum((char *) xpathobj->stringval);
4292 12 : datumtype = CSTRINGOID;
4293 12 : break;
4294 :
4295 0 : default:
4296 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4297 : xpathobj->type);
4298 : return 0; /* keep compiler quiet */
4299 : }
4300 :
4301 : /* Common code for scalar-value cases */
4302 30 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4303 30 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
4304 30 : (void) accumArrayResult(astate, datum, false,
4305 : XMLOID, CurrentMemoryContext);
4306 30 : return 1;
4307 : }
4308 :
4309 :
4310 : /*
4311 : * Common code for xpath() and xmlexists()
4312 : *
4313 : * Evaluate XPath expression and return number of nodes in res_nitems
4314 : * and array of XML values in astate. Either of those pointers can be
4315 : * NULL if the corresponding result isn't wanted.
4316 : *
4317 : * It is up to the user to ensure that the XML passed is in fact
4318 : * an XML document - XPath doesn't work easily on fragments without
4319 : * a context node being known.
4320 : */
4321 : static void
4322 558 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4323 : int *res_nitems, ArrayBuildState *astate)
4324 : {
4325 : PgXmlErrorContext *xmlerrcxt;
4326 558 : volatile xmlParserCtxtPtr ctxt = NULL;
4327 558 : volatile xmlDocPtr doc = NULL;
4328 558 : volatile xmlXPathContextPtr xpathctx = NULL;
4329 558 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4330 558 : volatile xmlXPathObjectPtr xpathobj = NULL;
4331 : char *datastr;
4332 : int32 len;
4333 : int32 xpath_len;
4334 : xmlChar *string;
4335 : xmlChar *xpath_expr;
4336 558 : size_t xmldecl_len = 0;
4337 : int i;
4338 : int ndim;
4339 : Datum *ns_names_uris;
4340 : bool *ns_names_uris_nulls;
4341 : int ns_count;
4342 :
4343 : /*
4344 : * Namespace mappings are passed as text[]. If an empty array is passed
4345 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4346 : * Else, a 2-dimensional array with length of the second axis being equal
4347 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4348 : * first element defining the name, the second one the URI. Example:
4349 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4350 : * 'http://example2.com']].
4351 : */
4352 558 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4353 558 : if (ndim != 0)
4354 : {
4355 : int *dims;
4356 :
4357 126 : dims = ARR_DIMS(namespaces);
4358 :
4359 126 : if (ndim != 2 || dims[1] != 2)
4360 0 : ereport(ERROR,
4361 : (errcode(ERRCODE_DATA_EXCEPTION),
4362 : errmsg("invalid array for XML namespace mapping"),
4363 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4364 :
4365 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4366 :
4367 126 : deconstruct_array_builtin(namespaces, TEXTOID,
4368 : &ns_names_uris, &ns_names_uris_nulls,
4369 : &ns_count);
4370 :
4371 : Assert((ns_count % 2) == 0); /* checked above */
4372 126 : ns_count /= 2; /* count pairs only */
4373 : }
4374 : else
4375 : {
4376 432 : ns_names_uris = NULL;
4377 432 : ns_names_uris_nulls = NULL;
4378 432 : ns_count = 0;
4379 : }
4380 :
4381 558 : datastr = VARDATA(data);
4382 558 : len = VARSIZE(data) - VARHDRSZ;
4383 558 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4384 558 : if (xpath_len == 0)
4385 6 : ereport(ERROR,
4386 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4387 : errmsg("empty XPath expression")));
4388 :
4389 552 : string = pg_xmlCharStrndup(datastr, len);
4390 552 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4391 :
4392 : /*
4393 : * In a UTF8 database, skip any xml declaration, which might assert
4394 : * another encoding. Ignore parse_xml_decl() failure, letting
4395 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4396 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4397 : * those scenarios bug-compatible with historical behavior.
4398 : */
4399 552 : if (GetDatabaseEncoding() == PG_UTF8)
4400 552 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4401 :
4402 552 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4403 :
4404 552 : PG_TRY();
4405 : {
4406 552 : xmlInitParser();
4407 :
4408 : /*
4409 : * redundant XML parsing (two parsings for the same value during one
4410 : * command execution are possible)
4411 : */
4412 552 : ctxt = xmlNewParserCtxt();
4413 552 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4414 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4415 : "could not allocate parser context");
4416 1104 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4417 552 : len - xmldecl_len, NULL, NULL, 0);
4418 552 : if (doc == NULL || xmlerrcxt->err_occurred)
4419 12 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4420 : "could not parse XML document");
4421 540 : xpathctx = xmlXPathNewContext(doc);
4422 540 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4423 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4424 : "could not allocate XPath context");
4425 540 : xpathctx->node = (xmlNodePtr) doc;
4426 :
4427 : /* register namespaces, if any */
4428 540 : if (ns_count > 0)
4429 : {
4430 252 : for (i = 0; i < ns_count; i++)
4431 : {
4432 : char *ns_name;
4433 : char *ns_uri;
4434 :
4435 126 : if (ns_names_uris_nulls[i * 2] ||
4436 126 : ns_names_uris_nulls[i * 2 + 1])
4437 0 : ereport(ERROR,
4438 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4439 : errmsg("neither namespace name nor URI may be null")));
4440 126 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4441 126 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4442 126 : if (xmlXPathRegisterNs(xpathctx,
4443 : (xmlChar *) ns_name,
4444 : (xmlChar *) ns_uri) != 0)
4445 0 : ereport(ERROR, /* is this an internal error??? */
4446 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4447 : ns_name, ns_uri)));
4448 : }
4449 : }
4450 :
4451 : /*
4452 : * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4453 : * xmlXPathCompile. In libxml2 2.13.3 and older, the latter function
4454 : * fails to defend itself against recursion-to-stack-overflow. See
4455 : * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4456 : */
4457 540 : xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4458 540 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4459 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4460 : "invalid XPath expression");
4461 :
4462 : /*
4463 : * Version 2.6.27 introduces a function named
4464 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4465 : * but we can derive the existence by whether any nodes are returned,
4466 : * thereby preventing a library version upgrade and keeping the code
4467 : * the same.
4468 : */
4469 540 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4470 540 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4471 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4472 : "could not create XPath object");
4473 :
4474 : /*
4475 : * Extract the results as requested.
4476 : */
4477 540 : if (res_nitems != NULL)
4478 432 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4479 : else
4480 108 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4481 : }
4482 12 : PG_CATCH();
4483 : {
4484 12 : if (xpathobj)
4485 0 : xmlXPathFreeObject(xpathobj);
4486 12 : if (xpathcomp)
4487 0 : xmlXPathFreeCompExpr(xpathcomp);
4488 12 : if (xpathctx)
4489 0 : xmlXPathFreeContext(xpathctx);
4490 12 : if (doc)
4491 12 : xmlFreeDoc(doc);
4492 12 : if (ctxt)
4493 12 : xmlFreeParserCtxt(ctxt);
4494 :
4495 12 : pg_xml_done(xmlerrcxt, true);
4496 :
4497 12 : PG_RE_THROW();
4498 : }
4499 540 : PG_END_TRY();
4500 :
4501 540 : xmlXPathFreeObject(xpathobj);
4502 540 : xmlXPathFreeCompExpr(xpathcomp);
4503 540 : xmlXPathFreeContext(xpathctx);
4504 540 : xmlFreeDoc(doc);
4505 540 : xmlFreeParserCtxt(ctxt);
4506 :
4507 540 : pg_xml_done(xmlerrcxt, false);
4508 540 : }
4509 : #endif /* USE_LIBXML */
4510 :
4511 : /*
4512 : * Evaluate XPath expression and return array of XML values.
4513 : *
4514 : * As we have no support of XQuery sequences yet, this function seems
4515 : * to be the most useful one (array of XML functions plays a role of
4516 : * some kind of substitution for XQuery sequences).
4517 : */
4518 : Datum
4519 126 : xpath(PG_FUNCTION_ARGS)
4520 : {
4521 : #ifdef USE_LIBXML
4522 126 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4523 126 : xmltype *data = PG_GETARG_XML_P(1);
4524 126 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4525 : ArrayBuildState *astate;
4526 :
4527 126 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4528 126 : xpath_internal(xpath_expr_text, data, namespaces,
4529 : NULL, astate);
4530 108 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4531 : #else
4532 : NO_XML_SUPPORT();
4533 : return 0;
4534 : #endif
4535 : }
4536 :
4537 : /*
4538 : * Determines if the node specified by the supplied XPath exists
4539 : * in a given XML document, returning a boolean.
4540 : */
4541 : Datum
4542 198 : xmlexists(PG_FUNCTION_ARGS)
4543 : {
4544 : #ifdef USE_LIBXML
4545 198 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4546 198 : xmltype *data = PG_GETARG_XML_P(1);
4547 : int res_nitems;
4548 :
4549 198 : xpath_internal(xpath_expr_text, data, NULL,
4550 : &res_nitems, NULL);
4551 :
4552 198 : PG_RETURN_BOOL(res_nitems > 0);
4553 : #else
4554 : NO_XML_SUPPORT();
4555 : return 0;
4556 : #endif
4557 : }
4558 :
4559 : /*
4560 : * Determines if the node specified by the supplied XPath exists
4561 : * in a given XML document, returning a boolean. Differs from
4562 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4563 : */
4564 : Datum
4565 234 : xpath_exists(PG_FUNCTION_ARGS)
4566 : {
4567 : #ifdef USE_LIBXML
4568 234 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4569 234 : xmltype *data = PG_GETARG_XML_P(1);
4570 234 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4571 : int res_nitems;
4572 :
4573 234 : xpath_internal(xpath_expr_text, data, namespaces,
4574 : &res_nitems, NULL);
4575 :
4576 234 : PG_RETURN_BOOL(res_nitems > 0);
4577 : #else
4578 : NO_XML_SUPPORT();
4579 : return 0;
4580 : #endif
4581 : }
4582 :
4583 : /*
4584 : * Functions for checking well-formed-ness
4585 : */
4586 :
4587 : #ifdef USE_LIBXML
4588 : static bool
4589 114 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4590 : {
4591 : xmlDocPtr doc;
4592 114 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4593 :
4594 : /*
4595 : * We'll report "true" if no soft error is reported by xml_parse().
4596 : */
4597 114 : doc = xml_parse(data, xmloption_arg, true,
4598 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4599 114 : if (doc)
4600 60 : xmlFreeDoc(doc);
4601 :
4602 114 : return !escontext.error_occurred;
4603 : }
4604 : #endif
4605 :
4606 : Datum
4607 90 : xml_is_well_formed(PG_FUNCTION_ARGS)
4608 : {
4609 : #ifdef USE_LIBXML
4610 90 : text *data = PG_GETARG_TEXT_PP(0);
4611 :
4612 90 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4613 : #else
4614 : NO_XML_SUPPORT();
4615 : return 0;
4616 : #endif /* not USE_LIBXML */
4617 : }
4618 :
4619 : Datum
4620 12 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4621 : {
4622 : #ifdef USE_LIBXML
4623 12 : text *data = PG_GETARG_TEXT_PP(0);
4624 :
4625 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4626 : #else
4627 : NO_XML_SUPPORT();
4628 : return 0;
4629 : #endif /* not USE_LIBXML */
4630 : }
4631 :
4632 : Datum
4633 12 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4634 : {
4635 : #ifdef USE_LIBXML
4636 12 : text *data = PG_GETARG_TEXT_PP(0);
4637 :
4638 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4639 : #else
4640 : NO_XML_SUPPORT();
4641 : return 0;
4642 : #endif /* not USE_LIBXML */
4643 : }
4644 :
4645 : /*
4646 : * support functions for XMLTABLE
4647 : *
4648 : */
4649 : #ifdef USE_LIBXML
4650 :
4651 : /*
4652 : * Returns private data from executor state. Ensure validity by check with
4653 : * MAGIC number.
4654 : */
4655 : static inline XmlTableBuilderData *
4656 157302 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4657 : {
4658 : XmlTableBuilderData *result;
4659 :
4660 157302 : if (!IsA(state, TableFuncScanState))
4661 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4662 157302 : result = (XmlTableBuilderData *) state->opaque;
4663 157302 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4664 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4665 :
4666 157302 : return result;
4667 : }
4668 : #endif
4669 :
4670 : /*
4671 : * XmlTableInitOpaque
4672 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4673 : * the XML parser.
4674 : *
4675 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4676 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4677 : * executor nodes run until this node is processed to completion. Caller
4678 : * must execute this to completion (probably filling a tuplestore to exhaust
4679 : * this node in a single pass) instead of using row-per-call mode.
4680 : */
4681 : static void
4682 264 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4683 : {
4684 : #ifdef USE_LIBXML
4685 264 : volatile xmlParserCtxtPtr ctxt = NULL;
4686 : XmlTableBuilderData *xtCxt;
4687 : PgXmlErrorContext *xmlerrcxt;
4688 :
4689 264 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4690 264 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4691 264 : xtCxt->natts = natts;
4692 264 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4693 :
4694 264 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4695 :
4696 264 : PG_TRY();
4697 : {
4698 264 : xmlInitParser();
4699 :
4700 264 : ctxt = xmlNewParserCtxt();
4701 264 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4702 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4703 : "could not allocate parser context");
4704 : }
4705 0 : PG_CATCH();
4706 : {
4707 0 : if (ctxt != NULL)
4708 0 : xmlFreeParserCtxt(ctxt);
4709 :
4710 0 : pg_xml_done(xmlerrcxt, true);
4711 :
4712 0 : PG_RE_THROW();
4713 : }
4714 264 : PG_END_TRY();
4715 :
4716 264 : xtCxt->xmlerrcxt = xmlerrcxt;
4717 264 : xtCxt->ctxt = ctxt;
4718 :
4719 264 : state->opaque = xtCxt;
4720 : #else
4721 : NO_XML_SUPPORT();
4722 : #endif /* not USE_LIBXML */
4723 264 : }
4724 :
4725 : /*
4726 : * XmlTableSetDocument
4727 : * Install the input document
4728 : */
4729 : static void
4730 264 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4731 : {
4732 : #ifdef USE_LIBXML
4733 : XmlTableBuilderData *xtCxt;
4734 264 : xmltype *xmlval = DatumGetXmlP(value);
4735 : char *str;
4736 : xmlChar *xstr;
4737 : int length;
4738 264 : volatile xmlDocPtr doc = NULL;
4739 264 : volatile xmlXPathContextPtr xpathcxt = NULL;
4740 :
4741 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4742 :
4743 : /*
4744 : * Use out function for casting to string (remove encoding property). See
4745 : * comment in xml_out.
4746 : */
4747 264 : str = xml_out_internal(xmlval, 0);
4748 :
4749 264 : length = strlen(str);
4750 264 : xstr = pg_xmlCharStrndup(str, length);
4751 :
4752 264 : PG_TRY();
4753 : {
4754 264 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4755 264 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4756 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4757 : "could not parse XML document");
4758 264 : xpathcxt = xmlXPathNewContext(doc);
4759 264 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4760 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4761 : "could not allocate XPath context");
4762 264 : xpathcxt->node = (xmlNodePtr) doc;
4763 : }
4764 0 : PG_CATCH();
4765 : {
4766 0 : if (xpathcxt != NULL)
4767 0 : xmlXPathFreeContext(xpathcxt);
4768 0 : if (doc != NULL)
4769 0 : xmlFreeDoc(doc);
4770 :
4771 0 : PG_RE_THROW();
4772 : }
4773 264 : PG_END_TRY();
4774 :
4775 264 : xtCxt->doc = doc;
4776 264 : xtCxt->xpathcxt = xpathcxt;
4777 : #else
4778 : NO_XML_SUPPORT();
4779 : #endif /* not USE_LIBXML */
4780 264 : }
4781 :
4782 : /*
4783 : * XmlTableSetNamespace
4784 : * Add a namespace declaration
4785 : */
4786 : static void
4787 18 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4788 : {
4789 : #ifdef USE_LIBXML
4790 : XmlTableBuilderData *xtCxt;
4791 :
4792 18 : if (name == NULL)
4793 6 : ereport(ERROR,
4794 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4795 : errmsg("DEFAULT namespace is not supported")));
4796 12 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4797 :
4798 12 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4799 12 : pg_xmlCharStrndup(name, strlen(name)),
4800 12 : pg_xmlCharStrndup(uri, strlen(uri))))
4801 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4802 : "could not set XML namespace");
4803 : #else
4804 : NO_XML_SUPPORT();
4805 : #endif /* not USE_LIBXML */
4806 12 : }
4807 :
4808 : /*
4809 : * XmlTableSetRowFilter
4810 : * Install the row-filter Xpath expression.
4811 : */
4812 : static void
4813 258 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4814 : {
4815 : #ifdef USE_LIBXML
4816 : XmlTableBuilderData *xtCxt;
4817 : xmlChar *xstr;
4818 :
4819 258 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4820 :
4821 258 : if (*path == '\0')
4822 0 : ereport(ERROR,
4823 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4824 : errmsg("row path filter must not be empty string")));
4825 :
4826 258 : xstr = pg_xmlCharStrndup(path, strlen(path));
4827 :
4828 : /* We require XmlTableSetDocument to have been done already */
4829 : Assert(xtCxt->xpathcxt != NULL);
4830 :
4831 258 : xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4832 258 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4833 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4834 : "invalid XPath expression");
4835 : #else
4836 : NO_XML_SUPPORT();
4837 : #endif /* not USE_LIBXML */
4838 258 : }
4839 :
4840 : /*
4841 : * XmlTableSetColumnFilter
4842 : * Install the column-filter Xpath expression, for the given column.
4843 : */
4844 : static void
4845 774 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4846 : {
4847 : #ifdef USE_LIBXML
4848 : XmlTableBuilderData *xtCxt;
4849 : xmlChar *xstr;
4850 :
4851 : Assert(PointerIsValid(path));
4852 :
4853 774 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4854 :
4855 774 : if (*path == '\0')
4856 0 : ereport(ERROR,
4857 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4858 : errmsg("column path filter must not be empty string")));
4859 :
4860 774 : xstr = pg_xmlCharStrndup(path, strlen(path));
4861 :
4862 : /* We require XmlTableSetDocument to have been done already */
4863 : Assert(xtCxt->xpathcxt != NULL);
4864 :
4865 774 : xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4866 774 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4867 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4868 : "invalid XPath expression");
4869 : #else
4870 : NO_XML_SUPPORT();
4871 : #endif /* not USE_LIBXML */
4872 774 : }
4873 :
4874 : /*
4875 : * XmlTableFetchRow
4876 : * Prepare the next "current" tuple for upcoming GetValue calls.
4877 : * Returns false if the row-filter expression returned no more rows.
4878 : */
4879 : static bool
4880 22608 : XmlTableFetchRow(TableFuncScanState *state)
4881 : {
4882 : #ifdef USE_LIBXML
4883 : XmlTableBuilderData *xtCxt;
4884 :
4885 22608 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4886 :
4887 : /* Propagate our own error context to libxml2 */
4888 22608 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4889 :
4890 22608 : if (xtCxt->xpathobj == NULL)
4891 : {
4892 258 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4893 258 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4894 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4895 : "could not create XPath object");
4896 :
4897 258 : xtCxt->row_count = 0;
4898 : }
4899 :
4900 22608 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4901 : {
4902 22608 : if (xtCxt->xpathobj->nodesetval != NULL)
4903 : {
4904 22608 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4905 22362 : return true;
4906 : }
4907 : }
4908 :
4909 246 : return false;
4910 : #else
4911 : NO_XML_SUPPORT();
4912 : return false;
4913 : #endif /* not USE_LIBXML */
4914 : }
4915 :
4916 : /*
4917 : * XmlTableGetValue
4918 : * Return the value for column number 'colnum' for the current row. If
4919 : * column -1 is requested, return representation of the whole row.
4920 : *
4921 : * This leaks memory, so be sure to reset often the context in which it's
4922 : * called.
4923 : */
4924 : static Datum
4925 133122 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4926 : Oid typid, int32 typmod, bool *isnull)
4927 : {
4928 : #ifdef USE_LIBXML
4929 : XmlTableBuilderData *xtCxt;
4930 133122 : Datum result = (Datum) 0;
4931 : xmlNodePtr cur;
4932 133122 : char *cstr = NULL;
4933 133122 : volatile xmlXPathObjectPtr xpathobj = NULL;
4934 :
4935 133122 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4936 :
4937 : Assert(xtCxt->xpathobj &&
4938 : xtCxt->xpathobj->type == XPATH_NODESET &&
4939 : xtCxt->xpathobj->nodesetval != NULL);
4940 :
4941 : /* Propagate our own error context to libxml2 */
4942 133122 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4943 :
4944 133122 : *isnull = false;
4945 :
4946 133122 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4947 :
4948 : Assert(xtCxt->xpathscomp[colnum] != NULL);
4949 :
4950 133122 : PG_TRY();
4951 : {
4952 : /* Set current node as entry point for XPath evaluation */
4953 133122 : xtCxt->xpathcxt->node = cur;
4954 :
4955 : /* Evaluate column path */
4956 133122 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4957 133122 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4958 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4959 : "could not create XPath object");
4960 :
4961 : /*
4962 : * There are four possible cases, depending on the number of nodes
4963 : * returned by the XPath expression and the type of the target column:
4964 : * a) XPath returns no nodes. b) The target type is XML (return all
4965 : * as XML). For non-XML return types: c) One node (return content).
4966 : * d) Multiple nodes (error).
4967 : */
4968 133122 : if (xpathobj->type == XPATH_NODESET)
4969 : {
4970 133092 : int count = 0;
4971 :
4972 133092 : if (xpathobj->nodesetval != NULL)
4973 132882 : count = xpathobj->nodesetval->nodeNr;
4974 :
4975 133092 : if (xpathobj->nodesetval == NULL || count == 0)
4976 : {
4977 22266 : *isnull = true;
4978 : }
4979 : else
4980 : {
4981 110826 : if (typid == XMLOID)
4982 : {
4983 : text *textstr;
4984 : StringInfoData str;
4985 :
4986 : /* Concatenate serialized values */
4987 72 : initStringInfo(&str);
4988 174 : for (int i = 0; i < count; i++)
4989 : {
4990 : textstr =
4991 102 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4992 : xtCxt->xmlerrcxt);
4993 :
4994 102 : appendStringInfoText(&str, textstr);
4995 : }
4996 72 : cstr = str.data;
4997 : }
4998 : else
4999 : {
5000 : xmlChar *str;
5001 :
5002 110754 : if (count > 1)
5003 6 : ereport(ERROR,
5004 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
5005 : errmsg("more than one value returned by column XPath expression")));
5006 :
5007 110748 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5008 110748 : cstr = str ? xml_pstrdup_and_free(str) : "";
5009 : }
5010 : }
5011 : }
5012 30 : else if (xpathobj->type == XPATH_STRING)
5013 : {
5014 : /* Content should be escaped when target will be XML */
5015 18 : if (typid == XMLOID)
5016 6 : cstr = escape_xml((char *) xpathobj->stringval);
5017 : else
5018 12 : cstr = (char *) xpathobj->stringval;
5019 : }
5020 12 : else if (xpathobj->type == XPATH_BOOLEAN)
5021 : {
5022 : char typcategory;
5023 : bool typispreferred;
5024 : xmlChar *str;
5025 :
5026 : /* Allow implicit casting from boolean to numbers */
5027 6 : get_type_category_preferred(typid, &typcategory, &typispreferred);
5028 :
5029 6 : if (typcategory != TYPCATEGORY_NUMERIC)
5030 6 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
5031 : else
5032 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5033 :
5034 6 : cstr = xml_pstrdup_and_free(str);
5035 : }
5036 6 : else if (xpathobj->type == XPATH_NUMBER)
5037 : {
5038 : xmlChar *str;
5039 :
5040 6 : str = xmlXPathCastNumberToString(xpathobj->floatval);
5041 6 : cstr = xml_pstrdup_and_free(str);
5042 : }
5043 : else
5044 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5045 :
5046 : /*
5047 : * By here, either cstr contains the result value, or the isnull flag
5048 : * has been set.
5049 : */
5050 : Assert(cstr || *isnull);
5051 :
5052 133116 : if (!*isnull)
5053 110850 : result = InputFunctionCall(&state->in_functions[colnum],
5054 : cstr,
5055 110850 : state->typioparams[colnum],
5056 : typmod);
5057 : }
5058 6 : PG_FINALLY();
5059 : {
5060 133122 : if (xpathobj != NULL)
5061 133122 : xmlXPathFreeObject(xpathobj);
5062 : }
5063 133122 : PG_END_TRY();
5064 :
5065 133116 : return result;
5066 : #else
5067 : NO_XML_SUPPORT();
5068 : return 0;
5069 : #endif /* not USE_LIBXML */
5070 : }
5071 :
5072 : /*
5073 : * XmlTableDestroyOpaque
5074 : * Release all libxml2 resources
5075 : */
5076 : static void
5077 264 : XmlTableDestroyOpaque(TableFuncScanState *state)
5078 : {
5079 : #ifdef USE_LIBXML
5080 : XmlTableBuilderData *xtCxt;
5081 :
5082 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5083 :
5084 : /* Propagate our own error context to libxml2 */
5085 264 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
5086 :
5087 264 : if (xtCxt->xpathscomp != NULL)
5088 : {
5089 : int i;
5090 :
5091 1116 : for (i = 0; i < xtCxt->natts; i++)
5092 852 : if (xtCxt->xpathscomp[i] != NULL)
5093 774 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5094 : }
5095 :
5096 264 : if (xtCxt->xpathobj != NULL)
5097 258 : xmlXPathFreeObject(xtCxt->xpathobj);
5098 264 : if (xtCxt->xpathcomp != NULL)
5099 258 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5100 264 : if (xtCxt->xpathcxt != NULL)
5101 264 : xmlXPathFreeContext(xtCxt->xpathcxt);
5102 264 : if (xtCxt->doc != NULL)
5103 264 : xmlFreeDoc(xtCxt->doc);
5104 264 : if (xtCxt->ctxt != NULL)
5105 264 : xmlFreeParserCtxt(xtCxt->ctxt);
5106 :
5107 264 : pg_xml_done(xtCxt->xmlerrcxt, true);
5108 :
5109 : /* not valid anymore */
5110 264 : xtCxt->magic = 0;
5111 264 : state->opaque = NULL;
5112 :
5113 : #else
5114 : NO_XML_SUPPORT();
5115 : #endif /* not USE_LIBXML */
5116 264 : }
|