Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/entities.h>
51 : #include <libxml/parser.h>
52 : #include <libxml/parserInternals.h>
53 : #include <libxml/tree.h>
54 : #include <libxml/uri.h>
55 : #include <libxml/xmlerror.h>
56 : #include <libxml/xmlsave.h>
57 : #include <libxml/xmlversion.h>
58 : #include <libxml/xmlwriter.h>
59 : #include <libxml/xpath.h>
60 : #include <libxml/xpathInternals.h>
61 :
62 : /*
63 : * We used to check for xmlStructuredErrorContext via a configure test; but
64 : * that doesn't work on Windows, so instead use this grottier method of
65 : * testing the library version number.
66 : */
67 : #if LIBXML_VERSION >= 20704
68 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69 : #endif
70 :
71 : /*
72 : * libxml2 2.12 decided to insert "const" into the error handler API.
73 : */
74 : #if LIBXML_VERSION >= 21200
75 : #define PgXmlErrorPtr const xmlError *
76 : #else
77 : #define PgXmlErrorPtr xmlErrorPtr
78 : #endif
79 :
80 : #endif /* USE_LIBXML */
81 :
82 : #include "access/htup_details.h"
83 : #include "access/table.h"
84 : #include "catalog/namespace.h"
85 : #include "catalog/pg_class.h"
86 : #include "catalog/pg_type.h"
87 : #include "commands/dbcommands.h"
88 : #include "executor/spi.h"
89 : #include "executor/tablefunc.h"
90 : #include "fmgr.h"
91 : #include "lib/stringinfo.h"
92 : #include "libpq/pqformat.h"
93 : #include "mb/pg_wchar.h"
94 : #include "miscadmin.h"
95 : #include "nodes/execnodes.h"
96 : #include "nodes/miscnodes.h"
97 : #include "nodes/nodeFuncs.h"
98 : #include "utils/array.h"
99 : #include "utils/builtins.h"
100 : #include "utils/date.h"
101 : #include "utils/datetime.h"
102 : #include "utils/lsyscache.h"
103 : #include "utils/rel.h"
104 : #include "utils/syscache.h"
105 : #include "utils/xml.h"
106 :
107 :
108 : /* GUC variables */
109 : int xmlbinary = XMLBINARY_BASE64;
110 : int xmloption = XMLOPTION_CONTENT;
111 :
112 : #ifdef USE_LIBXML
113 :
114 : /* random number to identify PgXmlErrorContext */
115 : #define ERRCXT_MAGIC 68275028
116 :
117 : struct PgXmlErrorContext
118 : {
119 : int magic;
120 : /* strictness argument passed to pg_xml_init */
121 : PgXmlStrictness strictness;
122 : /* current error status and accumulated message, if any */
123 : bool err_occurred;
124 : StringInfoData err_buf;
125 : /* previous libxml error handling state (saved by pg_xml_init) */
126 : xmlStructuredErrorFunc saved_errfunc;
127 : void *saved_errcxt;
128 : /* previous libxml entity handler (saved by pg_xml_init) */
129 : xmlExternalEntityLoader saved_entityfunc;
130 : };
131 :
132 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
133 : xmlParserCtxtPtr ctxt);
134 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
135 : int sqlcode, const char *msg);
136 : static void xml_errorHandler(void *data, PgXmlErrorPtr error);
137 : static int errdetail_for_xml_code(int code);
138 : static void chopStringInfoNewlines(StringInfo str);
139 : static void appendStringInfoLineSeparator(StringInfo str);
140 :
141 : #ifdef USE_LIBXMLCONTEXT
142 :
143 : static MemoryContext LibxmlContext = NULL;
144 :
145 : static void xml_memory_init(void);
146 : static void *xml_palloc(size_t size);
147 : static void *xml_repalloc(void *ptr, size_t size);
148 : static void xml_pfree(void *ptr);
149 : static char *xml_pstrdup(const char *string);
150 : #endif /* USE_LIBXMLCONTEXT */
151 :
152 : static xmlChar *xml_text2xmlChar(text *in);
153 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
154 : xmlChar **version, xmlChar **encoding, int *standalone);
155 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
156 : pg_enc encoding, int standalone);
157 : static bool xml_doctype_in_content(const xmlChar *str);
158 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
159 : bool preserve_whitespace, int encoding,
160 : XmlOptionType *parsed_xmloptiontype,
161 : xmlNodePtr *parsed_nodes,
162 : Node *escontext);
163 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
164 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
165 : ArrayBuildState *astate,
166 : PgXmlErrorContext *xmlerrcxt);
167 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
168 : #endif /* USE_LIBXML */
169 :
170 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
171 : const char *xmlschema, const char *targetns,
172 : bool top_level);
173 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
174 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
175 : const char *xmlschema, bool nulls, bool tableforest,
176 : const char *targetns, bool top_level);
177 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
178 : bool nulls, bool tableforest, const char *targetns);
179 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
180 : List *relid_list, bool nulls,
181 : bool tableforest, const char *targetns);
182 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
183 : bool nulls, bool tableforest,
184 : const char *targetns);
185 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
186 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
187 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
188 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
189 : char *tablename, bool nulls, bool tableforest,
190 : const char *targetns, bool top_level);
191 :
192 : /* XMLTABLE support */
193 : #ifdef USE_LIBXML
194 : /* random number to identify XmlTableContext */
195 : #define XMLTABLE_CONTEXT_MAGIC 46922182
196 : typedef struct XmlTableBuilderData
197 : {
198 : int magic;
199 : int natts;
200 : long int row_count;
201 : PgXmlErrorContext *xmlerrcxt;
202 : xmlParserCtxtPtr ctxt;
203 : xmlDocPtr doc;
204 : xmlXPathContextPtr xpathcxt;
205 : xmlXPathCompExprPtr xpathcomp;
206 : xmlXPathObjectPtr xpathobj;
207 : xmlXPathCompExprPtr *xpathscomp;
208 : } XmlTableBuilderData;
209 : #endif
210 :
211 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
212 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
213 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
214 : const char *uri);
215 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
216 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
217 : const char *path, int colnum);
218 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
219 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
220 : Oid typid, int32 typmod, bool *isnull);
221 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
222 :
223 : const TableFuncRoutine XmlTableRoutine =
224 : {
225 : .InitOpaque = XmlTableInitOpaque,
226 : .SetDocument = XmlTableSetDocument,
227 : .SetNamespace = XmlTableSetNamespace,
228 : .SetRowFilter = XmlTableSetRowFilter,
229 : .SetColumnFilter = XmlTableSetColumnFilter,
230 : .FetchRow = XmlTableFetchRow,
231 : .GetValue = XmlTableGetValue,
232 : .DestroyOpaque = XmlTableDestroyOpaque
233 : };
234 :
235 : #define NO_XML_SUPPORT() \
236 : ereport(ERROR, \
237 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
238 : errmsg("unsupported XML feature"), \
239 : errdetail("This functionality requires the server to be built with libxml support.")))
240 :
241 :
242 : /* from SQL/XML:2008 section 4.9 */
243 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
244 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
245 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
246 :
247 :
248 : #ifdef USE_LIBXML
249 :
250 : static int
251 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
252 : {
253 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
254 :
255 0 : if (encoding < 0)
256 0 : ereport(ERROR,
257 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
258 : errmsg("invalid encoding name \"%s\"",
259 : (const char *) encoding_name)));
260 0 : return encoding;
261 : }
262 : #endif
263 :
264 :
265 : /*
266 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
267 : * we use the conversion function for the text datatype.
268 : *
269 : * This is only acceptable so long as xmltype and text use the same
270 : * representation.
271 : */
272 : Datum
273 898 : xml_in(PG_FUNCTION_ARGS)
274 : {
275 : #ifdef USE_LIBXML
276 898 : char *s = PG_GETARG_CSTRING(0);
277 : xmltype *vardata;
278 : xmlDocPtr doc;
279 :
280 : /* Build the result object. */
281 898 : vardata = (xmltype *) cstring_to_text(s);
282 :
283 : /*
284 : * Parse the data to check if it is well-formed XML data.
285 : *
286 : * Note: we don't need to worry about whether a soft error is detected.
287 : */
288 898 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
289 898 : NULL, NULL, fcinfo->context);
290 850 : if (doc != NULL)
291 838 : xmlFreeDoc(doc);
292 :
293 850 : PG_RETURN_XML_P(vardata);
294 : #else
295 : NO_XML_SUPPORT();
296 : return 0;
297 : #endif
298 : }
299 :
300 :
301 : #define PG_XML_DEFAULT_VERSION "1.0"
302 :
303 :
304 : /*
305 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
306 : * time being we use the conversion function for the text datatype.
307 : *
308 : * This is only acceptable so long as xmltype and text use the same
309 : * representation.
310 : */
311 : static char *
312 23714 : xml_out_internal(xmltype *x, pg_enc target_encoding)
313 : {
314 23714 : char *str = text_to_cstring((text *) x);
315 :
316 : #ifdef USE_LIBXML
317 23714 : size_t len = strlen(str);
318 : xmlChar *version;
319 : int standalone;
320 : int res_code;
321 :
322 23714 : if ((res_code = parse_xml_decl((xmlChar *) str,
323 : &len, &version, NULL, &standalone)) == 0)
324 : {
325 : StringInfoData buf;
326 :
327 23714 : initStringInfo(&buf);
328 :
329 23714 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
330 : {
331 : /*
332 : * If we are not going to produce an XML declaration, eat a single
333 : * newline in the original string to prevent empty first lines in
334 : * the output.
335 : */
336 23666 : if (*(str + len) == '\n')
337 6 : len += 1;
338 : }
339 23714 : appendStringInfoString(&buf, str + len);
340 :
341 23714 : pfree(str);
342 :
343 23714 : return buf.data;
344 : }
345 :
346 0 : ereport(WARNING,
347 : errcode(ERRCODE_DATA_CORRUPTED),
348 : errmsg_internal("could not parse XML declaration in stored value"),
349 : errdetail_for_xml_code(res_code));
350 : #endif
351 0 : return str;
352 : }
353 :
354 :
355 : Datum
356 23450 : xml_out(PG_FUNCTION_ARGS)
357 : {
358 23450 : xmltype *x = PG_GETARG_XML_P(0);
359 :
360 : /*
361 : * xml_out removes the encoding property in all cases. This is because we
362 : * cannot control from here whether the datum will be converted to a
363 : * different client encoding, so we'd do more harm than good by including
364 : * it.
365 : */
366 23450 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
367 : }
368 :
369 :
370 : Datum
371 0 : xml_recv(PG_FUNCTION_ARGS)
372 : {
373 : #ifdef USE_LIBXML
374 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
375 : xmltype *result;
376 : char *str;
377 : char *newstr;
378 : int nbytes;
379 : xmlDocPtr doc;
380 0 : xmlChar *encodingStr = NULL;
381 : int encoding;
382 :
383 : /*
384 : * Read the data in raw format. We don't know yet what the encoding is, as
385 : * that information is embedded in the xml declaration; so we have to
386 : * parse that before converting to server encoding.
387 : */
388 0 : nbytes = buf->len - buf->cursor;
389 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
390 :
391 : /*
392 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
393 : * than make a separate copy, make the temporary result one byte bigger
394 : * than it needs to be.
395 : */
396 0 : result = palloc(nbytes + 1 + VARHDRSZ);
397 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
398 0 : memcpy(VARDATA(result), str, nbytes);
399 0 : str = VARDATA(result);
400 0 : str[nbytes] = '\0';
401 :
402 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403 :
404 : /*
405 : * If encoding wasn't explicitly specified in the XML header, treat it as
406 : * UTF-8, as that's the default in XML. This is different from xml_in(),
407 : * where the input has to go through the normal client to server encoding
408 : * conversion.
409 : */
410 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411 :
412 : /*
413 : * Parse the data to check if it is well-formed XML data. Assume that
414 : * xml_parse will throw ERROR if not.
415 : */
416 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417 0 : xmlFreeDoc(doc);
418 :
419 : /* Now that we know what we're dealing with, convert to server encoding */
420 0 : newstr = pg_any_to_server(str, nbytes, encoding);
421 :
422 0 : if (newstr != str)
423 : {
424 0 : pfree(result);
425 0 : result = (xmltype *) cstring_to_text(newstr);
426 0 : pfree(newstr);
427 : }
428 :
429 0 : PG_RETURN_XML_P(result);
430 : #else
431 : NO_XML_SUPPORT();
432 : return 0;
433 : #endif
434 : }
435 :
436 :
437 : Datum
438 0 : xml_send(PG_FUNCTION_ARGS)
439 : {
440 0 : xmltype *x = PG_GETARG_XML_P(0);
441 : char *outval;
442 : StringInfoData buf;
443 :
444 : /*
445 : * xml_out_internal doesn't convert the encoding, it just prints the right
446 : * declaration. pq_sendtext will do the conversion.
447 : */
448 0 : outval = xml_out_internal(x, pg_get_client_encoding());
449 :
450 0 : pq_begintypsend(&buf);
451 0 : pq_sendtext(&buf, outval, strlen(outval));
452 0 : pfree(outval);
453 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454 : }
455 :
456 :
457 : #ifdef USE_LIBXML
458 : static void
459 132 : appendStringInfoText(StringInfo str, const text *t)
460 : {
461 132 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462 132 : }
463 : #endif
464 :
465 :
466 : static xmltype *
467 22530 : stringinfo_to_xmltype(StringInfo buf)
468 : {
469 22530 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470 : }
471 :
472 :
473 : static xmltype *
474 78 : cstring_to_xmltype(const char *string)
475 : {
476 78 : return (xmltype *) cstring_to_text(string);
477 : }
478 :
479 :
480 : #ifdef USE_LIBXML
481 : static xmltype *
482 22628 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
483 : {
484 22628 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485 : xmlBufferLength(buf));
486 : }
487 : #endif
488 :
489 :
490 : Datum
491 42 : xmlcomment(PG_FUNCTION_ARGS)
492 : {
493 : #ifdef USE_LIBXML
494 42 : text *arg = PG_GETARG_TEXT_PP(0);
495 42 : char *argdata = VARDATA_ANY(arg);
496 42 : int len = VARSIZE_ANY_EXHDR(arg);
497 : StringInfoData buf;
498 : int i;
499 :
500 : /* check for "--" in string or "-" at the end */
501 180 : for (i = 1; i < len; i++)
502 : {
503 144 : if (argdata[i] == '-' && argdata[i - 1] == '-')
504 6 : ereport(ERROR,
505 : (errcode(ERRCODE_INVALID_XML_COMMENT),
506 : errmsg("invalid XML comment")));
507 : }
508 36 : if (len > 0 && argdata[len - 1] == '-')
509 6 : ereport(ERROR,
510 : (errcode(ERRCODE_INVALID_XML_COMMENT),
511 : errmsg("invalid XML comment")));
512 :
513 30 : initStringInfo(&buf);
514 30 : appendStringInfoString(&buf, "<!--");
515 30 : appendStringInfoText(&buf, arg);
516 30 : appendStringInfoString(&buf, "-->");
517 :
518 30 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519 : #else
520 : NO_XML_SUPPORT();
521 : return 0;
522 : #endif
523 : }
524 :
525 :
526 : Datum
527 30 : xmltext(PG_FUNCTION_ARGS)
528 : {
529 : #ifdef USE_LIBXML
530 30 : text *arg = PG_GETARG_TEXT_PP(0);
531 : text *result;
532 30 : xmlChar *xmlbuf = NULL;
533 :
534 30 : xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
535 :
536 : Assert(xmlbuf);
537 :
538 30 : result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
539 30 : xmlFree(xmlbuf);
540 30 : PG_RETURN_XML_P(result);
541 : #else
542 : NO_XML_SUPPORT();
543 : return 0;
544 : #endif /* not USE_LIBXML */
545 : }
546 :
547 :
548 : /*
549 : * TODO: xmlconcat needs to merge the notations and unparsed entities
550 : * of the argument values. Not very important in practice, though.
551 : */
552 : xmltype *
553 22280 : xmlconcat(List *args)
554 : {
555 : #ifdef USE_LIBXML
556 22280 : int global_standalone = 1;
557 22280 : xmlChar *global_version = NULL;
558 22280 : bool global_version_no_value = false;
559 : StringInfoData buf;
560 : ListCell *v;
561 :
562 22280 : initStringInfo(&buf);
563 66846 : foreach(v, args)
564 : {
565 44566 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
566 : size_t len;
567 : xmlChar *version;
568 : int standalone;
569 : char *str;
570 :
571 44566 : len = VARSIZE(x) - VARHDRSZ;
572 44566 : str = text_to_cstring((text *) x);
573 :
574 44566 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
575 :
576 44566 : if (standalone == 0 && global_standalone == 1)
577 0 : global_standalone = 0;
578 44566 : if (standalone < 0)
579 44554 : global_standalone = -1;
580 :
581 44566 : if (!version)
582 44548 : global_version_no_value = true;
583 18 : else if (!global_version)
584 12 : global_version = version;
585 6 : else if (xmlStrcmp(version, global_version) != 0)
586 0 : global_version_no_value = true;
587 :
588 44566 : appendStringInfoString(&buf, str + len);
589 44566 : pfree(str);
590 : }
591 :
592 22280 : if (!global_version_no_value || global_standalone >= 0)
593 : {
594 : StringInfoData buf2;
595 :
596 6 : initStringInfo(&buf2);
597 :
598 6 : print_xml_decl(&buf2,
599 6 : (!global_version_no_value) ? global_version : NULL,
600 : 0,
601 : global_standalone);
602 :
603 6 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
604 6 : buf = buf2;
605 : }
606 :
607 22280 : return stringinfo_to_xmltype(&buf);
608 : #else
609 : NO_XML_SUPPORT();
610 : return NULL;
611 : #endif
612 : }
613 :
614 :
615 : /*
616 : * XMLAGG support
617 : */
618 : Datum
619 22256 : xmlconcat2(PG_FUNCTION_ARGS)
620 : {
621 22256 : if (PG_ARGISNULL(0))
622 : {
623 18 : if (PG_ARGISNULL(1))
624 0 : PG_RETURN_NULL();
625 : else
626 18 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
627 : }
628 22238 : else if (PG_ARGISNULL(1))
629 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
630 : else
631 22238 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
632 : PG_GETARG_XML_P(1))));
633 : }
634 :
635 :
636 : Datum
637 6 : texttoxml(PG_FUNCTION_ARGS)
638 : {
639 6 : text *data = PG_GETARG_TEXT_PP(0);
640 :
641 6 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
642 : }
643 :
644 :
645 : Datum
646 0 : xmltotext(PG_FUNCTION_ARGS)
647 : {
648 0 : xmltype *data = PG_GETARG_XML_P(0);
649 :
650 : /* It's actually binary compatible. */
651 0 : PG_RETURN_TEXT_P((text *) data);
652 : }
653 :
654 :
655 : text *
656 180 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
657 : {
658 : #ifdef USE_LIBXML
659 : text *volatile result;
660 : xmlDocPtr doc;
661 : XmlOptionType parsed_xmloptiontype;
662 : xmlNodePtr content_nodes;
663 180 : volatile xmlBufferPtr buf = NULL;
664 180 : volatile xmlSaveCtxtPtr ctxt = NULL;
665 180 : ErrorSaveContext escontext = {T_ErrorSaveContext};
666 : PgXmlErrorContext *xmlerrcxt;
667 : #endif
668 :
669 180 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
670 : {
671 : /*
672 : * We don't actually need to do anything, so just return the
673 : * binary-compatible input. For backwards-compatibility reasons,
674 : * allow such cases to succeed even without USE_LIBXML.
675 : */
676 36 : return (text *) data;
677 : }
678 :
679 : #ifdef USE_LIBXML
680 :
681 : /*
682 : * Parse the input according to the xmloption.
683 : *
684 : * preserve_whitespace is set to false in case we are indenting, otherwise
685 : * libxml2 will fail to indent elements that have whitespace between them.
686 : */
687 144 : doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
688 : &parsed_xmloptiontype, &content_nodes,
689 144 : (Node *) &escontext);
690 144 : if (doc == NULL || escontext.error_occurred)
691 : {
692 30 : if (doc)
693 0 : xmlFreeDoc(doc);
694 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
695 30 : ereport(ERROR,
696 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
697 : errmsg("not an XML document")));
698 : }
699 :
700 : /* If we weren't asked to indent, we're done. */
701 114 : if (!indent)
702 : {
703 18 : xmlFreeDoc(doc);
704 18 : return (text *) data;
705 : }
706 :
707 : /* Otherwise, we gotta spin up some error handling. */
708 96 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
709 :
710 96 : PG_TRY();
711 : {
712 96 : size_t decl_len = 0;
713 :
714 : /* The serialized data will go into this buffer. */
715 96 : buf = xmlBufferCreate();
716 :
717 96 : if (buf == NULL || xmlerrcxt->err_occurred)
718 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
719 : "could not allocate xmlBuffer");
720 :
721 : /* Detect whether there's an XML declaration */
722 96 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
723 :
724 : /*
725 : * Emit declaration only if the input had one. Note: some versions of
726 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
727 : * passed, so don't do that. We don't want any encoding conversion
728 : * anyway.
729 : */
730 96 : if (decl_len == 0)
731 84 : ctxt = xmlSaveToBuffer(buf, NULL,
732 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
733 : else
734 12 : ctxt = xmlSaveToBuffer(buf, NULL,
735 : XML_SAVE_FORMAT);
736 :
737 96 : if (ctxt == NULL || xmlerrcxt->err_occurred)
738 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
739 : "could not allocate xmlSaveCtxt");
740 :
741 96 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
742 : {
743 : /* If it's a document, saving is easy. */
744 42 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
745 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
746 : "could not save document to xmlBuffer");
747 : }
748 54 : else if (content_nodes != NULL)
749 : {
750 : /*
751 : * Deal with the case where we have non-singly-rooted XML.
752 : * libxml's dump functions don't work well for that without help.
753 : * We build a fake root node that serves as a container for the
754 : * content nodes, and then iterate over the nodes.
755 : */
756 : xmlNodePtr root;
757 : xmlNodePtr oldroot;
758 : xmlNodePtr newline;
759 :
760 48 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
761 48 : if (root == NULL || xmlerrcxt->err_occurred)
762 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
763 : "could not allocate xml node");
764 :
765 : /*
766 : * This attaches root to doc, so we need not free it separately...
767 : * but instead, we have to free the old root if there was one.
768 : */
769 48 : oldroot = xmlDocSetRootElement(doc, root);
770 48 : if (oldroot != NULL)
771 48 : xmlFreeNode(oldroot);
772 :
773 48 : xmlAddChildList(root, content_nodes);
774 :
775 : /*
776 : * We use this node to insert newlines in the dump. Note: in at
777 : * least some libxml versions, xmlNewDocText would not attach the
778 : * node to the document even if we passed it. Therefore, manage
779 : * freeing of this node manually, and pass NULL here to make sure
780 : * there's not a dangling link.
781 : */
782 48 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
783 48 : if (newline == NULL || xmlerrcxt->err_occurred)
784 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
785 : "could not allocate xml node");
786 :
787 126 : for (xmlNodePtr node = root->children; node; node = node->next)
788 : {
789 : /* insert newlines between nodes */
790 78 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
791 : {
792 24 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
793 : {
794 0 : xmlFreeNode(newline);
795 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
796 : "could not save newline to xmlBuffer");
797 : }
798 : }
799 :
800 78 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
801 : {
802 0 : xmlFreeNode(newline);
803 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
804 : "could not save content to xmlBuffer");
805 : }
806 : }
807 :
808 48 : xmlFreeNode(newline);
809 : }
810 :
811 96 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
812 : {
813 0 : ctxt = NULL; /* don't try to close it again */
814 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
815 : "could not close xmlSaveCtxtPtr");
816 : }
817 :
818 : /*
819 : * xmlDocContentDumpOutput may add a trailing newline, so remove that.
820 : */
821 96 : if (xmloption_arg == XMLOPTION_DOCUMENT)
822 : {
823 36 : const char *str = (const char *) xmlBufferContent(buf);
824 36 : int len = xmlBufferLength(buf);
825 :
826 72 : while (len > 0 && (str[len - 1] == '\n' ||
827 36 : str[len - 1] == '\r'))
828 36 : len--;
829 :
830 36 : result = cstring_to_text_with_len(str, len);
831 : }
832 : else
833 60 : result = (text *) xmlBuffer_to_xmltype(buf);
834 : }
835 0 : PG_CATCH();
836 : {
837 0 : if (ctxt)
838 0 : xmlSaveClose(ctxt);
839 0 : if (buf)
840 0 : xmlBufferFree(buf);
841 0 : if (doc)
842 0 : xmlFreeDoc(doc);
843 :
844 0 : pg_xml_done(xmlerrcxt, true);
845 :
846 0 : PG_RE_THROW();
847 : }
848 96 : PG_END_TRY();
849 :
850 96 : xmlBufferFree(buf);
851 96 : xmlFreeDoc(doc);
852 :
853 96 : pg_xml_done(xmlerrcxt, false);
854 :
855 96 : return result;
856 : #else
857 : NO_XML_SUPPORT();
858 : return NULL;
859 : #endif
860 : }
861 :
862 :
863 : xmltype *
864 22412 : xmlelement(XmlExpr *xexpr,
865 : Datum *named_argvalue, bool *named_argnull,
866 : Datum *argvalue, bool *argnull)
867 : {
868 : #ifdef USE_LIBXML
869 : xmltype *result;
870 : List *named_arg_strings;
871 : List *arg_strings;
872 : int i;
873 : ListCell *arg;
874 : ListCell *narg;
875 : PgXmlErrorContext *xmlerrcxt;
876 22412 : volatile xmlBufferPtr buf = NULL;
877 22412 : volatile xmlTextWriterPtr writer = NULL;
878 :
879 : /*
880 : * All arguments are already evaluated, and their values are passed in the
881 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
882 : * issues if one of the arguments involves a call to some other function
883 : * or subsystem that wants to use libxml on its own terms. We examine the
884 : * original XmlExpr to identify the numbers and types of the arguments.
885 : */
886 22412 : named_arg_strings = NIL;
887 22412 : i = 0;
888 22460 : foreach(arg, xexpr->named_args)
889 : {
890 54 : Expr *e = (Expr *) lfirst(arg);
891 : char *str;
892 :
893 54 : if (named_argnull[i])
894 0 : str = NULL;
895 : else
896 54 : str = map_sql_value_to_xml_value(named_argvalue[i],
897 : exprType((Node *) e),
898 : false);
899 48 : named_arg_strings = lappend(named_arg_strings, str);
900 48 : i++;
901 : }
902 :
903 22406 : arg_strings = NIL;
904 22406 : i = 0;
905 44788 : foreach(arg, xexpr->args)
906 : {
907 22382 : Expr *e = (Expr *) lfirst(arg);
908 : char *str;
909 :
910 : /* here we can just forget NULL elements immediately */
911 22382 : if (!argnull[i])
912 : {
913 22382 : str = map_sql_value_to_xml_value(argvalue[i],
914 : exprType((Node *) e),
915 : true);
916 22382 : arg_strings = lappend(arg_strings, str);
917 : }
918 22382 : i++;
919 : }
920 :
921 22406 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
922 :
923 22406 : PG_TRY();
924 : {
925 22406 : buf = xmlBufferCreate();
926 22406 : if (buf == NULL || xmlerrcxt->err_occurred)
927 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
928 : "could not allocate xmlBuffer");
929 22406 : writer = xmlNewTextWriterMemory(buf, 0);
930 22406 : if (writer == NULL || xmlerrcxt->err_occurred)
931 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
932 : "could not allocate xmlTextWriter");
933 :
934 22406 : xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
935 :
936 22454 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
937 : {
938 48 : char *str = (char *) lfirst(arg);
939 48 : char *argname = strVal(lfirst(narg));
940 :
941 48 : if (str)
942 48 : xmlTextWriterWriteAttribute(writer,
943 : (xmlChar *) argname,
944 : (xmlChar *) str);
945 : }
946 :
947 44788 : foreach(arg, arg_strings)
948 : {
949 22382 : char *str = (char *) lfirst(arg);
950 :
951 22382 : xmlTextWriterWriteRaw(writer, (xmlChar *) str);
952 : }
953 :
954 22406 : xmlTextWriterEndElement(writer);
955 :
956 : /* we MUST do this now to flush data out to the buffer ... */
957 22406 : xmlFreeTextWriter(writer);
958 22406 : writer = NULL;
959 :
960 22406 : result = xmlBuffer_to_xmltype(buf);
961 : }
962 0 : PG_CATCH();
963 : {
964 0 : if (writer)
965 0 : xmlFreeTextWriter(writer);
966 0 : if (buf)
967 0 : xmlBufferFree(buf);
968 :
969 0 : pg_xml_done(xmlerrcxt, true);
970 :
971 0 : PG_RE_THROW();
972 : }
973 22406 : PG_END_TRY();
974 :
975 22406 : xmlBufferFree(buf);
976 :
977 22406 : pg_xml_done(xmlerrcxt, false);
978 :
979 22406 : return result;
980 : #else
981 : NO_XML_SUPPORT();
982 : return NULL;
983 : #endif
984 : }
985 :
986 :
987 : xmltype *
988 138 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
989 : {
990 : #ifdef USE_LIBXML
991 : xmlDocPtr doc;
992 :
993 138 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
994 : GetDatabaseEncoding(), NULL, NULL, NULL);
995 90 : xmlFreeDoc(doc);
996 :
997 90 : return (xmltype *) data;
998 : #else
999 : NO_XML_SUPPORT();
1000 : return NULL;
1001 : #endif
1002 : }
1003 :
1004 :
1005 : xmltype *
1006 72 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1007 : {
1008 : #ifdef USE_LIBXML
1009 : xmltype *result;
1010 : StringInfoData buf;
1011 :
1012 72 : if (pg_strcasecmp(target, "xml") == 0)
1013 12 : ereport(ERROR,
1014 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1015 : errmsg("invalid XML processing instruction"),
1016 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1017 :
1018 : /*
1019 : * Following the SQL standard, the null check comes after the syntax check
1020 : * above.
1021 : */
1022 60 : *result_is_null = arg_is_null;
1023 60 : if (*result_is_null)
1024 12 : return NULL;
1025 :
1026 48 : initStringInfo(&buf);
1027 :
1028 48 : appendStringInfo(&buf, "<?%s", target);
1029 :
1030 48 : if (arg != NULL)
1031 : {
1032 : char *string;
1033 :
1034 24 : string = text_to_cstring(arg);
1035 24 : if (strstr(string, "?>") != NULL)
1036 6 : ereport(ERROR,
1037 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1038 : errmsg("invalid XML processing instruction"),
1039 : errdetail("XML processing instruction cannot contain \"?>\".")));
1040 :
1041 18 : appendStringInfoChar(&buf, ' ');
1042 18 : appendStringInfoString(&buf, string + strspn(string, " "));
1043 18 : pfree(string);
1044 : }
1045 42 : appendStringInfoString(&buf, "?>");
1046 :
1047 42 : result = stringinfo_to_xmltype(&buf);
1048 42 : pfree(buf.data);
1049 42 : return result;
1050 : #else
1051 : NO_XML_SUPPORT();
1052 : return NULL;
1053 : #endif
1054 : }
1055 :
1056 :
1057 : xmltype *
1058 60 : xmlroot(xmltype *data, text *version, int standalone)
1059 : {
1060 : #ifdef USE_LIBXML
1061 : char *str;
1062 : size_t len;
1063 : xmlChar *orig_version;
1064 : int orig_standalone;
1065 : StringInfoData buf;
1066 :
1067 60 : len = VARSIZE(data) - VARHDRSZ;
1068 60 : str = text_to_cstring((text *) data);
1069 :
1070 60 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1071 :
1072 60 : if (version)
1073 24 : orig_version = xml_text2xmlChar(version);
1074 : else
1075 36 : orig_version = NULL;
1076 :
1077 60 : switch (standalone)
1078 : {
1079 18 : case XML_STANDALONE_YES:
1080 18 : orig_standalone = 1;
1081 18 : break;
1082 12 : case XML_STANDALONE_NO:
1083 12 : orig_standalone = 0;
1084 12 : break;
1085 12 : case XML_STANDALONE_NO_VALUE:
1086 12 : orig_standalone = -1;
1087 12 : break;
1088 18 : case XML_STANDALONE_OMITTED:
1089 : /* leave original value */
1090 18 : break;
1091 : }
1092 :
1093 60 : initStringInfo(&buf);
1094 60 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
1095 60 : appendStringInfoString(&buf, str + len);
1096 :
1097 60 : return stringinfo_to_xmltype(&buf);
1098 : #else
1099 : NO_XML_SUPPORT();
1100 : return NULL;
1101 : #endif
1102 : }
1103 :
1104 :
1105 : /*
1106 : * Validate document (given as string) against DTD (given as external link)
1107 : *
1108 : * This has been removed because it is a security hole: unprivileged users
1109 : * should not be able to use Postgres to fetch arbitrary external files,
1110 : * which unfortunately is exactly what libxml is willing to do with the DTD
1111 : * parameter.
1112 : */
1113 : Datum
1114 0 : xmlvalidate(PG_FUNCTION_ARGS)
1115 : {
1116 0 : ereport(ERROR,
1117 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1118 : errmsg("xmlvalidate is not implemented")));
1119 : return 0;
1120 : }
1121 :
1122 :
1123 : bool
1124 24 : xml_is_document(xmltype *arg)
1125 : {
1126 : #ifdef USE_LIBXML
1127 : xmlDocPtr doc;
1128 24 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1129 :
1130 : /*
1131 : * We'll report "true" if no soft error is reported by xml_parse().
1132 : */
1133 24 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1134 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1135 24 : if (doc)
1136 12 : xmlFreeDoc(doc);
1137 :
1138 24 : return !escontext.error_occurred;
1139 : #else /* not USE_LIBXML */
1140 : NO_XML_SUPPORT();
1141 : return false;
1142 : #endif /* not USE_LIBXML */
1143 : }
1144 :
1145 :
1146 : #ifdef USE_LIBXML
1147 :
1148 : /*
1149 : * pg_xml_init_library --- set up for use of libxml
1150 : *
1151 : * This should be called by each function that is about to use libxml
1152 : * facilities but doesn't require error handling. It initializes libxml
1153 : * and verifies compatibility with the loaded libxml version. These are
1154 : * once-per-session activities.
1155 : *
1156 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1157 : * check)
1158 : */
1159 : void
1160 94706 : pg_xml_init_library(void)
1161 : {
1162 : static bool first_time = true;
1163 :
1164 94706 : if (first_time)
1165 : {
1166 : /* Stuff we need do only once per session */
1167 :
1168 : /*
1169 : * Currently, we have no pure UTF-8 support for internals -- check if
1170 : * we can work.
1171 : */
1172 : if (sizeof(char) != sizeof(xmlChar))
1173 : ereport(ERROR,
1174 : (errmsg("could not initialize XML library"),
1175 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1176 : sizeof(char), sizeof(xmlChar))));
1177 :
1178 : #ifdef USE_LIBXMLCONTEXT
1179 : /* Set up libxml's memory allocation our way */
1180 : xml_memory_init();
1181 : #endif
1182 :
1183 : /* Check library compatibility */
1184 42 : LIBXML_TEST_VERSION;
1185 :
1186 42 : first_time = false;
1187 : }
1188 94706 : }
1189 :
1190 : /*
1191 : * pg_xml_init --- set up for use of libxml and register an error handler
1192 : *
1193 : * This should be called by each function that is about to use libxml
1194 : * facilities and requires error handling. It initializes libxml with
1195 : * pg_xml_init_library() and establishes our libxml error handler.
1196 : *
1197 : * strictness determines which errors are reported and which are ignored.
1198 : *
1199 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1200 : * that pg_xml_done() is called during either normal or error exit.
1201 : *
1202 : * This is exported for use by contrib/xml2, as well as other code that might
1203 : * wish to share use of this module's libxml error handler.
1204 : */
1205 : PgXmlErrorContext *
1206 24694 : pg_xml_init(PgXmlStrictness strictness)
1207 : {
1208 : PgXmlErrorContext *errcxt;
1209 : void *new_errcxt;
1210 :
1211 : /* Do one-time setup if needed */
1212 24694 : pg_xml_init_library();
1213 :
1214 : /* Create error handling context structure */
1215 24694 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1216 24694 : errcxt->magic = ERRCXT_MAGIC;
1217 24694 : errcxt->strictness = strictness;
1218 24694 : errcxt->err_occurred = false;
1219 24694 : initStringInfo(&errcxt->err_buf);
1220 :
1221 : /*
1222 : * Save original error handler and install ours. libxml originally didn't
1223 : * distinguish between the contexts for generic and for structured error
1224 : * handlers. If we're using an old libxml version, we must thus save the
1225 : * generic error context, even though we're using a structured error
1226 : * handler.
1227 : */
1228 24694 : errcxt->saved_errfunc = xmlStructuredError;
1229 :
1230 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1231 24694 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1232 : #else
1233 : errcxt->saved_errcxt = xmlGenericErrorContext;
1234 : #endif
1235 :
1236 24694 : xmlSetStructuredErrorFunc(errcxt, xml_errorHandler);
1237 :
1238 : /*
1239 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1240 : * expected it to. If not, the error context pointer we just saved is not
1241 : * the correct thing to restore, and since that leaves us without a way to
1242 : * restore the context in pg_xml_done, we must fail.
1243 : *
1244 : * The only known situation in which this test fails is if we compile with
1245 : * headers from a libxml2 that doesn't track the structured error context
1246 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1247 : * versa. The libxml2 authors did not treat that change as constituting
1248 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1249 : * fails to protect us from this.
1250 : */
1251 :
1252 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1253 24694 : new_errcxt = xmlStructuredErrorContext;
1254 : #else
1255 : new_errcxt = xmlGenericErrorContext;
1256 : #endif
1257 :
1258 24694 : if (new_errcxt != errcxt)
1259 0 : ereport(ERROR,
1260 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1261 : errmsg("could not set up XML error handler"),
1262 : errhint("This probably indicates that the version of libxml2"
1263 : " being used is not compatible with the libxml2"
1264 : " header files that PostgreSQL was built with.")));
1265 :
1266 : /*
1267 : * Also, install an entity loader to prevent unwanted fetches of external
1268 : * files and URLs.
1269 : */
1270 24694 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1271 24694 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1272 :
1273 24694 : return errcxt;
1274 : }
1275 :
1276 :
1277 : /*
1278 : * pg_xml_done --- restore previous libxml error handling
1279 : *
1280 : * Resets libxml's global error-handling state to what it was before
1281 : * pg_xml_init() was called.
1282 : *
1283 : * This routine verifies that all pending errors have been dealt with
1284 : * (in assert-enabled builds, anyway).
1285 : */
1286 : void
1287 24694 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1288 : {
1289 : void *cur_errcxt;
1290 :
1291 : /* An assert seems like enough protection here */
1292 : Assert(errcxt->magic == ERRCXT_MAGIC);
1293 :
1294 : /*
1295 : * In a normal exit, there should be no un-handled libxml errors. But we
1296 : * shouldn't try to enforce this during error recovery, since the longjmp
1297 : * could have been thrown before xml_ereport had a chance to run.
1298 : */
1299 : Assert(!errcxt->err_occurred || isError);
1300 :
1301 : /*
1302 : * Check that libxml's global state is correct, warn if not. This is a
1303 : * real test and not an Assert because it has a higher probability of
1304 : * happening.
1305 : */
1306 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1307 24694 : cur_errcxt = xmlStructuredErrorContext;
1308 : #else
1309 : cur_errcxt = xmlGenericErrorContext;
1310 : #endif
1311 :
1312 24694 : if (cur_errcxt != errcxt)
1313 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1314 :
1315 : /* Restore the saved handlers */
1316 24694 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1317 24694 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1318 :
1319 : /*
1320 : * Mark the struct as invalid, just in case somebody somehow manages to
1321 : * call xml_errorHandler or xml_ereport with it.
1322 : */
1323 24694 : errcxt->magic = 0;
1324 :
1325 : /* Release memory */
1326 24694 : pfree(errcxt->err_buf.data);
1327 24694 : pfree(errcxt);
1328 24694 : }
1329 :
1330 :
1331 : /*
1332 : * pg_xml_error_occurred() --- test the error flag
1333 : */
1334 : bool
1335 0 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1336 : {
1337 0 : return errcxt->err_occurred;
1338 : }
1339 :
1340 :
1341 : /*
1342 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1343 : * documents" are specified by the XML specification and are parsed
1344 : * easily by libxml. "XML content" is specified by SQL/XML as the
1345 : * production "XMLDecl? content". But libxml can only parse the
1346 : * "content" part, so we have to parse the XML declaration ourselves
1347 : * to complete this.
1348 : */
1349 :
1350 : #define CHECK_XML_SPACE(p) \
1351 : do { \
1352 : if (!xmlIsBlank_ch(*(p))) \
1353 : return XML_ERR_SPACE_REQUIRED; \
1354 : } while (0)
1355 :
1356 : #define SKIP_XML_SPACE(p) \
1357 : while (xmlIsBlank_ch(*(p))) (p)++
1358 :
1359 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1360 : /* Beware of multiple evaluations of argument! */
1361 : #define PG_XMLISNAMECHAR(c) \
1362 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1363 : || xmlIsDigit_ch(c) \
1364 : || c == '.' || c == '-' || c == '_' || c == ':' \
1365 : || xmlIsCombiningQ(c) \
1366 : || xmlIsExtender_ch(c))
1367 :
1368 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1369 : static xmlChar *
1370 192 : xml_pnstrdup(const xmlChar *str, size_t len)
1371 : {
1372 : xmlChar *result;
1373 :
1374 192 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1375 192 : memcpy(result, str, len * sizeof(xmlChar));
1376 192 : result[len] = 0;
1377 192 : return result;
1378 : }
1379 :
1380 : /* Ditto, except input is char* */
1381 : static xmlChar *
1382 2424 : pg_xmlCharStrndup(const char *str, size_t len)
1383 : {
1384 : xmlChar *result;
1385 :
1386 2424 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1387 2424 : memcpy(result, str, len);
1388 2424 : result[len] = '\0';
1389 :
1390 2424 : return result;
1391 : }
1392 :
1393 : /*
1394 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1395 : *
1396 : * The input xmlChar is freed regardless of success of the copy.
1397 : */
1398 : static char *
1399 112656 : xml_pstrdup_and_free(xmlChar *str)
1400 : {
1401 : char *result;
1402 :
1403 112656 : if (str)
1404 : {
1405 112656 : PG_TRY();
1406 : {
1407 112656 : result = pstrdup((char *) str);
1408 : }
1409 0 : PG_FINALLY();
1410 : {
1411 112656 : xmlFree(str);
1412 : }
1413 112656 : PG_END_TRY();
1414 : }
1415 : else
1416 0 : result = NULL;
1417 :
1418 112656 : return result;
1419 : }
1420 :
1421 : /*
1422 : * str is the null-terminated input string. Remaining arguments are
1423 : * output arguments; each can be NULL if value is not wanted.
1424 : * version and encoding are returned as locally-palloc'd strings.
1425 : * Result is 0 if OK, an error code if not.
1426 : */
1427 : static int
1428 70012 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1429 : xmlChar **version, xmlChar **encoding, int *standalone)
1430 : {
1431 : const xmlChar *p;
1432 : const xmlChar *save_p;
1433 : size_t len;
1434 : int utf8char;
1435 : int utf8len;
1436 :
1437 : /*
1438 : * Only initialize libxml. We don't need error handling here, but we do
1439 : * need to make sure libxml is initialized before calling any of its
1440 : * functions. Note that this is safe (and a no-op) if caller has already
1441 : * done pg_xml_init().
1442 : */
1443 70012 : pg_xml_init_library();
1444 :
1445 : /* Initialize output arguments to "not present" */
1446 70012 : if (version)
1447 69364 : *version = NULL;
1448 70012 : if (encoding)
1449 0 : *encoding = NULL;
1450 70012 : if (standalone)
1451 69364 : *standalone = -1;
1452 :
1453 70012 : p = str;
1454 :
1455 70012 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1456 69790 : goto finished;
1457 :
1458 : /*
1459 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1460 : * rather than an XMLDecl, so we have done what we came to do and found no
1461 : * XMLDecl.
1462 : *
1463 : * We need an input length value for xmlGetUTF8Char, but there's no need
1464 : * to count the whole document size, so use strnlen not strlen.
1465 : */
1466 222 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1467 222 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1468 222 : if (PG_XMLISNAMECHAR(utf8char))
1469 12 : goto finished;
1470 :
1471 210 : p += 5;
1472 :
1473 : /* version */
1474 210 : CHECK_XML_SPACE(p);
1475 420 : SKIP_XML_SPACE(p);
1476 210 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1477 0 : return XML_ERR_VERSION_MISSING;
1478 210 : p += 7;
1479 210 : SKIP_XML_SPACE(p);
1480 210 : if (*p != '=')
1481 0 : return XML_ERR_VERSION_MISSING;
1482 210 : p += 1;
1483 210 : SKIP_XML_SPACE(p);
1484 :
1485 210 : if (*p == '\'' || *p == '"')
1486 210 : {
1487 : const xmlChar *q;
1488 :
1489 210 : q = xmlStrchr(p + 1, *p);
1490 210 : if (!q)
1491 0 : return XML_ERR_VERSION_MISSING;
1492 :
1493 210 : if (version)
1494 192 : *version = xml_pnstrdup(p + 1, q - p - 1);
1495 210 : p = q + 1;
1496 : }
1497 : else
1498 0 : return XML_ERR_VERSION_MISSING;
1499 :
1500 : /* encoding */
1501 210 : save_p = p;
1502 372 : SKIP_XML_SPACE(p);
1503 210 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1504 : {
1505 54 : CHECK_XML_SPACE(save_p);
1506 54 : p += 8;
1507 54 : SKIP_XML_SPACE(p);
1508 54 : if (*p != '=')
1509 0 : return XML_ERR_MISSING_ENCODING;
1510 54 : p += 1;
1511 54 : SKIP_XML_SPACE(p);
1512 :
1513 54 : if (*p == '\'' || *p == '"')
1514 54 : {
1515 : const xmlChar *q;
1516 :
1517 54 : q = xmlStrchr(p + 1, *p);
1518 54 : if (!q)
1519 0 : return XML_ERR_MISSING_ENCODING;
1520 :
1521 54 : if (encoding)
1522 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1523 54 : p = q + 1;
1524 : }
1525 : else
1526 0 : return XML_ERR_MISSING_ENCODING;
1527 : }
1528 : else
1529 : {
1530 156 : p = save_p;
1531 : }
1532 :
1533 : /* standalone */
1534 210 : save_p = p;
1535 318 : SKIP_XML_SPACE(p);
1536 210 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1537 : {
1538 108 : CHECK_XML_SPACE(save_p);
1539 108 : p += 10;
1540 108 : SKIP_XML_SPACE(p);
1541 108 : if (*p != '=')
1542 0 : return XML_ERR_STANDALONE_VALUE;
1543 108 : p += 1;
1544 108 : SKIP_XML_SPACE(p);
1545 216 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1546 108 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1547 : {
1548 60 : if (standalone)
1549 60 : *standalone = 1;
1550 60 : p += 5;
1551 : }
1552 96 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1553 48 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1554 : {
1555 36 : if (standalone)
1556 36 : *standalone = 0;
1557 36 : p += 4;
1558 : }
1559 : else
1560 12 : return XML_ERR_STANDALONE_VALUE;
1561 : }
1562 : else
1563 : {
1564 102 : p = save_p;
1565 : }
1566 :
1567 198 : SKIP_XML_SPACE(p);
1568 198 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1569 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1570 198 : p += 2;
1571 :
1572 70000 : finished:
1573 70000 : len = p - str;
1574 :
1575 76732 : for (p = str; p < str + len; p++)
1576 6732 : if (*p > 127)
1577 0 : return XML_ERR_INVALID_CHAR;
1578 :
1579 70000 : if (lenp)
1580 70000 : *lenp = len;
1581 :
1582 70000 : return XML_ERR_OK;
1583 : }
1584 :
1585 :
1586 : /*
1587 : * Write an XML declaration. On output, we adjust the XML declaration
1588 : * as follows. (These rules are the moral equivalent of the clause
1589 : * "Serialization of an XML value" in the SQL standard.)
1590 : *
1591 : * We try to avoid generating an XML declaration if possible. This is
1592 : * so that you don't get trivial things like xml '<foo/>' resulting in
1593 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1594 : * must provide a declaration if the standalone property is specified
1595 : * or if we include an encoding declaration. If we have a
1596 : * declaration, we must specify a version (XML requires this).
1597 : * Otherwise we only make a declaration if the version is not "1.0",
1598 : * which is the default version specified in SQL:2003.
1599 : */
1600 : static bool
1601 23780 : print_xml_decl(StringInfo buf, const xmlChar *version,
1602 : pg_enc encoding, int standalone)
1603 : {
1604 23780 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1605 23744 : || (encoding && encoding != PG_UTF8)
1606 23744 : || standalone != -1)
1607 : {
1608 96 : appendStringInfoString(buf, "<?xml");
1609 :
1610 96 : if (version)
1611 72 : appendStringInfo(buf, " version=\"%s\"", version);
1612 : else
1613 24 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1614 :
1615 96 : if (encoding && encoding != PG_UTF8)
1616 : {
1617 : /*
1618 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1619 : * instead of LATIN1 etc.); needs field experience
1620 : */
1621 0 : appendStringInfo(buf, " encoding=\"%s\"",
1622 : pg_encoding_to_char(encoding));
1623 : }
1624 :
1625 96 : if (standalone == 1)
1626 48 : appendStringInfoString(buf, " standalone=\"yes\"");
1627 48 : else if (standalone == 0)
1628 24 : appendStringInfoString(buf, " standalone=\"no\"");
1629 96 : appendStringInfoString(buf, "?>");
1630 :
1631 96 : return true;
1632 : }
1633 : else
1634 23684 : return false;
1635 : }
1636 :
1637 : /*
1638 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1639 : *
1640 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1641 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1642 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1643 : * later fix that, by redefining content with reference to the "more
1644 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1645 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1646 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1647 : * pg_restore).
1648 : *
1649 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1650 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1651 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1652 : * by detecting this case first and simply doing the parse as DOCUMENT.
1653 : *
1654 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1655 : * it will ordinarily start within a few dozen characters. The only things
1656 : * that can precede it are an XMLDecl (here, the caller will have called
1657 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1658 : * This function need only return true if it sees a valid sequence of such
1659 : * things leading to <!DOCTYPE. It can simply return false in any other
1660 : * cases, including malformed input; that will mean the input gets parsed as
1661 : * CONTENT as originally planned, with libxml reporting any errors.
1662 : *
1663 : * This is only to be called from xml_parse, when pg_xml_init has already
1664 : * been called. The input is already in UTF8 encoding.
1665 : */
1666 : static bool
1667 1012 : xml_doctype_in_content(const xmlChar *str)
1668 : {
1669 1012 : const xmlChar *p = str;
1670 :
1671 : for (;;)
1672 36 : {
1673 : const xmlChar *e;
1674 :
1675 1138 : SKIP_XML_SPACE(p);
1676 1048 : if (*p != '<')
1677 214 : return false;
1678 834 : p++;
1679 :
1680 834 : if (*p == '!')
1681 : {
1682 72 : p++;
1683 :
1684 : /* if we see <!DOCTYPE, we can return true */
1685 72 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1686 42 : return true;
1687 :
1688 : /* otherwise, if it's not a comment, fail */
1689 30 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1690 0 : return false;
1691 : /* find end of comment: find -- and a > must follow */
1692 30 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1693 30 : if (!p || p[2] != '>')
1694 0 : return false;
1695 : /* advance over comment, and keep scanning */
1696 30 : p += 3;
1697 30 : continue;
1698 : }
1699 :
1700 : /* otherwise, if it's not a PI <?target something?>, fail */
1701 762 : if (*p != '?')
1702 756 : return false;
1703 6 : p++;
1704 :
1705 : /* find end of PI (the string ?> is forbidden within a PI) */
1706 6 : e = xmlStrstr(p, (xmlChar *) "?>");
1707 6 : if (!e)
1708 0 : return false;
1709 :
1710 : /* advance over PI, keep scanning */
1711 6 : p = e + 2;
1712 : }
1713 : }
1714 :
1715 :
1716 : /*
1717 : * Convert a text object to XML internal representation
1718 : *
1719 : * data is the source data (must not be toasted!), encoding is its encoding,
1720 : * and xmloption_arg and preserve_whitespace are options for the
1721 : * transformation.
1722 : *
1723 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1724 : * XmlOptionType actually used to parse the input (typically the same as
1725 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1726 : *
1727 : * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1728 : * of parsed nodes from the xmlParseInNodeContext call will be returned
1729 : * to *parsed_nodes. (It is caller's responsibility to free that.)
1730 : *
1731 : * Errors normally result in ereport(ERROR), but if escontext is an
1732 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1733 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1734 : *
1735 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1736 : * else a permanent memory leak will ensue! But note the result could
1737 : * be NULL after a soft error.
1738 : *
1739 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1740 : * yet do not use SAX - see xmlreader.c)
1741 : */
1742 : static xmlDocPtr
1743 1318 : xml_parse(text *data, XmlOptionType xmloption_arg,
1744 : bool preserve_whitespace, int encoding,
1745 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1746 : Node *escontext)
1747 : {
1748 : int32 len;
1749 : xmlChar *string;
1750 : xmlChar *utf8string;
1751 : PgXmlErrorContext *xmlerrcxt;
1752 1318 : volatile xmlParserCtxtPtr ctxt = NULL;
1753 1318 : volatile xmlDocPtr doc = NULL;
1754 :
1755 : /*
1756 : * This step looks annoyingly redundant, but we must do it to have a
1757 : * null-terminated string in case encoding conversion isn't required.
1758 : */
1759 1318 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1760 1318 : string = xml_text2xmlChar(data);
1761 :
1762 : /*
1763 : * If the data isn't UTF8, we must translate before giving it to libxml.
1764 : *
1765 : * XXX ideally, we'd catch any encoding conversion failure and return a
1766 : * soft error. However, failure to convert to UTF8 should be pretty darn
1767 : * rare, so for now this is left undone.
1768 : */
1769 1318 : utf8string = pg_do_encoding_conversion(string,
1770 : len,
1771 : encoding,
1772 : PG_UTF8);
1773 :
1774 : /* Start up libxml and its parser */
1775 1318 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1776 :
1777 : /* Use a TRY block to ensure we clean up correctly */
1778 1318 : PG_TRY();
1779 : {
1780 1318 : bool parse_as_document = false;
1781 : int options;
1782 : int res_code;
1783 1318 : size_t count = 0;
1784 1318 : xmlChar *version = NULL;
1785 1318 : int standalone = 0;
1786 :
1787 : /* Any errors here are reported as hard ereport's */
1788 1318 : xmlInitParser();
1789 :
1790 : /* Decide whether to parse as document or content */
1791 1318 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1792 294 : parse_as_document = true;
1793 : else
1794 : {
1795 : /* Parse and skip over the XML declaration, if any */
1796 1024 : res_code = parse_xml_decl(utf8string,
1797 : &count, &version, NULL, &standalone);
1798 1024 : if (res_code != 0)
1799 : {
1800 12 : errsave(escontext,
1801 : errcode(ERRCODE_INVALID_XML_CONTENT),
1802 : errmsg_internal("invalid XML content: invalid XML declaration"),
1803 : errdetail_for_xml_code(res_code));
1804 12 : goto fail;
1805 : }
1806 :
1807 : /* Is there a DOCTYPE element? */
1808 1012 : if (xml_doctype_in_content(utf8string + count))
1809 42 : parse_as_document = true;
1810 : }
1811 :
1812 : /*
1813 : * Select parse options.
1814 : *
1815 : * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1816 : * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1817 : * internal DTD are applied'. As for external DTDs, we try to support
1818 : * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1819 : * happen because xmlPgEntityLoader prevents it.
1820 : */
1821 1306 : options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1822 1306 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1823 :
1824 : /* initialize output parameters */
1825 1306 : if (parsed_xmloptiontype != NULL)
1826 144 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1827 : XMLOPTION_CONTENT;
1828 1306 : if (parsed_nodes != NULL)
1829 144 : *parsed_nodes = NULL;
1830 :
1831 1306 : if (parse_as_document)
1832 : {
1833 336 : ctxt = xmlNewParserCtxt();
1834 336 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1835 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1836 : "could not allocate parser context");
1837 :
1838 336 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1839 : NULL, /* no URL */
1840 : "UTF-8",
1841 : options);
1842 :
1843 336 : if (doc == NULL || xmlerrcxt->err_occurred)
1844 : {
1845 : /* Use original option to decide which error code to report */
1846 144 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1847 138 : xml_errsave(escontext, xmlerrcxt,
1848 : ERRCODE_INVALID_XML_DOCUMENT,
1849 : "invalid XML document");
1850 : else
1851 6 : xml_errsave(escontext, xmlerrcxt,
1852 : ERRCODE_INVALID_XML_CONTENT,
1853 : "invalid XML content");
1854 96 : goto fail;
1855 : }
1856 : }
1857 : else
1858 : {
1859 : xmlNodePtr root;
1860 : xmlNodePtr oldroot PG_USED_FOR_ASSERTS_ONLY;
1861 :
1862 : /* set up document with empty root node to be the context node */
1863 970 : doc = xmlNewDoc(version);
1864 970 : if (doc == NULL || xmlerrcxt->err_occurred)
1865 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1866 : "could not allocate XML document");
1867 :
1868 : Assert(doc->encoding == NULL);
1869 970 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1870 970 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1871 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1872 : "could not allocate XML document");
1873 970 : doc->standalone = standalone;
1874 :
1875 970 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1876 970 : if (root == NULL || xmlerrcxt->err_occurred)
1877 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1878 : "could not allocate xml node");
1879 :
1880 : /*
1881 : * This attaches root to doc, so we need not free it separately;
1882 : * and there can't yet be any old root to free.
1883 : */
1884 970 : oldroot = xmlDocSetRootElement(doc, root);
1885 : Assert(oldroot == NULL);
1886 :
1887 : /* allow empty content */
1888 970 : if (*(utf8string + count))
1889 : {
1890 946 : xmlNodePtr node_list = NULL;
1891 : xmlParserErrors res;
1892 :
1893 1892 : res = xmlParseInNodeContext(root,
1894 : (char *) utf8string + count,
1895 946 : strlen((char *) utf8string + count),
1896 : options,
1897 : &node_list);
1898 :
1899 946 : if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1900 : {
1901 60 : xmlFreeNodeList(node_list);
1902 60 : xml_errsave(escontext, xmlerrcxt,
1903 : ERRCODE_INVALID_XML_CONTENT,
1904 : "invalid XML content");
1905 12 : goto fail;
1906 : }
1907 :
1908 886 : if (parsed_nodes != NULL)
1909 48 : *parsed_nodes = node_list;
1910 : else
1911 838 : xmlFreeNodeList(node_list);
1912 : }
1913 : }
1914 :
1915 1222 : fail:
1916 : ;
1917 : }
1918 96 : PG_CATCH();
1919 : {
1920 96 : if (doc != NULL)
1921 48 : xmlFreeDoc(doc);
1922 96 : if (ctxt != NULL)
1923 48 : xmlFreeParserCtxt(ctxt);
1924 :
1925 96 : pg_xml_done(xmlerrcxt, true);
1926 :
1927 96 : PG_RE_THROW();
1928 : }
1929 1222 : PG_END_TRY();
1930 :
1931 1222 : if (ctxt != NULL)
1932 288 : xmlFreeParserCtxt(ctxt);
1933 :
1934 1222 : pg_xml_done(xmlerrcxt, false);
1935 :
1936 1222 : return doc;
1937 : }
1938 :
1939 :
1940 : /*
1941 : * xmlChar<->text conversions
1942 : */
1943 : static xmlChar *
1944 1468 : xml_text2xmlChar(text *in)
1945 : {
1946 1468 : return (xmlChar *) text_to_cstring(in);
1947 : }
1948 :
1949 :
1950 : #ifdef USE_LIBXMLCONTEXT
1951 :
1952 : /*
1953 : * Manage the special context used for all libxml allocations (but only
1954 : * in special debug builds; see notes at top of file)
1955 : */
1956 : static void
1957 : xml_memory_init(void)
1958 : {
1959 : /* Create memory context if not there already */
1960 : if (LibxmlContext == NULL)
1961 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1962 : "Libxml context",
1963 : ALLOCSET_DEFAULT_SIZES);
1964 :
1965 : /* Re-establish the callbacks even if already set */
1966 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1967 : }
1968 :
1969 : /*
1970 : * Wrappers for memory management functions
1971 : */
1972 : static void *
1973 : xml_palloc(size_t size)
1974 : {
1975 : return MemoryContextAlloc(LibxmlContext, size);
1976 : }
1977 :
1978 :
1979 : static void *
1980 : xml_repalloc(void *ptr, size_t size)
1981 : {
1982 : return repalloc(ptr, size);
1983 : }
1984 :
1985 :
1986 : static void
1987 : xml_pfree(void *ptr)
1988 : {
1989 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1990 : if (ptr)
1991 : pfree(ptr);
1992 : }
1993 :
1994 :
1995 : static char *
1996 : xml_pstrdup(const char *string)
1997 : {
1998 : return MemoryContextStrdup(LibxmlContext, string);
1999 : }
2000 : #endif /* USE_LIBXMLCONTEXT */
2001 :
2002 :
2003 : /*
2004 : * xmlPgEntityLoader --- entity loader callback function
2005 : *
2006 : * Silently prevent any external entity URL from being loaded. We don't want
2007 : * to throw an error, so instead make the entity appear to expand to an empty
2008 : * string.
2009 : *
2010 : * We would prefer to allow loading entities that exist in the system's
2011 : * global XML catalog; but the available libxml2 APIs make that a complex
2012 : * and fragile task. For now, just shut down all external access.
2013 : */
2014 : static xmlParserInputPtr
2015 18 : xmlPgEntityLoader(const char *URL, const char *ID,
2016 : xmlParserCtxtPtr ctxt)
2017 : {
2018 18 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2019 : }
2020 :
2021 :
2022 : /*
2023 : * xml_ereport --- report an XML-related error
2024 : *
2025 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2026 : * standard. This function adds libxml's native error message, if any, as
2027 : * detail.
2028 : *
2029 : * This is exported for modules that want to share the core libxml error
2030 : * handler. Note that pg_xml_init() *must* have been called previously.
2031 : */
2032 : void
2033 12 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2034 : {
2035 : char *detail;
2036 :
2037 : /* Defend against someone passing us a bogus context struct */
2038 12 : if (errcxt->magic != ERRCXT_MAGIC)
2039 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2040 :
2041 : /* Flag that the current libxml error has been reported */
2042 12 : errcxt->err_occurred = false;
2043 :
2044 : /* Include detail only if we have some text from libxml */
2045 12 : if (errcxt->err_buf.len > 0)
2046 12 : detail = errcxt->err_buf.data;
2047 : else
2048 0 : detail = NULL;
2049 :
2050 12 : ereport(level,
2051 : (errcode(sqlcode),
2052 : errmsg_internal("%s", msg),
2053 : detail ? errdetail_internal("%s", detail) : 0));
2054 0 : }
2055 :
2056 :
2057 : /*
2058 : * xml_errsave --- save an XML-related error
2059 : *
2060 : * If escontext is an ErrorSaveContext, error details are saved into it,
2061 : * and control returns normally.
2062 : *
2063 : * Otherwise, the error is thrown, so that this is equivalent to
2064 : * xml_ereport() with level == ERROR.
2065 : *
2066 : * This should be used only for errors that we're sure we do not need
2067 : * a transaction abort to clean up after.
2068 : */
2069 : static void
2070 204 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2071 : int sqlcode, const char *msg)
2072 : {
2073 : char *detail;
2074 :
2075 : /* Defend against someone passing us a bogus context struct */
2076 204 : if (errcxt->magic != ERRCXT_MAGIC)
2077 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2078 :
2079 : /* Flag that the current libxml error has been reported */
2080 204 : errcxt->err_occurred = false;
2081 :
2082 : /* Include detail only if we have some text from libxml */
2083 204 : if (errcxt->err_buf.len > 0)
2084 204 : detail = errcxt->err_buf.data;
2085 : else
2086 0 : detail = NULL;
2087 :
2088 204 : errsave(escontext,
2089 : (errcode(sqlcode),
2090 : errmsg_internal("%s", msg),
2091 : detail ? errdetail_internal("%s", detail) : 0));
2092 108 : }
2093 :
2094 :
2095 : /*
2096 : * Error handler for libxml errors and warnings
2097 : */
2098 : static void
2099 398 : xml_errorHandler(void *data, PgXmlErrorPtr error)
2100 : {
2101 398 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2102 398 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2103 398 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2104 398 : xmlNodePtr node = error->node;
2105 398 : const xmlChar *name = (node != NULL &&
2106 398 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2107 398 : int domain = error->domain;
2108 398 : int level = error->level;
2109 : StringInfo errorBuf;
2110 :
2111 : /*
2112 : * Defend against someone passing us a bogus context struct.
2113 : *
2114 : * We force a backend exit if this check fails because longjmp'ing out of
2115 : * libxml would likely render it unsafe to use further.
2116 : */
2117 398 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
2118 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2119 :
2120 : /*----------
2121 : * Older libxml versions report some errors differently.
2122 : * First, some errors were previously reported as coming from the parser
2123 : * domain but are now reported as coming from the namespace domain.
2124 : * Second, some warnings were upgraded to errors.
2125 : * We attempt to compensate for that here.
2126 : *----------
2127 : */
2128 398 : switch (error->code)
2129 : {
2130 30 : case XML_WAR_NS_URI:
2131 30 : level = XML_ERR_ERROR;
2132 30 : domain = XML_FROM_NAMESPACE;
2133 30 : break;
2134 :
2135 54 : case XML_ERR_NS_DECL_ERROR:
2136 : case XML_WAR_NS_URI_RELATIVE:
2137 : case XML_WAR_NS_COLUMN:
2138 : case XML_NS_ERR_XML_NAMESPACE:
2139 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2140 : case XML_NS_ERR_QNAME:
2141 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2142 : case XML_NS_ERR_EMPTY:
2143 54 : domain = XML_FROM_NAMESPACE;
2144 54 : break;
2145 : }
2146 :
2147 : /* Decide whether to act on the error or not */
2148 398 : switch (domain)
2149 : {
2150 314 : case XML_FROM_PARSER:
2151 :
2152 : /*
2153 : * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2154 : * other, more on-point error. Furthermore, libxml2 2.13 reports
2155 : * it under a completely different set of rules than prior
2156 : * versions. To avoid cross-version behavioral differences,
2157 : * suppress it so long as we already logged some error.
2158 : */
2159 314 : if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2160 30 : xmlerrcxt->err_occurred)
2161 30 : return;
2162 : /* fall through */
2163 :
2164 : case XML_FROM_NONE:
2165 : case XML_FROM_MEMORY:
2166 : case XML_FROM_IO:
2167 :
2168 : /*
2169 : * Suppress warnings about undeclared entities. We need to do
2170 : * this to avoid problems due to not loading DTD definitions.
2171 : */
2172 284 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
2173 6 : return;
2174 :
2175 : /* Otherwise, accept error regardless of the parsing purpose */
2176 278 : break;
2177 :
2178 84 : default:
2179 : /* Ignore error if only doing well-formedness check */
2180 84 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2181 66 : return;
2182 18 : break;
2183 : }
2184 :
2185 : /* Prepare error message in errorBuf */
2186 296 : errorBuf = makeStringInfo();
2187 :
2188 296 : if (error->line > 0)
2189 296 : appendStringInfo(errorBuf, "line %d: ", error->line);
2190 296 : if (name != NULL)
2191 0 : appendStringInfo(errorBuf, "element %s: ", name);
2192 296 : if (error->message != NULL)
2193 296 : appendStringInfoString(errorBuf, error->message);
2194 : else
2195 0 : appendStringInfoString(errorBuf, "(no message provided)");
2196 :
2197 : /*
2198 : * Append context information to errorBuf.
2199 : *
2200 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2201 : * write the context. Since we don't want to duplicate libxml
2202 : * functionality here, we set up a generic error handler temporarily.
2203 : *
2204 : * We use appendStringInfo() directly as libxml's generic error handler.
2205 : * This should work because it has essentially the same signature as
2206 : * libxml expects, namely (void *ptr, const char *msg, ...).
2207 : */
2208 296 : if (input != NULL)
2209 : {
2210 296 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2211 296 : void *errCtxSaved = xmlGenericErrorContext;
2212 :
2213 296 : xmlSetGenericErrorFunc(errorBuf,
2214 : (xmlGenericErrorFunc) appendStringInfo);
2215 :
2216 : /* Add context information to errorBuf */
2217 296 : appendStringInfoLineSeparator(errorBuf);
2218 :
2219 296 : xmlParserPrintFileContext(input);
2220 :
2221 : /* Restore generic error func */
2222 296 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2223 : }
2224 :
2225 : /* Get rid of any trailing newlines in errorBuf */
2226 296 : chopStringInfoNewlines(errorBuf);
2227 :
2228 : /*
2229 : * Legacy error handling mode. err_occurred is never set, we just add the
2230 : * message to err_buf. This mode exists because the xml2 contrib module
2231 : * uses our error-handling infrastructure, but we don't want to change its
2232 : * behaviour since it's deprecated anyway. This is also why we don't
2233 : * distinguish between notices, warnings and errors here --- the old-style
2234 : * generic error handler wouldn't have done that either.
2235 : */
2236 296 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2237 : {
2238 2 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2239 2 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2240 : errorBuf->len);
2241 :
2242 2 : destroyStringInfo(errorBuf);
2243 2 : return;
2244 : }
2245 :
2246 : /*
2247 : * We don't want to ereport() here because that'd probably leave libxml in
2248 : * an inconsistent state. Instead, we remember the error and ereport()
2249 : * from xml_ereport().
2250 : *
2251 : * Warnings and notices can be reported immediately since they won't cause
2252 : * a longjmp() out of libxml.
2253 : */
2254 294 : if (level >= XML_ERR_ERROR)
2255 : {
2256 288 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2257 288 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2258 : errorBuf->len);
2259 :
2260 288 : xmlerrcxt->err_occurred = true;
2261 : }
2262 6 : else if (level >= XML_ERR_WARNING)
2263 : {
2264 6 : ereport(WARNING,
2265 : (errmsg_internal("%s", errorBuf->data)));
2266 : }
2267 : else
2268 : {
2269 0 : ereport(NOTICE,
2270 : (errmsg_internal("%s", errorBuf->data)));
2271 : }
2272 :
2273 294 : destroyStringInfo(errorBuf);
2274 : }
2275 :
2276 :
2277 : /*
2278 : * Convert libxml error codes into textual errdetail messages.
2279 : *
2280 : * This should be called within an ereport or errsave invocation,
2281 : * just as errdetail would be.
2282 : *
2283 : * At the moment, we only need to cover those codes that we
2284 : * may raise in this file.
2285 : */
2286 : static int
2287 6 : errdetail_for_xml_code(int code)
2288 : {
2289 : const char *det;
2290 :
2291 6 : switch (code)
2292 : {
2293 0 : case XML_ERR_INVALID_CHAR:
2294 0 : det = gettext_noop("Invalid character value.");
2295 0 : break;
2296 0 : case XML_ERR_SPACE_REQUIRED:
2297 0 : det = gettext_noop("Space required.");
2298 0 : break;
2299 6 : case XML_ERR_STANDALONE_VALUE:
2300 6 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2301 6 : break;
2302 0 : case XML_ERR_VERSION_MISSING:
2303 0 : det = gettext_noop("Malformed declaration: missing version.");
2304 0 : break;
2305 0 : case XML_ERR_MISSING_ENCODING:
2306 0 : det = gettext_noop("Missing encoding in text declaration.");
2307 0 : break;
2308 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
2309 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
2310 0 : break;
2311 0 : default:
2312 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
2313 0 : break;
2314 : }
2315 :
2316 6 : return errdetail(det, code);
2317 : }
2318 :
2319 :
2320 : /*
2321 : * Remove all trailing newlines from a StringInfo string
2322 : */
2323 : static void
2324 882 : chopStringInfoNewlines(StringInfo str)
2325 : {
2326 1474 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2327 592 : str->data[--str->len] = '\0';
2328 882 : }
2329 :
2330 :
2331 : /*
2332 : * Append a newline after removing any existing trailing newlines
2333 : */
2334 : static void
2335 586 : appendStringInfoLineSeparator(StringInfo str)
2336 : {
2337 586 : chopStringInfoNewlines(str);
2338 586 : if (str->len > 0)
2339 368 : appendStringInfoChar(str, '\n');
2340 586 : }
2341 :
2342 :
2343 : /*
2344 : * Convert one char in the current server encoding to a Unicode codepoint.
2345 : */
2346 : static pg_wchar
2347 18350 : sqlchar_to_unicode(const char *s)
2348 : {
2349 : char *utf8string;
2350 : pg_wchar ret[2]; /* need space for trailing zero */
2351 :
2352 : /* note we're not assuming s is null-terminated */
2353 18350 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2354 :
2355 18350 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2356 : pg_encoding_mblen(PG_UTF8, utf8string));
2357 :
2358 18350 : if (utf8string != s)
2359 0 : pfree(utf8string);
2360 :
2361 18350 : return ret[0];
2362 : }
2363 :
2364 :
2365 : static bool
2366 3652 : is_valid_xml_namefirst(pg_wchar c)
2367 : {
2368 : /* (Letter | '_' | ':') */
2369 3658 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2370 7310 : || c == '_' || c == ':');
2371 : }
2372 :
2373 :
2374 : static bool
2375 14698 : is_valid_xml_namechar(pg_wchar c)
2376 : {
2377 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2378 15590 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2379 892 : || xmlIsDigitQ(c)
2380 256 : || c == '.' || c == '-' || c == '_' || c == ':'
2381 12 : || xmlIsCombiningQ(c)
2382 31180 : || xmlIsExtenderQ(c));
2383 : }
2384 : #endif /* USE_LIBXML */
2385 :
2386 :
2387 : /*
2388 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2389 : */
2390 : char *
2391 3668 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2392 : bool escape_period)
2393 : {
2394 : #ifdef USE_LIBXML
2395 : StringInfoData buf;
2396 : const char *p;
2397 :
2398 : /*
2399 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2400 : * mistake.
2401 : */
2402 : Assert(fully_escaped || !escape_period);
2403 :
2404 3668 : initStringInfo(&buf);
2405 :
2406 22040 : for (p = ident; *p; p += pg_mblen(p))
2407 : {
2408 18372 : if (*p == ':' && (p == ident || fully_escaped))
2409 16 : appendStringInfoString(&buf, "_x003A_");
2410 18356 : else if (*p == '_' && *(p + 1) == 'x')
2411 6 : appendStringInfoString(&buf, "_x005F_");
2412 21638 : else if (fully_escaped && p == ident &&
2413 3288 : pg_strncasecmp(p, "xml", 3) == 0)
2414 : {
2415 0 : if (*p == 'x')
2416 0 : appendStringInfoString(&buf, "_x0078_");
2417 : else
2418 0 : appendStringInfoString(&buf, "_x0058_");
2419 : }
2420 18350 : else if (escape_period && *p == '.')
2421 0 : appendStringInfoString(&buf, "_x002E_");
2422 : else
2423 : {
2424 18350 : pg_wchar u = sqlchar_to_unicode(p);
2425 :
2426 36700 : if ((p == ident)
2427 3652 : ? !is_valid_xml_namefirst(u)
2428 14698 : : !is_valid_xml_namechar(u))
2429 18 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2430 : else
2431 18332 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2432 : }
2433 : }
2434 :
2435 3668 : return buf.data;
2436 : #else /* not USE_LIBXML */
2437 : NO_XML_SUPPORT();
2438 : return NULL;
2439 : #endif /* not USE_LIBXML */
2440 : }
2441 :
2442 :
2443 : /*
2444 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2445 : */
2446 : char *
2447 176 : map_xml_name_to_sql_identifier(const char *name)
2448 : {
2449 : StringInfoData buf;
2450 : const char *p;
2451 :
2452 176 : initStringInfo(&buf);
2453 :
2454 968 : for (p = name; *p; p += pg_mblen(p))
2455 : {
2456 792 : if (*p == '_' && *(p + 1) == 'x'
2457 22 : && isxdigit((unsigned char) *(p + 2))
2458 22 : && isxdigit((unsigned char) *(p + 3))
2459 22 : && isxdigit((unsigned char) *(p + 4))
2460 22 : && isxdigit((unsigned char) *(p + 5))
2461 22 : && *(p + 6) == '_')
2462 22 : {
2463 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2464 : unsigned int u;
2465 :
2466 22 : sscanf(p + 2, "%X", &u);
2467 22 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2468 22 : appendStringInfoString(&buf, cbuf);
2469 22 : p += 6;
2470 : }
2471 : else
2472 770 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2473 : }
2474 :
2475 176 : return buf.data;
2476 : }
2477 :
2478 : /*
2479 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2480 : *
2481 : * When xml_escape_strings is true, then certain characters in string
2482 : * values are replaced by entity references (< etc.), as specified
2483 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2484 : * wanted. The false case is mainly useful when the resulting value
2485 : * is used with xmlTextWriterWriteAttribute() to write out an
2486 : * attribute, because that function does the escaping itself.
2487 : */
2488 : char *
2489 134708 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2490 : {
2491 134708 : if (type_is_array_domain(type))
2492 : {
2493 : ArrayType *array;
2494 : Oid elmtype;
2495 : int16 elmlen;
2496 : bool elmbyval;
2497 : char elmalign;
2498 : int num_elems;
2499 : Datum *elem_values;
2500 : bool *elem_nulls;
2501 : StringInfoData buf;
2502 : int i;
2503 :
2504 6 : array = DatumGetArrayTypeP(value);
2505 6 : elmtype = ARR_ELEMTYPE(array);
2506 6 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2507 :
2508 6 : deconstruct_array(array, elmtype,
2509 : elmlen, elmbyval, elmalign,
2510 : &elem_values, &elem_nulls,
2511 : &num_elems);
2512 :
2513 6 : initStringInfo(&buf);
2514 :
2515 24 : for (i = 0; i < num_elems; i++)
2516 : {
2517 18 : if (elem_nulls[i])
2518 0 : continue;
2519 18 : appendStringInfoString(&buf, "<element>");
2520 18 : appendStringInfoString(&buf,
2521 18 : map_sql_value_to_xml_value(elem_values[i],
2522 : elmtype, true));
2523 18 : appendStringInfoString(&buf, "</element>");
2524 : }
2525 :
2526 6 : pfree(elem_values);
2527 6 : pfree(elem_nulls);
2528 :
2529 6 : return buf.data;
2530 : }
2531 : else
2532 : {
2533 : Oid typeOut;
2534 : bool isvarlena;
2535 : char *str;
2536 :
2537 : /*
2538 : * Flatten domains; the special-case treatments below should apply to,
2539 : * eg, domains over boolean not just boolean.
2540 : */
2541 134702 : type = getBaseType(type);
2542 :
2543 : /*
2544 : * Special XSD formatting for some data types
2545 : */
2546 134702 : switch (type)
2547 : {
2548 66 : case BOOLOID:
2549 66 : if (DatumGetBool(value))
2550 60 : return "true";
2551 : else
2552 6 : return "false";
2553 :
2554 48 : case DATEOID:
2555 : {
2556 : DateADT date;
2557 : struct pg_tm tm;
2558 : char buf[MAXDATELEN + 1];
2559 :
2560 48 : date = DatumGetDateADT(value);
2561 : /* XSD doesn't support infinite values */
2562 48 : if (DATE_NOT_FINITE(date))
2563 0 : ereport(ERROR,
2564 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2565 : errmsg("date out of range"),
2566 : errdetail("XML does not support infinite date values.")));
2567 48 : j2date(date + POSTGRES_EPOCH_JDATE,
2568 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2569 48 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2570 :
2571 48 : return pstrdup(buf);
2572 : }
2573 :
2574 36 : case TIMESTAMPOID:
2575 : {
2576 : Timestamp timestamp;
2577 : struct pg_tm tm;
2578 : fsec_t fsec;
2579 : char buf[MAXDATELEN + 1];
2580 :
2581 36 : timestamp = DatumGetTimestamp(value);
2582 :
2583 : /* XSD doesn't support infinite values */
2584 36 : if (TIMESTAMP_NOT_FINITE(timestamp))
2585 6 : ereport(ERROR,
2586 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2587 : errmsg("timestamp out of range"),
2588 : errdetail("XML does not support infinite timestamp values.")));
2589 30 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2590 30 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2591 : else
2592 0 : ereport(ERROR,
2593 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2594 : errmsg("timestamp out of range")));
2595 :
2596 30 : return pstrdup(buf);
2597 : }
2598 :
2599 24 : case TIMESTAMPTZOID:
2600 : {
2601 : TimestampTz timestamp;
2602 : struct pg_tm tm;
2603 : int tz;
2604 : fsec_t fsec;
2605 24 : const char *tzn = NULL;
2606 : char buf[MAXDATELEN + 1];
2607 :
2608 24 : timestamp = DatumGetTimestamp(value);
2609 :
2610 : /* XSD doesn't support infinite values */
2611 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2612 0 : ereport(ERROR,
2613 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2614 : errmsg("timestamp out of range"),
2615 : errdetail("XML does not support infinite timestamp values.")));
2616 24 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2617 24 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2618 : else
2619 0 : ereport(ERROR,
2620 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2621 : errmsg("timestamp out of range")));
2622 :
2623 24 : return pstrdup(buf);
2624 : }
2625 :
2626 : #ifdef USE_LIBXML
2627 36 : case BYTEAOID:
2628 : {
2629 36 : bytea *bstr = DatumGetByteaPP(value);
2630 : PgXmlErrorContext *xmlerrcxt;
2631 36 : volatile xmlBufferPtr buf = NULL;
2632 36 : volatile xmlTextWriterPtr writer = NULL;
2633 : char *result;
2634 :
2635 36 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2636 :
2637 36 : PG_TRY();
2638 : {
2639 36 : buf = xmlBufferCreate();
2640 36 : if (buf == NULL || xmlerrcxt->err_occurred)
2641 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2642 : "could not allocate xmlBuffer");
2643 36 : writer = xmlNewTextWriterMemory(buf, 0);
2644 36 : if (writer == NULL || xmlerrcxt->err_occurred)
2645 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2646 : "could not allocate xmlTextWriter");
2647 :
2648 36 : if (xmlbinary == XMLBINARY_BASE64)
2649 30 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2650 30 : 0, VARSIZE_ANY_EXHDR(bstr));
2651 : else
2652 6 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2653 6 : 0, VARSIZE_ANY_EXHDR(bstr));
2654 :
2655 : /* we MUST do this now to flush data out to the buffer */
2656 36 : xmlFreeTextWriter(writer);
2657 36 : writer = NULL;
2658 :
2659 36 : result = pstrdup((const char *) xmlBufferContent(buf));
2660 : }
2661 0 : PG_CATCH();
2662 : {
2663 0 : if (writer)
2664 0 : xmlFreeTextWriter(writer);
2665 0 : if (buf)
2666 0 : xmlBufferFree(buf);
2667 :
2668 0 : pg_xml_done(xmlerrcxt, true);
2669 :
2670 0 : PG_RE_THROW();
2671 : }
2672 36 : PG_END_TRY();
2673 :
2674 36 : xmlBufferFree(buf);
2675 :
2676 36 : pg_xml_done(xmlerrcxt, false);
2677 :
2678 36 : return result;
2679 : }
2680 : #endif /* USE_LIBXML */
2681 :
2682 : }
2683 :
2684 : /*
2685 : * otherwise, just use the type's native text representation
2686 : */
2687 134492 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2688 134492 : str = OidOutputFunctionCall(typeOut, value);
2689 :
2690 : /* ... exactly as-is for XML, and when escaping is not wanted */
2691 134492 : if (type == XMLOID || !xml_escape_strings)
2692 22322 : return str;
2693 :
2694 : /* otherwise, translate special characters as needed */
2695 112170 : return escape_xml(str);
2696 : }
2697 : }
2698 :
2699 :
2700 : /*
2701 : * Escape characters in text that have special meanings in XML.
2702 : *
2703 : * Returns a palloc'd string.
2704 : *
2705 : * NB: this is intentionally not dependent on libxml.
2706 : */
2707 : char *
2708 112642 : escape_xml(const char *str)
2709 : {
2710 : StringInfoData buf;
2711 : const char *p;
2712 :
2713 112642 : initStringInfo(&buf);
2714 707540 : for (p = str; *p; p++)
2715 : {
2716 594898 : switch (*p)
2717 : {
2718 0 : case '&':
2719 0 : appendStringInfoString(&buf, "&");
2720 0 : break;
2721 36 : case '<':
2722 36 : appendStringInfoString(&buf, "<");
2723 36 : break;
2724 24 : case '>':
2725 24 : appendStringInfoString(&buf, ">");
2726 24 : break;
2727 0 : case '\r':
2728 0 : appendStringInfoString(&buf, "
");
2729 0 : break;
2730 594838 : default:
2731 594838 : appendStringInfoCharMacro(&buf, *p);
2732 594838 : break;
2733 : }
2734 : }
2735 112642 : return buf.data;
2736 : }
2737 :
2738 :
2739 : static char *
2740 24 : _SPI_strdup(const char *s)
2741 : {
2742 24 : size_t len = strlen(s) + 1;
2743 24 : char *ret = SPI_palloc(len);
2744 :
2745 24 : memcpy(ret, s, len);
2746 24 : return ret;
2747 : }
2748 :
2749 :
2750 : /*
2751 : * SQL to XML mapping functions
2752 : *
2753 : * What follows below was at one point intentionally organized so that
2754 : * you can read along in the SQL/XML standard. The functions are
2755 : * mostly split up the way the clauses lay out in the standards
2756 : * document, and the identifiers are also aligned with the standard
2757 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2758 : * differently than SQL/XML:2003, so the order below doesn't make much
2759 : * sense anymore.
2760 : *
2761 : * There are many things going on there:
2762 : *
2763 : * There are two kinds of mappings: Mapping SQL data (table contents)
2764 : * to XML documents, and mapping SQL structure (the "schema") to XML
2765 : * Schema. And there are functions that do both at the same time.
2766 : *
2767 : * Then you can map a database, a schema, or a table, each in both
2768 : * ways. This breaks down recursively: Mapping a database invokes
2769 : * mapping schemas, which invokes mapping tables, which invokes
2770 : * mapping rows, which invokes mapping columns, although you can't
2771 : * call the last two from the outside. Because of this, there are a
2772 : * number of xyz_internal() functions which are to be called both from
2773 : * the function manager wrapper and from some upper layer in a
2774 : * recursive call.
2775 : *
2776 : * See the documentation about what the common function arguments
2777 : * nulls, tableforest, and targetns mean.
2778 : *
2779 : * Some style guidelines for XML output: Use double quotes for quoting
2780 : * XML attributes. Indent XML elements by two spaces, but remember
2781 : * that a lot of code is called recursively at different levels, so
2782 : * it's better not to indent rather than create output that indents
2783 : * and outdents weirdly. Add newlines to make the output look nice.
2784 : */
2785 :
2786 :
2787 : /*
2788 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2789 : * 4.10.8.
2790 : */
2791 :
2792 : /*
2793 : * Given a query, which must return type oid as first column, produce
2794 : * a list of Oids with the query results.
2795 : */
2796 : static List *
2797 36 : query_to_oid_list(const char *query)
2798 : {
2799 : uint64 i;
2800 36 : List *list = NIL;
2801 : int spi_result;
2802 :
2803 36 : spi_result = SPI_execute(query, true, 0);
2804 36 : if (spi_result != SPI_OK_SELECT)
2805 0 : elog(ERROR, "SPI_execute returned %s for %s",
2806 : SPI_result_code_string(spi_result), query);
2807 :
2808 108 : for (i = 0; i < SPI_processed; i++)
2809 : {
2810 : Datum oid;
2811 : bool isnull;
2812 :
2813 72 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2814 72 : SPI_tuptable->tupdesc,
2815 : 1,
2816 : &isnull);
2817 72 : if (!isnull)
2818 72 : list = lappend_oid(list, DatumGetObjectId(oid));
2819 : }
2820 :
2821 36 : return list;
2822 : }
2823 :
2824 :
2825 : static List *
2826 36 : schema_get_xml_visible_tables(Oid nspid)
2827 : {
2828 : StringInfoData query;
2829 :
2830 36 : initStringInfo(&query);
2831 36 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2832 : " WHERE relnamespace = %u AND relkind IN ("
2833 : CppAsString2(RELKIND_RELATION) ","
2834 : CppAsString2(RELKIND_MATVIEW) ","
2835 : CppAsString2(RELKIND_VIEW) ")"
2836 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2837 : " ORDER BY relname;", nspid);
2838 :
2839 36 : return query_to_oid_list(query.data);
2840 : }
2841 :
2842 :
2843 : /*
2844 : * Including the system schemas is probably not useful for a database
2845 : * mapping.
2846 : */
2847 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2848 :
2849 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2850 :
2851 :
2852 : static List *
2853 0 : database_get_xml_visible_schemas(void)
2854 : {
2855 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2856 : }
2857 :
2858 :
2859 : static List *
2860 0 : database_get_xml_visible_tables(void)
2861 : {
2862 : /* At the moment there is no order required here. */
2863 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2864 : " WHERE relkind IN ("
2865 : CppAsString2(RELKIND_RELATION) ","
2866 : CppAsString2(RELKIND_MATVIEW) ","
2867 : CppAsString2(RELKIND_VIEW) ")"
2868 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2869 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2870 : }
2871 :
2872 :
2873 : /*
2874 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2875 : * section 9.11.
2876 : */
2877 :
2878 : static StringInfo
2879 96 : table_to_xml_internal(Oid relid,
2880 : const char *xmlschema, bool nulls, bool tableforest,
2881 : const char *targetns, bool top_level)
2882 : {
2883 : StringInfoData query;
2884 :
2885 96 : initStringInfo(&query);
2886 96 : appendStringInfo(&query, "SELECT * FROM %s",
2887 : DatumGetCString(DirectFunctionCall1(regclassout,
2888 : ObjectIdGetDatum(relid))));
2889 96 : return query_to_xml_internal(query.data, get_rel_name(relid),
2890 : xmlschema, nulls, tableforest,
2891 : targetns, top_level);
2892 : }
2893 :
2894 :
2895 : Datum
2896 36 : table_to_xml(PG_FUNCTION_ARGS)
2897 : {
2898 36 : Oid relid = PG_GETARG_OID(0);
2899 36 : bool nulls = PG_GETARG_BOOL(1);
2900 36 : bool tableforest = PG_GETARG_BOOL(2);
2901 36 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2902 :
2903 36 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2904 : nulls, tableforest,
2905 : targetns, true)));
2906 : }
2907 :
2908 :
2909 : Datum
2910 10 : query_to_xml(PG_FUNCTION_ARGS)
2911 : {
2912 10 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2913 10 : bool nulls = PG_GETARG_BOOL(1);
2914 10 : bool tableforest = PG_GETARG_BOOL(2);
2915 10 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2916 :
2917 10 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2918 : NULL, nulls, tableforest,
2919 : targetns, true)));
2920 : }
2921 :
2922 :
2923 : Datum
2924 12 : cursor_to_xml(PG_FUNCTION_ARGS)
2925 : {
2926 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2927 12 : int32 count = PG_GETARG_INT32(1);
2928 12 : bool nulls = PG_GETARG_BOOL(2);
2929 12 : bool tableforest = PG_GETARG_BOOL(3);
2930 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2931 :
2932 : StringInfoData result;
2933 : Portal portal;
2934 : uint64 i;
2935 :
2936 12 : initStringInfo(&result);
2937 :
2938 12 : if (!tableforest)
2939 : {
2940 6 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2941 6 : appendStringInfoChar(&result, '\n');
2942 : }
2943 :
2944 12 : SPI_connect();
2945 12 : portal = SPI_cursor_find(name);
2946 12 : if (portal == NULL)
2947 0 : ereport(ERROR,
2948 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2949 : errmsg("cursor \"%s\" does not exist", name)));
2950 :
2951 12 : SPI_cursor_fetch(portal, true, count);
2952 48 : for (i = 0; i < SPI_processed; i++)
2953 36 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2954 : tableforest, targetns, true);
2955 :
2956 12 : SPI_finish();
2957 :
2958 12 : if (!tableforest)
2959 6 : xmldata_root_element_end(&result, "table");
2960 :
2961 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2962 : }
2963 :
2964 :
2965 : /*
2966 : * Write the start tag of the root element of a data mapping.
2967 : *
2968 : * top_level means that this is the very top level of the eventual
2969 : * output. For example, when the user calls table_to_xml, then a call
2970 : * with a table name to this function is the top level. When the user
2971 : * calls database_to_xml, then a call with a schema name to this
2972 : * function is not the top level. If top_level is false, then the XML
2973 : * namespace declarations are omitted, because they supposedly already
2974 : * appeared earlier in the output. Repeating them is not wrong, but
2975 : * it looks ugly.
2976 : */
2977 : static void
2978 238 : xmldata_root_element_start(StringInfo result, const char *eltname,
2979 : const char *xmlschema, const char *targetns,
2980 : bool top_level)
2981 : {
2982 : /* This isn't really wrong but currently makes no sense. */
2983 : Assert(top_level || !xmlschema);
2984 :
2985 238 : appendStringInfo(result, "<%s", eltname);
2986 238 : if (top_level)
2987 : {
2988 178 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2989 178 : if (strlen(targetns) > 0)
2990 30 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
2991 : }
2992 238 : if (xmlschema)
2993 : {
2994 : /* FIXME: better targets */
2995 18 : if (strlen(targetns) > 0)
2996 6 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2997 : else
2998 12 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2999 : }
3000 238 : appendStringInfoString(result, ">\n");
3001 238 : }
3002 :
3003 :
3004 : static void
3005 238 : xmldata_root_element_end(StringInfo result, const char *eltname)
3006 : {
3007 238 : appendStringInfo(result, "</%s>\n", eltname);
3008 238 : }
3009 :
3010 :
3011 : static StringInfo
3012 112 : query_to_xml_internal(const char *query, char *tablename,
3013 : const char *xmlschema, bool nulls, bool tableforest,
3014 : const char *targetns, bool top_level)
3015 : {
3016 : StringInfo result;
3017 : char *xmltn;
3018 : uint64 i;
3019 :
3020 112 : if (tablename)
3021 96 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3022 : else
3023 16 : xmltn = "table";
3024 :
3025 112 : result = makeStringInfo();
3026 :
3027 112 : SPI_connect();
3028 112 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3029 0 : ereport(ERROR,
3030 : (errcode(ERRCODE_DATA_EXCEPTION),
3031 : errmsg("invalid query")));
3032 :
3033 112 : if (!tableforest)
3034 : {
3035 52 : xmldata_root_element_start(result, xmltn, xmlschema,
3036 : targetns, top_level);
3037 52 : appendStringInfoChar(result, '\n');
3038 : }
3039 :
3040 112 : if (xmlschema)
3041 30 : appendStringInfo(result, "%s\n\n", xmlschema);
3042 :
3043 388 : for (i = 0; i < SPI_processed; i++)
3044 276 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3045 : tableforest, targetns, top_level);
3046 :
3047 112 : if (!tableforest)
3048 52 : xmldata_root_element_end(result, xmltn);
3049 :
3050 112 : SPI_finish();
3051 :
3052 112 : return result;
3053 : }
3054 :
3055 :
3056 : Datum
3057 30 : table_to_xmlschema(PG_FUNCTION_ARGS)
3058 : {
3059 30 : Oid relid = PG_GETARG_OID(0);
3060 30 : bool nulls = PG_GETARG_BOOL(1);
3061 30 : bool tableforest = PG_GETARG_BOOL(2);
3062 30 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3063 : const char *result;
3064 : Relation rel;
3065 :
3066 30 : rel = table_open(relid, AccessShareLock);
3067 30 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3068 : tableforest, targetns);
3069 30 : table_close(rel, NoLock);
3070 :
3071 30 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3072 : }
3073 :
3074 :
3075 : Datum
3076 6 : query_to_xmlschema(PG_FUNCTION_ARGS)
3077 : {
3078 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3079 6 : bool nulls = PG_GETARG_BOOL(1);
3080 6 : bool tableforest = PG_GETARG_BOOL(2);
3081 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3082 : const char *result;
3083 : SPIPlanPtr plan;
3084 : Portal portal;
3085 :
3086 6 : SPI_connect();
3087 :
3088 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3089 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3090 :
3091 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3092 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3093 :
3094 6 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3095 : InvalidOid, nulls,
3096 : tableforest, targetns));
3097 6 : SPI_cursor_close(portal);
3098 6 : SPI_finish();
3099 :
3100 6 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3101 : }
3102 :
3103 :
3104 : Datum
3105 12 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
3106 : {
3107 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3108 12 : bool nulls = PG_GETARG_BOOL(1);
3109 12 : bool tableforest = PG_GETARG_BOOL(2);
3110 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3111 : const char *xmlschema;
3112 : Portal portal;
3113 :
3114 12 : SPI_connect();
3115 12 : portal = SPI_cursor_find(name);
3116 12 : if (portal == NULL)
3117 0 : ereport(ERROR,
3118 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3119 : errmsg("cursor \"%s\" does not exist", name)));
3120 12 : if (portal->tupDesc == NULL)
3121 0 : ereport(ERROR,
3122 : (errcode(ERRCODE_INVALID_CURSOR_STATE),
3123 : errmsg("portal \"%s\" does not return tuples", name)));
3124 :
3125 12 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3126 : InvalidOid, nulls,
3127 : tableforest, targetns));
3128 12 : SPI_finish();
3129 :
3130 12 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3131 : }
3132 :
3133 :
3134 : Datum
3135 24 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3136 : {
3137 24 : Oid relid = PG_GETARG_OID(0);
3138 24 : bool nulls = PG_GETARG_BOOL(1);
3139 24 : bool tableforest = PG_GETARG_BOOL(2);
3140 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3141 : Relation rel;
3142 : const char *xmlschema;
3143 :
3144 24 : rel = table_open(relid, AccessShareLock);
3145 24 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3146 : tableforest, targetns);
3147 24 : table_close(rel, NoLock);
3148 :
3149 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3150 : xmlschema, nulls, tableforest,
3151 : targetns, true)));
3152 : }
3153 :
3154 :
3155 : Datum
3156 6 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3157 : {
3158 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3159 6 : bool nulls = PG_GETARG_BOOL(1);
3160 6 : bool tableforest = PG_GETARG_BOOL(2);
3161 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3162 :
3163 : const char *xmlschema;
3164 : SPIPlanPtr plan;
3165 : Portal portal;
3166 :
3167 6 : SPI_connect();
3168 :
3169 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3170 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3171 :
3172 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3173 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3174 :
3175 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3176 : InvalidOid, nulls, tableforest, targetns));
3177 6 : SPI_cursor_close(portal);
3178 6 : SPI_finish();
3179 :
3180 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3181 : xmlschema, nulls, tableforest,
3182 : targetns, true)));
3183 : }
3184 :
3185 :
3186 : /*
3187 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3188 : * sections 9.13, 9.14.
3189 : */
3190 :
3191 : static StringInfo
3192 18 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3193 : bool tableforest, const char *targetns, bool top_level)
3194 : {
3195 : StringInfo result;
3196 : char *xmlsn;
3197 : List *relid_list;
3198 : ListCell *cell;
3199 :
3200 18 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3201 : true, false);
3202 18 : result = makeStringInfo();
3203 :
3204 18 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3205 18 : appendStringInfoChar(result, '\n');
3206 :
3207 18 : if (xmlschema)
3208 6 : appendStringInfo(result, "%s\n\n", xmlschema);
3209 :
3210 18 : SPI_connect();
3211 :
3212 18 : relid_list = schema_get_xml_visible_tables(nspid);
3213 :
3214 54 : foreach(cell, relid_list)
3215 : {
3216 36 : Oid relid = lfirst_oid(cell);
3217 : StringInfo subres;
3218 :
3219 36 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3220 : targetns, false);
3221 :
3222 36 : appendBinaryStringInfo(result, subres->data, subres->len);
3223 36 : appendStringInfoChar(result, '\n');
3224 : }
3225 :
3226 18 : SPI_finish();
3227 :
3228 18 : xmldata_root_element_end(result, xmlsn);
3229 :
3230 18 : return result;
3231 : }
3232 :
3233 :
3234 : Datum
3235 12 : schema_to_xml(PG_FUNCTION_ARGS)
3236 : {
3237 12 : Name name = PG_GETARG_NAME(0);
3238 12 : bool nulls = PG_GETARG_BOOL(1);
3239 12 : bool tableforest = PG_GETARG_BOOL(2);
3240 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3241 :
3242 : char *schemaname;
3243 : Oid nspid;
3244 :
3245 12 : schemaname = NameStr(*name);
3246 12 : nspid = LookupExplicitNamespace(schemaname, false);
3247 :
3248 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3249 : nulls, tableforest, targetns, true)));
3250 : }
3251 :
3252 :
3253 : /*
3254 : * Write the start element of the root element of an XML Schema mapping.
3255 : */
3256 : static void
3257 96 : xsd_schema_element_start(StringInfo result, const char *targetns)
3258 : {
3259 96 : appendStringInfoString(result,
3260 : "<xsd:schema\n"
3261 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
3262 96 : if (strlen(targetns) > 0)
3263 18 : appendStringInfo(result,
3264 : "\n"
3265 : " targetNamespace=\"%s\"\n"
3266 : " elementFormDefault=\"qualified\"",
3267 : targetns);
3268 96 : appendStringInfoString(result,
3269 : ">\n\n");
3270 96 : }
3271 :
3272 :
3273 : static void
3274 96 : xsd_schema_element_end(StringInfo result)
3275 : {
3276 96 : appendStringInfoString(result, "</xsd:schema>");
3277 96 : }
3278 :
3279 :
3280 : static StringInfo
3281 18 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3282 : bool tableforest, const char *targetns)
3283 : {
3284 : Oid nspid;
3285 : List *relid_list;
3286 : List *tupdesc_list;
3287 : ListCell *cell;
3288 : StringInfo result;
3289 :
3290 18 : result = makeStringInfo();
3291 :
3292 18 : nspid = LookupExplicitNamespace(schemaname, false);
3293 :
3294 18 : xsd_schema_element_start(result, targetns);
3295 :
3296 18 : SPI_connect();
3297 :
3298 18 : relid_list = schema_get_xml_visible_tables(nspid);
3299 :
3300 18 : tupdesc_list = NIL;
3301 54 : foreach(cell, relid_list)
3302 : {
3303 : Relation rel;
3304 :
3305 36 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3306 36 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3307 36 : table_close(rel, NoLock);
3308 : }
3309 :
3310 18 : appendStringInfoString(result,
3311 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3312 :
3313 18 : appendStringInfoString(result,
3314 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3315 : nulls, tableforest, targetns));
3316 :
3317 18 : xsd_schema_element_end(result);
3318 :
3319 18 : SPI_finish();
3320 :
3321 18 : return result;
3322 : }
3323 :
3324 :
3325 : Datum
3326 12 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3327 : {
3328 12 : Name name = PG_GETARG_NAME(0);
3329 12 : bool nulls = PG_GETARG_BOOL(1);
3330 12 : bool tableforest = PG_GETARG_BOOL(2);
3331 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3332 :
3333 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3334 : nulls, tableforest, targetns)));
3335 : }
3336 :
3337 :
3338 : Datum
3339 6 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3340 : {
3341 6 : Name name = PG_GETARG_NAME(0);
3342 6 : bool nulls = PG_GETARG_BOOL(1);
3343 6 : bool tableforest = PG_GETARG_BOOL(2);
3344 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3345 : char *schemaname;
3346 : Oid nspid;
3347 : StringInfo xmlschema;
3348 :
3349 6 : schemaname = NameStr(*name);
3350 6 : nspid = LookupExplicitNamespace(schemaname, false);
3351 :
3352 6 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3353 : tableforest, targetns);
3354 :
3355 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3356 : xmlschema->data, nulls,
3357 : tableforest, targetns, true)));
3358 : }
3359 :
3360 :
3361 : /*
3362 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3363 : * sections 9.16, 9.17.
3364 : */
3365 :
3366 : static StringInfo
3367 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3368 : bool tableforest, const char *targetns)
3369 : {
3370 : StringInfo result;
3371 : List *nspid_list;
3372 : ListCell *cell;
3373 : char *xmlcn;
3374 :
3375 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3376 : true, false);
3377 0 : result = makeStringInfo();
3378 :
3379 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3380 0 : appendStringInfoChar(result, '\n');
3381 :
3382 0 : if (xmlschema)
3383 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3384 :
3385 0 : SPI_connect();
3386 :
3387 0 : nspid_list = database_get_xml_visible_schemas();
3388 :
3389 0 : foreach(cell, nspid_list)
3390 : {
3391 0 : Oid nspid = lfirst_oid(cell);
3392 : StringInfo subres;
3393 :
3394 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3395 : tableforest, targetns, false);
3396 :
3397 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3398 0 : appendStringInfoChar(result, '\n');
3399 : }
3400 :
3401 0 : SPI_finish();
3402 :
3403 0 : xmldata_root_element_end(result, xmlcn);
3404 :
3405 0 : return result;
3406 : }
3407 :
3408 :
3409 : Datum
3410 0 : database_to_xml(PG_FUNCTION_ARGS)
3411 : {
3412 0 : bool nulls = PG_GETARG_BOOL(0);
3413 0 : bool tableforest = PG_GETARG_BOOL(1);
3414 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3415 :
3416 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3417 : tableforest, targetns)));
3418 : }
3419 :
3420 :
3421 : static StringInfo
3422 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3423 : const char *targetns)
3424 : {
3425 : List *relid_list;
3426 : List *nspid_list;
3427 : List *tupdesc_list;
3428 : ListCell *cell;
3429 : StringInfo result;
3430 :
3431 0 : result = makeStringInfo();
3432 :
3433 0 : xsd_schema_element_start(result, targetns);
3434 :
3435 0 : SPI_connect();
3436 :
3437 0 : relid_list = database_get_xml_visible_tables();
3438 0 : nspid_list = database_get_xml_visible_schemas();
3439 :
3440 0 : tupdesc_list = NIL;
3441 0 : foreach(cell, relid_list)
3442 : {
3443 : Relation rel;
3444 :
3445 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3446 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3447 0 : table_close(rel, NoLock);
3448 : }
3449 :
3450 0 : appendStringInfoString(result,
3451 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3452 :
3453 0 : appendStringInfoString(result,
3454 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3455 :
3456 0 : xsd_schema_element_end(result);
3457 :
3458 0 : SPI_finish();
3459 :
3460 0 : return result;
3461 : }
3462 :
3463 :
3464 : Datum
3465 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3466 : {
3467 0 : bool nulls = PG_GETARG_BOOL(0);
3468 0 : bool tableforest = PG_GETARG_BOOL(1);
3469 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3470 :
3471 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3472 : tableforest, targetns)));
3473 : }
3474 :
3475 :
3476 : Datum
3477 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3478 : {
3479 0 : bool nulls = PG_GETARG_BOOL(0);
3480 0 : bool tableforest = PG_GETARG_BOOL(1);
3481 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3482 : StringInfo xmlschema;
3483 :
3484 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3485 :
3486 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3487 : nulls, tableforest, targetns)));
3488 : }
3489 :
3490 :
3491 : /*
3492 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3493 : * 9.2.
3494 : */
3495 : static char *
3496 384 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3497 : {
3498 : StringInfoData result;
3499 :
3500 384 : initStringInfo(&result);
3501 :
3502 384 : if (a)
3503 384 : appendStringInfoString(&result,
3504 384 : map_sql_identifier_to_xml_name(a, true, true));
3505 384 : if (b)
3506 384 : appendStringInfo(&result, ".%s",
3507 : map_sql_identifier_to_xml_name(b, true, true));
3508 384 : if (c)
3509 384 : appendStringInfo(&result, ".%s",
3510 : map_sql_identifier_to_xml_name(c, true, true));
3511 384 : if (d)
3512 366 : appendStringInfo(&result, ".%s",
3513 : map_sql_identifier_to_xml_name(d, true, true));
3514 :
3515 384 : return result.data;
3516 : }
3517 :
3518 :
3519 : /*
3520 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3521 : * section 9.11.
3522 : *
3523 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3524 : * 9.9.
3525 : */
3526 : static const char *
3527 78 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3528 : bool tableforest, const char *targetns)
3529 : {
3530 : int i;
3531 : char *xmltn;
3532 : char *tabletypename;
3533 : char *rowtypename;
3534 : StringInfoData result;
3535 :
3536 78 : initStringInfo(&result);
3537 :
3538 78 : if (OidIsValid(relid))
3539 : {
3540 : HeapTuple tuple;
3541 : Form_pg_class reltuple;
3542 :
3543 54 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3544 54 : if (!HeapTupleIsValid(tuple))
3545 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3546 54 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3547 :
3548 54 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3549 : true, false);
3550 :
3551 54 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3552 54 : get_database_name(MyDatabaseId),
3553 54 : get_namespace_name(reltuple->relnamespace),
3554 54 : NameStr(reltuple->relname));
3555 :
3556 54 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3557 54 : get_database_name(MyDatabaseId),
3558 54 : get_namespace_name(reltuple->relnamespace),
3559 54 : NameStr(reltuple->relname));
3560 :
3561 54 : ReleaseSysCache(tuple);
3562 : }
3563 : else
3564 : {
3565 24 : if (tableforest)
3566 12 : xmltn = "row";
3567 : else
3568 12 : xmltn = "table";
3569 :
3570 24 : tabletypename = "TableType";
3571 24 : rowtypename = "RowType";
3572 : }
3573 :
3574 78 : xsd_schema_element_start(&result, targetns);
3575 :
3576 78 : appendStringInfoString(&result,
3577 78 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3578 :
3579 78 : appendStringInfo(&result,
3580 : "<xsd:complexType name=\"%s\">\n"
3581 : " <xsd:sequence>\n",
3582 : rowtypename);
3583 :
3584 324 : for (i = 0; i < tupdesc->natts; i++)
3585 : {
3586 246 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3587 :
3588 246 : if (att->attisdropped)
3589 6 : continue;
3590 480 : appendStringInfo(&result,
3591 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3592 240 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3593 : true, false),
3594 : map_sql_type_to_xml_name(att->atttypid, -1),
3595 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3596 : }
3597 :
3598 78 : appendStringInfoString(&result,
3599 : " </xsd:sequence>\n"
3600 : "</xsd:complexType>\n\n");
3601 :
3602 78 : if (!tableforest)
3603 : {
3604 42 : appendStringInfo(&result,
3605 : "<xsd:complexType name=\"%s\">\n"
3606 : " <xsd:sequence>\n"
3607 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3608 : " </xsd:sequence>\n"
3609 : "</xsd:complexType>\n\n",
3610 : tabletypename, rowtypename);
3611 :
3612 42 : appendStringInfo(&result,
3613 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3614 : xmltn, tabletypename);
3615 : }
3616 : else
3617 36 : appendStringInfo(&result,
3618 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3619 : xmltn, rowtypename);
3620 :
3621 78 : xsd_schema_element_end(&result);
3622 :
3623 78 : return result.data;
3624 : }
3625 :
3626 :
3627 : /*
3628 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3629 : * section 9.12.
3630 : */
3631 : static const char *
3632 18 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3633 : bool tableforest, const char *targetns)
3634 : {
3635 : char *dbname;
3636 : char *nspname;
3637 : char *xmlsn;
3638 : char *schematypename;
3639 : StringInfoData result;
3640 : ListCell *cell;
3641 :
3642 18 : dbname = get_database_name(MyDatabaseId);
3643 18 : nspname = get_namespace_name(nspid);
3644 :
3645 18 : initStringInfo(&result);
3646 :
3647 18 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3648 :
3649 18 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3650 : dbname,
3651 : nspname,
3652 : NULL);
3653 :
3654 18 : appendStringInfo(&result,
3655 : "<xsd:complexType name=\"%s\">\n", schematypename);
3656 18 : if (!tableforest)
3657 6 : appendStringInfoString(&result,
3658 : " <xsd:all>\n");
3659 : else
3660 12 : appendStringInfoString(&result,
3661 : " <xsd:sequence>\n");
3662 :
3663 54 : foreach(cell, relid_list)
3664 : {
3665 36 : Oid relid = lfirst_oid(cell);
3666 36 : char *relname = get_rel_name(relid);
3667 36 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3668 36 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3669 : dbname,
3670 : nspname,
3671 : relname);
3672 :
3673 36 : if (!tableforest)
3674 12 : appendStringInfo(&result,
3675 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3676 : xmltn, tabletypename);
3677 : else
3678 24 : appendStringInfo(&result,
3679 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3680 : xmltn, tabletypename);
3681 : }
3682 :
3683 18 : if (!tableforest)
3684 6 : appendStringInfoString(&result,
3685 : " </xsd:all>\n");
3686 : else
3687 12 : appendStringInfoString(&result,
3688 : " </xsd:sequence>\n");
3689 18 : appendStringInfoString(&result,
3690 : "</xsd:complexType>\n\n");
3691 :
3692 18 : appendStringInfo(&result,
3693 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3694 : xmlsn, schematypename);
3695 :
3696 18 : return result.data;
3697 : }
3698 :
3699 :
3700 : /*
3701 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3702 : * section 9.15.
3703 : */
3704 : static const char *
3705 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3706 : bool tableforest, const char *targetns)
3707 : {
3708 : char *dbname;
3709 : char *xmlcn;
3710 : char *catalogtypename;
3711 : StringInfoData result;
3712 : ListCell *cell;
3713 :
3714 0 : dbname = get_database_name(MyDatabaseId);
3715 :
3716 0 : initStringInfo(&result);
3717 :
3718 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3719 :
3720 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3721 : dbname,
3722 : NULL,
3723 : NULL);
3724 :
3725 0 : appendStringInfo(&result,
3726 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3727 0 : appendStringInfoString(&result,
3728 : " <xsd:all>\n");
3729 :
3730 0 : foreach(cell, nspid_list)
3731 : {
3732 0 : Oid nspid = lfirst_oid(cell);
3733 0 : char *nspname = get_namespace_name(nspid);
3734 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3735 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3736 : dbname,
3737 : nspname,
3738 : NULL);
3739 :
3740 0 : appendStringInfo(&result,
3741 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3742 : xmlsn, schematypename);
3743 : }
3744 :
3745 0 : appendStringInfoString(&result,
3746 : " </xsd:all>\n");
3747 0 : appendStringInfoString(&result,
3748 : "</xsd:complexType>\n\n");
3749 :
3750 0 : appendStringInfo(&result,
3751 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3752 : xmlcn, catalogtypename);
3753 :
3754 0 : return result.data;
3755 : }
3756 :
3757 :
3758 : /*
3759 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3760 : */
3761 : static const char *
3762 810 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3763 : {
3764 : StringInfoData result;
3765 :
3766 810 : initStringInfo(&result);
3767 :
3768 810 : switch (typeoid)
3769 : {
3770 30 : case BPCHAROID:
3771 30 : if (typmod == -1)
3772 30 : appendStringInfoString(&result, "CHAR");
3773 : else
3774 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3775 30 : break;
3776 54 : case VARCHAROID:
3777 54 : if (typmod == -1)
3778 54 : appendStringInfoString(&result, "VARCHAR");
3779 : else
3780 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3781 54 : break;
3782 30 : case NUMERICOID:
3783 30 : if (typmod == -1)
3784 30 : appendStringInfoString(&result, "NUMERIC");
3785 : else
3786 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3787 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3788 0 : (typmod - VARHDRSZ) & 0xffff);
3789 30 : break;
3790 174 : case INT4OID:
3791 174 : appendStringInfoString(&result, "INTEGER");
3792 174 : break;
3793 30 : case INT2OID:
3794 30 : appendStringInfoString(&result, "SMALLINT");
3795 30 : break;
3796 30 : case INT8OID:
3797 30 : appendStringInfoString(&result, "BIGINT");
3798 30 : break;
3799 30 : case FLOAT4OID:
3800 30 : appendStringInfoString(&result, "REAL");
3801 30 : break;
3802 0 : case FLOAT8OID:
3803 0 : appendStringInfoString(&result, "DOUBLE");
3804 0 : break;
3805 30 : case BOOLOID:
3806 30 : appendStringInfoString(&result, "BOOLEAN");
3807 30 : break;
3808 30 : case TIMEOID:
3809 30 : if (typmod == -1)
3810 30 : appendStringInfoString(&result, "TIME");
3811 : else
3812 0 : appendStringInfo(&result, "TIME_%d", typmod);
3813 30 : break;
3814 30 : case TIMETZOID:
3815 30 : if (typmod == -1)
3816 30 : appendStringInfoString(&result, "TIME_WTZ");
3817 : else
3818 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3819 30 : break;
3820 30 : case TIMESTAMPOID:
3821 30 : if (typmod == -1)
3822 30 : appendStringInfoString(&result, "TIMESTAMP");
3823 : else
3824 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3825 30 : break;
3826 30 : case TIMESTAMPTZOID:
3827 30 : if (typmod == -1)
3828 30 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3829 : else
3830 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3831 30 : break;
3832 30 : case DATEOID:
3833 30 : appendStringInfoString(&result, "DATE");
3834 30 : break;
3835 30 : case XMLOID:
3836 30 : appendStringInfoString(&result, "XML");
3837 30 : break;
3838 222 : default:
3839 : {
3840 : HeapTuple tuple;
3841 : Form_pg_type typtuple;
3842 :
3843 222 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3844 222 : if (!HeapTupleIsValid(tuple))
3845 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3846 222 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3847 :
3848 222 : appendStringInfoString(&result,
3849 222 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3850 222 : get_database_name(MyDatabaseId),
3851 222 : get_namespace_name(typtuple->typnamespace),
3852 222 : NameStr(typtuple->typname)));
3853 :
3854 222 : ReleaseSysCache(tuple);
3855 : }
3856 : }
3857 :
3858 810 : return result.data;
3859 : }
3860 :
3861 :
3862 : /*
3863 : * Map a collection of SQL data types to XML Schema data types; see
3864 : * SQL/XML:2008 section 9.7.
3865 : */
3866 : static const char *
3867 96 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3868 : {
3869 96 : List *uniquetypes = NIL;
3870 : int i;
3871 : StringInfoData result;
3872 : ListCell *cell0;
3873 :
3874 : /* extract all column types used in the set of TupleDescs */
3875 210 : foreach(cell0, tupdesc_list)
3876 : {
3877 114 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3878 :
3879 702 : for (i = 0; i < tupdesc->natts; i++)
3880 : {
3881 588 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3882 :
3883 588 : if (att->attisdropped)
3884 24 : continue;
3885 564 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3886 : }
3887 : }
3888 :
3889 : /* add base types of domains */
3890 642 : foreach(cell0, uniquetypes)
3891 : {
3892 546 : Oid typid = lfirst_oid(cell0);
3893 546 : Oid basetypid = getBaseType(typid);
3894 :
3895 546 : if (basetypid != typid)
3896 24 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3897 : }
3898 :
3899 : /* Convert to textual form */
3900 96 : initStringInfo(&result);
3901 :
3902 642 : foreach(cell0, uniquetypes)
3903 : {
3904 546 : appendStringInfo(&result, "%s\n",
3905 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3906 : -1));
3907 : }
3908 :
3909 96 : return result.data;
3910 : }
3911 :
3912 :
3913 : /*
3914 : * Map an SQL data type to a named XML Schema data type; see
3915 : * SQL/XML:2008 sections 9.5 and 9.6.
3916 : *
3917 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3918 : * a name attribute, which this function does. The name-less version
3919 : * 9.5 doesn't appear to be required anywhere.)
3920 : */
3921 : static const char *
3922 546 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3923 : {
3924 : StringInfoData result;
3925 546 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3926 :
3927 546 : initStringInfo(&result);
3928 :
3929 546 : if (typeoid == XMLOID)
3930 : {
3931 24 : appendStringInfoString(&result,
3932 : "<xsd:complexType mixed=\"true\">\n"
3933 : " <xsd:sequence>\n"
3934 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3935 : " </xsd:sequence>\n"
3936 : "</xsd:complexType>\n");
3937 : }
3938 : else
3939 : {
3940 522 : appendStringInfo(&result,
3941 : "<xsd:simpleType name=\"%s\">\n", typename);
3942 :
3943 522 : switch (typeoid)
3944 : {
3945 138 : case BPCHAROID:
3946 : case VARCHAROID:
3947 : case TEXTOID:
3948 138 : appendStringInfoString(&result,
3949 : " <xsd:restriction base=\"xsd:string\">\n");
3950 138 : if (typmod != -1)
3951 0 : appendStringInfo(&result,
3952 : " <xsd:maxLength value=\"%d\"/>\n",
3953 : typmod - VARHDRSZ);
3954 138 : appendStringInfoString(&result, " </xsd:restriction>\n");
3955 138 : break;
3956 :
3957 24 : case BYTEAOID:
3958 24 : appendStringInfo(&result,
3959 : " <xsd:restriction base=\"xsd:%s\">\n"
3960 : " </xsd:restriction>\n",
3961 24 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3962 24 : break;
3963 :
3964 24 : case NUMERICOID:
3965 24 : if (typmod != -1)
3966 0 : appendStringInfo(&result,
3967 : " <xsd:restriction base=\"xsd:decimal\">\n"
3968 : " <xsd:totalDigits value=\"%d\"/>\n"
3969 : " <xsd:fractionDigits value=\"%d\"/>\n"
3970 : " </xsd:restriction>\n",
3971 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3972 0 : (typmod - VARHDRSZ) & 0xffff);
3973 24 : break;
3974 :
3975 24 : case INT2OID:
3976 24 : appendStringInfo(&result,
3977 : " <xsd:restriction base=\"xsd:short\">\n"
3978 : " <xsd:maxInclusive value=\"%d\"/>\n"
3979 : " <xsd:minInclusive value=\"%d\"/>\n"
3980 : " </xsd:restriction>\n",
3981 : SHRT_MAX, SHRT_MIN);
3982 24 : break;
3983 :
3984 96 : case INT4OID:
3985 96 : appendStringInfo(&result,
3986 : " <xsd:restriction base=\"xsd:int\">\n"
3987 : " <xsd:maxInclusive value=\"%d\"/>\n"
3988 : " <xsd:minInclusive value=\"%d\"/>\n"
3989 : " </xsd:restriction>\n",
3990 : INT_MAX, INT_MIN);
3991 96 : break;
3992 :
3993 24 : case INT8OID:
3994 24 : appendStringInfo(&result,
3995 : " <xsd:restriction base=\"xsd:long\">\n"
3996 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3997 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3998 : " </xsd:restriction>\n",
3999 : PG_INT64_MAX,
4000 : PG_INT64_MIN);
4001 24 : break;
4002 :
4003 24 : case FLOAT4OID:
4004 24 : appendStringInfoString(&result,
4005 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
4006 24 : break;
4007 :
4008 0 : case FLOAT8OID:
4009 0 : appendStringInfoString(&result,
4010 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
4011 0 : break;
4012 :
4013 24 : case BOOLOID:
4014 24 : appendStringInfoString(&result,
4015 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4016 24 : break;
4017 :
4018 48 : case TIMEOID:
4019 : case TIMETZOID:
4020 : {
4021 48 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4022 :
4023 48 : if (typmod == -1)
4024 48 : appendStringInfo(&result,
4025 : " <xsd:restriction base=\"xsd:time\">\n"
4026 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4027 : " </xsd:restriction>\n", tz);
4028 0 : else if (typmod == 0)
4029 0 : appendStringInfo(&result,
4030 : " <xsd:restriction base=\"xsd:time\">\n"
4031 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4032 : " </xsd:restriction>\n", tz);
4033 : else
4034 0 : appendStringInfo(&result,
4035 : " <xsd:restriction base=\"xsd:time\">\n"
4036 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4037 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4038 48 : break;
4039 : }
4040 :
4041 48 : case TIMESTAMPOID:
4042 : case TIMESTAMPTZOID:
4043 : {
4044 48 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4045 :
4046 48 : if (typmod == -1)
4047 48 : appendStringInfo(&result,
4048 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4049 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4050 : " </xsd:restriction>\n", tz);
4051 0 : else if (typmod == 0)
4052 0 : appendStringInfo(&result,
4053 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4054 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4055 : " </xsd:restriction>\n", tz);
4056 : else
4057 0 : appendStringInfo(&result,
4058 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4059 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4060 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4061 48 : break;
4062 : }
4063 :
4064 24 : case DATEOID:
4065 24 : appendStringInfoString(&result,
4066 : " <xsd:restriction base=\"xsd:date\">\n"
4067 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4068 : " </xsd:restriction>\n");
4069 24 : break;
4070 :
4071 24 : default:
4072 24 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4073 : {
4074 : Oid base_typeoid;
4075 24 : int32 base_typmod = -1;
4076 :
4077 24 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4078 :
4079 24 : appendStringInfo(&result,
4080 : " <xsd:restriction base=\"%s\"/>\n",
4081 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
4082 : }
4083 24 : break;
4084 : }
4085 522 : appendStringInfoString(&result, "</xsd:simpleType>\n");
4086 : }
4087 :
4088 546 : return result.data;
4089 : }
4090 :
4091 :
4092 : /*
4093 : * Map an SQL row to an XML element, taking the row from the active
4094 : * SPI cursor. See also SQL/XML:2008 section 9.10.
4095 : */
4096 : static void
4097 312 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4098 : bool nulls, bool tableforest,
4099 : const char *targetns, bool top_level)
4100 : {
4101 : int i;
4102 : char *xmltn;
4103 :
4104 312 : if (tablename)
4105 228 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4106 : else
4107 : {
4108 84 : if (tableforest)
4109 36 : xmltn = "row";
4110 : else
4111 48 : xmltn = "table";
4112 : }
4113 :
4114 312 : if (tableforest)
4115 162 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4116 : else
4117 150 : appendStringInfoString(result, "<row>\n");
4118 :
4119 1272 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4120 : {
4121 : char *colname;
4122 : Datum colval;
4123 : bool isnull;
4124 :
4125 960 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4126 : true, false);
4127 960 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4128 960 : SPI_tuptable->tupdesc,
4129 : i,
4130 : &isnull);
4131 960 : if (isnull)
4132 : {
4133 114 : if (nulls)
4134 60 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
4135 : }
4136 : else
4137 846 : appendStringInfo(result, " <%s>%s</%s>\n",
4138 : colname,
4139 : map_sql_value_to_xml_value(colval,
4140 846 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4141 : colname);
4142 : }
4143 :
4144 312 : if (tableforest)
4145 : {
4146 162 : xmldata_root_element_end(result, xmltn);
4147 162 : appendStringInfoChar(result, '\n');
4148 : }
4149 : else
4150 150 : appendStringInfoString(result, "</row>\n\n");
4151 312 : }
4152 :
4153 :
4154 : /*
4155 : * XPath related functions
4156 : */
4157 :
4158 : #ifdef USE_LIBXML
4159 :
4160 : /*
4161 : * Convert XML node to text.
4162 : *
4163 : * For attribute and text nodes, return the escaped text. For anything else,
4164 : * dump the whole subtree.
4165 : */
4166 : static text *
4167 192 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4168 : {
4169 192 : xmltype *result = NULL;
4170 :
4171 192 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4172 162 : {
4173 162 : void (*volatile nodefree) (xmlNodePtr) = NULL;
4174 162 : volatile xmlBufferPtr buf = NULL;
4175 162 : volatile xmlNodePtr cur_copy = NULL;
4176 :
4177 162 : PG_TRY();
4178 : {
4179 : int bytes;
4180 :
4181 162 : buf = xmlBufferCreate();
4182 162 : if (buf == NULL || xmlerrcxt->err_occurred)
4183 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4184 : "could not allocate xmlBuffer");
4185 :
4186 : /*
4187 : * Produce a dump of the node that we can serialize. xmlNodeDump
4188 : * does that, but the result of that function won't contain
4189 : * namespace definitions from ancestor nodes, so we first do a
4190 : * xmlCopyNode() which duplicates the node along with its required
4191 : * namespace definitions.
4192 : *
4193 : * Some old libxml2 versions such as 2.7.6 produce partially
4194 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4195 : * copying them. xmlNodeDump of such a node works fine, but
4196 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4197 : */
4198 162 : cur_copy = xmlCopyNode(cur, 1);
4199 162 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
4200 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4201 : "could not copy node");
4202 324 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4203 162 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4204 :
4205 162 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4206 162 : if (bytes == -1 || xmlerrcxt->err_occurred)
4207 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4208 : "could not dump node");
4209 :
4210 162 : result = xmlBuffer_to_xmltype(buf);
4211 : }
4212 0 : PG_FINALLY();
4213 : {
4214 162 : if (nodefree)
4215 162 : nodefree(cur_copy);
4216 162 : if (buf)
4217 162 : xmlBufferFree(buf);
4218 : }
4219 162 : PG_END_TRY();
4220 : }
4221 : else
4222 : {
4223 : xmlChar *str;
4224 :
4225 30 : str = xmlXPathCastNodeToString(cur);
4226 30 : PG_TRY();
4227 : {
4228 : /* Here we rely on XML having the same representation as TEXT */
4229 30 : char *escaped = escape_xml((char *) str);
4230 :
4231 30 : result = (xmltype *) cstring_to_text(escaped);
4232 30 : pfree(escaped);
4233 : }
4234 0 : PG_FINALLY();
4235 : {
4236 30 : xmlFree(str);
4237 : }
4238 30 : PG_END_TRY();
4239 : }
4240 :
4241 192 : return result;
4242 : }
4243 :
4244 : /*
4245 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4246 : * to an array of xml values, which are appended to astate. The function
4247 : * result value is the number of elements in the array.
4248 : *
4249 : * If "astate" is NULL then we don't generate the array value, but we still
4250 : * return the number of elements it would have had.
4251 : *
4252 : * Nodesets are converted to an array containing the nodes' textual
4253 : * representations. Primitive values (float, double, string) are converted
4254 : * to a single-element array containing the value's string representation.
4255 : */
4256 : static int
4257 540 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4258 : ArrayBuildState *astate,
4259 : PgXmlErrorContext *xmlerrcxt)
4260 : {
4261 540 : int result = 0;
4262 : Datum datum;
4263 : Oid datumtype;
4264 : char *result_str;
4265 :
4266 540 : switch (xpathobj->type)
4267 : {
4268 498 : case XPATH_NODESET:
4269 498 : if (xpathobj->nodesetval != NULL)
4270 : {
4271 354 : result = xpathobj->nodesetval->nodeNr;
4272 354 : if (astate != NULL)
4273 : {
4274 : int i;
4275 :
4276 168 : for (i = 0; i < result; i++)
4277 : {
4278 90 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4279 : xmlerrcxt));
4280 90 : (void) accumArrayResult(astate, datum, false,
4281 : XMLOID, CurrentMemoryContext);
4282 : }
4283 : }
4284 : }
4285 498 : return result;
4286 :
4287 12 : case XPATH_BOOLEAN:
4288 12 : if (astate == NULL)
4289 0 : return 1;
4290 12 : datum = BoolGetDatum(xpathobj->boolval);
4291 12 : datumtype = BOOLOID;
4292 12 : break;
4293 :
4294 18 : case XPATH_NUMBER:
4295 18 : if (astate == NULL)
4296 12 : return 1;
4297 6 : datum = Float8GetDatum(xpathobj->floatval);
4298 6 : datumtype = FLOAT8OID;
4299 6 : break;
4300 :
4301 12 : case XPATH_STRING:
4302 12 : if (astate == NULL)
4303 0 : return 1;
4304 12 : datum = CStringGetDatum((char *) xpathobj->stringval);
4305 12 : datumtype = CSTRINGOID;
4306 12 : break;
4307 :
4308 0 : default:
4309 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4310 : xpathobj->type);
4311 : return 0; /* keep compiler quiet */
4312 : }
4313 :
4314 : /* Common code for scalar-value cases */
4315 30 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4316 30 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
4317 30 : (void) accumArrayResult(astate, datum, false,
4318 : XMLOID, CurrentMemoryContext);
4319 30 : return 1;
4320 : }
4321 :
4322 :
4323 : /*
4324 : * Common code for xpath() and xmlexists()
4325 : *
4326 : * Evaluate XPath expression and return number of nodes in res_nitems
4327 : * and array of XML values in astate. Either of those pointers can be
4328 : * NULL if the corresponding result isn't wanted.
4329 : *
4330 : * It is up to the user to ensure that the XML passed is in fact
4331 : * an XML document - XPath doesn't work easily on fragments without
4332 : * a context node being known.
4333 : */
4334 : static void
4335 558 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4336 : int *res_nitems, ArrayBuildState *astate)
4337 : {
4338 : PgXmlErrorContext *xmlerrcxt;
4339 558 : volatile xmlParserCtxtPtr ctxt = NULL;
4340 558 : volatile xmlDocPtr doc = NULL;
4341 558 : volatile xmlXPathContextPtr xpathctx = NULL;
4342 558 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4343 558 : volatile xmlXPathObjectPtr xpathobj = NULL;
4344 : char *datastr;
4345 : int32 len;
4346 : int32 xpath_len;
4347 : xmlChar *string;
4348 : xmlChar *xpath_expr;
4349 558 : size_t xmldecl_len = 0;
4350 : int i;
4351 : int ndim;
4352 : Datum *ns_names_uris;
4353 : bool *ns_names_uris_nulls;
4354 : int ns_count;
4355 :
4356 : /*
4357 : * Namespace mappings are passed as text[]. If an empty array is passed
4358 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4359 : * Else, a 2-dimensional array with length of the second axis being equal
4360 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4361 : * first element defining the name, the second one the URI. Example:
4362 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4363 : * 'http://example2.com']].
4364 : */
4365 558 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4366 558 : if (ndim != 0)
4367 : {
4368 : int *dims;
4369 :
4370 126 : dims = ARR_DIMS(namespaces);
4371 :
4372 126 : if (ndim != 2 || dims[1] != 2)
4373 0 : ereport(ERROR,
4374 : (errcode(ERRCODE_DATA_EXCEPTION),
4375 : errmsg("invalid array for XML namespace mapping"),
4376 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4377 :
4378 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4379 :
4380 126 : deconstruct_array_builtin(namespaces, TEXTOID,
4381 : &ns_names_uris, &ns_names_uris_nulls,
4382 : &ns_count);
4383 :
4384 : Assert((ns_count % 2) == 0); /* checked above */
4385 126 : ns_count /= 2; /* count pairs only */
4386 : }
4387 : else
4388 : {
4389 432 : ns_names_uris = NULL;
4390 432 : ns_names_uris_nulls = NULL;
4391 432 : ns_count = 0;
4392 : }
4393 :
4394 558 : datastr = VARDATA(data);
4395 558 : len = VARSIZE(data) - VARHDRSZ;
4396 558 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4397 558 : if (xpath_len == 0)
4398 6 : ereport(ERROR,
4399 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4400 : errmsg("empty XPath expression")));
4401 :
4402 552 : string = pg_xmlCharStrndup(datastr, len);
4403 552 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4404 :
4405 : /*
4406 : * In a UTF8 database, skip any xml declaration, which might assert
4407 : * another encoding. Ignore parse_xml_decl() failure, letting
4408 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4409 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4410 : * those scenarios bug-compatible with historical behavior.
4411 : */
4412 552 : if (GetDatabaseEncoding() == PG_UTF8)
4413 552 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4414 :
4415 552 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4416 :
4417 552 : PG_TRY();
4418 : {
4419 552 : xmlInitParser();
4420 :
4421 : /*
4422 : * redundant XML parsing (two parsings for the same value during one
4423 : * command execution are possible)
4424 : */
4425 552 : ctxt = xmlNewParserCtxt();
4426 552 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4427 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4428 : "could not allocate parser context");
4429 1104 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4430 552 : len - xmldecl_len, NULL, NULL, 0);
4431 552 : if (doc == NULL || xmlerrcxt->err_occurred)
4432 12 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4433 : "could not parse XML document");
4434 540 : xpathctx = xmlXPathNewContext(doc);
4435 540 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4436 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4437 : "could not allocate XPath context");
4438 540 : xpathctx->node = (xmlNodePtr) doc;
4439 :
4440 : /* register namespaces, if any */
4441 540 : if (ns_count > 0)
4442 : {
4443 252 : for (i = 0; i < ns_count; i++)
4444 : {
4445 : char *ns_name;
4446 : char *ns_uri;
4447 :
4448 126 : if (ns_names_uris_nulls[i * 2] ||
4449 126 : ns_names_uris_nulls[i * 2 + 1])
4450 0 : ereport(ERROR,
4451 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4452 : errmsg("neither namespace name nor URI may be null")));
4453 126 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4454 126 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4455 126 : if (xmlXPathRegisterNs(xpathctx,
4456 : (xmlChar *) ns_name,
4457 : (xmlChar *) ns_uri) != 0)
4458 0 : ereport(ERROR, /* is this an internal error??? */
4459 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4460 : ns_name, ns_uri)));
4461 : }
4462 : }
4463 :
4464 : /*
4465 : * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4466 : * xmlXPathCompile. In libxml2 2.13.3 and older, the latter function
4467 : * fails to defend itself against recursion-to-stack-overflow. See
4468 : * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4469 : */
4470 540 : xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4471 540 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4472 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4473 : "invalid XPath expression");
4474 :
4475 : /*
4476 : * Version 2.6.27 introduces a function named
4477 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4478 : * but we can derive the existence by whether any nodes are returned,
4479 : * thereby preventing a library version upgrade and keeping the code
4480 : * the same.
4481 : */
4482 540 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4483 540 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4484 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4485 : "could not create XPath object");
4486 :
4487 : /*
4488 : * Extract the results as requested.
4489 : */
4490 540 : if (res_nitems != NULL)
4491 432 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4492 : else
4493 108 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4494 : }
4495 12 : PG_CATCH();
4496 : {
4497 12 : if (xpathobj)
4498 0 : xmlXPathFreeObject(xpathobj);
4499 12 : if (xpathcomp)
4500 0 : xmlXPathFreeCompExpr(xpathcomp);
4501 12 : if (xpathctx)
4502 0 : xmlXPathFreeContext(xpathctx);
4503 12 : if (doc)
4504 12 : xmlFreeDoc(doc);
4505 12 : if (ctxt)
4506 12 : xmlFreeParserCtxt(ctxt);
4507 :
4508 12 : pg_xml_done(xmlerrcxt, true);
4509 :
4510 12 : PG_RE_THROW();
4511 : }
4512 540 : PG_END_TRY();
4513 :
4514 540 : xmlXPathFreeObject(xpathobj);
4515 540 : xmlXPathFreeCompExpr(xpathcomp);
4516 540 : xmlXPathFreeContext(xpathctx);
4517 540 : xmlFreeDoc(doc);
4518 540 : xmlFreeParserCtxt(ctxt);
4519 :
4520 540 : pg_xml_done(xmlerrcxt, false);
4521 540 : }
4522 : #endif /* USE_LIBXML */
4523 :
4524 : /*
4525 : * Evaluate XPath expression and return array of XML values.
4526 : *
4527 : * As we have no support of XQuery sequences yet, this function seems
4528 : * to be the most useful one (array of XML functions plays a role of
4529 : * some kind of substitution for XQuery sequences).
4530 : */
4531 : Datum
4532 126 : xpath(PG_FUNCTION_ARGS)
4533 : {
4534 : #ifdef USE_LIBXML
4535 126 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4536 126 : xmltype *data = PG_GETARG_XML_P(1);
4537 126 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4538 : ArrayBuildState *astate;
4539 :
4540 126 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4541 126 : xpath_internal(xpath_expr_text, data, namespaces,
4542 : NULL, astate);
4543 108 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4544 : #else
4545 : NO_XML_SUPPORT();
4546 : return 0;
4547 : #endif
4548 : }
4549 :
4550 : /*
4551 : * Determines if the node specified by the supplied XPath exists
4552 : * in a given XML document, returning a boolean.
4553 : */
4554 : Datum
4555 198 : xmlexists(PG_FUNCTION_ARGS)
4556 : {
4557 : #ifdef USE_LIBXML
4558 198 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4559 198 : xmltype *data = PG_GETARG_XML_P(1);
4560 : int res_nitems;
4561 :
4562 198 : xpath_internal(xpath_expr_text, data, NULL,
4563 : &res_nitems, NULL);
4564 :
4565 198 : PG_RETURN_BOOL(res_nitems > 0);
4566 : #else
4567 : NO_XML_SUPPORT();
4568 : return 0;
4569 : #endif
4570 : }
4571 :
4572 : /*
4573 : * Determines if the node specified by the supplied XPath exists
4574 : * in a given XML document, returning a boolean. Differs from
4575 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4576 : */
4577 : Datum
4578 234 : xpath_exists(PG_FUNCTION_ARGS)
4579 : {
4580 : #ifdef USE_LIBXML
4581 234 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4582 234 : xmltype *data = PG_GETARG_XML_P(1);
4583 234 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4584 : int res_nitems;
4585 :
4586 234 : xpath_internal(xpath_expr_text, data, namespaces,
4587 : &res_nitems, NULL);
4588 :
4589 234 : PG_RETURN_BOOL(res_nitems > 0);
4590 : #else
4591 : NO_XML_SUPPORT();
4592 : return 0;
4593 : #endif
4594 : }
4595 :
4596 : /*
4597 : * Functions for checking well-formed-ness
4598 : */
4599 :
4600 : #ifdef USE_LIBXML
4601 : static bool
4602 114 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4603 : {
4604 : xmlDocPtr doc;
4605 114 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4606 :
4607 : /*
4608 : * We'll report "true" if no soft error is reported by xml_parse().
4609 : */
4610 114 : doc = xml_parse(data, xmloption_arg, true,
4611 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4612 114 : if (doc)
4613 60 : xmlFreeDoc(doc);
4614 :
4615 114 : return !escontext.error_occurred;
4616 : }
4617 : #endif
4618 :
4619 : Datum
4620 90 : xml_is_well_formed(PG_FUNCTION_ARGS)
4621 : {
4622 : #ifdef USE_LIBXML
4623 90 : text *data = PG_GETARG_TEXT_PP(0);
4624 :
4625 90 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4626 : #else
4627 : NO_XML_SUPPORT();
4628 : return 0;
4629 : #endif /* not USE_LIBXML */
4630 : }
4631 :
4632 : Datum
4633 12 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4634 : {
4635 : #ifdef USE_LIBXML
4636 12 : text *data = PG_GETARG_TEXT_PP(0);
4637 :
4638 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4639 : #else
4640 : NO_XML_SUPPORT();
4641 : return 0;
4642 : #endif /* not USE_LIBXML */
4643 : }
4644 :
4645 : Datum
4646 12 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4647 : {
4648 : #ifdef USE_LIBXML
4649 12 : text *data = PG_GETARG_TEXT_PP(0);
4650 :
4651 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4652 : #else
4653 : NO_XML_SUPPORT();
4654 : return 0;
4655 : #endif /* not USE_LIBXML */
4656 : }
4657 :
4658 : /*
4659 : * support functions for XMLTABLE
4660 : *
4661 : */
4662 : #ifdef USE_LIBXML
4663 :
4664 : /*
4665 : * Returns private data from executor state. Ensure validity by check with
4666 : * MAGIC number.
4667 : */
4668 : static inline XmlTableBuilderData *
4669 160004 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4670 : {
4671 : XmlTableBuilderData *result;
4672 :
4673 160004 : if (!IsA(state, TableFuncScanState))
4674 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4675 160004 : result = (XmlTableBuilderData *) state->opaque;
4676 160004 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4677 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4678 :
4679 160004 : return result;
4680 : }
4681 : #endif
4682 :
4683 : /*
4684 : * XmlTableInitOpaque
4685 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4686 : * the XML parser.
4687 : *
4688 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4689 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4690 : * executor nodes run until this node is processed to completion. Caller
4691 : * must execute this to completion (probably filling a tuplestore to exhaust
4692 : * this node in a single pass) instead of using row-per-call mode.
4693 : */
4694 : static void
4695 264 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4696 : {
4697 : #ifdef USE_LIBXML
4698 264 : volatile xmlParserCtxtPtr ctxt = NULL;
4699 : XmlTableBuilderData *xtCxt;
4700 : PgXmlErrorContext *xmlerrcxt;
4701 :
4702 264 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4703 264 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4704 264 : xtCxt->natts = natts;
4705 264 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4706 :
4707 264 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4708 :
4709 264 : PG_TRY();
4710 : {
4711 264 : xmlInitParser();
4712 :
4713 264 : ctxt = xmlNewParserCtxt();
4714 264 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4715 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4716 : "could not allocate parser context");
4717 : }
4718 0 : PG_CATCH();
4719 : {
4720 0 : if (ctxt != NULL)
4721 0 : xmlFreeParserCtxt(ctxt);
4722 :
4723 0 : pg_xml_done(xmlerrcxt, true);
4724 :
4725 0 : PG_RE_THROW();
4726 : }
4727 264 : PG_END_TRY();
4728 :
4729 264 : xtCxt->xmlerrcxt = xmlerrcxt;
4730 264 : xtCxt->ctxt = ctxt;
4731 :
4732 264 : state->opaque = xtCxt;
4733 : #else
4734 : NO_XML_SUPPORT();
4735 : #endif /* not USE_LIBXML */
4736 264 : }
4737 :
4738 : /*
4739 : * XmlTableSetDocument
4740 : * Install the input document
4741 : */
4742 : static void
4743 264 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4744 : {
4745 : #ifdef USE_LIBXML
4746 : XmlTableBuilderData *xtCxt;
4747 264 : xmltype *xmlval = DatumGetXmlP(value);
4748 : char *str;
4749 : xmlChar *xstr;
4750 : int length;
4751 264 : volatile xmlDocPtr doc = NULL;
4752 264 : volatile xmlXPathContextPtr xpathcxt = NULL;
4753 :
4754 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4755 :
4756 : /*
4757 : * Use out function for casting to string (remove encoding property). See
4758 : * comment in xml_out.
4759 : */
4760 264 : str = xml_out_internal(xmlval, 0);
4761 :
4762 264 : length = strlen(str);
4763 264 : xstr = pg_xmlCharStrndup(str, length);
4764 :
4765 264 : PG_TRY();
4766 : {
4767 264 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4768 264 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4769 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4770 : "could not parse XML document");
4771 264 : xpathcxt = xmlXPathNewContext(doc);
4772 264 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4773 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4774 : "could not allocate XPath context");
4775 264 : xpathcxt->node = (xmlNodePtr) doc;
4776 : }
4777 0 : PG_CATCH();
4778 : {
4779 0 : if (xpathcxt != NULL)
4780 0 : xmlXPathFreeContext(xpathcxt);
4781 0 : if (doc != NULL)
4782 0 : xmlFreeDoc(doc);
4783 :
4784 0 : PG_RE_THROW();
4785 : }
4786 264 : PG_END_TRY();
4787 :
4788 264 : xtCxt->doc = doc;
4789 264 : xtCxt->xpathcxt = xpathcxt;
4790 : #else
4791 : NO_XML_SUPPORT();
4792 : #endif /* not USE_LIBXML */
4793 264 : }
4794 :
4795 : /*
4796 : * XmlTableSetNamespace
4797 : * Add a namespace declaration
4798 : */
4799 : static void
4800 18 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4801 : {
4802 : #ifdef USE_LIBXML
4803 : XmlTableBuilderData *xtCxt;
4804 :
4805 18 : if (name == NULL)
4806 6 : ereport(ERROR,
4807 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4808 : errmsg("DEFAULT namespace is not supported")));
4809 12 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4810 :
4811 12 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4812 12 : pg_xmlCharStrndup(name, strlen(name)),
4813 12 : pg_xmlCharStrndup(uri, strlen(uri))))
4814 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4815 : "could not set XML namespace");
4816 : #else
4817 : NO_XML_SUPPORT();
4818 : #endif /* not USE_LIBXML */
4819 12 : }
4820 :
4821 : /*
4822 : * XmlTableSetRowFilter
4823 : * Install the row-filter Xpath expression.
4824 : */
4825 : static void
4826 258 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4827 : {
4828 : #ifdef USE_LIBXML
4829 : XmlTableBuilderData *xtCxt;
4830 : xmlChar *xstr;
4831 :
4832 258 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4833 :
4834 258 : if (*path == '\0')
4835 0 : ereport(ERROR,
4836 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4837 : errmsg("row path filter must not be empty string")));
4838 :
4839 258 : xstr = pg_xmlCharStrndup(path, strlen(path));
4840 :
4841 : /* We require XmlTableSetDocument to have been done already */
4842 : Assert(xtCxt->xpathcxt != NULL);
4843 :
4844 258 : xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4845 258 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4846 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4847 : "invalid XPath expression");
4848 : #else
4849 : NO_XML_SUPPORT();
4850 : #endif /* not USE_LIBXML */
4851 258 : }
4852 :
4853 : /*
4854 : * XmlTableSetColumnFilter
4855 : * Install the column-filter Xpath expression, for the given column.
4856 : */
4857 : static void
4858 774 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4859 : {
4860 : #ifdef USE_LIBXML
4861 : XmlTableBuilderData *xtCxt;
4862 : xmlChar *xstr;
4863 :
4864 : Assert(PointerIsValid(path));
4865 :
4866 774 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4867 :
4868 774 : if (*path == '\0')
4869 0 : ereport(ERROR,
4870 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4871 : errmsg("column path filter must not be empty string")));
4872 :
4873 774 : xstr = pg_xmlCharStrndup(path, strlen(path));
4874 :
4875 : /* We require XmlTableSetDocument to have been done already */
4876 : Assert(xtCxt->xpathcxt != NULL);
4877 :
4878 774 : xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4879 774 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4880 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4881 : "invalid XPath expression");
4882 : #else
4883 : NO_XML_SUPPORT();
4884 : #endif /* not USE_LIBXML */
4885 774 : }
4886 :
4887 : /*
4888 : * XmlTableFetchRow
4889 : * Prepare the next "current" tuple for upcoming GetValue calls.
4890 : * Returns false if the row-filter expression returned no more rows.
4891 : */
4892 : static bool
4893 22994 : XmlTableFetchRow(TableFuncScanState *state)
4894 : {
4895 : #ifdef USE_LIBXML
4896 : XmlTableBuilderData *xtCxt;
4897 :
4898 22994 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4899 :
4900 : /* Propagate our own error context to libxml2 */
4901 22994 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4902 :
4903 22994 : if (xtCxt->xpathobj == NULL)
4904 : {
4905 258 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4906 258 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4907 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4908 : "could not create XPath object");
4909 :
4910 258 : xtCxt->row_count = 0;
4911 : }
4912 :
4913 22994 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4914 : {
4915 22994 : if (xtCxt->xpathobj->nodesetval != NULL)
4916 : {
4917 22994 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4918 22748 : return true;
4919 : }
4920 : }
4921 :
4922 246 : return false;
4923 : #else
4924 : NO_XML_SUPPORT();
4925 : return false;
4926 : #endif /* not USE_LIBXML */
4927 : }
4928 :
4929 : /*
4930 : * XmlTableGetValue
4931 : * Return the value for column number 'colnum' for the current row. If
4932 : * column -1 is requested, return representation of the whole row.
4933 : *
4934 : * This leaks memory, so be sure to reset often the context in which it's
4935 : * called.
4936 : */
4937 : static Datum
4938 135438 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4939 : Oid typid, int32 typmod, bool *isnull)
4940 : {
4941 : #ifdef USE_LIBXML
4942 135438 : Datum result = (Datum) 0;
4943 : XmlTableBuilderData *xtCxt;
4944 135438 : volatile xmlXPathObjectPtr xpathobj = NULL;
4945 :
4946 135438 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4947 :
4948 : Assert(xtCxt->xpathobj &&
4949 : xtCxt->xpathobj->type == XPATH_NODESET &&
4950 : xtCxt->xpathobj->nodesetval != NULL);
4951 :
4952 : /* Propagate our own error context to libxml2 */
4953 135438 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4954 :
4955 135438 : *isnull = false;
4956 :
4957 : Assert(xtCxt->xpathscomp[colnum] != NULL);
4958 :
4959 135438 : PG_TRY();
4960 : {
4961 : xmlNodePtr cur;
4962 135438 : char *cstr = NULL;
4963 :
4964 : /* Set current node as entry point for XPath evaluation */
4965 135438 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4966 135438 : xtCxt->xpathcxt->node = cur;
4967 :
4968 : /* Evaluate column path */
4969 135438 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4970 135438 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4971 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4972 : "could not create XPath object");
4973 :
4974 : /*
4975 : * There are four possible cases, depending on the number of nodes
4976 : * returned by the XPath expression and the type of the target column:
4977 : * a) XPath returns no nodes. b) The target type is XML (return all
4978 : * as XML). For non-XML return types: c) One node (return content).
4979 : * d) Multiple nodes (error).
4980 : */
4981 135438 : if (xpathobj->type == XPATH_NODESET)
4982 : {
4983 135408 : int count = 0;
4984 :
4985 135408 : if (xpathobj->nodesetval != NULL)
4986 135198 : count = xpathobj->nodesetval->nodeNr;
4987 :
4988 135408 : if (xpathobj->nodesetval == NULL || count == 0)
4989 : {
4990 22686 : *isnull = true;
4991 : }
4992 : else
4993 : {
4994 112722 : if (typid == XMLOID)
4995 : {
4996 : text *textstr;
4997 : StringInfoData str;
4998 :
4999 : /* Concatenate serialized values */
5000 72 : initStringInfo(&str);
5001 174 : for (int i = 0; i < count; i++)
5002 : {
5003 : textstr =
5004 102 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
5005 : xtCxt->xmlerrcxt);
5006 :
5007 102 : appendStringInfoText(&str, textstr);
5008 : }
5009 72 : cstr = str.data;
5010 : }
5011 : else
5012 : {
5013 : xmlChar *str;
5014 :
5015 112650 : if (count > 1)
5016 6 : ereport(ERROR,
5017 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
5018 : errmsg("more than one value returned by column XPath expression")));
5019 :
5020 112644 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5021 112644 : cstr = str ? xml_pstrdup_and_free(str) : "";
5022 : }
5023 : }
5024 : }
5025 30 : else if (xpathobj->type == XPATH_STRING)
5026 : {
5027 : /* Content should be escaped when target will be XML */
5028 18 : if (typid == XMLOID)
5029 6 : cstr = escape_xml((char *) xpathobj->stringval);
5030 : else
5031 12 : cstr = (char *) xpathobj->stringval;
5032 : }
5033 12 : else if (xpathobj->type == XPATH_BOOLEAN)
5034 : {
5035 : char typcategory;
5036 : bool typispreferred;
5037 : xmlChar *str;
5038 :
5039 : /* Allow implicit casting from boolean to numbers */
5040 6 : get_type_category_preferred(typid, &typcategory, &typispreferred);
5041 :
5042 6 : if (typcategory != TYPCATEGORY_NUMERIC)
5043 6 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
5044 : else
5045 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5046 :
5047 6 : cstr = xml_pstrdup_and_free(str);
5048 : }
5049 6 : else if (xpathobj->type == XPATH_NUMBER)
5050 : {
5051 : xmlChar *str;
5052 :
5053 6 : str = xmlXPathCastNumberToString(xpathobj->floatval);
5054 6 : cstr = xml_pstrdup_and_free(str);
5055 : }
5056 : else
5057 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5058 :
5059 : /*
5060 : * By here, either cstr contains the result value, or the isnull flag
5061 : * has been set.
5062 : */
5063 : Assert(cstr || *isnull);
5064 :
5065 135432 : if (!*isnull)
5066 112746 : result = InputFunctionCall(&state->in_functions[colnum],
5067 : cstr,
5068 112746 : state->typioparams[colnum],
5069 : typmod);
5070 : }
5071 6 : PG_FINALLY();
5072 : {
5073 135438 : if (xpathobj != NULL)
5074 135438 : xmlXPathFreeObject(xpathobj);
5075 : }
5076 135438 : PG_END_TRY();
5077 :
5078 135432 : return result;
5079 : #else
5080 : NO_XML_SUPPORT();
5081 : return 0;
5082 : #endif /* not USE_LIBXML */
5083 : }
5084 :
5085 : /*
5086 : * XmlTableDestroyOpaque
5087 : * Release all libxml2 resources
5088 : */
5089 : static void
5090 264 : XmlTableDestroyOpaque(TableFuncScanState *state)
5091 : {
5092 : #ifdef USE_LIBXML
5093 : XmlTableBuilderData *xtCxt;
5094 :
5095 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5096 :
5097 : /* Propagate our own error context to libxml2 */
5098 264 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5099 :
5100 264 : if (xtCxt->xpathscomp != NULL)
5101 : {
5102 : int i;
5103 :
5104 1116 : for (i = 0; i < xtCxt->natts; i++)
5105 852 : if (xtCxt->xpathscomp[i] != NULL)
5106 774 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5107 : }
5108 :
5109 264 : if (xtCxt->xpathobj != NULL)
5110 258 : xmlXPathFreeObject(xtCxt->xpathobj);
5111 264 : if (xtCxt->xpathcomp != NULL)
5112 258 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5113 264 : if (xtCxt->xpathcxt != NULL)
5114 264 : xmlXPathFreeContext(xtCxt->xpathcxt);
5115 264 : if (xtCxt->doc != NULL)
5116 264 : xmlFreeDoc(xtCxt->doc);
5117 264 : if (xtCxt->ctxt != NULL)
5118 264 : xmlFreeParserCtxt(xtCxt->ctxt);
5119 :
5120 264 : pg_xml_done(xtCxt->xmlerrcxt, true);
5121 :
5122 : /* not valid anymore */
5123 264 : xtCxt->magic = 0;
5124 264 : state->opaque = NULL;
5125 :
5126 : #else
5127 : NO_XML_SUPPORT();
5128 : #endif /* not USE_LIBXML */
5129 264 : }
|