Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/entities.h>
51 : #include <libxml/parser.h>
52 : #include <libxml/parserInternals.h>
53 : #include <libxml/tree.h>
54 : #include <libxml/uri.h>
55 : #include <libxml/xmlerror.h>
56 : #include <libxml/xmlsave.h>
57 : #include <libxml/xmlversion.h>
58 : #include <libxml/xmlwriter.h>
59 : #include <libxml/xpath.h>
60 : #include <libxml/xpathInternals.h>
61 :
62 : /*
63 : * We used to check for xmlStructuredErrorContext via a configure test; but
64 : * that doesn't work on Windows, so instead use this grottier method of
65 : * testing the library version number.
66 : */
67 : #if LIBXML_VERSION >= 20704
68 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69 : #endif
70 :
71 : /*
72 : * libxml2 2.12 decided to insert "const" into the error handler API.
73 : */
74 : #if LIBXML_VERSION >= 21200
75 : #define PgXmlErrorPtr const xmlError *
76 : #else
77 : #define PgXmlErrorPtr xmlErrorPtr
78 : #endif
79 :
80 : #endif /* USE_LIBXML */
81 :
82 : #include "access/htup_details.h"
83 : #include "access/table.h"
84 : #include "catalog/namespace.h"
85 : #include "catalog/pg_class.h"
86 : #include "catalog/pg_type.h"
87 : #include "executor/spi.h"
88 : #include "executor/tablefunc.h"
89 : #include "fmgr.h"
90 : #include "lib/stringinfo.h"
91 : #include "libpq/pqformat.h"
92 : #include "mb/pg_wchar.h"
93 : #include "miscadmin.h"
94 : #include "nodes/execnodes.h"
95 : #include "nodes/miscnodes.h"
96 : #include "nodes/nodeFuncs.h"
97 : #include "utils/array.h"
98 : #include "utils/builtins.h"
99 : #include "utils/date.h"
100 : #include "utils/datetime.h"
101 : #include "utils/lsyscache.h"
102 : #include "utils/rel.h"
103 : #include "utils/syscache.h"
104 : #include "utils/xml.h"
105 :
106 :
107 : /* GUC variables */
108 : int xmlbinary = XMLBINARY_BASE64;
109 : int xmloption = XMLOPTION_CONTENT;
110 :
111 : #ifdef USE_LIBXML
112 :
113 : /* random number to identify PgXmlErrorContext */
114 : #define ERRCXT_MAGIC 68275028
115 :
116 : struct PgXmlErrorContext
117 : {
118 : int magic;
119 : /* strictness argument passed to pg_xml_init */
120 : PgXmlStrictness strictness;
121 : /* current error status and accumulated message, if any */
122 : bool err_occurred;
123 : StringInfoData err_buf;
124 : /* previous libxml error handling state (saved by pg_xml_init) */
125 : xmlStructuredErrorFunc saved_errfunc;
126 : void *saved_errcxt;
127 : /* previous libxml entity handler (saved by pg_xml_init) */
128 : xmlExternalEntityLoader saved_entityfunc;
129 : };
130 :
131 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
132 : xmlParserCtxtPtr ctxt);
133 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
134 : int sqlcode, const char *msg);
135 : static void xml_errorHandler(void *data, PgXmlErrorPtr error);
136 : static int errdetail_for_xml_code(int code);
137 : static void chopStringInfoNewlines(StringInfo str);
138 : static void appendStringInfoLineSeparator(StringInfo str);
139 :
140 : #ifdef USE_LIBXMLCONTEXT
141 :
142 : static MemoryContext LibxmlContext = NULL;
143 :
144 : static void xml_memory_init(void);
145 : static void *xml_palloc(size_t size);
146 : static void *xml_repalloc(void *ptr, size_t size);
147 : static void xml_pfree(void *ptr);
148 : static char *xml_pstrdup(const char *string);
149 : #endif /* USE_LIBXMLCONTEXT */
150 :
151 : static xmlChar *xml_text2xmlChar(text *in);
152 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
153 : xmlChar **version, xmlChar **encoding, int *standalone);
154 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
155 : pg_enc encoding, int standalone);
156 : static bool xml_doctype_in_content(const xmlChar *str);
157 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
158 : bool preserve_whitespace, int encoding,
159 : XmlOptionType *parsed_xmloptiontype,
160 : xmlNodePtr *parsed_nodes,
161 : Node *escontext);
162 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
163 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
164 : ArrayBuildState *astate,
165 : PgXmlErrorContext *xmlerrcxt);
166 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
167 : #endif /* USE_LIBXML */
168 :
169 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
170 : const char *xmlschema, const char *targetns,
171 : bool top_level);
172 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
173 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
174 : const char *xmlschema, bool nulls, bool tableforest,
175 : const char *targetns, bool top_level);
176 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
177 : bool nulls, bool tableforest, const char *targetns);
178 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
179 : List *relid_list, bool nulls,
180 : bool tableforest, const char *targetns);
181 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
182 : bool nulls, bool tableforest,
183 : const char *targetns);
184 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
185 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
186 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
187 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
188 : char *tablename, bool nulls, bool tableforest,
189 : const char *targetns, bool top_level);
190 :
191 : /* XMLTABLE support */
192 : #ifdef USE_LIBXML
193 : /* random number to identify XmlTableContext */
194 : #define XMLTABLE_CONTEXT_MAGIC 46922182
195 : typedef struct XmlTableBuilderData
196 : {
197 : int magic;
198 : int natts;
199 : long int row_count;
200 : PgXmlErrorContext *xmlerrcxt;
201 : xmlParserCtxtPtr ctxt;
202 : xmlDocPtr doc;
203 : xmlXPathContextPtr xpathcxt;
204 : xmlXPathCompExprPtr xpathcomp;
205 : xmlXPathObjectPtr xpathobj;
206 : xmlXPathCompExprPtr *xpathscomp;
207 : } XmlTableBuilderData;
208 : #endif
209 :
210 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
211 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
212 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
213 : const char *uri);
214 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
215 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
216 : const char *path, int colnum);
217 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
218 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
219 : Oid typid, int32 typmod, bool *isnull);
220 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
221 :
222 : const TableFuncRoutine XmlTableRoutine =
223 : {
224 : .InitOpaque = XmlTableInitOpaque,
225 : .SetDocument = XmlTableSetDocument,
226 : .SetNamespace = XmlTableSetNamespace,
227 : .SetRowFilter = XmlTableSetRowFilter,
228 : .SetColumnFilter = XmlTableSetColumnFilter,
229 : .FetchRow = XmlTableFetchRow,
230 : .GetValue = XmlTableGetValue,
231 : .DestroyOpaque = XmlTableDestroyOpaque
232 : };
233 :
234 : #define NO_XML_SUPPORT() \
235 : ereport(ERROR, \
236 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
237 : errmsg("unsupported XML feature"), \
238 : errdetail("This functionality requires the server to be built with libxml support.")))
239 :
240 :
241 : /* from SQL/XML:2008 section 4.9 */
242 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
243 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
244 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
245 :
246 :
247 : #ifdef USE_LIBXML
248 :
249 : static int
250 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
251 : {
252 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
253 :
254 0 : if (encoding < 0)
255 0 : ereport(ERROR,
256 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
257 : errmsg("invalid encoding name \"%s\"",
258 : (const char *) encoding_name)));
259 0 : return encoding;
260 : }
261 : #endif
262 :
263 :
264 : /*
265 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
266 : * we use the conversion function for the text datatype.
267 : *
268 : * This is only acceptable so long as xmltype and text use the same
269 : * representation.
270 : */
271 : Datum
272 858 : xml_in(PG_FUNCTION_ARGS)
273 : {
274 : #ifdef USE_LIBXML
275 858 : char *s = PG_GETARG_CSTRING(0);
276 : xmltype *vardata;
277 : xmlDocPtr doc;
278 :
279 : /* Build the result object. */
280 858 : vardata = (xmltype *) cstring_to_text(s);
281 :
282 : /*
283 : * Parse the data to check if it is well-formed XML data.
284 : *
285 : * Note: we don't need to worry about whether a soft error is detected.
286 : */
287 858 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
288 858 : NULL, NULL, fcinfo->context);
289 810 : if (doc != NULL)
290 798 : xmlFreeDoc(doc);
291 :
292 810 : PG_RETURN_XML_P(vardata);
293 : #else
294 : NO_XML_SUPPORT();
295 : return 0;
296 : #endif
297 : }
298 :
299 :
300 : #define PG_XML_DEFAULT_VERSION "1.0"
301 :
302 :
303 : /*
304 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
305 : * time being we use the conversion function for the text datatype.
306 : *
307 : * This is only acceptable so long as xmltype and text use the same
308 : * representation.
309 : */
310 : static char *
311 23618 : xml_out_internal(xmltype *x, pg_enc target_encoding)
312 : {
313 23618 : char *str = text_to_cstring((text *) x);
314 :
315 : #ifdef USE_LIBXML
316 23618 : size_t len = strlen(str);
317 : xmlChar *version;
318 : int standalone;
319 : int res_code;
320 :
321 23618 : if ((res_code = parse_xml_decl((xmlChar *) str,
322 : &len, &version, NULL, &standalone)) == 0)
323 : {
324 : StringInfoData buf;
325 :
326 23618 : initStringInfo(&buf);
327 :
328 23618 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
329 : {
330 : /*
331 : * If we are not going to produce an XML declaration, eat a single
332 : * newline in the original string to prevent empty first lines in
333 : * the output.
334 : */
335 23570 : if (*(str + len) == '\n')
336 6 : len += 1;
337 : }
338 23618 : appendStringInfoString(&buf, str + len);
339 :
340 23618 : pfree(str);
341 :
342 23618 : return buf.data;
343 : }
344 :
345 0 : ereport(WARNING,
346 : errcode(ERRCODE_DATA_CORRUPTED),
347 : errmsg_internal("could not parse XML declaration in stored value"),
348 : errdetail_for_xml_code(res_code));
349 : #endif
350 0 : return str;
351 : }
352 :
353 :
354 : Datum
355 23354 : xml_out(PG_FUNCTION_ARGS)
356 : {
357 23354 : xmltype *x = PG_GETARG_XML_P(0);
358 :
359 : /*
360 : * xml_out removes the encoding property in all cases. This is because we
361 : * cannot control from here whether the datum will be converted to a
362 : * different client encoding, so we'd do more harm than good by including
363 : * it.
364 : */
365 23354 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
366 : }
367 :
368 :
369 : Datum
370 0 : xml_recv(PG_FUNCTION_ARGS)
371 : {
372 : #ifdef USE_LIBXML
373 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
374 : xmltype *result;
375 : char *str;
376 : char *newstr;
377 : int nbytes;
378 : xmlDocPtr doc;
379 0 : xmlChar *encodingStr = NULL;
380 : int encoding;
381 :
382 : /*
383 : * Read the data in raw format. We don't know yet what the encoding is, as
384 : * that information is embedded in the xml declaration; so we have to
385 : * parse that before converting to server encoding.
386 : */
387 0 : nbytes = buf->len - buf->cursor;
388 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
389 :
390 : /*
391 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
392 : * than make a separate copy, make the temporary result one byte bigger
393 : * than it needs to be.
394 : */
395 0 : result = palloc(nbytes + 1 + VARHDRSZ);
396 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
397 0 : memcpy(VARDATA(result), str, nbytes);
398 0 : str = VARDATA(result);
399 0 : str[nbytes] = '\0';
400 :
401 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
402 :
403 : /*
404 : * If encoding wasn't explicitly specified in the XML header, treat it as
405 : * UTF-8, as that's the default in XML. This is different from xml_in(),
406 : * where the input has to go through the normal client to server encoding
407 : * conversion.
408 : */
409 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
410 :
411 : /*
412 : * Parse the data to check if it is well-formed XML data. Assume that
413 : * xml_parse will throw ERROR if not.
414 : */
415 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
416 0 : xmlFreeDoc(doc);
417 :
418 : /* Now that we know what we're dealing with, convert to server encoding */
419 0 : newstr = pg_any_to_server(str, nbytes, encoding);
420 :
421 0 : if (newstr != str)
422 : {
423 0 : pfree(result);
424 0 : result = (xmltype *) cstring_to_text(newstr);
425 0 : pfree(newstr);
426 : }
427 :
428 0 : PG_RETURN_XML_P(result);
429 : #else
430 : NO_XML_SUPPORT();
431 : return 0;
432 : #endif
433 : }
434 :
435 :
436 : Datum
437 0 : xml_send(PG_FUNCTION_ARGS)
438 : {
439 0 : xmltype *x = PG_GETARG_XML_P(0);
440 : char *outval;
441 : StringInfoData buf;
442 :
443 : /*
444 : * xml_out_internal doesn't convert the encoding, it just prints the right
445 : * declaration. pq_sendtext will do the conversion.
446 : */
447 0 : outval = xml_out_internal(x, pg_get_client_encoding());
448 :
449 0 : pq_begintypsend(&buf);
450 0 : pq_sendtext(&buf, outval, strlen(outval));
451 0 : pfree(outval);
452 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
453 : }
454 :
455 :
456 : #ifdef USE_LIBXML
457 : static void
458 132 : appendStringInfoText(StringInfo str, const text *t)
459 : {
460 132 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
461 132 : }
462 : #endif
463 :
464 :
465 : static xmltype *
466 22560 : stringinfo_to_xmltype(StringInfo buf)
467 : {
468 22560 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
469 : }
470 :
471 :
472 : static xmltype *
473 78 : cstring_to_xmltype(const char *string)
474 : {
475 78 : return (xmltype *) cstring_to_text(string);
476 : }
477 :
478 :
479 : #ifdef USE_LIBXML
480 : static xmltype *
481 22658 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
482 : {
483 22658 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
484 : xmlBufferLength(buf));
485 : }
486 : #endif
487 :
488 :
489 : Datum
490 42 : xmlcomment(PG_FUNCTION_ARGS)
491 : {
492 : #ifdef USE_LIBXML
493 42 : text *arg = PG_GETARG_TEXT_PP(0);
494 42 : char *argdata = VARDATA_ANY(arg);
495 42 : int len = VARSIZE_ANY_EXHDR(arg);
496 : StringInfoData buf;
497 : int i;
498 :
499 : /* check for "--" in string or "-" at the end */
500 180 : for (i = 1; i < len; i++)
501 : {
502 144 : if (argdata[i] == '-' && argdata[i - 1] == '-')
503 6 : ereport(ERROR,
504 : (errcode(ERRCODE_INVALID_XML_COMMENT),
505 : errmsg("invalid XML comment")));
506 : }
507 36 : if (len > 0 && argdata[len - 1] == '-')
508 6 : ereport(ERROR,
509 : (errcode(ERRCODE_INVALID_XML_COMMENT),
510 : errmsg("invalid XML comment")));
511 :
512 30 : initStringInfo(&buf);
513 30 : appendStringInfoString(&buf, "<!--");
514 30 : appendStringInfoText(&buf, arg);
515 30 : appendStringInfoString(&buf, "-->");
516 :
517 30 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
518 : #else
519 : NO_XML_SUPPORT();
520 : return 0;
521 : #endif
522 : }
523 :
524 :
525 : Datum
526 30 : xmltext(PG_FUNCTION_ARGS)
527 : {
528 : #ifdef USE_LIBXML
529 30 : text *arg = PG_GETARG_TEXT_PP(0);
530 : text *result;
531 30 : volatile xmlChar *xmlbuf = NULL;
532 : PgXmlErrorContext *xmlerrcxt;
533 :
534 : /* First we gotta spin up some error handling. */
535 30 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
536 :
537 30 : PG_TRY();
538 : {
539 30 : xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
540 :
541 30 : if (xmlbuf == NULL || xmlerrcxt->err_occurred)
542 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
543 : "could not allocate xmlChar");
544 :
545 30 : result = cstring_to_text_with_len((const char *) xmlbuf,
546 : xmlStrlen((const xmlChar *) xmlbuf));
547 : }
548 0 : PG_CATCH();
549 : {
550 0 : if (xmlbuf)
551 0 : xmlFree((xmlChar *) xmlbuf);
552 :
553 0 : pg_xml_done(xmlerrcxt, true);
554 0 : PG_RE_THROW();
555 : }
556 30 : PG_END_TRY();
557 :
558 30 : xmlFree((xmlChar *) xmlbuf);
559 30 : pg_xml_done(xmlerrcxt, false);
560 :
561 30 : PG_RETURN_XML_P(result);
562 : #else
563 : NO_XML_SUPPORT();
564 : return 0;
565 : #endif /* not USE_LIBXML */
566 : }
567 :
568 :
569 : /*
570 : * TODO: xmlconcat needs to merge the notations and unparsed entities
571 : * of the argument values. Not very important in practice, though.
572 : */
573 : xmltype *
574 22310 : xmlconcat(List *args)
575 : {
576 : #ifdef USE_LIBXML
577 22310 : int global_standalone = 1;
578 22310 : xmlChar *global_version = NULL;
579 22310 : bool global_version_no_value = false;
580 : StringInfoData buf;
581 : ListCell *v;
582 :
583 22310 : initStringInfo(&buf);
584 66936 : foreach(v, args)
585 : {
586 44626 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
587 : size_t len;
588 : xmlChar *version;
589 : int standalone;
590 : char *str;
591 :
592 44626 : len = VARSIZE(x) - VARHDRSZ;
593 44626 : str = text_to_cstring((text *) x);
594 :
595 44626 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
596 :
597 44626 : if (standalone == 0 && global_standalone == 1)
598 0 : global_standalone = 0;
599 44626 : if (standalone < 0)
600 44614 : global_standalone = -1;
601 :
602 44626 : if (!version)
603 44608 : global_version_no_value = true;
604 18 : else if (!global_version)
605 12 : global_version = version;
606 6 : else if (xmlStrcmp(version, global_version) != 0)
607 0 : global_version_no_value = true;
608 :
609 44626 : appendStringInfoString(&buf, str + len);
610 44626 : pfree(str);
611 : }
612 :
613 22310 : if (!global_version_no_value || global_standalone >= 0)
614 : {
615 : StringInfoData buf2;
616 :
617 6 : initStringInfo(&buf2);
618 :
619 6 : print_xml_decl(&buf2,
620 6 : (!global_version_no_value) ? global_version : NULL,
621 : 0,
622 : global_standalone);
623 :
624 6 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
625 6 : buf = buf2;
626 : }
627 :
628 22310 : return stringinfo_to_xmltype(&buf);
629 : #else
630 : NO_XML_SUPPORT();
631 : return NULL;
632 : #endif
633 : }
634 :
635 :
636 : /*
637 : * XMLAGG support
638 : */
639 : Datum
640 22286 : xmlconcat2(PG_FUNCTION_ARGS)
641 : {
642 22286 : if (PG_ARGISNULL(0))
643 : {
644 18 : if (PG_ARGISNULL(1))
645 0 : PG_RETURN_NULL();
646 : else
647 18 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
648 : }
649 22268 : else if (PG_ARGISNULL(1))
650 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
651 : else
652 22268 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
653 : PG_GETARG_XML_P(1))));
654 : }
655 :
656 :
657 : Datum
658 6 : texttoxml(PG_FUNCTION_ARGS)
659 : {
660 6 : text *data = PG_GETARG_TEXT_PP(0);
661 :
662 6 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
663 : }
664 :
665 :
666 : Datum
667 0 : xmltotext(PG_FUNCTION_ARGS)
668 : {
669 0 : xmltype *data = PG_GETARG_XML_P(0);
670 :
671 : /* It's actually binary compatible. */
672 0 : PG_RETURN_TEXT_P((text *) data);
673 : }
674 :
675 :
676 : text *
677 180 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
678 : {
679 : #ifdef USE_LIBXML
680 : text *volatile result;
681 : xmlDocPtr doc;
682 : XmlOptionType parsed_xmloptiontype;
683 : xmlNodePtr content_nodes;
684 180 : volatile xmlBufferPtr buf = NULL;
685 180 : volatile xmlSaveCtxtPtr ctxt = NULL;
686 180 : ErrorSaveContext escontext = {T_ErrorSaveContext};
687 180 : PgXmlErrorContext *volatile xmlerrcxt = NULL;
688 : #endif
689 :
690 180 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
691 : {
692 : /*
693 : * We don't actually need to do anything, so just return the
694 : * binary-compatible input. For backwards-compatibility reasons,
695 : * allow such cases to succeed even without USE_LIBXML.
696 : */
697 36 : return (text *) data;
698 : }
699 :
700 : #ifdef USE_LIBXML
701 :
702 : /*
703 : * Parse the input according to the xmloption.
704 : *
705 : * preserve_whitespace is set to false in case we are indenting, otherwise
706 : * libxml2 will fail to indent elements that have whitespace between them.
707 : */
708 144 : doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
709 : &parsed_xmloptiontype, &content_nodes,
710 144 : (Node *) &escontext);
711 144 : if (doc == NULL || escontext.error_occurred)
712 : {
713 30 : if (doc)
714 0 : xmlFreeDoc(doc);
715 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
716 30 : ereport(ERROR,
717 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
718 : errmsg("not an XML document")));
719 : }
720 :
721 : /* If we weren't asked to indent, we're done. */
722 114 : if (!indent)
723 : {
724 18 : xmlFreeDoc(doc);
725 18 : return (text *) data;
726 : }
727 :
728 : /*
729 : * Otherwise, we gotta spin up some error handling. Unlike most other
730 : * routines in this module, we already have a libxml "doc" structure to
731 : * free, so we need to call pg_xml_init() inside the PG_TRY and be
732 : * prepared for it to fail (typically due to palloc OOM).
733 : */
734 96 : PG_TRY();
735 : {
736 96 : size_t decl_len = 0;
737 :
738 96 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
739 :
740 : /* The serialized data will go into this buffer. */
741 96 : buf = xmlBufferCreate();
742 :
743 96 : if (buf == NULL || xmlerrcxt->err_occurred)
744 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
745 : "could not allocate xmlBuffer");
746 :
747 : /* Detect whether there's an XML declaration */
748 96 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
749 :
750 : /*
751 : * Emit declaration only if the input had one. Note: some versions of
752 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
753 : * passed, so don't do that. We don't want any encoding conversion
754 : * anyway.
755 : */
756 96 : if (decl_len == 0)
757 84 : ctxt = xmlSaveToBuffer(buf, NULL,
758 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
759 : else
760 12 : ctxt = xmlSaveToBuffer(buf, NULL,
761 : XML_SAVE_FORMAT);
762 :
763 96 : if (ctxt == NULL || xmlerrcxt->err_occurred)
764 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
765 : "could not allocate xmlSaveCtxt");
766 :
767 96 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
768 : {
769 : /* If it's a document, saving is easy. */
770 42 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
771 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
772 : "could not save document to xmlBuffer");
773 : }
774 54 : else if (content_nodes != NULL)
775 : {
776 : /*
777 : * Deal with the case where we have non-singly-rooted XML.
778 : * libxml's dump functions don't work well for that without help.
779 : * We build a fake root node that serves as a container for the
780 : * content nodes, and then iterate over the nodes.
781 : */
782 : xmlNodePtr root;
783 : xmlNodePtr oldroot;
784 : xmlNodePtr newline;
785 :
786 48 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
787 48 : if (root == NULL || xmlerrcxt->err_occurred)
788 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
789 : "could not allocate xml node");
790 :
791 : /*
792 : * This attaches root to doc, so we need not free it separately...
793 : * but instead, we have to free the old root if there was one.
794 : */
795 48 : oldroot = xmlDocSetRootElement(doc, root);
796 48 : if (oldroot != NULL)
797 0 : xmlFreeNode(oldroot);
798 :
799 48 : if (xmlAddChildList(root, content_nodes) == NULL ||
800 48 : xmlerrcxt->err_occurred)
801 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
802 : "could not append xml node list");
803 :
804 : /*
805 : * We use this node to insert newlines in the dump. Note: in at
806 : * least some libxml versions, xmlNewDocText would not attach the
807 : * node to the document even if we passed it. Therefore, manage
808 : * freeing of this node manually, and pass NULL here to make sure
809 : * there's not a dangling link.
810 : */
811 48 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
812 48 : if (newline == NULL || xmlerrcxt->err_occurred)
813 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
814 : "could not allocate xml node");
815 :
816 126 : for (xmlNodePtr node = root->children; node; node = node->next)
817 : {
818 : /* insert newlines between nodes */
819 78 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
820 : {
821 24 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
822 : {
823 0 : xmlFreeNode(newline);
824 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
825 : "could not save newline to xmlBuffer");
826 : }
827 : }
828 :
829 78 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
830 : {
831 0 : xmlFreeNode(newline);
832 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
833 : "could not save content to xmlBuffer");
834 : }
835 : }
836 :
837 48 : xmlFreeNode(newline);
838 : }
839 :
840 96 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
841 : {
842 0 : ctxt = NULL; /* don't try to close it again */
843 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
844 : "could not close xmlSaveCtxtPtr");
845 : }
846 :
847 : /*
848 : * xmlDocContentDumpOutput may add a trailing newline, so remove that.
849 : */
850 96 : if (xmloption_arg == XMLOPTION_DOCUMENT)
851 : {
852 36 : const char *str = (const char *) xmlBufferContent(buf);
853 36 : int len = xmlBufferLength(buf);
854 :
855 72 : while (len > 0 && (str[len - 1] == '\n' ||
856 36 : str[len - 1] == '\r'))
857 36 : len--;
858 :
859 36 : result = cstring_to_text_with_len(str, len);
860 : }
861 : else
862 60 : result = (text *) xmlBuffer_to_xmltype(buf);
863 : }
864 0 : PG_CATCH();
865 : {
866 0 : if (ctxt)
867 0 : xmlSaveClose(ctxt);
868 0 : if (buf)
869 0 : xmlBufferFree(buf);
870 0 : xmlFreeDoc(doc);
871 :
872 0 : if (xmlerrcxt)
873 0 : pg_xml_done(xmlerrcxt, true);
874 :
875 0 : PG_RE_THROW();
876 : }
877 96 : PG_END_TRY();
878 :
879 96 : xmlBufferFree(buf);
880 96 : xmlFreeDoc(doc);
881 :
882 96 : pg_xml_done(xmlerrcxt, false);
883 :
884 96 : return result;
885 : #else
886 : NO_XML_SUPPORT();
887 : return NULL;
888 : #endif
889 : }
890 :
891 :
892 : xmltype *
893 22442 : xmlelement(XmlExpr *xexpr,
894 : Datum *named_argvalue, bool *named_argnull,
895 : Datum *argvalue, bool *argnull)
896 : {
897 : #ifdef USE_LIBXML
898 : xmltype *result;
899 : List *named_arg_strings;
900 : List *arg_strings;
901 : int i;
902 : ListCell *arg;
903 : ListCell *narg;
904 : PgXmlErrorContext *xmlerrcxt;
905 22442 : volatile xmlBufferPtr buf = NULL;
906 22442 : volatile xmlTextWriterPtr writer = NULL;
907 :
908 : /*
909 : * All arguments are already evaluated, and their values are passed in the
910 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
911 : * issues if one of the arguments involves a call to some other function
912 : * or subsystem that wants to use libxml on its own terms. We examine the
913 : * original XmlExpr to identify the numbers and types of the arguments.
914 : */
915 22442 : named_arg_strings = NIL;
916 22442 : i = 0;
917 22490 : foreach(arg, xexpr->named_args)
918 : {
919 54 : Expr *e = (Expr *) lfirst(arg);
920 : char *str;
921 :
922 54 : if (named_argnull[i])
923 0 : str = NULL;
924 : else
925 54 : str = map_sql_value_to_xml_value(named_argvalue[i],
926 : exprType((Node *) e),
927 : false);
928 48 : named_arg_strings = lappend(named_arg_strings, str);
929 48 : i++;
930 : }
931 :
932 22436 : arg_strings = NIL;
933 22436 : i = 0;
934 44848 : foreach(arg, xexpr->args)
935 : {
936 22412 : Expr *e = (Expr *) lfirst(arg);
937 : char *str;
938 :
939 : /* here we can just forget NULL elements immediately */
940 22412 : if (!argnull[i])
941 : {
942 22412 : str = map_sql_value_to_xml_value(argvalue[i],
943 : exprType((Node *) e),
944 : true);
945 22412 : arg_strings = lappend(arg_strings, str);
946 : }
947 22412 : i++;
948 : }
949 :
950 22436 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
951 :
952 22436 : PG_TRY();
953 : {
954 22436 : buf = xmlBufferCreate();
955 22436 : if (buf == NULL || xmlerrcxt->err_occurred)
956 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
957 : "could not allocate xmlBuffer");
958 22436 : writer = xmlNewTextWriterMemory(buf, 0);
959 22436 : if (writer == NULL || xmlerrcxt->err_occurred)
960 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
961 : "could not allocate xmlTextWriter");
962 :
963 22436 : if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
964 22436 : xmlerrcxt->err_occurred)
965 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
966 : "could not start xml element");
967 :
968 22484 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
969 : {
970 48 : char *str = (char *) lfirst(arg);
971 48 : char *argname = strVal(lfirst(narg));
972 :
973 48 : if (str)
974 : {
975 48 : if (xmlTextWriterWriteAttribute(writer,
976 : (xmlChar *) argname,
977 48 : (xmlChar *) str) < 0 ||
978 48 : xmlerrcxt->err_occurred)
979 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
980 : "could not write xml attribute");
981 : }
982 : }
983 :
984 44848 : foreach(arg, arg_strings)
985 : {
986 22412 : char *str = (char *) lfirst(arg);
987 :
988 22412 : if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
989 22412 : xmlerrcxt->err_occurred)
990 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
991 : "could not write raw xml text");
992 : }
993 :
994 22436 : if (xmlTextWriterEndElement(writer) < 0 ||
995 22436 : xmlerrcxt->err_occurred)
996 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
997 : "could not end xml element");
998 :
999 : /* we MUST do this now to flush data out to the buffer ... */
1000 22436 : xmlFreeTextWriter(writer);
1001 22436 : writer = NULL;
1002 :
1003 22436 : result = xmlBuffer_to_xmltype(buf);
1004 : }
1005 0 : PG_CATCH();
1006 : {
1007 0 : if (writer)
1008 0 : xmlFreeTextWriter(writer);
1009 0 : if (buf)
1010 0 : xmlBufferFree(buf);
1011 :
1012 0 : pg_xml_done(xmlerrcxt, true);
1013 :
1014 0 : PG_RE_THROW();
1015 : }
1016 22436 : PG_END_TRY();
1017 :
1018 22436 : xmlBufferFree(buf);
1019 :
1020 22436 : pg_xml_done(xmlerrcxt, false);
1021 :
1022 22436 : return result;
1023 : #else
1024 : NO_XML_SUPPORT();
1025 : return NULL;
1026 : #endif
1027 : }
1028 :
1029 :
1030 : xmltype *
1031 138 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
1032 : {
1033 : #ifdef USE_LIBXML
1034 : xmlDocPtr doc;
1035 :
1036 138 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
1037 : GetDatabaseEncoding(), NULL, NULL, NULL);
1038 90 : xmlFreeDoc(doc);
1039 :
1040 90 : return (xmltype *) data;
1041 : #else
1042 : NO_XML_SUPPORT();
1043 : return NULL;
1044 : #endif
1045 : }
1046 :
1047 :
1048 : xmltype *
1049 72 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1050 : {
1051 : #ifdef USE_LIBXML
1052 : xmltype *result;
1053 : StringInfoData buf;
1054 :
1055 72 : if (pg_strcasecmp(target, "xml") == 0)
1056 12 : ereport(ERROR,
1057 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1058 : errmsg("invalid XML processing instruction"),
1059 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1060 :
1061 : /*
1062 : * Following the SQL standard, the null check comes after the syntax check
1063 : * above.
1064 : */
1065 60 : *result_is_null = arg_is_null;
1066 60 : if (*result_is_null)
1067 12 : return NULL;
1068 :
1069 48 : initStringInfo(&buf);
1070 :
1071 48 : appendStringInfo(&buf, "<?%s", target);
1072 :
1073 48 : if (arg != NULL)
1074 : {
1075 : char *string;
1076 :
1077 24 : string = text_to_cstring(arg);
1078 24 : if (strstr(string, "?>") != NULL)
1079 6 : ereport(ERROR,
1080 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1081 : errmsg("invalid XML processing instruction"),
1082 : errdetail("XML processing instruction cannot contain \"?>\".")));
1083 :
1084 18 : appendStringInfoChar(&buf, ' ');
1085 18 : appendStringInfoString(&buf, string + strspn(string, " "));
1086 18 : pfree(string);
1087 : }
1088 42 : appendStringInfoString(&buf, "?>");
1089 :
1090 42 : result = stringinfo_to_xmltype(&buf);
1091 42 : pfree(buf.data);
1092 42 : return result;
1093 : #else
1094 : NO_XML_SUPPORT();
1095 : return NULL;
1096 : #endif
1097 : }
1098 :
1099 :
1100 : xmltype *
1101 60 : xmlroot(xmltype *data, text *version, int standalone)
1102 : {
1103 : #ifdef USE_LIBXML
1104 : char *str;
1105 : size_t len;
1106 : xmlChar *orig_version;
1107 : int orig_standalone;
1108 : StringInfoData buf;
1109 :
1110 60 : len = VARSIZE(data) - VARHDRSZ;
1111 60 : str = text_to_cstring((text *) data);
1112 :
1113 60 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1114 :
1115 60 : if (version)
1116 24 : orig_version = xml_text2xmlChar(version);
1117 : else
1118 36 : orig_version = NULL;
1119 :
1120 60 : switch (standalone)
1121 : {
1122 18 : case XML_STANDALONE_YES:
1123 18 : orig_standalone = 1;
1124 18 : break;
1125 12 : case XML_STANDALONE_NO:
1126 12 : orig_standalone = 0;
1127 12 : break;
1128 12 : case XML_STANDALONE_NO_VALUE:
1129 12 : orig_standalone = -1;
1130 12 : break;
1131 18 : case XML_STANDALONE_OMITTED:
1132 : /* leave original value */
1133 18 : break;
1134 : }
1135 :
1136 60 : initStringInfo(&buf);
1137 60 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
1138 60 : appendStringInfoString(&buf, str + len);
1139 :
1140 60 : return stringinfo_to_xmltype(&buf);
1141 : #else
1142 : NO_XML_SUPPORT();
1143 : return NULL;
1144 : #endif
1145 : }
1146 :
1147 :
1148 : /*
1149 : * Validate document (given as string) against DTD (given as external link)
1150 : *
1151 : * This has been removed because it is a security hole: unprivileged users
1152 : * should not be able to use Postgres to fetch arbitrary external files,
1153 : * which unfortunately is exactly what libxml is willing to do with the DTD
1154 : * parameter.
1155 : */
1156 : Datum
1157 0 : xmlvalidate(PG_FUNCTION_ARGS)
1158 : {
1159 0 : ereport(ERROR,
1160 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1161 : errmsg("xmlvalidate is not implemented")));
1162 : return 0;
1163 : }
1164 :
1165 :
1166 : bool
1167 24 : xml_is_document(xmltype *arg)
1168 : {
1169 : #ifdef USE_LIBXML
1170 : xmlDocPtr doc;
1171 24 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1172 :
1173 : /*
1174 : * We'll report "true" if no soft error is reported by xml_parse().
1175 : */
1176 24 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1177 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1178 24 : if (doc)
1179 12 : xmlFreeDoc(doc);
1180 :
1181 24 : return !escontext.error_occurred;
1182 : #else /* not USE_LIBXML */
1183 : NO_XML_SUPPORT();
1184 : return false;
1185 : #endif /* not USE_LIBXML */
1186 : }
1187 :
1188 :
1189 : #ifdef USE_LIBXML
1190 :
1191 : /*
1192 : * pg_xml_init_library --- set up for use of libxml
1193 : *
1194 : * This should be called by each function that is about to use libxml
1195 : * facilities but doesn't require error handling. It initializes libxml
1196 : * and verifies compatibility with the loaded libxml version. These are
1197 : * once-per-session activities.
1198 : *
1199 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1200 : * check)
1201 : */
1202 : void
1203 94670 : pg_xml_init_library(void)
1204 : {
1205 : static bool first_time = true;
1206 :
1207 94670 : if (first_time)
1208 : {
1209 : /* Stuff we need do only once per session */
1210 :
1211 : /*
1212 : * Currently, we have no pure UTF-8 support for internals -- check if
1213 : * we can work.
1214 : */
1215 : if (sizeof(char) != sizeof(xmlChar))
1216 : ereport(ERROR,
1217 : (errmsg("could not initialize XML library"),
1218 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1219 : sizeof(char), sizeof(xmlChar))));
1220 :
1221 : #ifdef USE_LIBXMLCONTEXT
1222 : /* Set up libxml's memory allocation our way */
1223 : xml_memory_init();
1224 : #endif
1225 :
1226 : /* Check library compatibility */
1227 26 : LIBXML_TEST_VERSION;
1228 :
1229 26 : first_time = false;
1230 : }
1231 94670 : }
1232 :
1233 : /*
1234 : * pg_xml_init --- set up for use of libxml and register an error handler
1235 : *
1236 : * This should be called by each function that is about to use libxml
1237 : * facilities and requires error handling. It initializes libxml with
1238 : * pg_xml_init_library() and establishes our libxml error handler.
1239 : *
1240 : * strictness determines which errors are reported and which are ignored.
1241 : *
1242 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1243 : * that pg_xml_done() is called during either normal or error exit.
1244 : *
1245 : * This is exported for use by contrib/xml2, as well as other code that might
1246 : * wish to share use of this module's libxml error handler.
1247 : */
1248 : PgXmlErrorContext *
1249 24734 : pg_xml_init(PgXmlStrictness strictness)
1250 : {
1251 : PgXmlErrorContext *errcxt;
1252 : void *new_errcxt;
1253 :
1254 : /* Do one-time setup if needed */
1255 24734 : pg_xml_init_library();
1256 :
1257 : /* Create error handling context structure */
1258 24734 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1259 24734 : errcxt->magic = ERRCXT_MAGIC;
1260 24734 : errcxt->strictness = strictness;
1261 24734 : errcxt->err_occurred = false;
1262 24734 : initStringInfo(&errcxt->err_buf);
1263 :
1264 : /*
1265 : * Save original error handler and install ours. libxml originally didn't
1266 : * distinguish between the contexts for generic and for structured error
1267 : * handlers. If we're using an old libxml version, we must thus save the
1268 : * generic error context, even though we're using a structured error
1269 : * handler.
1270 : */
1271 24734 : errcxt->saved_errfunc = xmlStructuredError;
1272 :
1273 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1274 24734 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1275 : #else
1276 : errcxt->saved_errcxt = xmlGenericErrorContext;
1277 : #endif
1278 :
1279 24734 : xmlSetStructuredErrorFunc(errcxt, xml_errorHandler);
1280 :
1281 : /*
1282 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1283 : * expected it to. If not, the error context pointer we just saved is not
1284 : * the correct thing to restore, and since that leaves us without a way to
1285 : * restore the context in pg_xml_done, we must fail.
1286 : *
1287 : * The only known situation in which this test fails is if we compile with
1288 : * headers from a libxml2 that doesn't track the structured error context
1289 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1290 : * versa. The libxml2 authors did not treat that change as constituting
1291 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1292 : * fails to protect us from this.
1293 : */
1294 :
1295 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1296 24734 : new_errcxt = xmlStructuredErrorContext;
1297 : #else
1298 : new_errcxt = xmlGenericErrorContext;
1299 : #endif
1300 :
1301 24734 : if (new_errcxt != errcxt)
1302 0 : ereport(ERROR,
1303 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1304 : errmsg("could not set up XML error handler"),
1305 : errhint("This probably indicates that the version of libxml2"
1306 : " being used is not compatible with the libxml2"
1307 : " header files that PostgreSQL was built with.")));
1308 :
1309 : /*
1310 : * Also, install an entity loader to prevent unwanted fetches of external
1311 : * files and URLs.
1312 : */
1313 24734 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1314 24734 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1315 :
1316 24734 : return errcxt;
1317 : }
1318 :
1319 :
1320 : /*
1321 : * pg_xml_done --- restore previous libxml error handling
1322 : *
1323 : * Resets libxml's global error-handling state to what it was before
1324 : * pg_xml_init() was called.
1325 : *
1326 : * This routine verifies that all pending errors have been dealt with
1327 : * (in assert-enabled builds, anyway).
1328 : */
1329 : void
1330 24734 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1331 : {
1332 : void *cur_errcxt;
1333 :
1334 : /* An assert seems like enough protection here */
1335 : Assert(errcxt->magic == ERRCXT_MAGIC);
1336 :
1337 : /*
1338 : * In a normal exit, there should be no un-handled libxml errors. But we
1339 : * shouldn't try to enforce this during error recovery, since the longjmp
1340 : * could have been thrown before xml_ereport had a chance to run.
1341 : */
1342 : Assert(!errcxt->err_occurred || isError);
1343 :
1344 : /*
1345 : * Check that libxml's global state is correct, warn if not. This is a
1346 : * real test and not an Assert because it has a higher probability of
1347 : * happening.
1348 : */
1349 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1350 24734 : cur_errcxt = xmlStructuredErrorContext;
1351 : #else
1352 : cur_errcxt = xmlGenericErrorContext;
1353 : #endif
1354 :
1355 24734 : if (cur_errcxt != errcxt)
1356 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1357 :
1358 : /* Restore the saved handlers */
1359 24734 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1360 24734 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1361 :
1362 : /*
1363 : * Mark the struct as invalid, just in case somebody somehow manages to
1364 : * call xml_errorHandler or xml_ereport with it.
1365 : */
1366 24734 : errcxt->magic = 0;
1367 :
1368 : /* Release memory */
1369 24734 : pfree(errcxt->err_buf.data);
1370 24734 : pfree(errcxt);
1371 24734 : }
1372 :
1373 :
1374 : /*
1375 : * pg_xml_error_occurred() --- test the error flag
1376 : */
1377 : bool
1378 78 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1379 : {
1380 78 : return errcxt->err_occurred;
1381 : }
1382 :
1383 :
1384 : /*
1385 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1386 : * documents" are specified by the XML specification and are parsed
1387 : * easily by libxml. "XML content" is specified by SQL/XML as the
1388 : * production "XMLDecl? content". But libxml can only parse the
1389 : * "content" part, so we have to parse the XML declaration ourselves
1390 : * to complete this.
1391 : */
1392 :
1393 : #define CHECK_XML_SPACE(p) \
1394 : do { \
1395 : if (!xmlIsBlank_ch(*(p))) \
1396 : return XML_ERR_SPACE_REQUIRED; \
1397 : } while (0)
1398 :
1399 : #define SKIP_XML_SPACE(p) \
1400 : while (xmlIsBlank_ch(*(p))) (p)++
1401 :
1402 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1403 : /* Beware of multiple evaluations of argument! */
1404 : #define PG_XMLISNAMECHAR(c) \
1405 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1406 : || xmlIsDigit_ch(c) \
1407 : || c == '.' || c == '-' || c == '_' || c == ':' \
1408 : || xmlIsCombiningQ(c) \
1409 : || xmlIsExtender_ch(c))
1410 :
1411 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1412 : static xmlChar *
1413 192 : xml_pnstrdup(const xmlChar *str, size_t len)
1414 : {
1415 : xmlChar *result;
1416 :
1417 192 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1418 192 : memcpy(result, str, len * sizeof(xmlChar));
1419 192 : result[len] = 0;
1420 192 : return result;
1421 : }
1422 :
1423 : /* Ditto, except input is char* */
1424 : static xmlChar *
1425 2424 : pg_xmlCharStrndup(const char *str, size_t len)
1426 : {
1427 : xmlChar *result;
1428 :
1429 2424 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1430 2424 : memcpy(result, str, len);
1431 2424 : result[len] = '\0';
1432 :
1433 2424 : return result;
1434 : }
1435 :
1436 : /*
1437 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1438 : *
1439 : * The input xmlChar is freed regardless of success of the copy.
1440 : */
1441 : static char *
1442 112812 : xml_pstrdup_and_free(xmlChar *str)
1443 : {
1444 : char *result;
1445 :
1446 112812 : if (str)
1447 : {
1448 112812 : PG_TRY();
1449 : {
1450 112812 : result = pstrdup((char *) str);
1451 : }
1452 0 : PG_FINALLY();
1453 : {
1454 112812 : xmlFree(str);
1455 : }
1456 112812 : PG_END_TRY();
1457 : }
1458 : else
1459 0 : result = NULL;
1460 :
1461 112812 : return result;
1462 : }
1463 :
1464 : /*
1465 : * str is the null-terminated input string. Remaining arguments are
1466 : * output arguments; each can be NULL if value is not wanted.
1467 : * version and encoding are returned as locally-palloc'd strings.
1468 : * Result is 0 if OK, an error code if not.
1469 : */
1470 : static int
1471 69936 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1472 : xmlChar **version, xmlChar **encoding, int *standalone)
1473 : {
1474 : const xmlChar *p;
1475 : const xmlChar *save_p;
1476 : size_t len;
1477 : int utf8char;
1478 : int utf8len;
1479 :
1480 : /*
1481 : * Only initialize libxml. We don't need error handling here, but we do
1482 : * need to make sure libxml is initialized before calling any of its
1483 : * functions. Note that this is safe (and a no-op) if caller has already
1484 : * done pg_xml_init().
1485 : */
1486 69936 : pg_xml_init_library();
1487 :
1488 : /* Initialize output arguments to "not present" */
1489 69936 : if (version)
1490 69288 : *version = NULL;
1491 69936 : if (encoding)
1492 0 : *encoding = NULL;
1493 69936 : if (standalone)
1494 69288 : *standalone = -1;
1495 :
1496 69936 : p = str;
1497 :
1498 69936 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1499 69714 : goto finished;
1500 :
1501 : /*
1502 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1503 : * rather than an XMLDecl, so we have done what we came to do and found no
1504 : * XMLDecl.
1505 : *
1506 : * We need an input length value for xmlGetUTF8Char, but there's no need
1507 : * to count the whole document size, so use strnlen not strlen.
1508 : */
1509 222 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1510 222 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1511 222 : if (PG_XMLISNAMECHAR(utf8char))
1512 12 : goto finished;
1513 :
1514 210 : p += 5;
1515 :
1516 : /* version */
1517 210 : CHECK_XML_SPACE(p);
1518 420 : SKIP_XML_SPACE(p);
1519 210 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1520 0 : return XML_ERR_VERSION_MISSING;
1521 210 : p += 7;
1522 210 : SKIP_XML_SPACE(p);
1523 210 : if (*p != '=')
1524 0 : return XML_ERR_VERSION_MISSING;
1525 210 : p += 1;
1526 210 : SKIP_XML_SPACE(p);
1527 :
1528 210 : if (*p == '\'' || *p == '"')
1529 210 : {
1530 : const xmlChar *q;
1531 :
1532 210 : q = xmlStrchr(p + 1, *p);
1533 210 : if (!q)
1534 0 : return XML_ERR_VERSION_MISSING;
1535 :
1536 210 : if (version)
1537 192 : *version = xml_pnstrdup(p + 1, q - p - 1);
1538 210 : p = q + 1;
1539 : }
1540 : else
1541 0 : return XML_ERR_VERSION_MISSING;
1542 :
1543 : /* encoding */
1544 210 : save_p = p;
1545 372 : SKIP_XML_SPACE(p);
1546 210 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1547 : {
1548 54 : CHECK_XML_SPACE(save_p);
1549 54 : p += 8;
1550 54 : SKIP_XML_SPACE(p);
1551 54 : if (*p != '=')
1552 0 : return XML_ERR_MISSING_ENCODING;
1553 54 : p += 1;
1554 54 : SKIP_XML_SPACE(p);
1555 :
1556 54 : if (*p == '\'' || *p == '"')
1557 54 : {
1558 : const xmlChar *q;
1559 :
1560 54 : q = xmlStrchr(p + 1, *p);
1561 54 : if (!q)
1562 0 : return XML_ERR_MISSING_ENCODING;
1563 :
1564 54 : if (encoding)
1565 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1566 54 : p = q + 1;
1567 : }
1568 : else
1569 0 : return XML_ERR_MISSING_ENCODING;
1570 : }
1571 : else
1572 : {
1573 156 : p = save_p;
1574 : }
1575 :
1576 : /* standalone */
1577 210 : save_p = p;
1578 318 : SKIP_XML_SPACE(p);
1579 210 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1580 : {
1581 108 : CHECK_XML_SPACE(save_p);
1582 108 : p += 10;
1583 108 : SKIP_XML_SPACE(p);
1584 108 : if (*p != '=')
1585 0 : return XML_ERR_STANDALONE_VALUE;
1586 108 : p += 1;
1587 108 : SKIP_XML_SPACE(p);
1588 216 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1589 108 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1590 : {
1591 60 : if (standalone)
1592 60 : *standalone = 1;
1593 60 : p += 5;
1594 : }
1595 96 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1596 48 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1597 : {
1598 36 : if (standalone)
1599 36 : *standalone = 0;
1600 36 : p += 4;
1601 : }
1602 : else
1603 12 : return XML_ERR_STANDALONE_VALUE;
1604 : }
1605 : else
1606 : {
1607 102 : p = save_p;
1608 : }
1609 :
1610 198 : SKIP_XML_SPACE(p);
1611 198 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1612 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1613 198 : p += 2;
1614 :
1615 69924 : finished:
1616 69924 : len = p - str;
1617 :
1618 76656 : for (p = str; p < str + len; p++)
1619 6732 : if (*p > 127)
1620 0 : return XML_ERR_INVALID_CHAR;
1621 :
1622 69924 : if (lenp)
1623 69924 : *lenp = len;
1624 :
1625 69924 : return XML_ERR_OK;
1626 : }
1627 :
1628 :
1629 : /*
1630 : * Write an XML declaration. On output, we adjust the XML declaration
1631 : * as follows. (These rules are the moral equivalent of the clause
1632 : * "Serialization of an XML value" in the SQL standard.)
1633 : *
1634 : * We try to avoid generating an XML declaration if possible. This is
1635 : * so that you don't get trivial things like xml '<foo/>' resulting in
1636 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1637 : * must provide a declaration if the standalone property is specified
1638 : * or if we include an encoding declaration. If we have a
1639 : * declaration, we must specify a version (XML requires this).
1640 : * Otherwise we only make a declaration if the version is not "1.0",
1641 : * which is the default version specified in SQL:2003.
1642 : */
1643 : static bool
1644 23684 : print_xml_decl(StringInfo buf, const xmlChar *version,
1645 : pg_enc encoding, int standalone)
1646 : {
1647 23684 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1648 23648 : || (encoding && encoding != PG_UTF8)
1649 23648 : || standalone != -1)
1650 : {
1651 96 : appendStringInfoString(buf, "<?xml");
1652 :
1653 96 : if (version)
1654 72 : appendStringInfo(buf, " version=\"%s\"", version);
1655 : else
1656 24 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1657 :
1658 96 : if (encoding && encoding != PG_UTF8)
1659 : {
1660 : /*
1661 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1662 : * instead of LATIN1 etc.); needs field experience
1663 : */
1664 0 : appendStringInfo(buf, " encoding=\"%s\"",
1665 : pg_encoding_to_char(encoding));
1666 : }
1667 :
1668 96 : if (standalone == 1)
1669 48 : appendStringInfoString(buf, " standalone=\"yes\"");
1670 48 : else if (standalone == 0)
1671 24 : appendStringInfoString(buf, " standalone=\"no\"");
1672 96 : appendStringInfoString(buf, "?>");
1673 :
1674 96 : return true;
1675 : }
1676 : else
1677 23588 : return false;
1678 : }
1679 :
1680 : /*
1681 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1682 : *
1683 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1684 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1685 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1686 : * later fix that, by redefining content with reference to the "more
1687 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1688 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1689 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1690 : * pg_restore).
1691 : *
1692 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1693 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1694 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1695 : * by detecting this case first and simply doing the parse as DOCUMENT.
1696 : *
1697 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1698 : * it will ordinarily start within a few dozen characters. The only things
1699 : * that can precede it are an XMLDecl (here, the caller will have called
1700 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1701 : * This function need only return true if it sees a valid sequence of such
1702 : * things leading to <!DOCTYPE. It can simply return false in any other
1703 : * cases, including malformed input; that will mean the input gets parsed as
1704 : * CONTENT as originally planned, with libxml reporting any errors.
1705 : *
1706 : * This is only to be called from xml_parse, when pg_xml_init has already
1707 : * been called. The input is already in UTF8 encoding.
1708 : */
1709 : static bool
1710 972 : xml_doctype_in_content(const xmlChar *str)
1711 : {
1712 972 : const xmlChar *p = str;
1713 :
1714 : for (;;)
1715 36 : {
1716 : const xmlChar *e;
1717 :
1718 1098 : SKIP_XML_SPACE(p);
1719 1008 : if (*p != '<')
1720 206 : return false;
1721 802 : p++;
1722 :
1723 802 : if (*p == '!')
1724 : {
1725 72 : p++;
1726 :
1727 : /* if we see <!DOCTYPE, we can return true */
1728 72 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1729 42 : return true;
1730 :
1731 : /* otherwise, if it's not a comment, fail */
1732 30 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1733 0 : return false;
1734 : /* find end of comment: find -- and a > must follow */
1735 30 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1736 30 : if (!p || p[2] != '>')
1737 0 : return false;
1738 : /* advance over comment, and keep scanning */
1739 30 : p += 3;
1740 30 : continue;
1741 : }
1742 :
1743 : /* otherwise, if it's not a PI <?target something?>, fail */
1744 730 : if (*p != '?')
1745 724 : return false;
1746 6 : p++;
1747 :
1748 : /* find end of PI (the string ?> is forbidden within a PI) */
1749 6 : e = xmlStrstr(p, (xmlChar *) "?>");
1750 6 : if (!e)
1751 0 : return false;
1752 :
1753 : /* advance over PI, keep scanning */
1754 6 : p = e + 2;
1755 : }
1756 : }
1757 :
1758 :
1759 : /*
1760 : * Convert a text object to XML internal representation
1761 : *
1762 : * data is the source data (must not be toasted!), encoding is its encoding,
1763 : * and xmloption_arg and preserve_whitespace are options for the
1764 : * transformation.
1765 : *
1766 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1767 : * XmlOptionType actually used to parse the input (typically the same as
1768 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1769 : *
1770 : * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1771 : * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
1772 : * to *parsed_nodes. (It is caller's responsibility to free that.)
1773 : *
1774 : * Errors normally result in ereport(ERROR), but if escontext is an
1775 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1776 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1777 : *
1778 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1779 : * else a permanent memory leak will ensue! But note the result could
1780 : * be NULL after a soft error.
1781 : *
1782 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1783 : * yet do not use SAX - see xmlreader.c)
1784 : */
1785 : static xmlDocPtr
1786 1278 : xml_parse(text *data, XmlOptionType xmloption_arg,
1787 : bool preserve_whitespace, int encoding,
1788 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1789 : Node *escontext)
1790 : {
1791 : int32 len;
1792 : xmlChar *string;
1793 : xmlChar *utf8string;
1794 : PgXmlErrorContext *xmlerrcxt;
1795 1278 : volatile xmlParserCtxtPtr ctxt = NULL;
1796 1278 : volatile xmlDocPtr doc = NULL;
1797 1278 : volatile int save_keep_blanks = -1;
1798 :
1799 : /*
1800 : * This step looks annoyingly redundant, but we must do it to have a
1801 : * null-terminated string in case encoding conversion isn't required.
1802 : */
1803 1278 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1804 1278 : string = xml_text2xmlChar(data);
1805 :
1806 : /*
1807 : * If the data isn't UTF8, we must translate before giving it to libxml.
1808 : *
1809 : * XXX ideally, we'd catch any encoding conversion failure and return a
1810 : * soft error. However, failure to convert to UTF8 should be pretty darn
1811 : * rare, so for now this is left undone.
1812 : */
1813 1278 : utf8string = pg_do_encoding_conversion(string,
1814 : len,
1815 : encoding,
1816 : PG_UTF8);
1817 :
1818 : /* Start up libxml and its parser */
1819 1278 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1820 :
1821 : /* Use a TRY block to ensure we clean up correctly */
1822 1278 : PG_TRY();
1823 : {
1824 1278 : bool parse_as_document = false;
1825 : int res_code;
1826 1278 : size_t count = 0;
1827 1278 : xmlChar *version = NULL;
1828 1278 : int standalone = 0;
1829 :
1830 : /* Any errors here are reported as hard ereport's */
1831 1278 : xmlInitParser();
1832 :
1833 : /* Decide whether to parse as document or content */
1834 1278 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1835 294 : parse_as_document = true;
1836 : else
1837 : {
1838 : /* Parse and skip over the XML declaration, if any */
1839 984 : res_code = parse_xml_decl(utf8string,
1840 : &count, &version, NULL, &standalone);
1841 984 : if (res_code != 0)
1842 : {
1843 12 : errsave(escontext,
1844 : errcode(ERRCODE_INVALID_XML_CONTENT),
1845 : errmsg_internal("invalid XML content: invalid XML declaration"),
1846 : errdetail_for_xml_code(res_code));
1847 12 : goto fail;
1848 : }
1849 :
1850 : /* Is there a DOCTYPE element? */
1851 972 : if (xml_doctype_in_content(utf8string + count))
1852 42 : parse_as_document = true;
1853 : }
1854 :
1855 : /* initialize output parameters */
1856 1266 : if (parsed_xmloptiontype != NULL)
1857 144 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1858 : XMLOPTION_CONTENT;
1859 1266 : if (parsed_nodes != NULL)
1860 144 : *parsed_nodes = NULL;
1861 :
1862 1266 : if (parse_as_document)
1863 : {
1864 : int options;
1865 :
1866 : /* set up parser context used by xmlCtxtReadDoc */
1867 336 : ctxt = xmlNewParserCtxt();
1868 336 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1869 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1870 : "could not allocate parser context");
1871 :
1872 : /*
1873 : * Select parse options.
1874 : *
1875 : * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1876 : * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1877 : * by internal DTD are applied'. As for external DTDs, we try to
1878 : * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1879 : * doesn't really happen because xmlPgEntityLoader prevents it.
1880 : */
1881 336 : options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1882 336 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1883 :
1884 336 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1885 : NULL, /* no URL */
1886 : "UTF-8",
1887 : options);
1888 :
1889 336 : if (doc == NULL || xmlerrcxt->err_occurred)
1890 : {
1891 : /* Use original option to decide which error code to report */
1892 144 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1893 138 : xml_errsave(escontext, xmlerrcxt,
1894 : ERRCODE_INVALID_XML_DOCUMENT,
1895 : "invalid XML document");
1896 : else
1897 6 : xml_errsave(escontext, xmlerrcxt,
1898 : ERRCODE_INVALID_XML_CONTENT,
1899 : "invalid XML content");
1900 96 : goto fail;
1901 : }
1902 : }
1903 : else
1904 : {
1905 : /* set up document that xmlParseBalancedChunkMemory will add to */
1906 930 : doc = xmlNewDoc(version);
1907 930 : if (doc == NULL || xmlerrcxt->err_occurred)
1908 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1909 : "could not allocate XML document");
1910 :
1911 : Assert(doc->encoding == NULL);
1912 930 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1913 930 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1914 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1915 : "could not allocate XML document");
1916 930 : doc->standalone = standalone;
1917 :
1918 : /* set parse options --- have to do this the ugly way */
1919 930 : save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
1920 :
1921 : /* allow empty content */
1922 930 : if (*(utf8string + count))
1923 : {
1924 1812 : res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1925 906 : utf8string + count,
1926 : parsed_nodes);
1927 906 : if (res_code != 0 || xmlerrcxt->err_occurred)
1928 : {
1929 60 : xml_errsave(escontext, xmlerrcxt,
1930 : ERRCODE_INVALID_XML_CONTENT,
1931 : "invalid XML content");
1932 12 : goto fail;
1933 : }
1934 : }
1935 : }
1936 :
1937 1182 : fail:
1938 : ;
1939 : }
1940 96 : PG_CATCH();
1941 : {
1942 96 : if (save_keep_blanks != -1)
1943 48 : xmlKeepBlanksDefault(save_keep_blanks);
1944 96 : if (doc != NULL)
1945 48 : xmlFreeDoc(doc);
1946 96 : if (ctxt != NULL)
1947 48 : xmlFreeParserCtxt(ctxt);
1948 :
1949 96 : pg_xml_done(xmlerrcxt, true);
1950 :
1951 96 : PG_RE_THROW();
1952 : }
1953 1182 : PG_END_TRY();
1954 :
1955 1182 : if (save_keep_blanks != -1)
1956 882 : xmlKeepBlanksDefault(save_keep_blanks);
1957 :
1958 1182 : if (ctxt != NULL)
1959 288 : xmlFreeParserCtxt(ctxt);
1960 :
1961 1182 : pg_xml_done(xmlerrcxt, false);
1962 :
1963 1182 : return doc;
1964 : }
1965 :
1966 :
1967 : /*
1968 : * xmlChar<->text conversions
1969 : */
1970 : static xmlChar *
1971 1428 : xml_text2xmlChar(text *in)
1972 : {
1973 1428 : return (xmlChar *) text_to_cstring(in);
1974 : }
1975 :
1976 :
1977 : #ifdef USE_LIBXMLCONTEXT
1978 :
1979 : /*
1980 : * Manage the special context used for all libxml allocations (but only
1981 : * in special debug builds; see notes at top of file)
1982 : */
1983 : static void
1984 : xml_memory_init(void)
1985 : {
1986 : /* Create memory context if not there already */
1987 : if (LibxmlContext == NULL)
1988 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1989 : "Libxml context",
1990 : ALLOCSET_DEFAULT_SIZES);
1991 :
1992 : /* Re-establish the callbacks even if already set */
1993 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1994 : }
1995 :
1996 : /*
1997 : * Wrappers for memory management functions
1998 : */
1999 : static void *
2000 : xml_palloc(size_t size)
2001 : {
2002 : return MemoryContextAlloc(LibxmlContext, size);
2003 : }
2004 :
2005 :
2006 : static void *
2007 : xml_repalloc(void *ptr, size_t size)
2008 : {
2009 : return repalloc(ptr, size);
2010 : }
2011 :
2012 :
2013 : static void
2014 : xml_pfree(void *ptr)
2015 : {
2016 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
2017 : if (ptr)
2018 : pfree(ptr);
2019 : }
2020 :
2021 :
2022 : static char *
2023 : xml_pstrdup(const char *string)
2024 : {
2025 : return MemoryContextStrdup(LibxmlContext, string);
2026 : }
2027 : #endif /* USE_LIBXMLCONTEXT */
2028 :
2029 :
2030 : /*
2031 : * xmlPgEntityLoader --- entity loader callback function
2032 : *
2033 : * Silently prevent any external entity URL from being loaded. We don't want
2034 : * to throw an error, so instead make the entity appear to expand to an empty
2035 : * string.
2036 : *
2037 : * We would prefer to allow loading entities that exist in the system's
2038 : * global XML catalog; but the available libxml2 APIs make that a complex
2039 : * and fragile task. For now, just shut down all external access.
2040 : */
2041 : static xmlParserInputPtr
2042 18 : xmlPgEntityLoader(const char *URL, const char *ID,
2043 : xmlParserCtxtPtr ctxt)
2044 : {
2045 18 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2046 : }
2047 :
2048 :
2049 : /*
2050 : * xml_ereport --- report an XML-related error
2051 : *
2052 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2053 : * standard. This function adds libxml's native error message, if any, as
2054 : * detail.
2055 : *
2056 : * This is exported for modules that want to share the core libxml error
2057 : * handler. Note that pg_xml_init() *must* have been called previously.
2058 : */
2059 : void
2060 12 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2061 : {
2062 : char *detail;
2063 :
2064 : /* Defend against someone passing us a bogus context struct */
2065 12 : if (errcxt->magic != ERRCXT_MAGIC)
2066 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2067 :
2068 : /* Flag that the current libxml error has been reported */
2069 12 : errcxt->err_occurred = false;
2070 :
2071 : /* Include detail only if we have some text from libxml */
2072 12 : if (errcxt->err_buf.len > 0)
2073 12 : detail = errcxt->err_buf.data;
2074 : else
2075 0 : detail = NULL;
2076 :
2077 12 : ereport(level,
2078 : (errcode(sqlcode),
2079 : errmsg_internal("%s", msg),
2080 : detail ? errdetail_internal("%s", detail) : 0));
2081 0 : }
2082 :
2083 :
2084 : /*
2085 : * xml_errsave --- save an XML-related error
2086 : *
2087 : * If escontext is an ErrorSaveContext, error details are saved into it,
2088 : * and control returns normally.
2089 : *
2090 : * Otherwise, the error is thrown, so that this is equivalent to
2091 : * xml_ereport() with level == ERROR.
2092 : *
2093 : * This should be used only for errors that we're sure we do not need
2094 : * a transaction abort to clean up after.
2095 : */
2096 : static void
2097 204 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2098 : int sqlcode, const char *msg)
2099 : {
2100 : char *detail;
2101 :
2102 : /* Defend against someone passing us a bogus context struct */
2103 204 : if (errcxt->magic != ERRCXT_MAGIC)
2104 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2105 :
2106 : /* Flag that the current libxml error has been reported */
2107 204 : errcxt->err_occurred = false;
2108 :
2109 : /* Include detail only if we have some text from libxml */
2110 204 : if (errcxt->err_buf.len > 0)
2111 204 : detail = errcxt->err_buf.data;
2112 : else
2113 0 : detail = NULL;
2114 :
2115 204 : errsave(escontext,
2116 : (errcode(sqlcode),
2117 : errmsg_internal("%s", msg),
2118 : detail ? errdetail_internal("%s", detail) : 0));
2119 108 : }
2120 :
2121 :
2122 : /*
2123 : * Error handler for libxml errors and warnings
2124 : */
2125 : static void
2126 398 : xml_errorHandler(void *data, PgXmlErrorPtr error)
2127 : {
2128 398 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2129 398 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2130 398 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2131 398 : xmlNodePtr node = error->node;
2132 398 : const xmlChar *name = (node != NULL &&
2133 398 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2134 398 : int domain = error->domain;
2135 398 : int level = error->level;
2136 : StringInfo errorBuf;
2137 :
2138 : /*
2139 : * Defend against someone passing us a bogus context struct.
2140 : *
2141 : * We force a backend exit if this check fails because longjmp'ing out of
2142 : * libxml would likely render it unsafe to use further.
2143 : */
2144 398 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
2145 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2146 :
2147 : /*----------
2148 : * Older libxml versions report some errors differently.
2149 : * First, some errors were previously reported as coming from the parser
2150 : * domain but are now reported as coming from the namespace domain.
2151 : * Second, some warnings were upgraded to errors.
2152 : * We attempt to compensate for that here.
2153 : *----------
2154 : */
2155 398 : switch (error->code)
2156 : {
2157 30 : case XML_WAR_NS_URI:
2158 30 : level = XML_ERR_ERROR;
2159 30 : domain = XML_FROM_NAMESPACE;
2160 30 : break;
2161 :
2162 54 : case XML_ERR_NS_DECL_ERROR:
2163 : case XML_WAR_NS_URI_RELATIVE:
2164 : case XML_WAR_NS_COLUMN:
2165 : case XML_NS_ERR_XML_NAMESPACE:
2166 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2167 : case XML_NS_ERR_QNAME:
2168 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2169 : case XML_NS_ERR_EMPTY:
2170 54 : domain = XML_FROM_NAMESPACE;
2171 54 : break;
2172 : }
2173 :
2174 : /* Decide whether to act on the error or not */
2175 398 : switch (domain)
2176 : {
2177 314 : case XML_FROM_PARSER:
2178 :
2179 : /*
2180 : * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2181 : * other, more on-point error. Furthermore, libxml2 2.13 reports
2182 : * it under a completely different set of rules than prior
2183 : * versions. To avoid cross-version behavioral differences,
2184 : * suppress it so long as we already logged some error.
2185 : */
2186 314 : if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2187 30 : xmlerrcxt->err_occurred)
2188 30 : return;
2189 : /* fall through */
2190 :
2191 : case XML_FROM_NONE:
2192 : case XML_FROM_MEMORY:
2193 : case XML_FROM_IO:
2194 :
2195 : /*
2196 : * Suppress warnings about undeclared entities. We need to do
2197 : * this to avoid problems due to not loading DTD definitions.
2198 : */
2199 284 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
2200 6 : return;
2201 :
2202 : /* Otherwise, accept error regardless of the parsing purpose */
2203 278 : break;
2204 :
2205 84 : default:
2206 : /* Ignore error if only doing well-formedness check */
2207 84 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2208 66 : return;
2209 18 : break;
2210 : }
2211 :
2212 : /* Prepare error message in errorBuf */
2213 296 : errorBuf = makeStringInfo();
2214 :
2215 296 : if (error->line > 0)
2216 296 : appendStringInfo(errorBuf, "line %d: ", error->line);
2217 296 : if (name != NULL)
2218 0 : appendStringInfo(errorBuf, "element %s: ", name);
2219 296 : if (error->message != NULL)
2220 296 : appendStringInfoString(errorBuf, error->message);
2221 : else
2222 0 : appendStringInfoString(errorBuf, "(no message provided)");
2223 :
2224 : /*
2225 : * Append context information to errorBuf.
2226 : *
2227 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2228 : * write the context. Since we don't want to duplicate libxml
2229 : * functionality here, we set up a generic error handler temporarily.
2230 : *
2231 : * We use appendStringInfo() directly as libxml's generic error handler.
2232 : * This should work because it has essentially the same signature as
2233 : * libxml expects, namely (void *ptr, const char *msg, ...).
2234 : */
2235 296 : if (input != NULL)
2236 : {
2237 296 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2238 296 : void *errCtxSaved = xmlGenericErrorContext;
2239 :
2240 296 : xmlSetGenericErrorFunc(errorBuf,
2241 : (xmlGenericErrorFunc) appendStringInfo);
2242 :
2243 : /* Add context information to errorBuf */
2244 296 : appendStringInfoLineSeparator(errorBuf);
2245 :
2246 296 : xmlParserPrintFileContext(input);
2247 :
2248 : /* Restore generic error func */
2249 296 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2250 : }
2251 :
2252 : /* Get rid of any trailing newlines in errorBuf */
2253 296 : chopStringInfoNewlines(errorBuf);
2254 :
2255 : /*
2256 : * Legacy error handling mode. err_occurred is never set, we just add the
2257 : * message to err_buf. This mode exists because the xml2 contrib module
2258 : * uses our error-handling infrastructure, but we don't want to change its
2259 : * behaviour since it's deprecated anyway. This is also why we don't
2260 : * distinguish between notices, warnings and errors here --- the old-style
2261 : * generic error handler wouldn't have done that either.
2262 : */
2263 296 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2264 : {
2265 2 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2266 2 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2267 : errorBuf->len);
2268 :
2269 2 : destroyStringInfo(errorBuf);
2270 2 : return;
2271 : }
2272 :
2273 : /*
2274 : * We don't want to ereport() here because that'd probably leave libxml in
2275 : * an inconsistent state. Instead, we remember the error and ereport()
2276 : * from xml_ereport().
2277 : *
2278 : * Warnings and notices can be reported immediately since they won't cause
2279 : * a longjmp() out of libxml.
2280 : */
2281 294 : if (level >= XML_ERR_ERROR)
2282 : {
2283 288 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2284 288 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2285 : errorBuf->len);
2286 :
2287 288 : xmlerrcxt->err_occurred = true;
2288 : }
2289 6 : else if (level >= XML_ERR_WARNING)
2290 : {
2291 6 : ereport(WARNING,
2292 : (errmsg_internal("%s", errorBuf->data)));
2293 : }
2294 : else
2295 : {
2296 0 : ereport(NOTICE,
2297 : (errmsg_internal("%s", errorBuf->data)));
2298 : }
2299 :
2300 294 : destroyStringInfo(errorBuf);
2301 : }
2302 :
2303 :
2304 : /*
2305 : * Convert libxml error codes into textual errdetail messages.
2306 : *
2307 : * This should be called within an ereport or errsave invocation,
2308 : * just as errdetail would be.
2309 : *
2310 : * At the moment, we only need to cover those codes that we
2311 : * may raise in this file.
2312 : */
2313 : static int
2314 6 : errdetail_for_xml_code(int code)
2315 : {
2316 : const char *det;
2317 :
2318 6 : switch (code)
2319 : {
2320 0 : case XML_ERR_INVALID_CHAR:
2321 0 : det = gettext_noop("Invalid character value.");
2322 0 : break;
2323 0 : case XML_ERR_SPACE_REQUIRED:
2324 0 : det = gettext_noop("Space required.");
2325 0 : break;
2326 6 : case XML_ERR_STANDALONE_VALUE:
2327 6 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2328 6 : break;
2329 0 : case XML_ERR_VERSION_MISSING:
2330 0 : det = gettext_noop("Malformed declaration: missing version.");
2331 0 : break;
2332 0 : case XML_ERR_MISSING_ENCODING:
2333 0 : det = gettext_noop("Missing encoding in text declaration.");
2334 0 : break;
2335 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
2336 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
2337 0 : break;
2338 0 : default:
2339 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
2340 0 : break;
2341 : }
2342 :
2343 6 : return errdetail(det, code);
2344 : }
2345 :
2346 :
2347 : /*
2348 : * Remove all trailing newlines from a StringInfo string
2349 : */
2350 : static void
2351 882 : chopStringInfoNewlines(StringInfo str)
2352 : {
2353 1474 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2354 592 : str->data[--str->len] = '\0';
2355 882 : }
2356 :
2357 :
2358 : /*
2359 : * Append a newline after removing any existing trailing newlines
2360 : */
2361 : static void
2362 586 : appendStringInfoLineSeparator(StringInfo str)
2363 : {
2364 586 : chopStringInfoNewlines(str);
2365 586 : if (str->len > 0)
2366 368 : appendStringInfoChar(str, '\n');
2367 586 : }
2368 :
2369 :
2370 : /*
2371 : * Convert one char in the current server encoding to a Unicode codepoint.
2372 : */
2373 : static pg_wchar
2374 18280 : sqlchar_to_unicode(const char *s)
2375 : {
2376 : char *utf8string;
2377 : pg_wchar ret[2]; /* need space for trailing zero */
2378 :
2379 : /* note we're not assuming s is null-terminated */
2380 18280 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2381 :
2382 18280 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2383 : pg_encoding_mblen(PG_UTF8, utf8string));
2384 :
2385 18280 : if (utf8string != s)
2386 0 : pfree(utf8string);
2387 :
2388 18280 : return ret[0];
2389 : }
2390 :
2391 :
2392 : static bool
2393 3638 : is_valid_xml_namefirst(pg_wchar c)
2394 : {
2395 : /* (Letter | '_' | ':') */
2396 3644 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2397 7282 : || c == '_' || c == ':');
2398 : }
2399 :
2400 :
2401 : static bool
2402 14642 : is_valid_xml_namechar(pg_wchar c)
2403 : {
2404 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2405 15532 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2406 890 : || xmlIsDigitQ(c)
2407 254 : || c == '.' || c == '-' || c == '_' || c == ':'
2408 12 : || xmlIsCombiningQ(c)
2409 31064 : || xmlIsExtenderQ(c));
2410 : }
2411 : #endif /* USE_LIBXML */
2412 :
2413 :
2414 : /*
2415 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2416 : */
2417 : char *
2418 3652 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2419 : bool escape_period)
2420 : {
2421 : #ifdef USE_LIBXML
2422 : StringInfoData buf;
2423 : const char *p;
2424 :
2425 : /*
2426 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2427 : * mistake.
2428 : */
2429 : Assert(fully_escaped || !escape_period);
2430 :
2431 3652 : initStringInfo(&buf);
2432 :
2433 21952 : for (p = ident; *p; p += pg_mblen(p))
2434 : {
2435 18300 : if (*p == ':' && (p == ident || fully_escaped))
2436 14 : appendStringInfoString(&buf, "_x003A_");
2437 18286 : else if (*p == '_' && *(p + 1) == 'x')
2438 6 : appendStringInfoString(&buf, "_x005F_");
2439 21568 : else if (fully_escaped && p == ident &&
2440 3288 : pg_strncasecmp(p, "xml", 3) == 0)
2441 : {
2442 0 : if (*p == 'x')
2443 0 : appendStringInfoString(&buf, "_x0078_");
2444 : else
2445 0 : appendStringInfoString(&buf, "_x0058_");
2446 : }
2447 18280 : else if (escape_period && *p == '.')
2448 0 : appendStringInfoString(&buf, "_x002E_");
2449 : else
2450 : {
2451 18280 : pg_wchar u = sqlchar_to_unicode(p);
2452 :
2453 36560 : if ((p == ident)
2454 3638 : ? !is_valid_xml_namefirst(u)
2455 14642 : : !is_valid_xml_namechar(u))
2456 18 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2457 : else
2458 18262 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2459 : }
2460 : }
2461 :
2462 3652 : return buf.data;
2463 : #else /* not USE_LIBXML */
2464 : NO_XML_SUPPORT();
2465 : return NULL;
2466 : #endif /* not USE_LIBXML */
2467 : }
2468 :
2469 :
2470 : /*
2471 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2472 : */
2473 : char *
2474 128 : map_xml_name_to_sql_identifier(const char *name)
2475 : {
2476 : StringInfoData buf;
2477 : const char *p;
2478 :
2479 128 : initStringInfo(&buf);
2480 :
2481 704 : for (p = name; *p; p += pg_mblen(p))
2482 : {
2483 576 : if (*p == '_' && *(p + 1) == 'x'
2484 16 : && isxdigit((unsigned char) *(p + 2))
2485 16 : && isxdigit((unsigned char) *(p + 3))
2486 16 : && isxdigit((unsigned char) *(p + 4))
2487 16 : && isxdigit((unsigned char) *(p + 5))
2488 16 : && *(p + 6) == '_')
2489 16 : {
2490 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2491 : unsigned int u;
2492 :
2493 16 : sscanf(p + 2, "%X", &u);
2494 16 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2495 16 : appendStringInfoString(&buf, cbuf);
2496 16 : p += 6;
2497 : }
2498 : else
2499 560 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2500 : }
2501 :
2502 128 : return buf.data;
2503 : }
2504 :
2505 : /*
2506 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2507 : *
2508 : * When xml_escape_strings is true, then certain characters in string
2509 : * values are replaced by entity references (< etc.), as specified
2510 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2511 : * wanted. The false case is mainly useful when the resulting value
2512 : * is used with xmlTextWriterWriteAttribute() to write out an
2513 : * attribute, because that function does the escaping itself.
2514 : */
2515 : char *
2516 134894 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2517 : {
2518 134894 : if (type_is_array_domain(type))
2519 : {
2520 : ArrayType *array;
2521 : Oid elmtype;
2522 : int16 elmlen;
2523 : bool elmbyval;
2524 : char elmalign;
2525 : int num_elems;
2526 : Datum *elem_values;
2527 : bool *elem_nulls;
2528 : StringInfoData buf;
2529 : int i;
2530 :
2531 6 : array = DatumGetArrayTypeP(value);
2532 6 : elmtype = ARR_ELEMTYPE(array);
2533 6 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2534 :
2535 6 : deconstruct_array(array, elmtype,
2536 : elmlen, elmbyval, elmalign,
2537 : &elem_values, &elem_nulls,
2538 : &num_elems);
2539 :
2540 6 : initStringInfo(&buf);
2541 :
2542 24 : for (i = 0; i < num_elems; i++)
2543 : {
2544 18 : if (elem_nulls[i])
2545 0 : continue;
2546 18 : appendStringInfoString(&buf, "<element>");
2547 18 : appendStringInfoString(&buf,
2548 18 : map_sql_value_to_xml_value(elem_values[i],
2549 : elmtype, true));
2550 18 : appendStringInfoString(&buf, "</element>");
2551 : }
2552 :
2553 6 : pfree(elem_values);
2554 6 : pfree(elem_nulls);
2555 :
2556 6 : return buf.data;
2557 : }
2558 : else
2559 : {
2560 : Oid typeOut;
2561 : bool isvarlena;
2562 : char *str;
2563 :
2564 : /*
2565 : * Flatten domains; the special-case treatments below should apply to,
2566 : * eg, domains over boolean not just boolean.
2567 : */
2568 134888 : type = getBaseType(type);
2569 :
2570 : /*
2571 : * Special XSD formatting for some data types
2572 : */
2573 134888 : switch (type)
2574 : {
2575 66 : case BOOLOID:
2576 66 : if (DatumGetBool(value))
2577 60 : return "true";
2578 : else
2579 6 : return "false";
2580 :
2581 48 : case DATEOID:
2582 : {
2583 : DateADT date;
2584 : struct pg_tm tm;
2585 : char buf[MAXDATELEN + 1];
2586 :
2587 48 : date = DatumGetDateADT(value);
2588 : /* XSD doesn't support infinite values */
2589 48 : if (DATE_NOT_FINITE(date))
2590 0 : ereport(ERROR,
2591 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2592 : errmsg("date out of range"),
2593 : errdetail("XML does not support infinite date values.")));
2594 48 : j2date(date + POSTGRES_EPOCH_JDATE,
2595 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2596 48 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2597 :
2598 48 : return pstrdup(buf);
2599 : }
2600 :
2601 36 : case TIMESTAMPOID:
2602 : {
2603 : Timestamp timestamp;
2604 : struct pg_tm tm;
2605 : fsec_t fsec;
2606 : char buf[MAXDATELEN + 1];
2607 :
2608 36 : timestamp = DatumGetTimestamp(value);
2609 :
2610 : /* XSD doesn't support infinite values */
2611 36 : if (TIMESTAMP_NOT_FINITE(timestamp))
2612 6 : ereport(ERROR,
2613 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2614 : errmsg("timestamp out of range"),
2615 : errdetail("XML does not support infinite timestamp values.")));
2616 30 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2617 30 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2618 : else
2619 0 : ereport(ERROR,
2620 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2621 : errmsg("timestamp out of range")));
2622 :
2623 30 : return pstrdup(buf);
2624 : }
2625 :
2626 24 : case TIMESTAMPTZOID:
2627 : {
2628 : TimestampTz timestamp;
2629 : struct pg_tm tm;
2630 : int tz;
2631 : fsec_t fsec;
2632 24 : const char *tzn = NULL;
2633 : char buf[MAXDATELEN + 1];
2634 :
2635 24 : timestamp = DatumGetTimestamp(value);
2636 :
2637 : /* XSD doesn't support infinite values */
2638 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2639 0 : ereport(ERROR,
2640 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2641 : errmsg("timestamp out of range"),
2642 : errdetail("XML does not support infinite timestamp values.")));
2643 24 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2644 24 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2645 : else
2646 0 : ereport(ERROR,
2647 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2648 : errmsg("timestamp out of range")));
2649 :
2650 24 : return pstrdup(buf);
2651 : }
2652 :
2653 : #ifdef USE_LIBXML
2654 36 : case BYTEAOID:
2655 : {
2656 36 : bytea *bstr = DatumGetByteaPP(value);
2657 : PgXmlErrorContext *xmlerrcxt;
2658 36 : volatile xmlBufferPtr buf = NULL;
2659 36 : volatile xmlTextWriterPtr writer = NULL;
2660 : char *result;
2661 :
2662 36 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2663 :
2664 36 : PG_TRY();
2665 : {
2666 36 : buf = xmlBufferCreate();
2667 36 : if (buf == NULL || xmlerrcxt->err_occurred)
2668 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2669 : "could not allocate xmlBuffer");
2670 36 : writer = xmlNewTextWriterMemory(buf, 0);
2671 36 : if (writer == NULL || xmlerrcxt->err_occurred)
2672 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2673 : "could not allocate xmlTextWriter");
2674 :
2675 36 : if (xmlbinary == XMLBINARY_BASE64)
2676 30 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2677 30 : 0, VARSIZE_ANY_EXHDR(bstr));
2678 : else
2679 6 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2680 6 : 0, VARSIZE_ANY_EXHDR(bstr));
2681 :
2682 : /* we MUST do this now to flush data out to the buffer */
2683 36 : xmlFreeTextWriter(writer);
2684 36 : writer = NULL;
2685 :
2686 36 : result = pstrdup((const char *) xmlBufferContent(buf));
2687 : }
2688 0 : PG_CATCH();
2689 : {
2690 0 : if (writer)
2691 0 : xmlFreeTextWriter(writer);
2692 0 : if (buf)
2693 0 : xmlBufferFree(buf);
2694 :
2695 0 : pg_xml_done(xmlerrcxt, true);
2696 :
2697 0 : PG_RE_THROW();
2698 : }
2699 36 : PG_END_TRY();
2700 :
2701 36 : xmlBufferFree(buf);
2702 :
2703 36 : pg_xml_done(xmlerrcxt, false);
2704 :
2705 36 : return result;
2706 : }
2707 : #endif /* USE_LIBXML */
2708 :
2709 : }
2710 :
2711 : /*
2712 : * otherwise, just use the type's native text representation
2713 : */
2714 134678 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2715 134678 : str = OidOutputFunctionCall(typeOut, value);
2716 :
2717 : /* ... exactly as-is for XML, and when escaping is not wanted */
2718 134678 : if (type == XMLOID || !xml_escape_strings)
2719 22352 : return str;
2720 :
2721 : /* otherwise, translate special characters as needed */
2722 112326 : return escape_xml(str);
2723 : }
2724 : }
2725 :
2726 :
2727 : /*
2728 : * Escape characters in text that have special meanings in XML.
2729 : *
2730 : * Returns a palloc'd string.
2731 : *
2732 : * NB: this is intentionally not dependent on libxml.
2733 : */
2734 : char *
2735 112798 : escape_xml(const char *str)
2736 : {
2737 : StringInfoData buf;
2738 : const char *p;
2739 :
2740 112798 : initStringInfo(&buf);
2741 708860 : for (p = str; *p; p++)
2742 : {
2743 596062 : switch (*p)
2744 : {
2745 0 : case '&':
2746 0 : appendStringInfoString(&buf, "&");
2747 0 : break;
2748 36 : case '<':
2749 36 : appendStringInfoString(&buf, "<");
2750 36 : break;
2751 24 : case '>':
2752 24 : appendStringInfoString(&buf, ">");
2753 24 : break;
2754 0 : case '\r':
2755 0 : appendStringInfoString(&buf, "
");
2756 0 : break;
2757 596002 : default:
2758 596002 : appendStringInfoCharMacro(&buf, *p);
2759 596002 : break;
2760 : }
2761 : }
2762 112798 : return buf.data;
2763 : }
2764 :
2765 :
2766 : static char *
2767 24 : _SPI_strdup(const char *s)
2768 : {
2769 24 : size_t len = strlen(s) + 1;
2770 24 : char *ret = SPI_palloc(len);
2771 :
2772 24 : memcpy(ret, s, len);
2773 24 : return ret;
2774 : }
2775 :
2776 :
2777 : /*
2778 : * SQL to XML mapping functions
2779 : *
2780 : * What follows below was at one point intentionally organized so that
2781 : * you can read along in the SQL/XML standard. The functions are
2782 : * mostly split up the way the clauses lay out in the standards
2783 : * document, and the identifiers are also aligned with the standard
2784 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2785 : * differently than SQL/XML:2003, so the order below doesn't make much
2786 : * sense anymore.
2787 : *
2788 : * There are many things going on there:
2789 : *
2790 : * There are two kinds of mappings: Mapping SQL data (table contents)
2791 : * to XML documents, and mapping SQL structure (the "schema") to XML
2792 : * Schema. And there are functions that do both at the same time.
2793 : *
2794 : * Then you can map a database, a schema, or a table, each in both
2795 : * ways. This breaks down recursively: Mapping a database invokes
2796 : * mapping schemas, which invokes mapping tables, which invokes
2797 : * mapping rows, which invokes mapping columns, although you can't
2798 : * call the last two from the outside. Because of this, there are a
2799 : * number of xyz_internal() functions which are to be called both from
2800 : * the function manager wrapper and from some upper layer in a
2801 : * recursive call.
2802 : *
2803 : * See the documentation about what the common function arguments
2804 : * nulls, tableforest, and targetns mean.
2805 : *
2806 : * Some style guidelines for XML output: Use double quotes for quoting
2807 : * XML attributes. Indent XML elements by two spaces, but remember
2808 : * that a lot of code is called recursively at different levels, so
2809 : * it's better not to indent rather than create output that indents
2810 : * and outdents weirdly. Add newlines to make the output look nice.
2811 : */
2812 :
2813 :
2814 : /*
2815 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2816 : * 4.10.8.
2817 : */
2818 :
2819 : /*
2820 : * Given a query, which must return type oid as first column, produce
2821 : * a list of Oids with the query results.
2822 : */
2823 : static List *
2824 36 : query_to_oid_list(const char *query)
2825 : {
2826 : uint64 i;
2827 36 : List *list = NIL;
2828 : int spi_result;
2829 :
2830 36 : spi_result = SPI_execute(query, true, 0);
2831 36 : if (spi_result != SPI_OK_SELECT)
2832 0 : elog(ERROR, "SPI_execute returned %s for %s",
2833 : SPI_result_code_string(spi_result), query);
2834 :
2835 108 : for (i = 0; i < SPI_processed; i++)
2836 : {
2837 : Datum oid;
2838 : bool isnull;
2839 :
2840 72 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2841 72 : SPI_tuptable->tupdesc,
2842 : 1,
2843 : &isnull);
2844 72 : if (!isnull)
2845 72 : list = lappend_oid(list, DatumGetObjectId(oid));
2846 : }
2847 :
2848 36 : return list;
2849 : }
2850 :
2851 :
2852 : static List *
2853 36 : schema_get_xml_visible_tables(Oid nspid)
2854 : {
2855 : StringInfoData query;
2856 :
2857 36 : initStringInfo(&query);
2858 36 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2859 : " WHERE relnamespace = %u AND relkind IN ("
2860 : CppAsString2(RELKIND_RELATION) ","
2861 : CppAsString2(RELKIND_MATVIEW) ","
2862 : CppAsString2(RELKIND_VIEW) ")"
2863 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2864 : " ORDER BY relname;", nspid);
2865 :
2866 36 : return query_to_oid_list(query.data);
2867 : }
2868 :
2869 :
2870 : /*
2871 : * Including the system schemas is probably not useful for a database
2872 : * mapping.
2873 : */
2874 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2875 :
2876 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2877 :
2878 :
2879 : static List *
2880 0 : database_get_xml_visible_schemas(void)
2881 : {
2882 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2883 : }
2884 :
2885 :
2886 : static List *
2887 0 : database_get_xml_visible_tables(void)
2888 : {
2889 : /* At the moment there is no order required here. */
2890 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2891 : " WHERE relkind IN ("
2892 : CppAsString2(RELKIND_RELATION) ","
2893 : CppAsString2(RELKIND_MATVIEW) ","
2894 : CppAsString2(RELKIND_VIEW) ")"
2895 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2896 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2897 : }
2898 :
2899 :
2900 : /*
2901 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2902 : * section 9.11.
2903 : */
2904 :
2905 : static StringInfo
2906 96 : table_to_xml_internal(Oid relid,
2907 : const char *xmlschema, bool nulls, bool tableforest,
2908 : const char *targetns, bool top_level)
2909 : {
2910 : StringInfoData query;
2911 :
2912 96 : initStringInfo(&query);
2913 96 : appendStringInfo(&query, "SELECT * FROM %s",
2914 : DatumGetCString(DirectFunctionCall1(regclassout,
2915 : ObjectIdGetDatum(relid))));
2916 96 : return query_to_xml_internal(query.data, get_rel_name(relid),
2917 : xmlschema, nulls, tableforest,
2918 : targetns, top_level);
2919 : }
2920 :
2921 :
2922 : Datum
2923 36 : table_to_xml(PG_FUNCTION_ARGS)
2924 : {
2925 36 : Oid relid = PG_GETARG_OID(0);
2926 36 : bool nulls = PG_GETARG_BOOL(1);
2927 36 : bool tableforest = PG_GETARG_BOOL(2);
2928 36 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2929 :
2930 36 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2931 : nulls, tableforest,
2932 : targetns, true)));
2933 : }
2934 :
2935 :
2936 : Datum
2937 10 : query_to_xml(PG_FUNCTION_ARGS)
2938 : {
2939 10 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2940 10 : bool nulls = PG_GETARG_BOOL(1);
2941 10 : bool tableforest = PG_GETARG_BOOL(2);
2942 10 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2943 :
2944 10 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2945 : NULL, nulls, tableforest,
2946 : targetns, true)));
2947 : }
2948 :
2949 :
2950 : Datum
2951 12 : cursor_to_xml(PG_FUNCTION_ARGS)
2952 : {
2953 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2954 12 : int32 count = PG_GETARG_INT32(1);
2955 12 : bool nulls = PG_GETARG_BOOL(2);
2956 12 : bool tableforest = PG_GETARG_BOOL(3);
2957 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2958 :
2959 : StringInfoData result;
2960 : Portal portal;
2961 : uint64 i;
2962 :
2963 12 : initStringInfo(&result);
2964 :
2965 12 : if (!tableforest)
2966 : {
2967 6 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2968 6 : appendStringInfoChar(&result, '\n');
2969 : }
2970 :
2971 12 : SPI_connect();
2972 12 : portal = SPI_cursor_find(name);
2973 12 : if (portal == NULL)
2974 0 : ereport(ERROR,
2975 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2976 : errmsg("cursor \"%s\" does not exist", name)));
2977 :
2978 12 : SPI_cursor_fetch(portal, true, count);
2979 48 : for (i = 0; i < SPI_processed; i++)
2980 36 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2981 : tableforest, targetns, true);
2982 :
2983 12 : SPI_finish();
2984 :
2985 12 : if (!tableforest)
2986 6 : xmldata_root_element_end(&result, "table");
2987 :
2988 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2989 : }
2990 :
2991 :
2992 : /*
2993 : * Write the start tag of the root element of a data mapping.
2994 : *
2995 : * top_level means that this is the very top level of the eventual
2996 : * output. For example, when the user calls table_to_xml, then a call
2997 : * with a table name to this function is the top level. When the user
2998 : * calls database_to_xml, then a call with a schema name to this
2999 : * function is not the top level. If top_level is false, then the XML
3000 : * namespace declarations are omitted, because they supposedly already
3001 : * appeared earlier in the output. Repeating them is not wrong, but
3002 : * it looks ugly.
3003 : */
3004 : static void
3005 238 : xmldata_root_element_start(StringInfo result, const char *eltname,
3006 : const char *xmlschema, const char *targetns,
3007 : bool top_level)
3008 : {
3009 : /* This isn't really wrong but currently makes no sense. */
3010 : Assert(top_level || !xmlschema);
3011 :
3012 238 : appendStringInfo(result, "<%s", eltname);
3013 238 : if (top_level)
3014 : {
3015 178 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
3016 178 : if (strlen(targetns) > 0)
3017 30 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
3018 : }
3019 238 : if (xmlschema)
3020 : {
3021 : /* FIXME: better targets */
3022 18 : if (strlen(targetns) > 0)
3023 6 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
3024 : else
3025 12 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
3026 : }
3027 238 : appendStringInfoString(result, ">\n");
3028 238 : }
3029 :
3030 :
3031 : static void
3032 238 : xmldata_root_element_end(StringInfo result, const char *eltname)
3033 : {
3034 238 : appendStringInfo(result, "</%s>\n", eltname);
3035 238 : }
3036 :
3037 :
3038 : static StringInfo
3039 112 : query_to_xml_internal(const char *query, char *tablename,
3040 : const char *xmlschema, bool nulls, bool tableforest,
3041 : const char *targetns, bool top_level)
3042 : {
3043 : StringInfo result;
3044 : char *xmltn;
3045 : uint64 i;
3046 :
3047 112 : if (tablename)
3048 96 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3049 : else
3050 16 : xmltn = "table";
3051 :
3052 112 : result = makeStringInfo();
3053 :
3054 112 : SPI_connect();
3055 112 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3056 0 : ereport(ERROR,
3057 : (errcode(ERRCODE_DATA_EXCEPTION),
3058 : errmsg("invalid query")));
3059 :
3060 112 : if (!tableforest)
3061 : {
3062 52 : xmldata_root_element_start(result, xmltn, xmlschema,
3063 : targetns, top_level);
3064 52 : appendStringInfoChar(result, '\n');
3065 : }
3066 :
3067 112 : if (xmlschema)
3068 30 : appendStringInfo(result, "%s\n\n", xmlschema);
3069 :
3070 388 : for (i = 0; i < SPI_processed; i++)
3071 276 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3072 : tableforest, targetns, top_level);
3073 :
3074 112 : if (!tableforest)
3075 52 : xmldata_root_element_end(result, xmltn);
3076 :
3077 112 : SPI_finish();
3078 :
3079 112 : return result;
3080 : }
3081 :
3082 :
3083 : Datum
3084 30 : table_to_xmlschema(PG_FUNCTION_ARGS)
3085 : {
3086 30 : Oid relid = PG_GETARG_OID(0);
3087 30 : bool nulls = PG_GETARG_BOOL(1);
3088 30 : bool tableforest = PG_GETARG_BOOL(2);
3089 30 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3090 : const char *result;
3091 : Relation rel;
3092 :
3093 30 : rel = table_open(relid, AccessShareLock);
3094 30 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3095 : tableforest, targetns);
3096 30 : table_close(rel, NoLock);
3097 :
3098 30 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3099 : }
3100 :
3101 :
3102 : Datum
3103 6 : query_to_xmlschema(PG_FUNCTION_ARGS)
3104 : {
3105 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3106 6 : bool nulls = PG_GETARG_BOOL(1);
3107 6 : bool tableforest = PG_GETARG_BOOL(2);
3108 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3109 : const char *result;
3110 : SPIPlanPtr plan;
3111 : Portal portal;
3112 :
3113 6 : SPI_connect();
3114 :
3115 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3116 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3117 :
3118 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3119 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3120 :
3121 6 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3122 : InvalidOid, nulls,
3123 : tableforest, targetns));
3124 6 : SPI_cursor_close(portal);
3125 6 : SPI_finish();
3126 :
3127 6 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3128 : }
3129 :
3130 :
3131 : Datum
3132 12 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
3133 : {
3134 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3135 12 : bool nulls = PG_GETARG_BOOL(1);
3136 12 : bool tableforest = PG_GETARG_BOOL(2);
3137 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3138 : const char *xmlschema;
3139 : Portal portal;
3140 :
3141 12 : SPI_connect();
3142 12 : portal = SPI_cursor_find(name);
3143 12 : if (portal == NULL)
3144 0 : ereport(ERROR,
3145 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3146 : errmsg("cursor \"%s\" does not exist", name)));
3147 12 : if (portal->tupDesc == NULL)
3148 0 : ereport(ERROR,
3149 : (errcode(ERRCODE_INVALID_CURSOR_STATE),
3150 : errmsg("portal \"%s\" does not return tuples", name)));
3151 :
3152 12 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3153 : InvalidOid, nulls,
3154 : tableforest, targetns));
3155 12 : SPI_finish();
3156 :
3157 12 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3158 : }
3159 :
3160 :
3161 : Datum
3162 24 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3163 : {
3164 24 : Oid relid = PG_GETARG_OID(0);
3165 24 : bool nulls = PG_GETARG_BOOL(1);
3166 24 : bool tableforest = PG_GETARG_BOOL(2);
3167 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3168 : Relation rel;
3169 : const char *xmlschema;
3170 :
3171 24 : rel = table_open(relid, AccessShareLock);
3172 24 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3173 : tableforest, targetns);
3174 24 : table_close(rel, NoLock);
3175 :
3176 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3177 : xmlschema, nulls, tableforest,
3178 : targetns, true)));
3179 : }
3180 :
3181 :
3182 : Datum
3183 6 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3184 : {
3185 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3186 6 : bool nulls = PG_GETARG_BOOL(1);
3187 6 : bool tableforest = PG_GETARG_BOOL(2);
3188 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3189 :
3190 : const char *xmlschema;
3191 : SPIPlanPtr plan;
3192 : Portal portal;
3193 :
3194 6 : SPI_connect();
3195 :
3196 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3197 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3198 :
3199 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3200 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3201 :
3202 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3203 : InvalidOid, nulls, tableforest, targetns));
3204 6 : SPI_cursor_close(portal);
3205 6 : SPI_finish();
3206 :
3207 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3208 : xmlschema, nulls, tableforest,
3209 : targetns, true)));
3210 : }
3211 :
3212 :
3213 : /*
3214 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3215 : * sections 9.13, 9.14.
3216 : */
3217 :
3218 : static StringInfo
3219 18 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3220 : bool tableforest, const char *targetns, bool top_level)
3221 : {
3222 : StringInfo result;
3223 : char *xmlsn;
3224 : List *relid_list;
3225 : ListCell *cell;
3226 :
3227 18 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3228 : true, false);
3229 18 : result = makeStringInfo();
3230 :
3231 18 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3232 18 : appendStringInfoChar(result, '\n');
3233 :
3234 18 : if (xmlschema)
3235 6 : appendStringInfo(result, "%s\n\n", xmlschema);
3236 :
3237 18 : SPI_connect();
3238 :
3239 18 : relid_list = schema_get_xml_visible_tables(nspid);
3240 :
3241 54 : foreach(cell, relid_list)
3242 : {
3243 36 : Oid relid = lfirst_oid(cell);
3244 : StringInfo subres;
3245 :
3246 36 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3247 : targetns, false);
3248 :
3249 36 : appendBinaryStringInfo(result, subres->data, subres->len);
3250 36 : appendStringInfoChar(result, '\n');
3251 : }
3252 :
3253 18 : SPI_finish();
3254 :
3255 18 : xmldata_root_element_end(result, xmlsn);
3256 :
3257 18 : return result;
3258 : }
3259 :
3260 :
3261 : Datum
3262 12 : schema_to_xml(PG_FUNCTION_ARGS)
3263 : {
3264 12 : Name name = PG_GETARG_NAME(0);
3265 12 : bool nulls = PG_GETARG_BOOL(1);
3266 12 : bool tableforest = PG_GETARG_BOOL(2);
3267 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3268 :
3269 : char *schemaname;
3270 : Oid nspid;
3271 :
3272 12 : schemaname = NameStr(*name);
3273 12 : nspid = LookupExplicitNamespace(schemaname, false);
3274 :
3275 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3276 : nulls, tableforest, targetns, true)));
3277 : }
3278 :
3279 :
3280 : /*
3281 : * Write the start element of the root element of an XML Schema mapping.
3282 : */
3283 : static void
3284 96 : xsd_schema_element_start(StringInfo result, const char *targetns)
3285 : {
3286 96 : appendStringInfoString(result,
3287 : "<xsd:schema\n"
3288 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
3289 96 : if (strlen(targetns) > 0)
3290 18 : appendStringInfo(result,
3291 : "\n"
3292 : " targetNamespace=\"%s\"\n"
3293 : " elementFormDefault=\"qualified\"",
3294 : targetns);
3295 96 : appendStringInfoString(result,
3296 : ">\n\n");
3297 96 : }
3298 :
3299 :
3300 : static void
3301 96 : xsd_schema_element_end(StringInfo result)
3302 : {
3303 96 : appendStringInfoString(result, "</xsd:schema>");
3304 96 : }
3305 :
3306 :
3307 : static StringInfo
3308 18 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3309 : bool tableforest, const char *targetns)
3310 : {
3311 : Oid nspid;
3312 : List *relid_list;
3313 : List *tupdesc_list;
3314 : ListCell *cell;
3315 : StringInfo result;
3316 :
3317 18 : result = makeStringInfo();
3318 :
3319 18 : nspid = LookupExplicitNamespace(schemaname, false);
3320 :
3321 18 : xsd_schema_element_start(result, targetns);
3322 :
3323 18 : SPI_connect();
3324 :
3325 18 : relid_list = schema_get_xml_visible_tables(nspid);
3326 :
3327 18 : tupdesc_list = NIL;
3328 54 : foreach(cell, relid_list)
3329 : {
3330 : Relation rel;
3331 :
3332 36 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3333 36 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3334 36 : table_close(rel, NoLock);
3335 : }
3336 :
3337 18 : appendStringInfoString(result,
3338 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3339 :
3340 18 : appendStringInfoString(result,
3341 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3342 : nulls, tableforest, targetns));
3343 :
3344 18 : xsd_schema_element_end(result);
3345 :
3346 18 : SPI_finish();
3347 :
3348 18 : return result;
3349 : }
3350 :
3351 :
3352 : Datum
3353 12 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3354 : {
3355 12 : Name name = PG_GETARG_NAME(0);
3356 12 : bool nulls = PG_GETARG_BOOL(1);
3357 12 : bool tableforest = PG_GETARG_BOOL(2);
3358 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3359 :
3360 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3361 : nulls, tableforest, targetns)));
3362 : }
3363 :
3364 :
3365 : Datum
3366 6 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3367 : {
3368 6 : Name name = PG_GETARG_NAME(0);
3369 6 : bool nulls = PG_GETARG_BOOL(1);
3370 6 : bool tableforest = PG_GETARG_BOOL(2);
3371 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3372 : char *schemaname;
3373 : Oid nspid;
3374 : StringInfo xmlschema;
3375 :
3376 6 : schemaname = NameStr(*name);
3377 6 : nspid = LookupExplicitNamespace(schemaname, false);
3378 :
3379 6 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3380 : tableforest, targetns);
3381 :
3382 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3383 : xmlschema->data, nulls,
3384 : tableforest, targetns, true)));
3385 : }
3386 :
3387 :
3388 : /*
3389 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3390 : * sections 9.16, 9.17.
3391 : */
3392 :
3393 : static StringInfo
3394 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3395 : bool tableforest, const char *targetns)
3396 : {
3397 : StringInfo result;
3398 : List *nspid_list;
3399 : ListCell *cell;
3400 : char *xmlcn;
3401 :
3402 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3403 : true, false);
3404 0 : result = makeStringInfo();
3405 :
3406 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3407 0 : appendStringInfoChar(result, '\n');
3408 :
3409 0 : if (xmlschema)
3410 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3411 :
3412 0 : SPI_connect();
3413 :
3414 0 : nspid_list = database_get_xml_visible_schemas();
3415 :
3416 0 : foreach(cell, nspid_list)
3417 : {
3418 0 : Oid nspid = lfirst_oid(cell);
3419 : StringInfo subres;
3420 :
3421 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3422 : tableforest, targetns, false);
3423 :
3424 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3425 0 : appendStringInfoChar(result, '\n');
3426 : }
3427 :
3428 0 : SPI_finish();
3429 :
3430 0 : xmldata_root_element_end(result, xmlcn);
3431 :
3432 0 : return result;
3433 : }
3434 :
3435 :
3436 : Datum
3437 0 : database_to_xml(PG_FUNCTION_ARGS)
3438 : {
3439 0 : bool nulls = PG_GETARG_BOOL(0);
3440 0 : bool tableforest = PG_GETARG_BOOL(1);
3441 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3442 :
3443 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3444 : tableforest, targetns)));
3445 : }
3446 :
3447 :
3448 : static StringInfo
3449 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3450 : const char *targetns)
3451 : {
3452 : List *relid_list;
3453 : List *nspid_list;
3454 : List *tupdesc_list;
3455 : ListCell *cell;
3456 : StringInfo result;
3457 :
3458 0 : result = makeStringInfo();
3459 :
3460 0 : xsd_schema_element_start(result, targetns);
3461 :
3462 0 : SPI_connect();
3463 :
3464 0 : relid_list = database_get_xml_visible_tables();
3465 0 : nspid_list = database_get_xml_visible_schemas();
3466 :
3467 0 : tupdesc_list = NIL;
3468 0 : foreach(cell, relid_list)
3469 : {
3470 : Relation rel;
3471 :
3472 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3473 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3474 0 : table_close(rel, NoLock);
3475 : }
3476 :
3477 0 : appendStringInfoString(result,
3478 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3479 :
3480 0 : appendStringInfoString(result,
3481 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3482 :
3483 0 : xsd_schema_element_end(result);
3484 :
3485 0 : SPI_finish();
3486 :
3487 0 : return result;
3488 : }
3489 :
3490 :
3491 : Datum
3492 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3493 : {
3494 0 : bool nulls = PG_GETARG_BOOL(0);
3495 0 : bool tableforest = PG_GETARG_BOOL(1);
3496 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3497 :
3498 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3499 : tableforest, targetns)));
3500 : }
3501 :
3502 :
3503 : Datum
3504 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3505 : {
3506 0 : bool nulls = PG_GETARG_BOOL(0);
3507 0 : bool tableforest = PG_GETARG_BOOL(1);
3508 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3509 : StringInfo xmlschema;
3510 :
3511 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3512 :
3513 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3514 : nulls, tableforest, targetns)));
3515 : }
3516 :
3517 :
3518 : /*
3519 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3520 : * 9.2.
3521 : */
3522 : static char *
3523 384 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3524 : {
3525 : StringInfoData result;
3526 :
3527 384 : initStringInfo(&result);
3528 :
3529 384 : if (a)
3530 384 : appendStringInfoString(&result,
3531 384 : map_sql_identifier_to_xml_name(a, true, true));
3532 384 : if (b)
3533 384 : appendStringInfo(&result, ".%s",
3534 : map_sql_identifier_to_xml_name(b, true, true));
3535 384 : if (c)
3536 384 : appendStringInfo(&result, ".%s",
3537 : map_sql_identifier_to_xml_name(c, true, true));
3538 384 : if (d)
3539 366 : appendStringInfo(&result, ".%s",
3540 : map_sql_identifier_to_xml_name(d, true, true));
3541 :
3542 384 : return result.data;
3543 : }
3544 :
3545 :
3546 : /*
3547 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3548 : * section 9.11.
3549 : *
3550 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3551 : * 9.9.
3552 : */
3553 : static const char *
3554 78 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3555 : bool tableforest, const char *targetns)
3556 : {
3557 : int i;
3558 : char *xmltn;
3559 : char *tabletypename;
3560 : char *rowtypename;
3561 : StringInfoData result;
3562 :
3563 78 : initStringInfo(&result);
3564 :
3565 78 : if (OidIsValid(relid))
3566 : {
3567 : HeapTuple tuple;
3568 : Form_pg_class reltuple;
3569 :
3570 54 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3571 54 : if (!HeapTupleIsValid(tuple))
3572 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3573 54 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3574 :
3575 54 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3576 : true, false);
3577 :
3578 54 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3579 54 : get_database_name(MyDatabaseId),
3580 54 : get_namespace_name(reltuple->relnamespace),
3581 54 : NameStr(reltuple->relname));
3582 :
3583 54 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3584 54 : get_database_name(MyDatabaseId),
3585 54 : get_namespace_name(reltuple->relnamespace),
3586 54 : NameStr(reltuple->relname));
3587 :
3588 54 : ReleaseSysCache(tuple);
3589 : }
3590 : else
3591 : {
3592 24 : if (tableforest)
3593 12 : xmltn = "row";
3594 : else
3595 12 : xmltn = "table";
3596 :
3597 24 : tabletypename = "TableType";
3598 24 : rowtypename = "RowType";
3599 : }
3600 :
3601 78 : xsd_schema_element_start(&result, targetns);
3602 :
3603 78 : appendStringInfoString(&result,
3604 78 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3605 :
3606 78 : appendStringInfo(&result,
3607 : "<xsd:complexType name=\"%s\">\n"
3608 : " <xsd:sequence>\n",
3609 : rowtypename);
3610 :
3611 324 : for (i = 0; i < tupdesc->natts; i++)
3612 : {
3613 246 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3614 :
3615 246 : if (att->attisdropped)
3616 6 : continue;
3617 480 : appendStringInfo(&result,
3618 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3619 240 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3620 : true, false),
3621 : map_sql_type_to_xml_name(att->atttypid, -1),
3622 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3623 : }
3624 :
3625 78 : appendStringInfoString(&result,
3626 : " </xsd:sequence>\n"
3627 : "</xsd:complexType>\n\n");
3628 :
3629 78 : if (!tableforest)
3630 : {
3631 42 : appendStringInfo(&result,
3632 : "<xsd:complexType name=\"%s\">\n"
3633 : " <xsd:sequence>\n"
3634 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3635 : " </xsd:sequence>\n"
3636 : "</xsd:complexType>\n\n",
3637 : tabletypename, rowtypename);
3638 :
3639 42 : appendStringInfo(&result,
3640 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3641 : xmltn, tabletypename);
3642 : }
3643 : else
3644 36 : appendStringInfo(&result,
3645 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3646 : xmltn, rowtypename);
3647 :
3648 78 : xsd_schema_element_end(&result);
3649 :
3650 78 : return result.data;
3651 : }
3652 :
3653 :
3654 : /*
3655 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3656 : * section 9.12.
3657 : */
3658 : static const char *
3659 18 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3660 : bool tableforest, const char *targetns)
3661 : {
3662 : char *dbname;
3663 : char *nspname;
3664 : char *xmlsn;
3665 : char *schematypename;
3666 : StringInfoData result;
3667 : ListCell *cell;
3668 :
3669 18 : dbname = get_database_name(MyDatabaseId);
3670 18 : nspname = get_namespace_name(nspid);
3671 :
3672 18 : initStringInfo(&result);
3673 :
3674 18 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3675 :
3676 18 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3677 : dbname,
3678 : nspname,
3679 : NULL);
3680 :
3681 18 : appendStringInfo(&result,
3682 : "<xsd:complexType name=\"%s\">\n", schematypename);
3683 18 : if (!tableforest)
3684 6 : appendStringInfoString(&result,
3685 : " <xsd:all>\n");
3686 : else
3687 12 : appendStringInfoString(&result,
3688 : " <xsd:sequence>\n");
3689 :
3690 54 : foreach(cell, relid_list)
3691 : {
3692 36 : Oid relid = lfirst_oid(cell);
3693 36 : char *relname = get_rel_name(relid);
3694 36 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3695 36 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3696 : dbname,
3697 : nspname,
3698 : relname);
3699 :
3700 36 : if (!tableforest)
3701 12 : appendStringInfo(&result,
3702 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3703 : xmltn, tabletypename);
3704 : else
3705 24 : appendStringInfo(&result,
3706 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3707 : xmltn, tabletypename);
3708 : }
3709 :
3710 18 : if (!tableforest)
3711 6 : appendStringInfoString(&result,
3712 : " </xsd:all>\n");
3713 : else
3714 12 : appendStringInfoString(&result,
3715 : " </xsd:sequence>\n");
3716 18 : appendStringInfoString(&result,
3717 : "</xsd:complexType>\n\n");
3718 :
3719 18 : appendStringInfo(&result,
3720 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3721 : xmlsn, schematypename);
3722 :
3723 18 : return result.data;
3724 : }
3725 :
3726 :
3727 : /*
3728 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3729 : * section 9.15.
3730 : */
3731 : static const char *
3732 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3733 : bool tableforest, const char *targetns)
3734 : {
3735 : char *dbname;
3736 : char *xmlcn;
3737 : char *catalogtypename;
3738 : StringInfoData result;
3739 : ListCell *cell;
3740 :
3741 0 : dbname = get_database_name(MyDatabaseId);
3742 :
3743 0 : initStringInfo(&result);
3744 :
3745 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3746 :
3747 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3748 : dbname,
3749 : NULL,
3750 : NULL);
3751 :
3752 0 : appendStringInfo(&result,
3753 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3754 0 : appendStringInfoString(&result,
3755 : " <xsd:all>\n");
3756 :
3757 0 : foreach(cell, nspid_list)
3758 : {
3759 0 : Oid nspid = lfirst_oid(cell);
3760 0 : char *nspname = get_namespace_name(nspid);
3761 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3762 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3763 : dbname,
3764 : nspname,
3765 : NULL);
3766 :
3767 0 : appendStringInfo(&result,
3768 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3769 : xmlsn, schematypename);
3770 : }
3771 :
3772 0 : appendStringInfoString(&result,
3773 : " </xsd:all>\n");
3774 0 : appendStringInfoString(&result,
3775 : "</xsd:complexType>\n\n");
3776 :
3777 0 : appendStringInfo(&result,
3778 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3779 : xmlcn, catalogtypename);
3780 :
3781 0 : return result.data;
3782 : }
3783 :
3784 :
3785 : /*
3786 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3787 : */
3788 : static const char *
3789 810 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3790 : {
3791 : StringInfoData result;
3792 :
3793 810 : initStringInfo(&result);
3794 :
3795 810 : switch (typeoid)
3796 : {
3797 30 : case BPCHAROID:
3798 30 : if (typmod == -1)
3799 30 : appendStringInfoString(&result, "CHAR");
3800 : else
3801 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3802 30 : break;
3803 54 : case VARCHAROID:
3804 54 : if (typmod == -1)
3805 54 : appendStringInfoString(&result, "VARCHAR");
3806 : else
3807 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3808 54 : break;
3809 30 : case NUMERICOID:
3810 30 : if (typmod == -1)
3811 30 : appendStringInfoString(&result, "NUMERIC");
3812 : else
3813 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3814 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3815 0 : (typmod - VARHDRSZ) & 0xffff);
3816 30 : break;
3817 174 : case INT4OID:
3818 174 : appendStringInfoString(&result, "INTEGER");
3819 174 : break;
3820 30 : case INT2OID:
3821 30 : appendStringInfoString(&result, "SMALLINT");
3822 30 : break;
3823 30 : case INT8OID:
3824 30 : appendStringInfoString(&result, "BIGINT");
3825 30 : break;
3826 30 : case FLOAT4OID:
3827 30 : appendStringInfoString(&result, "REAL");
3828 30 : break;
3829 0 : case FLOAT8OID:
3830 0 : appendStringInfoString(&result, "DOUBLE");
3831 0 : break;
3832 30 : case BOOLOID:
3833 30 : appendStringInfoString(&result, "BOOLEAN");
3834 30 : break;
3835 30 : case TIMEOID:
3836 30 : if (typmod == -1)
3837 30 : appendStringInfoString(&result, "TIME");
3838 : else
3839 0 : appendStringInfo(&result, "TIME_%d", typmod);
3840 30 : break;
3841 30 : case TIMETZOID:
3842 30 : if (typmod == -1)
3843 30 : appendStringInfoString(&result, "TIME_WTZ");
3844 : else
3845 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3846 30 : break;
3847 30 : case TIMESTAMPOID:
3848 30 : if (typmod == -1)
3849 30 : appendStringInfoString(&result, "TIMESTAMP");
3850 : else
3851 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3852 30 : break;
3853 30 : case TIMESTAMPTZOID:
3854 30 : if (typmod == -1)
3855 30 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3856 : else
3857 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3858 30 : break;
3859 30 : case DATEOID:
3860 30 : appendStringInfoString(&result, "DATE");
3861 30 : break;
3862 30 : case XMLOID:
3863 30 : appendStringInfoString(&result, "XML");
3864 30 : break;
3865 222 : default:
3866 : {
3867 : HeapTuple tuple;
3868 : Form_pg_type typtuple;
3869 :
3870 222 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3871 222 : if (!HeapTupleIsValid(tuple))
3872 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3873 222 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3874 :
3875 222 : appendStringInfoString(&result,
3876 222 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3877 222 : get_database_name(MyDatabaseId),
3878 222 : get_namespace_name(typtuple->typnamespace),
3879 222 : NameStr(typtuple->typname)));
3880 :
3881 222 : ReleaseSysCache(tuple);
3882 : }
3883 : }
3884 :
3885 810 : return result.data;
3886 : }
3887 :
3888 :
3889 : /*
3890 : * Map a collection of SQL data types to XML Schema data types; see
3891 : * SQL/XML:2008 section 9.7.
3892 : */
3893 : static const char *
3894 96 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3895 : {
3896 96 : List *uniquetypes = NIL;
3897 : int i;
3898 : StringInfoData result;
3899 : ListCell *cell0;
3900 :
3901 : /* extract all column types used in the set of TupleDescs */
3902 210 : foreach(cell0, tupdesc_list)
3903 : {
3904 114 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3905 :
3906 702 : for (i = 0; i < tupdesc->natts; i++)
3907 : {
3908 588 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3909 :
3910 588 : if (att->attisdropped)
3911 24 : continue;
3912 564 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3913 : }
3914 : }
3915 :
3916 : /* add base types of domains */
3917 642 : foreach(cell0, uniquetypes)
3918 : {
3919 546 : Oid typid = lfirst_oid(cell0);
3920 546 : Oid basetypid = getBaseType(typid);
3921 :
3922 546 : if (basetypid != typid)
3923 24 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3924 : }
3925 :
3926 : /* Convert to textual form */
3927 96 : initStringInfo(&result);
3928 :
3929 642 : foreach(cell0, uniquetypes)
3930 : {
3931 546 : appendStringInfo(&result, "%s\n",
3932 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3933 : -1));
3934 : }
3935 :
3936 96 : return result.data;
3937 : }
3938 :
3939 :
3940 : /*
3941 : * Map an SQL data type to a named XML Schema data type; see
3942 : * SQL/XML:2008 sections 9.5 and 9.6.
3943 : *
3944 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3945 : * a name attribute, which this function does. The name-less version
3946 : * 9.5 doesn't appear to be required anywhere.)
3947 : */
3948 : static const char *
3949 546 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3950 : {
3951 : StringInfoData result;
3952 546 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3953 :
3954 546 : initStringInfo(&result);
3955 :
3956 546 : if (typeoid == XMLOID)
3957 : {
3958 24 : appendStringInfoString(&result,
3959 : "<xsd:complexType mixed=\"true\">\n"
3960 : " <xsd:sequence>\n"
3961 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3962 : " </xsd:sequence>\n"
3963 : "</xsd:complexType>\n");
3964 : }
3965 : else
3966 : {
3967 522 : appendStringInfo(&result,
3968 : "<xsd:simpleType name=\"%s\">\n", typename);
3969 :
3970 522 : switch (typeoid)
3971 : {
3972 138 : case BPCHAROID:
3973 : case VARCHAROID:
3974 : case TEXTOID:
3975 138 : appendStringInfoString(&result,
3976 : " <xsd:restriction base=\"xsd:string\">\n");
3977 138 : if (typmod != -1)
3978 0 : appendStringInfo(&result,
3979 : " <xsd:maxLength value=\"%d\"/>\n",
3980 : typmod - VARHDRSZ);
3981 138 : appendStringInfoString(&result, " </xsd:restriction>\n");
3982 138 : break;
3983 :
3984 24 : case BYTEAOID:
3985 24 : appendStringInfo(&result,
3986 : " <xsd:restriction base=\"xsd:%s\">\n"
3987 : " </xsd:restriction>\n",
3988 24 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3989 24 : break;
3990 :
3991 24 : case NUMERICOID:
3992 24 : if (typmod != -1)
3993 0 : appendStringInfo(&result,
3994 : " <xsd:restriction base=\"xsd:decimal\">\n"
3995 : " <xsd:totalDigits value=\"%d\"/>\n"
3996 : " <xsd:fractionDigits value=\"%d\"/>\n"
3997 : " </xsd:restriction>\n",
3998 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3999 0 : (typmod - VARHDRSZ) & 0xffff);
4000 24 : break;
4001 :
4002 24 : case INT2OID:
4003 24 : appendStringInfo(&result,
4004 : " <xsd:restriction base=\"xsd:short\">\n"
4005 : " <xsd:maxInclusive value=\"%d\"/>\n"
4006 : " <xsd:minInclusive value=\"%d\"/>\n"
4007 : " </xsd:restriction>\n",
4008 : SHRT_MAX, SHRT_MIN);
4009 24 : break;
4010 :
4011 96 : case INT4OID:
4012 96 : appendStringInfo(&result,
4013 : " <xsd:restriction base=\"xsd:int\">\n"
4014 : " <xsd:maxInclusive value=\"%d\"/>\n"
4015 : " <xsd:minInclusive value=\"%d\"/>\n"
4016 : " </xsd:restriction>\n",
4017 : INT_MAX, INT_MIN);
4018 96 : break;
4019 :
4020 24 : case INT8OID:
4021 24 : appendStringInfo(&result,
4022 : " <xsd:restriction base=\"xsd:long\">\n"
4023 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
4024 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
4025 : " </xsd:restriction>\n",
4026 : PG_INT64_MAX,
4027 : PG_INT64_MIN);
4028 24 : break;
4029 :
4030 24 : case FLOAT4OID:
4031 24 : appendStringInfoString(&result,
4032 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
4033 24 : break;
4034 :
4035 0 : case FLOAT8OID:
4036 0 : appendStringInfoString(&result,
4037 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
4038 0 : break;
4039 :
4040 24 : case BOOLOID:
4041 24 : appendStringInfoString(&result,
4042 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4043 24 : break;
4044 :
4045 48 : case TIMEOID:
4046 : case TIMETZOID:
4047 : {
4048 48 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4049 :
4050 48 : if (typmod == -1)
4051 48 : appendStringInfo(&result,
4052 : " <xsd:restriction base=\"xsd:time\">\n"
4053 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4054 : " </xsd:restriction>\n", tz);
4055 0 : else if (typmod == 0)
4056 0 : appendStringInfo(&result,
4057 : " <xsd:restriction base=\"xsd:time\">\n"
4058 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4059 : " </xsd:restriction>\n", tz);
4060 : else
4061 0 : appendStringInfo(&result,
4062 : " <xsd:restriction base=\"xsd:time\">\n"
4063 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4064 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4065 48 : break;
4066 : }
4067 :
4068 48 : case TIMESTAMPOID:
4069 : case TIMESTAMPTZOID:
4070 : {
4071 48 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4072 :
4073 48 : if (typmod == -1)
4074 48 : appendStringInfo(&result,
4075 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4076 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4077 : " </xsd:restriction>\n", tz);
4078 0 : else if (typmod == 0)
4079 0 : appendStringInfo(&result,
4080 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4081 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4082 : " </xsd:restriction>\n", tz);
4083 : else
4084 0 : appendStringInfo(&result,
4085 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4086 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4087 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4088 48 : break;
4089 : }
4090 :
4091 24 : case DATEOID:
4092 24 : appendStringInfoString(&result,
4093 : " <xsd:restriction base=\"xsd:date\">\n"
4094 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4095 : " </xsd:restriction>\n");
4096 24 : break;
4097 :
4098 24 : default:
4099 24 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4100 : {
4101 : Oid base_typeoid;
4102 24 : int32 base_typmod = -1;
4103 :
4104 24 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4105 :
4106 24 : appendStringInfo(&result,
4107 : " <xsd:restriction base=\"%s\"/>\n",
4108 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
4109 : }
4110 24 : break;
4111 : }
4112 522 : appendStringInfoString(&result, "</xsd:simpleType>\n");
4113 : }
4114 :
4115 546 : return result.data;
4116 : }
4117 :
4118 :
4119 : /*
4120 : * Map an SQL row to an XML element, taking the row from the active
4121 : * SPI cursor. See also SQL/XML:2008 section 9.10.
4122 : */
4123 : static void
4124 312 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4125 : bool nulls, bool tableforest,
4126 : const char *targetns, bool top_level)
4127 : {
4128 : int i;
4129 : char *xmltn;
4130 :
4131 312 : if (tablename)
4132 228 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4133 : else
4134 : {
4135 84 : if (tableforest)
4136 36 : xmltn = "row";
4137 : else
4138 48 : xmltn = "table";
4139 : }
4140 :
4141 312 : if (tableforest)
4142 162 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4143 : else
4144 150 : appendStringInfoString(result, "<row>\n");
4145 :
4146 1272 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4147 : {
4148 : char *colname;
4149 : Datum colval;
4150 : bool isnull;
4151 :
4152 960 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4153 : true, false);
4154 960 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4155 960 : SPI_tuptable->tupdesc,
4156 : i,
4157 : &isnull);
4158 960 : if (isnull)
4159 : {
4160 114 : if (nulls)
4161 60 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
4162 : }
4163 : else
4164 846 : appendStringInfo(result, " <%s>%s</%s>\n",
4165 : colname,
4166 : map_sql_value_to_xml_value(colval,
4167 846 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4168 : colname);
4169 : }
4170 :
4171 312 : if (tableforest)
4172 : {
4173 162 : xmldata_root_element_end(result, xmltn);
4174 162 : appendStringInfoChar(result, '\n');
4175 : }
4176 : else
4177 150 : appendStringInfoString(result, "</row>\n\n");
4178 312 : }
4179 :
4180 :
4181 : /*
4182 : * XPath related functions
4183 : */
4184 :
4185 : #ifdef USE_LIBXML
4186 :
4187 : /*
4188 : * Convert XML node to text.
4189 : *
4190 : * For attribute and text nodes, return the escaped text. For anything else,
4191 : * dump the whole subtree.
4192 : */
4193 : static text *
4194 192 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4195 : {
4196 192 : xmltype *result = NULL;
4197 :
4198 192 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4199 162 : {
4200 162 : void (*volatile nodefree) (xmlNodePtr) = NULL;
4201 162 : volatile xmlBufferPtr buf = NULL;
4202 162 : volatile xmlNodePtr cur_copy = NULL;
4203 :
4204 162 : PG_TRY();
4205 : {
4206 : int bytes;
4207 :
4208 162 : buf = xmlBufferCreate();
4209 162 : if (buf == NULL || xmlerrcxt->err_occurred)
4210 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4211 : "could not allocate xmlBuffer");
4212 :
4213 : /*
4214 : * Produce a dump of the node that we can serialize. xmlNodeDump
4215 : * does that, but the result of that function won't contain
4216 : * namespace definitions from ancestor nodes, so we first do a
4217 : * xmlCopyNode() which duplicates the node along with its required
4218 : * namespace definitions.
4219 : *
4220 : * Some old libxml2 versions such as 2.7.6 produce partially
4221 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4222 : * copying them. xmlNodeDump of such a node works fine, but
4223 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4224 : */
4225 162 : cur_copy = xmlCopyNode(cur, 1);
4226 162 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
4227 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4228 : "could not copy node");
4229 324 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4230 162 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4231 :
4232 162 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4233 162 : if (bytes == -1 || xmlerrcxt->err_occurred)
4234 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4235 : "could not dump node");
4236 :
4237 162 : result = xmlBuffer_to_xmltype(buf);
4238 : }
4239 0 : PG_FINALLY();
4240 : {
4241 162 : if (nodefree)
4242 162 : nodefree(cur_copy);
4243 162 : if (buf)
4244 162 : xmlBufferFree(buf);
4245 : }
4246 162 : PG_END_TRY();
4247 : }
4248 : else
4249 : {
4250 30 : volatile xmlChar *str = NULL;
4251 :
4252 30 : PG_TRY();
4253 : {
4254 : char *escaped;
4255 :
4256 30 : str = xmlXPathCastNodeToString(cur);
4257 30 : if (str == NULL || xmlerrcxt->err_occurred)
4258 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4259 : "could not allocate xmlChar");
4260 :
4261 : /* Here we rely on XML having the same representation as TEXT */
4262 30 : escaped = escape_xml((char *) str);
4263 :
4264 30 : result = (xmltype *) cstring_to_text(escaped);
4265 30 : pfree(escaped);
4266 : }
4267 0 : PG_FINALLY();
4268 : {
4269 30 : if (str)
4270 30 : xmlFree((xmlChar *) str);
4271 : }
4272 30 : PG_END_TRY();
4273 : }
4274 :
4275 192 : return result;
4276 : }
4277 :
4278 : /*
4279 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4280 : * to an array of xml values, which are appended to astate. The function
4281 : * result value is the number of elements in the array.
4282 : *
4283 : * If "astate" is NULL then we don't generate the array value, but we still
4284 : * return the number of elements it would have had.
4285 : *
4286 : * Nodesets are converted to an array containing the nodes' textual
4287 : * representations. Primitive values (float, double, string) are converted
4288 : * to a single-element array containing the value's string representation.
4289 : */
4290 : static int
4291 540 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4292 : ArrayBuildState *astate,
4293 : PgXmlErrorContext *xmlerrcxt)
4294 : {
4295 540 : int result = 0;
4296 : Datum datum;
4297 : Oid datumtype;
4298 : char *result_str;
4299 :
4300 540 : switch (xpathobj->type)
4301 : {
4302 498 : case XPATH_NODESET:
4303 498 : if (xpathobj->nodesetval != NULL)
4304 : {
4305 354 : result = xpathobj->nodesetval->nodeNr;
4306 354 : if (astate != NULL)
4307 : {
4308 : int i;
4309 :
4310 168 : for (i = 0; i < result; i++)
4311 : {
4312 90 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4313 : xmlerrcxt));
4314 90 : (void) accumArrayResult(astate, datum, false,
4315 : XMLOID, CurrentMemoryContext);
4316 : }
4317 : }
4318 : }
4319 498 : return result;
4320 :
4321 12 : case XPATH_BOOLEAN:
4322 12 : if (astate == NULL)
4323 0 : return 1;
4324 12 : datum = BoolGetDatum(xpathobj->boolval);
4325 12 : datumtype = BOOLOID;
4326 12 : break;
4327 :
4328 18 : case XPATH_NUMBER:
4329 18 : if (astate == NULL)
4330 12 : return 1;
4331 6 : datum = Float8GetDatum(xpathobj->floatval);
4332 6 : datumtype = FLOAT8OID;
4333 6 : break;
4334 :
4335 12 : case XPATH_STRING:
4336 12 : if (astate == NULL)
4337 0 : return 1;
4338 12 : datum = CStringGetDatum((char *) xpathobj->stringval);
4339 12 : datumtype = CSTRINGOID;
4340 12 : break;
4341 :
4342 0 : default:
4343 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4344 : xpathobj->type);
4345 : return 0; /* keep compiler quiet */
4346 : }
4347 :
4348 : /* Common code for scalar-value cases */
4349 30 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4350 30 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
4351 30 : (void) accumArrayResult(astate, datum, false,
4352 : XMLOID, CurrentMemoryContext);
4353 30 : return 1;
4354 : }
4355 :
4356 :
4357 : /*
4358 : * Common code for xpath() and xmlexists()
4359 : *
4360 : * Evaluate XPath expression and return number of nodes in res_nitems
4361 : * and array of XML values in astate. Either of those pointers can be
4362 : * NULL if the corresponding result isn't wanted.
4363 : *
4364 : * It is up to the user to ensure that the XML passed is in fact
4365 : * an XML document - XPath doesn't work easily on fragments without
4366 : * a context node being known.
4367 : */
4368 : static void
4369 558 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4370 : int *res_nitems, ArrayBuildState *astate)
4371 : {
4372 : PgXmlErrorContext *xmlerrcxt;
4373 558 : volatile xmlParserCtxtPtr ctxt = NULL;
4374 558 : volatile xmlDocPtr doc = NULL;
4375 558 : volatile xmlXPathContextPtr xpathctx = NULL;
4376 558 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4377 558 : volatile xmlXPathObjectPtr xpathobj = NULL;
4378 : char *datastr;
4379 : int32 len;
4380 : int32 xpath_len;
4381 : xmlChar *string;
4382 : xmlChar *xpath_expr;
4383 558 : size_t xmldecl_len = 0;
4384 : int i;
4385 : int ndim;
4386 : Datum *ns_names_uris;
4387 : bool *ns_names_uris_nulls;
4388 : int ns_count;
4389 :
4390 : /*
4391 : * Namespace mappings are passed as text[]. If an empty array is passed
4392 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4393 : * Else, a 2-dimensional array with length of the second axis being equal
4394 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4395 : * first element defining the name, the second one the URI. Example:
4396 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4397 : * 'http://example2.com']].
4398 : */
4399 558 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4400 558 : if (ndim != 0)
4401 : {
4402 : int *dims;
4403 :
4404 126 : dims = ARR_DIMS(namespaces);
4405 :
4406 126 : if (ndim != 2 || dims[1] != 2)
4407 0 : ereport(ERROR,
4408 : (errcode(ERRCODE_DATA_EXCEPTION),
4409 : errmsg("invalid array for XML namespace mapping"),
4410 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4411 :
4412 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4413 :
4414 126 : deconstruct_array_builtin(namespaces, TEXTOID,
4415 : &ns_names_uris, &ns_names_uris_nulls,
4416 : &ns_count);
4417 :
4418 : Assert((ns_count % 2) == 0); /* checked above */
4419 126 : ns_count /= 2; /* count pairs only */
4420 : }
4421 : else
4422 : {
4423 432 : ns_names_uris = NULL;
4424 432 : ns_names_uris_nulls = NULL;
4425 432 : ns_count = 0;
4426 : }
4427 :
4428 558 : datastr = VARDATA(data);
4429 558 : len = VARSIZE(data) - VARHDRSZ;
4430 558 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4431 558 : if (xpath_len == 0)
4432 6 : ereport(ERROR,
4433 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4434 : errmsg("empty XPath expression")));
4435 :
4436 552 : string = pg_xmlCharStrndup(datastr, len);
4437 552 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4438 :
4439 : /*
4440 : * In a UTF8 database, skip any xml declaration, which might assert
4441 : * another encoding. Ignore parse_xml_decl() failure, letting
4442 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4443 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4444 : * those scenarios bug-compatible with historical behavior.
4445 : */
4446 552 : if (GetDatabaseEncoding() == PG_UTF8)
4447 552 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4448 :
4449 552 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4450 :
4451 552 : PG_TRY();
4452 : {
4453 552 : xmlInitParser();
4454 :
4455 : /*
4456 : * redundant XML parsing (two parsings for the same value during one
4457 : * command execution are possible)
4458 : */
4459 552 : ctxt = xmlNewParserCtxt();
4460 552 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4461 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4462 : "could not allocate parser context");
4463 1104 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4464 552 : len - xmldecl_len, NULL, NULL, 0);
4465 552 : if (doc == NULL || xmlerrcxt->err_occurred)
4466 12 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4467 : "could not parse XML document");
4468 540 : xpathctx = xmlXPathNewContext(doc);
4469 540 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4470 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4471 : "could not allocate XPath context");
4472 540 : xpathctx->node = (xmlNodePtr) doc;
4473 :
4474 : /* register namespaces, if any */
4475 540 : if (ns_count > 0)
4476 : {
4477 252 : for (i = 0; i < ns_count; i++)
4478 : {
4479 : char *ns_name;
4480 : char *ns_uri;
4481 :
4482 126 : if (ns_names_uris_nulls[i * 2] ||
4483 126 : ns_names_uris_nulls[i * 2 + 1])
4484 0 : ereport(ERROR,
4485 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4486 : errmsg("neither namespace name nor URI may be null")));
4487 126 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4488 126 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4489 126 : if (xmlXPathRegisterNs(xpathctx,
4490 : (xmlChar *) ns_name,
4491 : (xmlChar *) ns_uri) != 0)
4492 0 : ereport(ERROR, /* is this an internal error??? */
4493 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4494 : ns_name, ns_uri)));
4495 : }
4496 : }
4497 :
4498 : /*
4499 : * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4500 : * xmlXPathCompile. In libxml2 2.13.3 and older, the latter function
4501 : * fails to defend itself against recursion-to-stack-overflow. See
4502 : * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4503 : */
4504 540 : xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4505 540 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4506 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4507 : "invalid XPath expression");
4508 :
4509 : /*
4510 : * Version 2.6.27 introduces a function named
4511 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4512 : * but we can derive the existence by whether any nodes are returned,
4513 : * thereby preventing a library version upgrade and keeping the code
4514 : * the same.
4515 : */
4516 540 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4517 540 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4518 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4519 : "could not create XPath object");
4520 :
4521 : /*
4522 : * Extract the results as requested.
4523 : */
4524 540 : if (res_nitems != NULL)
4525 432 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4526 : else
4527 108 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4528 : }
4529 12 : PG_CATCH();
4530 : {
4531 12 : if (xpathobj)
4532 0 : xmlXPathFreeObject(xpathobj);
4533 12 : if (xpathcomp)
4534 0 : xmlXPathFreeCompExpr(xpathcomp);
4535 12 : if (xpathctx)
4536 0 : xmlXPathFreeContext(xpathctx);
4537 12 : if (doc)
4538 12 : xmlFreeDoc(doc);
4539 12 : if (ctxt)
4540 12 : xmlFreeParserCtxt(ctxt);
4541 :
4542 12 : pg_xml_done(xmlerrcxt, true);
4543 :
4544 12 : PG_RE_THROW();
4545 : }
4546 540 : PG_END_TRY();
4547 :
4548 540 : xmlXPathFreeObject(xpathobj);
4549 540 : xmlXPathFreeCompExpr(xpathcomp);
4550 540 : xmlXPathFreeContext(xpathctx);
4551 540 : xmlFreeDoc(doc);
4552 540 : xmlFreeParserCtxt(ctxt);
4553 :
4554 540 : pg_xml_done(xmlerrcxt, false);
4555 540 : }
4556 : #endif /* USE_LIBXML */
4557 :
4558 : /*
4559 : * Evaluate XPath expression and return array of XML values.
4560 : *
4561 : * As we have no support of XQuery sequences yet, this function seems
4562 : * to be the most useful one (array of XML functions plays a role of
4563 : * some kind of substitution for XQuery sequences).
4564 : */
4565 : Datum
4566 126 : xpath(PG_FUNCTION_ARGS)
4567 : {
4568 : #ifdef USE_LIBXML
4569 126 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4570 126 : xmltype *data = PG_GETARG_XML_P(1);
4571 126 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4572 : ArrayBuildState *astate;
4573 :
4574 126 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4575 126 : xpath_internal(xpath_expr_text, data, namespaces,
4576 : NULL, astate);
4577 108 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4578 : #else
4579 : NO_XML_SUPPORT();
4580 : return 0;
4581 : #endif
4582 : }
4583 :
4584 : /*
4585 : * Determines if the node specified by the supplied XPath exists
4586 : * in a given XML document, returning a boolean.
4587 : */
4588 : Datum
4589 198 : xmlexists(PG_FUNCTION_ARGS)
4590 : {
4591 : #ifdef USE_LIBXML
4592 198 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4593 198 : xmltype *data = PG_GETARG_XML_P(1);
4594 : int res_nitems;
4595 :
4596 198 : xpath_internal(xpath_expr_text, data, NULL,
4597 : &res_nitems, NULL);
4598 :
4599 198 : PG_RETURN_BOOL(res_nitems > 0);
4600 : #else
4601 : NO_XML_SUPPORT();
4602 : return 0;
4603 : #endif
4604 : }
4605 :
4606 : /*
4607 : * Determines if the node specified by the supplied XPath exists
4608 : * in a given XML document, returning a boolean. Differs from
4609 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4610 : */
4611 : Datum
4612 234 : xpath_exists(PG_FUNCTION_ARGS)
4613 : {
4614 : #ifdef USE_LIBXML
4615 234 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4616 234 : xmltype *data = PG_GETARG_XML_P(1);
4617 234 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4618 : int res_nitems;
4619 :
4620 234 : xpath_internal(xpath_expr_text, data, namespaces,
4621 : &res_nitems, NULL);
4622 :
4623 234 : PG_RETURN_BOOL(res_nitems > 0);
4624 : #else
4625 : NO_XML_SUPPORT();
4626 : return 0;
4627 : #endif
4628 : }
4629 :
4630 : /*
4631 : * Functions for checking well-formed-ness
4632 : */
4633 :
4634 : #ifdef USE_LIBXML
4635 : static bool
4636 114 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4637 : {
4638 : xmlDocPtr doc;
4639 114 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4640 :
4641 : /*
4642 : * We'll report "true" if no soft error is reported by xml_parse().
4643 : */
4644 114 : doc = xml_parse(data, xmloption_arg, true,
4645 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4646 114 : if (doc)
4647 60 : xmlFreeDoc(doc);
4648 :
4649 114 : return !escontext.error_occurred;
4650 : }
4651 : #endif
4652 :
4653 : Datum
4654 90 : xml_is_well_formed(PG_FUNCTION_ARGS)
4655 : {
4656 : #ifdef USE_LIBXML
4657 90 : text *data = PG_GETARG_TEXT_PP(0);
4658 :
4659 90 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4660 : #else
4661 : NO_XML_SUPPORT();
4662 : return 0;
4663 : #endif /* not USE_LIBXML */
4664 : }
4665 :
4666 : Datum
4667 12 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4668 : {
4669 : #ifdef USE_LIBXML
4670 12 : text *data = PG_GETARG_TEXT_PP(0);
4671 :
4672 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4673 : #else
4674 : NO_XML_SUPPORT();
4675 : return 0;
4676 : #endif /* not USE_LIBXML */
4677 : }
4678 :
4679 : Datum
4680 12 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4681 : {
4682 : #ifdef USE_LIBXML
4683 12 : text *data = PG_GETARG_TEXT_PP(0);
4684 :
4685 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4686 : #else
4687 : NO_XML_SUPPORT();
4688 : return 0;
4689 : #endif /* not USE_LIBXML */
4690 : }
4691 :
4692 : /*
4693 : * support functions for XMLTABLE
4694 : *
4695 : */
4696 : #ifdef USE_LIBXML
4697 :
4698 : /*
4699 : * Returns private data from executor state. Ensure validity by check with
4700 : * MAGIC number.
4701 : */
4702 : static inline XmlTableBuilderData *
4703 160214 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4704 : {
4705 : XmlTableBuilderData *result;
4706 :
4707 160214 : if (!IsA(state, TableFuncScanState))
4708 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4709 160214 : result = (XmlTableBuilderData *) state->opaque;
4710 160214 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4711 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4712 :
4713 160214 : return result;
4714 : }
4715 : #endif
4716 :
4717 : /*
4718 : * XmlTableInitOpaque
4719 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4720 : * the XML parser.
4721 : *
4722 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4723 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4724 : * executor nodes run until this node is processed to completion. Caller
4725 : * must execute this to completion (probably filling a tuplestore to exhaust
4726 : * this node in a single pass) instead of using row-per-call mode.
4727 : */
4728 : static void
4729 264 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4730 : {
4731 : #ifdef USE_LIBXML
4732 264 : volatile xmlParserCtxtPtr ctxt = NULL;
4733 : XmlTableBuilderData *xtCxt;
4734 : PgXmlErrorContext *xmlerrcxt;
4735 :
4736 264 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4737 264 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4738 264 : xtCxt->natts = natts;
4739 264 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4740 :
4741 264 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4742 :
4743 264 : PG_TRY();
4744 : {
4745 264 : xmlInitParser();
4746 :
4747 264 : ctxt = xmlNewParserCtxt();
4748 264 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4749 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4750 : "could not allocate parser context");
4751 : }
4752 0 : PG_CATCH();
4753 : {
4754 0 : if (ctxt != NULL)
4755 0 : xmlFreeParserCtxt(ctxt);
4756 :
4757 0 : pg_xml_done(xmlerrcxt, true);
4758 :
4759 0 : PG_RE_THROW();
4760 : }
4761 264 : PG_END_TRY();
4762 :
4763 264 : xtCxt->xmlerrcxt = xmlerrcxt;
4764 264 : xtCxt->ctxt = ctxt;
4765 :
4766 264 : state->opaque = xtCxt;
4767 : #else
4768 : NO_XML_SUPPORT();
4769 : #endif /* not USE_LIBXML */
4770 264 : }
4771 :
4772 : /*
4773 : * XmlTableSetDocument
4774 : * Install the input document
4775 : */
4776 : static void
4777 264 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4778 : {
4779 : #ifdef USE_LIBXML
4780 : XmlTableBuilderData *xtCxt;
4781 264 : xmltype *xmlval = DatumGetXmlP(value);
4782 : char *str;
4783 : xmlChar *xstr;
4784 : int length;
4785 264 : volatile xmlDocPtr doc = NULL;
4786 264 : volatile xmlXPathContextPtr xpathcxt = NULL;
4787 :
4788 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4789 :
4790 : /*
4791 : * Use out function for casting to string (remove encoding property). See
4792 : * comment in xml_out.
4793 : */
4794 264 : str = xml_out_internal(xmlval, 0);
4795 :
4796 264 : length = strlen(str);
4797 264 : xstr = pg_xmlCharStrndup(str, length);
4798 :
4799 264 : PG_TRY();
4800 : {
4801 264 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4802 264 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4803 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4804 : "could not parse XML document");
4805 264 : xpathcxt = xmlXPathNewContext(doc);
4806 264 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4807 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4808 : "could not allocate XPath context");
4809 264 : xpathcxt->node = (xmlNodePtr) doc;
4810 : }
4811 0 : PG_CATCH();
4812 : {
4813 0 : if (xpathcxt != NULL)
4814 0 : xmlXPathFreeContext(xpathcxt);
4815 0 : if (doc != NULL)
4816 0 : xmlFreeDoc(doc);
4817 :
4818 0 : PG_RE_THROW();
4819 : }
4820 264 : PG_END_TRY();
4821 :
4822 264 : xtCxt->doc = doc;
4823 264 : xtCxt->xpathcxt = xpathcxt;
4824 : #else
4825 : NO_XML_SUPPORT();
4826 : #endif /* not USE_LIBXML */
4827 264 : }
4828 :
4829 : /*
4830 : * XmlTableSetNamespace
4831 : * Add a namespace declaration
4832 : */
4833 : static void
4834 18 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4835 : {
4836 : #ifdef USE_LIBXML
4837 : XmlTableBuilderData *xtCxt;
4838 :
4839 18 : if (name == NULL)
4840 6 : ereport(ERROR,
4841 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4842 : errmsg("DEFAULT namespace is not supported")));
4843 12 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4844 :
4845 12 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4846 12 : pg_xmlCharStrndup(name, strlen(name)),
4847 12 : pg_xmlCharStrndup(uri, strlen(uri))))
4848 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4849 : "could not set XML namespace");
4850 : #else
4851 : NO_XML_SUPPORT();
4852 : #endif /* not USE_LIBXML */
4853 12 : }
4854 :
4855 : /*
4856 : * XmlTableSetRowFilter
4857 : * Install the row-filter Xpath expression.
4858 : */
4859 : static void
4860 258 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4861 : {
4862 : #ifdef USE_LIBXML
4863 : XmlTableBuilderData *xtCxt;
4864 : xmlChar *xstr;
4865 :
4866 258 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4867 :
4868 258 : if (*path == '\0')
4869 0 : ereport(ERROR,
4870 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4871 : errmsg("row path filter must not be empty string")));
4872 :
4873 258 : xstr = pg_xmlCharStrndup(path, strlen(path));
4874 :
4875 : /* We require XmlTableSetDocument to have been done already */
4876 : Assert(xtCxt->xpathcxt != NULL);
4877 :
4878 258 : xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4879 258 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4880 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4881 : "invalid XPath expression");
4882 : #else
4883 : NO_XML_SUPPORT();
4884 : #endif /* not USE_LIBXML */
4885 258 : }
4886 :
4887 : /*
4888 : * XmlTableSetColumnFilter
4889 : * Install the column-filter Xpath expression, for the given column.
4890 : */
4891 : static void
4892 774 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4893 : {
4894 : #ifdef USE_LIBXML
4895 : XmlTableBuilderData *xtCxt;
4896 : xmlChar *xstr;
4897 :
4898 : Assert(PointerIsValid(path));
4899 :
4900 774 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4901 :
4902 774 : if (*path == '\0')
4903 0 : ereport(ERROR,
4904 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4905 : errmsg("column path filter must not be empty string")));
4906 :
4907 774 : xstr = pg_xmlCharStrndup(path, strlen(path));
4908 :
4909 : /* We require XmlTableSetDocument to have been done already */
4910 : Assert(xtCxt->xpathcxt != NULL);
4911 :
4912 774 : xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4913 774 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4914 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4915 : "invalid XPath expression");
4916 : #else
4917 : NO_XML_SUPPORT();
4918 : #endif /* not USE_LIBXML */
4919 774 : }
4920 :
4921 : /*
4922 : * XmlTableFetchRow
4923 : * Prepare the next "current" tuple for upcoming GetValue calls.
4924 : * Returns false if the row-filter expression returned no more rows.
4925 : */
4926 : static bool
4927 23024 : XmlTableFetchRow(TableFuncScanState *state)
4928 : {
4929 : #ifdef USE_LIBXML
4930 : XmlTableBuilderData *xtCxt;
4931 :
4932 23024 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4933 :
4934 : /* Propagate our own error context to libxml2 */
4935 23024 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4936 :
4937 23024 : if (xtCxt->xpathobj == NULL)
4938 : {
4939 258 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4940 258 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4941 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4942 : "could not create XPath object");
4943 :
4944 258 : xtCxt->row_count = 0;
4945 : }
4946 :
4947 23024 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4948 : {
4949 23024 : if (xtCxt->xpathobj->nodesetval != NULL)
4950 : {
4951 23024 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4952 22778 : return true;
4953 : }
4954 : }
4955 :
4956 246 : return false;
4957 : #else
4958 : NO_XML_SUPPORT();
4959 : return false;
4960 : #endif /* not USE_LIBXML */
4961 : }
4962 :
4963 : /*
4964 : * XmlTableGetValue
4965 : * Return the value for column number 'colnum' for the current row. If
4966 : * column -1 is requested, return representation of the whole row.
4967 : *
4968 : * This leaks memory, so be sure to reset often the context in which it's
4969 : * called.
4970 : */
4971 : static Datum
4972 135618 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4973 : Oid typid, int32 typmod, bool *isnull)
4974 : {
4975 : #ifdef USE_LIBXML
4976 135618 : Datum result = (Datum) 0;
4977 : XmlTableBuilderData *xtCxt;
4978 135618 : volatile xmlXPathObjectPtr xpathobj = NULL;
4979 :
4980 135618 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4981 :
4982 : Assert(xtCxt->xpathobj &&
4983 : xtCxt->xpathobj->type == XPATH_NODESET &&
4984 : xtCxt->xpathobj->nodesetval != NULL);
4985 :
4986 : /* Propagate our own error context to libxml2 */
4987 135618 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4988 :
4989 135618 : *isnull = false;
4990 :
4991 : Assert(xtCxt->xpathscomp[colnum] != NULL);
4992 :
4993 135618 : PG_TRY();
4994 : {
4995 : xmlNodePtr cur;
4996 135618 : char *cstr = NULL;
4997 :
4998 : /* Set current node as entry point for XPath evaluation */
4999 135618 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
5000 135618 : xtCxt->xpathcxt->node = cur;
5001 :
5002 : /* Evaluate column path */
5003 135618 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
5004 135618 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
5005 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
5006 : "could not create XPath object");
5007 :
5008 : /*
5009 : * There are four possible cases, depending on the number of nodes
5010 : * returned by the XPath expression and the type of the target column:
5011 : * a) XPath returns no nodes. b) The target type is XML (return all
5012 : * as XML). For non-XML return types: c) One node (return content).
5013 : * d) Multiple nodes (error).
5014 : */
5015 135618 : if (xpathobj->type == XPATH_NODESET)
5016 : {
5017 135588 : int count = 0;
5018 :
5019 135588 : if (xpathobj->nodesetval != NULL)
5020 135378 : count = xpathobj->nodesetval->nodeNr;
5021 :
5022 135588 : if (xpathobj->nodesetval == NULL || count == 0)
5023 : {
5024 22710 : *isnull = true;
5025 : }
5026 : else
5027 : {
5028 112878 : if (typid == XMLOID)
5029 : {
5030 : text *textstr;
5031 : StringInfoData str;
5032 :
5033 : /* Concatenate serialized values */
5034 72 : initStringInfo(&str);
5035 174 : for (int i = 0; i < count; i++)
5036 : {
5037 : textstr =
5038 102 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
5039 : xtCxt->xmlerrcxt);
5040 :
5041 102 : appendStringInfoText(&str, textstr);
5042 : }
5043 72 : cstr = str.data;
5044 : }
5045 : else
5046 : {
5047 : xmlChar *str;
5048 :
5049 112806 : if (count > 1)
5050 6 : ereport(ERROR,
5051 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
5052 : errmsg("more than one value returned by column XPath expression")));
5053 :
5054 112800 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5055 112800 : cstr = str ? xml_pstrdup_and_free(str) : "";
5056 : }
5057 : }
5058 : }
5059 30 : else if (xpathobj->type == XPATH_STRING)
5060 : {
5061 : /* Content should be escaped when target will be XML */
5062 18 : if (typid == XMLOID)
5063 6 : cstr = escape_xml((char *) xpathobj->stringval);
5064 : else
5065 12 : cstr = (char *) xpathobj->stringval;
5066 : }
5067 12 : else if (xpathobj->type == XPATH_BOOLEAN)
5068 : {
5069 : char typcategory;
5070 : bool typispreferred;
5071 : xmlChar *str;
5072 :
5073 : /* Allow implicit casting from boolean to numbers */
5074 6 : get_type_category_preferred(typid, &typcategory, &typispreferred);
5075 :
5076 6 : if (typcategory != TYPCATEGORY_NUMERIC)
5077 6 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
5078 : else
5079 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5080 :
5081 6 : cstr = xml_pstrdup_and_free(str);
5082 : }
5083 6 : else if (xpathobj->type == XPATH_NUMBER)
5084 : {
5085 : xmlChar *str;
5086 :
5087 6 : str = xmlXPathCastNumberToString(xpathobj->floatval);
5088 6 : cstr = xml_pstrdup_and_free(str);
5089 : }
5090 : else
5091 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5092 :
5093 : /*
5094 : * By here, either cstr contains the result value, or the isnull flag
5095 : * has been set.
5096 : */
5097 : Assert(cstr || *isnull);
5098 :
5099 135612 : if (!*isnull)
5100 112902 : result = InputFunctionCall(&state->in_functions[colnum],
5101 : cstr,
5102 112902 : state->typioparams[colnum],
5103 : typmod);
5104 : }
5105 6 : PG_FINALLY();
5106 : {
5107 135618 : if (xpathobj != NULL)
5108 135618 : xmlXPathFreeObject(xpathobj);
5109 : }
5110 135618 : PG_END_TRY();
5111 :
5112 135612 : return result;
5113 : #else
5114 : NO_XML_SUPPORT();
5115 : return 0;
5116 : #endif /* not USE_LIBXML */
5117 : }
5118 :
5119 : /*
5120 : * XmlTableDestroyOpaque
5121 : * Release all libxml2 resources
5122 : */
5123 : static void
5124 264 : XmlTableDestroyOpaque(TableFuncScanState *state)
5125 : {
5126 : #ifdef USE_LIBXML
5127 : XmlTableBuilderData *xtCxt;
5128 :
5129 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5130 :
5131 : /* Propagate our own error context to libxml2 */
5132 264 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5133 :
5134 264 : if (xtCxt->xpathscomp != NULL)
5135 : {
5136 : int i;
5137 :
5138 1116 : for (i = 0; i < xtCxt->natts; i++)
5139 852 : if (xtCxt->xpathscomp[i] != NULL)
5140 774 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5141 : }
5142 :
5143 264 : if (xtCxt->xpathobj != NULL)
5144 258 : xmlXPathFreeObject(xtCxt->xpathobj);
5145 264 : if (xtCxt->xpathcomp != NULL)
5146 258 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5147 264 : if (xtCxt->xpathcxt != NULL)
5148 264 : xmlXPathFreeContext(xtCxt->xpathcxt);
5149 264 : if (xtCxt->doc != NULL)
5150 264 : xmlFreeDoc(xtCxt->doc);
5151 264 : if (xtCxt->ctxt != NULL)
5152 264 : xmlFreeParserCtxt(xtCxt->ctxt);
5153 :
5154 264 : pg_xml_done(xtCxt->xmlerrcxt, true);
5155 :
5156 : /* not valid anymore */
5157 264 : xtCxt->magic = 0;
5158 264 : state->opaque = NULL;
5159 :
5160 : #else
5161 : NO_XML_SUPPORT();
5162 : #endif /* not USE_LIBXML */
5163 264 : }
|