Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/entities.h>
51 : #include <libxml/parser.h>
52 : #include <libxml/parserInternals.h>
53 : #include <libxml/tree.h>
54 : #include <libxml/uri.h>
55 : #include <libxml/xmlerror.h>
56 : #include <libxml/xmlsave.h>
57 : #include <libxml/xmlversion.h>
58 : #include <libxml/xmlwriter.h>
59 : #include <libxml/xpath.h>
60 : #include <libxml/xpathInternals.h>
61 :
62 : /*
63 : * We used to check for xmlStructuredErrorContext via a configure test; but
64 : * that doesn't work on Windows, so instead use this grottier method of
65 : * testing the library version number.
66 : */
67 : #if LIBXML_VERSION >= 20704
68 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69 : #endif
70 :
71 : /*
72 : * libxml2 2.12 decided to insert "const" into the error handler API.
73 : */
74 : #if LIBXML_VERSION >= 21200
75 : #define PgXmlErrorPtr const xmlError *
76 : #else
77 : #define PgXmlErrorPtr xmlErrorPtr
78 : #endif
79 :
80 : #endif /* USE_LIBXML */
81 :
82 : #include "access/htup_details.h"
83 : #include "access/table.h"
84 : #include "catalog/namespace.h"
85 : #include "catalog/pg_class.h"
86 : #include "catalog/pg_type.h"
87 : #include "executor/spi.h"
88 : #include "executor/tablefunc.h"
89 : #include "fmgr.h"
90 : #include "lib/stringinfo.h"
91 : #include "libpq/pqformat.h"
92 : #include "mb/pg_wchar.h"
93 : #include "miscadmin.h"
94 : #include "nodes/execnodes.h"
95 : #include "nodes/miscnodes.h"
96 : #include "nodes/nodeFuncs.h"
97 : #include "utils/array.h"
98 : #include "utils/builtins.h"
99 : #include "utils/date.h"
100 : #include "utils/datetime.h"
101 : #include "utils/lsyscache.h"
102 : #include "utils/rel.h"
103 : #include "utils/syscache.h"
104 : #include "utils/xml.h"
105 :
106 :
107 : /* GUC variables */
108 : int xmlbinary = XMLBINARY_BASE64;
109 : int xmloption = XMLOPTION_CONTENT;
110 :
111 : #ifdef USE_LIBXML
112 :
113 : /* random number to identify PgXmlErrorContext */
114 : #define ERRCXT_MAGIC 68275028
115 :
116 : struct PgXmlErrorContext
117 : {
118 : int magic;
119 : /* strictness argument passed to pg_xml_init */
120 : PgXmlStrictness strictness;
121 : /* current error status and accumulated message, if any */
122 : bool err_occurred;
123 : StringInfoData err_buf;
124 : /* previous libxml error handling state (saved by pg_xml_init) */
125 : xmlStructuredErrorFunc saved_errfunc;
126 : void *saved_errcxt;
127 : /* previous libxml entity handler (saved by pg_xml_init) */
128 : xmlExternalEntityLoader saved_entityfunc;
129 : };
130 :
131 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
132 : xmlParserCtxtPtr ctxt);
133 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
134 : int sqlcode, const char *msg);
135 : static void xml_errorHandler(void *data, PgXmlErrorPtr error);
136 : static int errdetail_for_xml_code(int code);
137 : static void chopStringInfoNewlines(StringInfo str);
138 : static void appendStringInfoLineSeparator(StringInfo str);
139 :
140 : #ifdef USE_LIBXMLCONTEXT
141 :
142 : static MemoryContext LibxmlContext = NULL;
143 :
144 : static void xml_memory_init(void);
145 : static void *xml_palloc(size_t size);
146 : static void *xml_repalloc(void *ptr, size_t size);
147 : static void xml_pfree(void *ptr);
148 : static char *xml_pstrdup(const char *string);
149 : #endif /* USE_LIBXMLCONTEXT */
150 :
151 : static xmlChar *xml_text2xmlChar(text *in);
152 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
153 : xmlChar **version, xmlChar **encoding, int *standalone);
154 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
155 : pg_enc encoding, int standalone);
156 : static bool xml_doctype_in_content(const xmlChar *str);
157 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
158 : bool preserve_whitespace, int encoding,
159 : XmlOptionType *parsed_xmloptiontype,
160 : xmlNodePtr *parsed_nodes,
161 : Node *escontext);
162 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
163 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
164 : ArrayBuildState *astate,
165 : PgXmlErrorContext *xmlerrcxt);
166 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
167 : #endif /* USE_LIBXML */
168 :
169 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
170 : const char *xmlschema, const char *targetns,
171 : bool top_level);
172 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
173 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
174 : const char *xmlschema, bool nulls, bool tableforest,
175 : const char *targetns, bool top_level);
176 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
177 : bool nulls, bool tableforest, const char *targetns);
178 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
179 : List *relid_list, bool nulls,
180 : bool tableforest, const char *targetns);
181 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
182 : bool nulls, bool tableforest,
183 : const char *targetns);
184 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
185 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
186 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
187 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
188 : char *tablename, bool nulls, bool tableforest,
189 : const char *targetns, bool top_level);
190 :
191 : /* XMLTABLE support */
192 : #ifdef USE_LIBXML
193 : /* random number to identify XmlTableContext */
194 : #define XMLTABLE_CONTEXT_MAGIC 46922182
195 : typedef struct XmlTableBuilderData
196 : {
197 : int magic;
198 : int natts;
199 : long int row_count;
200 : PgXmlErrorContext *xmlerrcxt;
201 : xmlParserCtxtPtr ctxt;
202 : xmlDocPtr doc;
203 : xmlXPathContextPtr xpathcxt;
204 : xmlXPathCompExprPtr xpathcomp;
205 : xmlXPathObjectPtr xpathobj;
206 : xmlXPathCompExprPtr *xpathscomp;
207 : } XmlTableBuilderData;
208 : #endif
209 :
210 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
211 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
212 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
213 : const char *uri);
214 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
215 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
216 : const char *path, int colnum);
217 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
218 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
219 : Oid typid, int32 typmod, bool *isnull);
220 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
221 :
222 : const TableFuncRoutine XmlTableRoutine =
223 : {
224 : .InitOpaque = XmlTableInitOpaque,
225 : .SetDocument = XmlTableSetDocument,
226 : .SetNamespace = XmlTableSetNamespace,
227 : .SetRowFilter = XmlTableSetRowFilter,
228 : .SetColumnFilter = XmlTableSetColumnFilter,
229 : .FetchRow = XmlTableFetchRow,
230 : .GetValue = XmlTableGetValue,
231 : .DestroyOpaque = XmlTableDestroyOpaque
232 : };
233 :
234 : #define NO_XML_SUPPORT() \
235 : ereport(ERROR, \
236 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
237 : errmsg("unsupported XML feature"), \
238 : errdetail("This functionality requires the server to be built with libxml support.")))
239 :
240 :
241 : /* from SQL/XML:2008 section 4.9 */
242 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
243 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
244 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
245 :
246 :
247 : #ifdef USE_LIBXML
248 :
249 : static int
250 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
251 : {
252 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
253 :
254 0 : if (encoding < 0)
255 0 : ereport(ERROR,
256 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
257 : errmsg("invalid encoding name \"%s\"",
258 : (const char *) encoding_name)));
259 0 : return encoding;
260 : }
261 : #endif
262 :
263 :
264 : /*
265 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
266 : * we use the conversion function for the text datatype.
267 : *
268 : * This is only acceptable so long as xmltype and text use the same
269 : * representation.
270 : */
271 : Datum
272 567 : xml_in(PG_FUNCTION_ARGS)
273 : {
274 : #ifdef USE_LIBXML
275 567 : char *s = PG_GETARG_CSTRING(0);
276 : xmltype *vardata;
277 : xmlDocPtr doc;
278 :
279 : /* Build the result object. */
280 567 : vardata = (xmltype *) cstring_to_text(s);
281 :
282 : /*
283 : * Parse the data to check if it is well-formed XML data.
284 : *
285 : * Note: we don't need to worry about whether a soft error is detected.
286 : */
287 567 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
288 : NULL, NULL, fcinfo->context);
289 535 : if (doc != NULL)
290 527 : xmlFreeDoc(doc);
291 :
292 535 : PG_RETURN_XML_P(vardata);
293 : #else
294 : NO_XML_SUPPORT();
295 : return 0;
296 : #endif
297 : }
298 :
299 :
300 : #define PG_XML_DEFAULT_VERSION "1.0"
301 :
302 :
303 : /*
304 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
305 : * time being we use the conversion function for the text datatype.
306 : *
307 : * This is only acceptable so long as xmltype and text use the same
308 : * representation.
309 : */
310 : static char *
311 15975 : xml_out_internal(xmltype *x, pg_enc target_encoding)
312 : {
313 15975 : char *str = text_to_cstring((text *) x);
314 :
315 : #ifdef USE_LIBXML
316 15975 : size_t len = strlen(str);
317 : xmlChar *version;
318 : int standalone;
319 : int res_code;
320 :
321 15975 : if ((res_code = parse_xml_decl((xmlChar *) str,
322 : &len, &version, NULL, &standalone)) == 0)
323 : {
324 : StringInfoData buf;
325 :
326 15975 : initStringInfo(&buf);
327 :
328 15975 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
329 : {
330 : /*
331 : * If we are not going to produce an XML declaration, eat a single
332 : * newline in the original string to prevent empty first lines in
333 : * the output.
334 : */
335 15943 : if (*(str + len) == '\n')
336 4 : len += 1;
337 : }
338 15975 : appendStringInfoString(&buf, str + len);
339 :
340 15975 : pfree(str);
341 :
342 15975 : return buf.data;
343 : }
344 :
345 0 : ereport(WARNING,
346 : errcode(ERRCODE_DATA_CORRUPTED),
347 : errmsg_internal("could not parse XML declaration in stored value"),
348 : errdetail_for_xml_code(res_code));
349 : #endif
350 0 : return str;
351 : }
352 :
353 :
354 : Datum
355 15799 : xml_out(PG_FUNCTION_ARGS)
356 : {
357 15799 : xmltype *x = PG_GETARG_XML_P(0);
358 :
359 : /*
360 : * xml_out removes the encoding property in all cases. This is because we
361 : * cannot control from here whether the datum will be converted to a
362 : * different client encoding, so we'd do more harm than good by including
363 : * it.
364 : */
365 15799 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
366 : }
367 :
368 :
369 : Datum
370 0 : xml_recv(PG_FUNCTION_ARGS)
371 : {
372 : #ifdef USE_LIBXML
373 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
374 : xmltype *result;
375 : const char *input;
376 : char *str;
377 : char *newstr;
378 : int nbytes;
379 : xmlDocPtr doc;
380 0 : xmlChar *encodingStr = NULL;
381 : int encoding;
382 :
383 : /*
384 : * Read the data in raw format. We don't know yet what the encoding is, as
385 : * that information is embedded in the xml declaration; so we have to
386 : * parse that before converting to server encoding.
387 : */
388 0 : nbytes = buf->len - buf->cursor;
389 0 : input = pq_getmsgbytes(buf, nbytes);
390 :
391 : /*
392 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
393 : * than make a separate copy, make the temporary result one byte bigger
394 : * than it needs to be.
395 : */
396 0 : result = palloc(nbytes + 1 + VARHDRSZ);
397 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
398 0 : memcpy(VARDATA(result), input, nbytes);
399 0 : str = VARDATA(result);
400 0 : str[nbytes] = '\0';
401 :
402 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403 :
404 : /*
405 : * If encoding wasn't explicitly specified in the XML header, treat it as
406 : * UTF-8, as that's the default in XML. This is different from xml_in(),
407 : * where the input has to go through the normal client to server encoding
408 : * conversion.
409 : */
410 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411 :
412 : /*
413 : * Parse the data to check if it is well-formed XML data. Assume that
414 : * xml_parse will throw ERROR if not.
415 : */
416 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417 0 : xmlFreeDoc(doc);
418 :
419 : /* Now that we know what we're dealing with, convert to server encoding */
420 0 : newstr = pg_any_to_server(str, nbytes, encoding);
421 :
422 0 : if (newstr != str)
423 : {
424 0 : pfree(result);
425 0 : result = (xmltype *) cstring_to_text(newstr);
426 0 : pfree(newstr);
427 : }
428 :
429 0 : PG_RETURN_XML_P(result);
430 : #else
431 : NO_XML_SUPPORT();
432 : return 0;
433 : #endif
434 : }
435 :
436 :
437 : Datum
438 0 : xml_send(PG_FUNCTION_ARGS)
439 : {
440 0 : xmltype *x = PG_GETARG_XML_P(0);
441 : char *outval;
442 : StringInfoData buf;
443 :
444 : /*
445 : * xml_out_internal doesn't convert the encoding, it just prints the right
446 : * declaration. pq_sendtext will do the conversion.
447 : */
448 0 : outval = xml_out_internal(x, pg_get_client_encoding());
449 :
450 0 : pq_begintypsend(&buf);
451 0 : pq_sendtext(&buf, outval, strlen(outval));
452 0 : pfree(outval);
453 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454 : }
455 :
456 :
457 : #ifdef USE_LIBXML
458 : static void
459 93 : appendStringInfoText(StringInfo str, const text *t)
460 : {
461 93 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462 93 : }
463 : #endif
464 :
465 :
466 : static xmltype *
467 15306 : stringinfo_to_xmltype(StringInfo buf)
468 : {
469 15306 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470 : }
471 :
472 :
473 : static xmltype *
474 57 : cstring_to_xmltype(const char *string)
475 : {
476 57 : return (xmltype *) cstring_to_text(string);
477 : }
478 :
479 :
480 : #ifdef USE_LIBXML
481 : static xmltype *
482 15377 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
483 : {
484 15377 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485 : xmlBufferLength(buf));
486 : }
487 : #endif
488 :
489 :
490 : Datum
491 33 : xmlcomment(PG_FUNCTION_ARGS)
492 : {
493 : #ifdef USE_LIBXML
494 33 : text *arg = PG_GETARG_TEXT_PP(0);
495 33 : char *argdata = VARDATA_ANY(arg);
496 33 : int len = VARSIZE_ANY_EXHDR(arg);
497 : StringInfoData buf;
498 : int i;
499 :
500 : /* check for "--" in string or "-" at the end */
501 144 : for (i = 1; i < len; i++)
502 : {
503 115 : if (argdata[i] == '-' && argdata[i - 1] == '-')
504 4 : ereport(ERROR,
505 : (errcode(ERRCODE_INVALID_XML_COMMENT),
506 : errmsg("invalid XML comment")));
507 : }
508 29 : if (len > 0 && argdata[len - 1] == '-')
509 4 : ereport(ERROR,
510 : (errcode(ERRCODE_INVALID_XML_COMMENT),
511 : errmsg("invalid XML comment")));
512 :
513 25 : initStringInfo(&buf);
514 25 : appendStringInfoString(&buf, "<!--");
515 25 : appendStringInfoText(&buf, arg);
516 25 : appendStringInfoString(&buf, "-->");
517 :
518 25 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519 : #else
520 : NO_XML_SUPPORT();
521 : return 0;
522 : #endif
523 : }
524 :
525 :
526 : Datum
527 25 : xmltext(PG_FUNCTION_ARGS)
528 : {
529 : #ifdef USE_LIBXML
530 25 : text *arg = PG_GETARG_TEXT_PP(0);
531 : text *result;
532 25 : xmlChar *volatile xmlbuf = NULL;
533 : PgXmlErrorContext *xmlerrcxt;
534 :
535 : /* First we gotta spin up some error handling. */
536 25 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
537 :
538 25 : PG_TRY();
539 : {
540 25 : xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
541 :
542 25 : if (xmlbuf == NULL || xmlerrcxt->err_occurred)
543 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
544 : "could not allocate xmlChar");
545 :
546 25 : result = cstring_to_text_with_len((const char *) xmlbuf,
547 : xmlStrlen(xmlbuf));
548 : }
549 0 : PG_CATCH();
550 : {
551 0 : if (xmlbuf)
552 0 : xmlFree(xmlbuf);
553 :
554 0 : pg_xml_done(xmlerrcxt, true);
555 0 : PG_RE_THROW();
556 : }
557 25 : PG_END_TRY();
558 :
559 25 : xmlFree(xmlbuf);
560 25 : pg_xml_done(xmlerrcxt, false);
561 :
562 25 : PG_RETURN_XML_P(result);
563 : #else
564 : NO_XML_SUPPORT();
565 : return 0;
566 : #endif /* not USE_LIBXML */
567 : }
568 :
569 :
570 : /*
571 : * TODO: xmlconcat needs to merge the notations and unparsed entities
572 : * of the argument values. Not very important in practice, though.
573 : */
574 : xmltype *
575 15135 : xmlconcat(List *args)
576 : {
577 : #ifdef USE_LIBXML
578 15135 : int global_standalone = 1;
579 15135 : xmlChar *global_version = NULL;
580 15135 : bool global_version_no_value = false;
581 : StringInfoData buf;
582 : ListCell *v;
583 :
584 15135 : initStringInfo(&buf);
585 45409 : foreach(v, args)
586 : {
587 30274 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
588 : size_t len;
589 : xmlChar *version;
590 : int standalone;
591 : char *str;
592 :
593 30274 : len = VARSIZE(x) - VARHDRSZ;
594 30274 : str = text_to_cstring((text *) x);
595 :
596 30274 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
597 :
598 30274 : if (standalone == 0 && global_standalone == 1)
599 0 : global_standalone = 0;
600 30274 : if (standalone < 0)
601 30266 : global_standalone = -1;
602 :
603 30274 : if (!version)
604 30262 : global_version_no_value = true;
605 12 : else if (!global_version)
606 8 : global_version = version;
607 4 : else if (xmlStrcmp(version, global_version) != 0)
608 0 : global_version_no_value = true;
609 :
610 30274 : appendStringInfoString(&buf, str + len);
611 30274 : pfree(str);
612 : }
613 :
614 15135 : if (!global_version_no_value || global_standalone >= 0)
615 : {
616 : StringInfoData buf2;
617 :
618 4 : initStringInfo(&buf2);
619 :
620 4 : print_xml_decl(&buf2,
621 4 : (!global_version_no_value) ? global_version : NULL,
622 : 0,
623 : global_standalone);
624 :
625 4 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
626 4 : buf = buf2;
627 : }
628 :
629 15135 : return stringinfo_to_xmltype(&buf);
630 : #else
631 : NO_XML_SUPPORT();
632 : return NULL;
633 : #endif
634 : }
635 :
636 :
637 : /*
638 : * XMLAGG support
639 : */
640 : Datum
641 15119 : xmlconcat2(PG_FUNCTION_ARGS)
642 : {
643 15119 : if (PG_ARGISNULL(0))
644 : {
645 12 : if (PG_ARGISNULL(1))
646 0 : PG_RETURN_NULL();
647 : else
648 12 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
649 : }
650 15107 : else if (PG_ARGISNULL(1))
651 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
652 : else
653 15107 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
654 : PG_GETARG_XML_P(1))));
655 : }
656 :
657 :
658 : Datum
659 4 : texttoxml(PG_FUNCTION_ARGS)
660 : {
661 4 : text *data = PG_GETARG_TEXT_PP(0);
662 :
663 4 : PG_RETURN_XML_P(xmlparse(data, xmloption, true, fcinfo->context));
664 : }
665 :
666 :
667 : Datum
668 0 : xmltotext(PG_FUNCTION_ARGS)
669 : {
670 0 : xmltype *data = PG_GETARG_XML_P(0);
671 :
672 : /* It's actually binary compatible. */
673 0 : PG_RETURN_TEXT_P((text *) data);
674 : }
675 :
676 :
677 : text *
678 120 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
679 : {
680 : #ifdef USE_LIBXML
681 : text *volatile result;
682 : xmlDocPtr doc;
683 : XmlOptionType parsed_xmloptiontype;
684 : xmlNodePtr content_nodes;
685 120 : volatile xmlBufferPtr buf = NULL;
686 120 : volatile xmlSaveCtxtPtr ctxt = NULL;
687 120 : ErrorSaveContext escontext = {T_ErrorSaveContext};
688 120 : PgXmlErrorContext *volatile xmlerrcxt = NULL;
689 : #endif
690 :
691 120 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
692 : {
693 : /*
694 : * We don't actually need to do anything, so just return the
695 : * binary-compatible input. For backwards-compatibility reasons,
696 : * allow such cases to succeed even without USE_LIBXML.
697 : */
698 24 : return (text *) data;
699 : }
700 :
701 : #ifdef USE_LIBXML
702 :
703 : /*
704 : * Parse the input according to the xmloption.
705 : *
706 : * preserve_whitespace is set to false in case we are indenting, otherwise
707 : * libxml2 will fail to indent elements that have whitespace between them.
708 : */
709 96 : doc = xml_parse(data, xmloption_arg, !indent, GetDatabaseEncoding(),
710 : &parsed_xmloptiontype, &content_nodes,
711 96 : (Node *) &escontext);
712 96 : if (doc == NULL || escontext.error_occurred)
713 : {
714 20 : if (doc)
715 0 : xmlFreeDoc(doc);
716 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
717 20 : ereport(ERROR,
718 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
719 : errmsg("not an XML document")));
720 : }
721 :
722 : /* If we weren't asked to indent, we're done. */
723 76 : if (!indent)
724 : {
725 12 : xmlFreeDoc(doc);
726 12 : return (text *) data;
727 : }
728 :
729 : /*
730 : * Otherwise, we gotta spin up some error handling. Unlike most other
731 : * routines in this module, we already have a libxml "doc" structure to
732 : * free, so we need to call pg_xml_init() inside the PG_TRY and be
733 : * prepared for it to fail (typically due to palloc OOM).
734 : */
735 64 : PG_TRY();
736 : {
737 64 : size_t decl_len = 0;
738 :
739 64 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
740 :
741 : /* The serialized data will go into this buffer. */
742 64 : buf = xmlBufferCreate();
743 :
744 64 : if (buf == NULL || xmlerrcxt->err_occurred)
745 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
746 : "could not allocate xmlBuffer");
747 :
748 : /* Detect whether there's an XML declaration */
749 64 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
750 :
751 : /*
752 : * Emit declaration only if the input had one. Note: some versions of
753 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
754 : * passed, so don't do that. We don't want any encoding conversion
755 : * anyway.
756 : */
757 64 : if (decl_len == 0)
758 56 : ctxt = xmlSaveToBuffer(buf, NULL,
759 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
760 : else
761 8 : ctxt = xmlSaveToBuffer(buf, NULL,
762 : XML_SAVE_FORMAT);
763 :
764 64 : if (ctxt == NULL || xmlerrcxt->err_occurred)
765 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
766 : "could not allocate xmlSaveCtxt");
767 :
768 64 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
769 : {
770 : /* If it's a document, saving is easy. */
771 28 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
772 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
773 : "could not save document to xmlBuffer");
774 : }
775 36 : else if (content_nodes != NULL)
776 : {
777 : /*
778 : * Deal with the case where we have non-singly-rooted XML.
779 : * libxml's dump functions don't work well for that without help.
780 : * We build a fake root node that serves as a container for the
781 : * content nodes, and then iterate over the nodes.
782 : */
783 : xmlNodePtr root;
784 : xmlNodePtr oldroot;
785 : xmlNodePtr newline;
786 :
787 32 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
788 32 : if (root == NULL || xmlerrcxt->err_occurred)
789 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
790 : "could not allocate xml node");
791 :
792 : /*
793 : * This attaches root to doc, so we need not free it separately...
794 : * but instead, we have to free the old root if there was one.
795 : */
796 32 : oldroot = xmlDocSetRootElement(doc, root);
797 32 : if (oldroot != NULL)
798 0 : xmlFreeNode(oldroot);
799 :
800 32 : if (xmlAddChildList(root, content_nodes) == NULL ||
801 32 : xmlerrcxt->err_occurred)
802 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
803 : "could not append xml node list");
804 :
805 : /*
806 : * We use this node to insert newlines in the dump. Note: in at
807 : * least some libxml versions, xmlNewDocText would not attach the
808 : * node to the document even if we passed it. Therefore, manage
809 : * freeing of this node manually, and pass NULL here to make sure
810 : * there's not a dangling link.
811 : */
812 32 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
813 32 : if (newline == NULL || xmlerrcxt->err_occurred)
814 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
815 : "could not allocate xml node");
816 :
817 84 : for (xmlNodePtr node = root->children; node; node = node->next)
818 : {
819 : /* insert newlines between nodes */
820 52 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
821 : {
822 16 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
823 : {
824 0 : xmlFreeNode(newline);
825 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
826 : "could not save newline to xmlBuffer");
827 : }
828 : }
829 :
830 52 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
831 : {
832 0 : xmlFreeNode(newline);
833 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
834 : "could not save content to xmlBuffer");
835 : }
836 : }
837 :
838 32 : xmlFreeNode(newline);
839 : }
840 :
841 64 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
842 : {
843 0 : ctxt = NULL; /* don't try to close it again */
844 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
845 : "could not close xmlSaveCtxtPtr");
846 : }
847 :
848 : /*
849 : * xmlDocContentDumpOutput may add a trailing newline, so remove that.
850 : */
851 64 : if (xmloption_arg == XMLOPTION_DOCUMENT)
852 : {
853 24 : const char *str = (const char *) xmlBufferContent(buf);
854 24 : int len = xmlBufferLength(buf);
855 :
856 48 : while (len > 0 && (str[len - 1] == '\n' ||
857 24 : str[len - 1] == '\r'))
858 24 : len--;
859 :
860 24 : result = cstring_to_text_with_len(str, len);
861 : }
862 : else
863 40 : result = (text *) xmlBuffer_to_xmltype(buf);
864 : }
865 0 : PG_CATCH();
866 : {
867 0 : if (ctxt)
868 0 : xmlSaveClose(ctxt);
869 0 : if (buf)
870 0 : xmlBufferFree(buf);
871 0 : xmlFreeDoc(doc);
872 :
873 0 : if (xmlerrcxt)
874 0 : pg_xml_done(xmlerrcxt, true);
875 :
876 0 : PG_RE_THROW();
877 : }
878 64 : PG_END_TRY();
879 :
880 64 : xmlBufferFree(buf);
881 64 : xmlFreeDoc(doc);
882 :
883 64 : pg_xml_done(xmlerrcxt, false);
884 :
885 64 : return result;
886 : #else
887 : NO_XML_SUPPORT();
888 : return NULL;
889 : #endif
890 : }
891 :
892 :
893 : xmltype *
894 15223 : xmlelement(XmlExpr *xexpr,
895 : const Datum *named_argvalue, const bool *named_argnull,
896 : const Datum *argvalue, const bool *argnull)
897 : {
898 : #ifdef USE_LIBXML
899 : xmltype *result;
900 : List *named_arg_strings;
901 : List *arg_strings;
902 : int i;
903 : ListCell *arg;
904 : ListCell *narg;
905 : PgXmlErrorContext *xmlerrcxt;
906 15223 : volatile xmlBufferPtr buf = NULL;
907 15223 : volatile xmlTextWriterPtr writer = NULL;
908 :
909 : /*
910 : * All arguments are already evaluated, and their values are passed in the
911 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
912 : * issues if one of the arguments involves a call to some other function
913 : * or subsystem that wants to use libxml on its own terms. We examine the
914 : * original XmlExpr to identify the numbers and types of the arguments.
915 : */
916 15223 : named_arg_strings = NIL;
917 15223 : i = 0;
918 15255 : foreach(arg, xexpr->named_args)
919 : {
920 36 : Expr *e = (Expr *) lfirst(arg);
921 : char *str;
922 :
923 36 : if (named_argnull[i])
924 0 : str = NULL;
925 : else
926 36 : str = map_sql_value_to_xml_value(named_argvalue[i],
927 : exprType((Node *) e),
928 : false);
929 32 : named_arg_strings = lappend(named_arg_strings, str);
930 32 : i++;
931 : }
932 :
933 15219 : arg_strings = NIL;
934 15219 : i = 0;
935 30422 : foreach(arg, xexpr->args)
936 : {
937 15203 : Expr *e = (Expr *) lfirst(arg);
938 : char *str;
939 :
940 : /* here we can just forget NULL elements immediately */
941 15203 : if (!argnull[i])
942 : {
943 15203 : str = map_sql_value_to_xml_value(argvalue[i],
944 : exprType((Node *) e),
945 : true);
946 15203 : arg_strings = lappend(arg_strings, str);
947 : }
948 15203 : i++;
949 : }
950 :
951 15219 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
952 :
953 15219 : PG_TRY();
954 : {
955 15219 : buf = xmlBufferCreate();
956 15219 : if (buf == NULL || xmlerrcxt->err_occurred)
957 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
958 : "could not allocate xmlBuffer");
959 15219 : writer = xmlNewTextWriterMemory(buf, 0);
960 15219 : if (writer == NULL || xmlerrcxt->err_occurred)
961 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
962 : "could not allocate xmlTextWriter");
963 :
964 15219 : if (xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name) < 0 ||
965 15219 : xmlerrcxt->err_occurred)
966 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
967 : "could not start xml element");
968 :
969 15251 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
970 : {
971 32 : char *str = (char *) lfirst(arg);
972 32 : char *argname = strVal(lfirst(narg));
973 :
974 32 : if (str)
975 : {
976 32 : if (xmlTextWriterWriteAttribute(writer,
977 : (xmlChar *) argname,
978 32 : (xmlChar *) str) < 0 ||
979 32 : xmlerrcxt->err_occurred)
980 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
981 : "could not write xml attribute");
982 : }
983 : }
984 :
985 30422 : foreach(arg, arg_strings)
986 : {
987 15203 : char *str = (char *) lfirst(arg);
988 :
989 15203 : if (xmlTextWriterWriteRaw(writer, (xmlChar *) str) < 0 ||
990 15203 : xmlerrcxt->err_occurred)
991 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
992 : "could not write raw xml text");
993 : }
994 :
995 15219 : if (xmlTextWriterEndElement(writer) < 0 ||
996 15219 : xmlerrcxt->err_occurred)
997 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
998 : "could not end xml element");
999 :
1000 : /* we MUST do this now to flush data out to the buffer ... */
1001 15219 : xmlFreeTextWriter(writer);
1002 15219 : writer = NULL;
1003 :
1004 15219 : result = xmlBuffer_to_xmltype(buf);
1005 : }
1006 0 : PG_CATCH();
1007 : {
1008 0 : if (writer)
1009 0 : xmlFreeTextWriter(writer);
1010 0 : if (buf)
1011 0 : xmlBufferFree(buf);
1012 :
1013 0 : pg_xml_done(xmlerrcxt, true);
1014 :
1015 0 : PG_RE_THROW();
1016 : }
1017 15219 : PG_END_TRY();
1018 :
1019 15219 : xmlBufferFree(buf);
1020 :
1021 15219 : pg_xml_done(xmlerrcxt, false);
1022 :
1023 15219 : return result;
1024 : #else
1025 : NO_XML_SUPPORT();
1026 : return NULL;
1027 : #endif
1028 : }
1029 :
1030 :
1031 : xmltype *
1032 92 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace, Node *escontext)
1033 : {
1034 : #ifdef USE_LIBXML
1035 : xmlDocPtr doc;
1036 :
1037 92 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
1038 : GetDatabaseEncoding(), NULL, NULL, escontext);
1039 60 : if (doc)
1040 60 : xmlFreeDoc(doc);
1041 :
1042 60 : if (SOFT_ERROR_OCCURRED(escontext))
1043 0 : return NULL;
1044 :
1045 60 : return (xmltype *) data;
1046 : #else
1047 : NO_XML_SUPPORT();
1048 : return NULL;
1049 : #endif
1050 : }
1051 :
1052 :
1053 : xmltype *
1054 48 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
1055 : {
1056 : #ifdef USE_LIBXML
1057 : xmltype *result;
1058 : StringInfoData buf;
1059 :
1060 48 : if (pg_strcasecmp(target, "xml") == 0)
1061 8 : ereport(ERROR,
1062 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1063 : errmsg("invalid XML processing instruction"),
1064 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
1065 :
1066 : /*
1067 : * Following the SQL standard, the null check comes after the syntax check
1068 : * above.
1069 : */
1070 40 : *result_is_null = arg_is_null;
1071 40 : if (*result_is_null)
1072 8 : return NULL;
1073 :
1074 32 : initStringInfo(&buf);
1075 :
1076 32 : appendStringInfo(&buf, "<?%s", target);
1077 :
1078 32 : if (arg != NULL)
1079 : {
1080 : char *string;
1081 :
1082 16 : string = text_to_cstring(arg);
1083 16 : if (strstr(string, "?>") != NULL)
1084 4 : ereport(ERROR,
1085 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1086 : errmsg("invalid XML processing instruction"),
1087 : errdetail("XML processing instruction cannot contain \"?>\".")));
1088 :
1089 12 : appendStringInfoChar(&buf, ' ');
1090 12 : appendStringInfoString(&buf, string + strspn(string, " "));
1091 12 : pfree(string);
1092 : }
1093 28 : appendStringInfoString(&buf, "?>");
1094 :
1095 28 : result = stringinfo_to_xmltype(&buf);
1096 28 : pfree(buf.data);
1097 28 : return result;
1098 : #else
1099 : NO_XML_SUPPORT();
1100 : return NULL;
1101 : #endif
1102 : }
1103 :
1104 :
1105 : xmltype *
1106 40 : xmlroot(xmltype *data, text *version, int standalone)
1107 : {
1108 : #ifdef USE_LIBXML
1109 : char *str;
1110 : size_t len;
1111 : xmlChar *orig_version;
1112 : int orig_standalone;
1113 : StringInfoData buf;
1114 :
1115 40 : len = VARSIZE(data) - VARHDRSZ;
1116 40 : str = text_to_cstring((text *) data);
1117 :
1118 40 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1119 :
1120 40 : if (version)
1121 16 : orig_version = xml_text2xmlChar(version);
1122 : else
1123 24 : orig_version = NULL;
1124 :
1125 40 : switch (standalone)
1126 : {
1127 12 : case XML_STANDALONE_YES:
1128 12 : orig_standalone = 1;
1129 12 : break;
1130 8 : case XML_STANDALONE_NO:
1131 8 : orig_standalone = 0;
1132 8 : break;
1133 8 : case XML_STANDALONE_NO_VALUE:
1134 8 : orig_standalone = -1;
1135 8 : break;
1136 12 : case XML_STANDALONE_OMITTED:
1137 : /* leave original value */
1138 12 : break;
1139 : }
1140 :
1141 40 : initStringInfo(&buf);
1142 40 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
1143 40 : appendStringInfoString(&buf, str + len);
1144 :
1145 40 : return stringinfo_to_xmltype(&buf);
1146 : #else
1147 : NO_XML_SUPPORT();
1148 : return NULL;
1149 : #endif
1150 : }
1151 :
1152 :
1153 : /*
1154 : * Validate document (given as string) against DTD (given as external link)
1155 : *
1156 : * This has been removed because it is a security hole: unprivileged users
1157 : * should not be able to use Postgres to fetch arbitrary external files,
1158 : * which unfortunately is exactly what libxml is willing to do with the DTD
1159 : * parameter.
1160 : */
1161 : Datum
1162 0 : xmlvalidate(PG_FUNCTION_ARGS)
1163 : {
1164 0 : ereport(ERROR,
1165 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1166 : errmsg("xmlvalidate is not implemented")));
1167 : return 0;
1168 : }
1169 :
1170 :
1171 : bool
1172 16 : xml_is_document(xmltype *arg)
1173 : {
1174 : #ifdef USE_LIBXML
1175 : xmlDocPtr doc;
1176 16 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1177 :
1178 : /*
1179 : * We'll report "true" if no soft error is reported by xml_parse().
1180 : */
1181 16 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1182 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1183 16 : if (doc)
1184 8 : xmlFreeDoc(doc);
1185 :
1186 16 : return !escontext.error_occurred;
1187 : #else /* not USE_LIBXML */
1188 : NO_XML_SUPPORT();
1189 : return false;
1190 : #endif /* not USE_LIBXML */
1191 : }
1192 :
1193 :
1194 : #ifdef USE_LIBXML
1195 :
1196 : /*
1197 : * pg_xml_init_library --- set up for use of libxml
1198 : *
1199 : * This should be called by each function that is about to use libxml
1200 : * facilities but doesn't require error handling. It initializes libxml
1201 : * and verifies compatibility with the loaded libxml version. These are
1202 : * once-per-session activities.
1203 : *
1204 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1205 : * check)
1206 : */
1207 : void
1208 64172 : pg_xml_init_library(void)
1209 : {
1210 : static bool first_time = true;
1211 :
1212 64172 : if (first_time)
1213 : {
1214 : /* Stuff we need do only once per session */
1215 :
1216 : /*
1217 : * Currently, we have no pure UTF-8 support for internals -- check if
1218 : * we can work.
1219 : */
1220 : if (sizeof(char) != sizeof(xmlChar))
1221 : ereport(ERROR,
1222 : (errmsg("could not initialize XML library"),
1223 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1224 : sizeof(char), sizeof(xmlChar))));
1225 :
1226 : #ifdef USE_LIBXMLCONTEXT
1227 : /* Set up libxml's memory allocation our way */
1228 : xml_memory_init();
1229 : #endif
1230 :
1231 : /* Check library compatibility */
1232 15 : LIBXML_TEST_VERSION;
1233 :
1234 15 : first_time = false;
1235 : }
1236 64172 : }
1237 :
1238 : /*
1239 : * pg_xml_init --- set up for use of libxml and register an error handler
1240 : *
1241 : * This should be called by each function that is about to use libxml
1242 : * facilities and requires error handling. It initializes libxml with
1243 : * pg_xml_init_library() and establishes our libxml error handler.
1244 : *
1245 : * strictness determines which errors are reported and which are ignored.
1246 : *
1247 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1248 : * that pg_xml_done() is called during either normal or error exit.
1249 : *
1250 : * This is exported for use by contrib/xml2, as well as other code that might
1251 : * wish to share use of this module's libxml error handler.
1252 : */
1253 : PgXmlErrorContext *
1254 16773 : pg_xml_init(PgXmlStrictness strictness)
1255 : {
1256 : PgXmlErrorContext *errcxt;
1257 : void *new_errcxt;
1258 :
1259 : /* Do one-time setup if needed */
1260 16773 : pg_xml_init_library();
1261 :
1262 : /* Create error handling context structure */
1263 16773 : errcxt = palloc_object(PgXmlErrorContext);
1264 16773 : errcxt->magic = ERRCXT_MAGIC;
1265 16773 : errcxt->strictness = strictness;
1266 16773 : errcxt->err_occurred = false;
1267 16773 : initStringInfo(&errcxt->err_buf);
1268 :
1269 : /*
1270 : * Save original error handler and install ours. libxml originally didn't
1271 : * distinguish between the contexts for generic and for structured error
1272 : * handlers. If we're using an old libxml version, we must thus save the
1273 : * generic error context, even though we're using a structured error
1274 : * handler.
1275 : */
1276 16773 : errcxt->saved_errfunc = xmlStructuredError;
1277 :
1278 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1279 16773 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1280 : #else
1281 : errcxt->saved_errcxt = xmlGenericErrorContext;
1282 : #endif
1283 :
1284 16773 : xmlSetStructuredErrorFunc(errcxt, xml_errorHandler);
1285 :
1286 : /*
1287 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1288 : * expected it to. If not, the error context pointer we just saved is not
1289 : * the correct thing to restore, and since that leaves us without a way to
1290 : * restore the context in pg_xml_done, we must fail.
1291 : *
1292 : * The only known situation in which this test fails is if we compile with
1293 : * headers from a libxml2 that doesn't track the structured error context
1294 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1295 : * versa. The libxml2 authors did not treat that change as constituting
1296 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1297 : * fails to protect us from this.
1298 : */
1299 :
1300 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1301 16773 : new_errcxt = xmlStructuredErrorContext;
1302 : #else
1303 : new_errcxt = xmlGenericErrorContext;
1304 : #endif
1305 :
1306 16773 : if (new_errcxt != errcxt)
1307 0 : ereport(ERROR,
1308 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1309 : errmsg("could not set up XML error handler"),
1310 : errhint("This probably indicates that the version of libxml2"
1311 : " being used is not compatible with the libxml2"
1312 : " header files that PostgreSQL was built with.")));
1313 :
1314 : /*
1315 : * Also, install an entity loader to prevent unwanted fetches of external
1316 : * files and URLs.
1317 : */
1318 16773 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1319 16773 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1320 :
1321 16773 : return errcxt;
1322 : }
1323 :
1324 :
1325 : /*
1326 : * pg_xml_done --- restore previous libxml error handling
1327 : *
1328 : * Resets libxml's global error-handling state to what it was before
1329 : * pg_xml_init() was called.
1330 : *
1331 : * This routine verifies that all pending errors have been dealt with
1332 : * (in assert-enabled builds, anyway).
1333 : */
1334 : void
1335 16773 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1336 : {
1337 : void *cur_errcxt;
1338 :
1339 : /* An assert seems like enough protection here */
1340 : Assert(errcxt->magic == ERRCXT_MAGIC);
1341 :
1342 : /*
1343 : * In a normal exit, there should be no un-handled libxml errors. But we
1344 : * shouldn't try to enforce this during error recovery, since the longjmp
1345 : * could have been thrown before xml_ereport had a chance to run.
1346 : */
1347 : Assert(!errcxt->err_occurred || isError);
1348 :
1349 : /*
1350 : * Check that libxml's global state is correct, warn if not. This is a
1351 : * real test and not an Assert because it has a higher probability of
1352 : * happening.
1353 : */
1354 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1355 16773 : cur_errcxt = xmlStructuredErrorContext;
1356 : #else
1357 : cur_errcxt = xmlGenericErrorContext;
1358 : #endif
1359 :
1360 16773 : if (cur_errcxt != errcxt)
1361 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1362 :
1363 : /* Restore the saved handlers */
1364 16773 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1365 16773 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1366 :
1367 : /*
1368 : * Mark the struct as invalid, just in case somebody somehow manages to
1369 : * call xml_errorHandler or xml_ereport with it.
1370 : */
1371 16773 : errcxt->magic = 0;
1372 :
1373 : /* Release memory */
1374 16773 : pfree(errcxt->err_buf.data);
1375 16773 : pfree(errcxt);
1376 16773 : }
1377 :
1378 :
1379 : /*
1380 : * pg_xml_error_occurred() --- test the error flag
1381 : */
1382 : bool
1383 39 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1384 : {
1385 39 : return errcxt->err_occurred;
1386 : }
1387 :
1388 :
1389 : /*
1390 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1391 : * documents" are specified by the XML specification and are parsed
1392 : * easily by libxml. "XML content" is specified by SQL/XML as the
1393 : * production "XMLDecl? content". But libxml can only parse the
1394 : * "content" part, so we have to parse the XML declaration ourselves
1395 : * to complete this.
1396 : */
1397 :
1398 : #define CHECK_XML_SPACE(p) \
1399 : do { \
1400 : if (!xmlIsBlank_ch(*(p))) \
1401 : return XML_ERR_SPACE_REQUIRED; \
1402 : } while (0)
1403 :
1404 : #define SKIP_XML_SPACE(p) \
1405 : while (xmlIsBlank_ch(*(p))) (p)++
1406 :
1407 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1408 : /* Beware of multiple evaluations of argument! */
1409 : #define PG_XMLISNAMECHAR(c) \
1410 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1411 : || xmlIsDigit_ch(c) \
1412 : || c == '.' || c == '-' || c == '_' || c == ':' \
1413 : || xmlIsCombiningQ(c) \
1414 : || xmlIsExtender_ch(c))
1415 :
1416 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1417 : static xmlChar *
1418 128 : xml_pnstrdup(const xmlChar *str, size_t len)
1419 : {
1420 : xmlChar *result;
1421 :
1422 128 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1423 128 : memcpy(result, str, len * sizeof(xmlChar));
1424 128 : result[len] = 0;
1425 128 : return result;
1426 : }
1427 :
1428 : /* Ditto, except input is char* */
1429 : static xmlChar *
1430 1666 : pg_xmlCharStrndup(const char *str, size_t len)
1431 : {
1432 : xmlChar *result;
1433 :
1434 1666 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1435 1666 : memcpy(result, str, len);
1436 1666 : result[len] = '\0';
1437 :
1438 1666 : return result;
1439 : }
1440 :
1441 : /*
1442 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1443 : *
1444 : * The input xmlChar is freed regardless of success of the copy.
1445 : */
1446 : static char *
1447 76548 : xml_pstrdup_and_free(xmlChar *str)
1448 : {
1449 : char *result;
1450 :
1451 76548 : if (str)
1452 : {
1453 76548 : PG_TRY();
1454 : {
1455 76548 : result = pstrdup((char *) str);
1456 : }
1457 0 : PG_FINALLY();
1458 : {
1459 76548 : xmlFree(str);
1460 : }
1461 76548 : PG_END_TRY();
1462 : }
1463 : else
1464 0 : result = NULL;
1465 :
1466 76548 : return result;
1467 : }
1468 :
1469 : /*
1470 : * str is the null-terminated input string. Remaining arguments are
1471 : * output arguments; each can be NULL if value is not wanted.
1472 : * version and encoding are returned as locally-palloc'd strings.
1473 : * Result is 0 if OK, an error code if not.
1474 : */
1475 : static int
1476 47399 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1477 : xmlChar **version, xmlChar **encoding, int *standalone)
1478 : {
1479 : const xmlChar *p;
1480 : const xmlChar *save_p;
1481 : size_t len;
1482 : int utf8char;
1483 : int utf8len;
1484 :
1485 : /*
1486 : * Only initialize libxml. We don't need error handling here, but we do
1487 : * need to make sure libxml is initialized before calling any of its
1488 : * functions. Note that this is safe (and a no-op) if caller has already
1489 : * done pg_xml_init().
1490 : */
1491 47399 : pg_xml_init_library();
1492 :
1493 : /* Initialize output arguments to "not present" */
1494 47399 : if (version)
1495 46942 : *version = NULL;
1496 47399 : if (encoding)
1497 0 : *encoding = NULL;
1498 47399 : if (standalone)
1499 46942 : *standalone = -1;
1500 :
1501 47399 : p = str;
1502 :
1503 47399 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1504 47251 : goto finished;
1505 :
1506 : /*
1507 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1508 : * rather than an XMLDecl, so we have done what we came to do and found no
1509 : * XMLDecl.
1510 : *
1511 : * We need an input length value for xmlGetUTF8Char, but there's no need
1512 : * to count the whole document size, so use strnlen not strlen.
1513 : */
1514 148 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1515 148 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1516 148 : if (PG_XMLISNAMECHAR(utf8char))
1517 8 : goto finished;
1518 :
1519 140 : p += 5;
1520 :
1521 : /* version */
1522 140 : CHECK_XML_SPACE(p);
1523 280 : SKIP_XML_SPACE(p);
1524 140 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1525 0 : return XML_ERR_VERSION_MISSING;
1526 140 : p += 7;
1527 140 : SKIP_XML_SPACE(p);
1528 140 : if (*p != '=')
1529 0 : return XML_ERR_VERSION_MISSING;
1530 140 : p += 1;
1531 140 : SKIP_XML_SPACE(p);
1532 :
1533 140 : if (*p == '\'' || *p == '"')
1534 140 : {
1535 : const xmlChar *q;
1536 :
1537 140 : q = xmlStrchr(p + 1, *p);
1538 140 : if (!q)
1539 0 : return XML_ERR_VERSION_MISSING;
1540 :
1541 140 : if (version)
1542 128 : *version = xml_pnstrdup(p + 1, q - p - 1);
1543 140 : p = q + 1;
1544 : }
1545 : else
1546 0 : return XML_ERR_VERSION_MISSING;
1547 :
1548 : /* encoding */
1549 140 : save_p = p;
1550 248 : SKIP_XML_SPACE(p);
1551 140 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1552 : {
1553 36 : CHECK_XML_SPACE(save_p);
1554 36 : p += 8;
1555 36 : SKIP_XML_SPACE(p);
1556 36 : if (*p != '=')
1557 0 : return XML_ERR_MISSING_ENCODING;
1558 36 : p += 1;
1559 36 : SKIP_XML_SPACE(p);
1560 :
1561 36 : if (*p == '\'' || *p == '"')
1562 36 : {
1563 : const xmlChar *q;
1564 :
1565 36 : q = xmlStrchr(p + 1, *p);
1566 36 : if (!q)
1567 0 : return XML_ERR_MISSING_ENCODING;
1568 :
1569 36 : if (encoding)
1570 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1571 36 : p = q + 1;
1572 : }
1573 : else
1574 0 : return XML_ERR_MISSING_ENCODING;
1575 : }
1576 : else
1577 : {
1578 104 : p = save_p;
1579 : }
1580 :
1581 : /* standalone */
1582 140 : save_p = p;
1583 212 : SKIP_XML_SPACE(p);
1584 140 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1585 : {
1586 72 : CHECK_XML_SPACE(save_p);
1587 72 : p += 10;
1588 72 : SKIP_XML_SPACE(p);
1589 72 : if (*p != '=')
1590 0 : return XML_ERR_STANDALONE_VALUE;
1591 72 : p += 1;
1592 72 : SKIP_XML_SPACE(p);
1593 144 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1594 72 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1595 : {
1596 40 : if (standalone)
1597 40 : *standalone = 1;
1598 40 : p += 5;
1599 : }
1600 64 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1601 32 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1602 : {
1603 24 : if (standalone)
1604 24 : *standalone = 0;
1605 24 : p += 4;
1606 : }
1607 : else
1608 8 : return XML_ERR_STANDALONE_VALUE;
1609 : }
1610 : else
1611 : {
1612 68 : p = save_p;
1613 : }
1614 :
1615 132 : SKIP_XML_SPACE(p);
1616 132 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1617 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1618 132 : p += 2;
1619 :
1620 47391 : finished:
1621 47391 : len = p - str;
1622 :
1623 51879 : for (p = str; p < str + len; p++)
1624 4488 : if (*p > 127)
1625 0 : return XML_ERR_INVALID_CHAR;
1626 :
1627 47391 : if (lenp)
1628 47391 : *lenp = len;
1629 :
1630 47391 : return XML_ERR_OK;
1631 : }
1632 :
1633 :
1634 : /*
1635 : * Write an XML declaration. On output, we adjust the XML declaration
1636 : * as follows. (These rules are the moral equivalent of the clause
1637 : * "Serialization of an XML value" in the SQL standard.)
1638 : *
1639 : * We try to avoid generating an XML declaration if possible. This is
1640 : * so that you don't get trivial things like xml '<foo/>' resulting in
1641 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1642 : * must provide a declaration if the standalone property is specified
1643 : * or if we include an encoding declaration. If we have a
1644 : * declaration, we must specify a version (XML requires this).
1645 : * Otherwise we only make a declaration if the version is not "1.0",
1646 : * which is the default version specified in SQL:2003.
1647 : */
1648 : static bool
1649 16019 : print_xml_decl(StringInfo buf, const xmlChar *version,
1650 : pg_enc encoding, int standalone)
1651 : {
1652 16019 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1653 15995 : || (encoding && encoding != PG_UTF8)
1654 15995 : || standalone != -1)
1655 : {
1656 64 : appendStringInfoString(buf, "<?xml");
1657 :
1658 64 : if (version)
1659 48 : appendStringInfo(buf, " version=\"%s\"", version);
1660 : else
1661 16 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1662 :
1663 64 : if (encoding && encoding != PG_UTF8)
1664 : {
1665 : /*
1666 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1667 : * instead of LATIN1 etc.); needs field experience
1668 : */
1669 0 : appendStringInfo(buf, " encoding=\"%s\"",
1670 : pg_encoding_to_char(encoding));
1671 : }
1672 :
1673 64 : if (standalone == 1)
1674 32 : appendStringInfoString(buf, " standalone=\"yes\"");
1675 32 : else if (standalone == 0)
1676 16 : appendStringInfoString(buf, " standalone=\"no\"");
1677 64 : appendStringInfoString(buf, "?>");
1678 :
1679 64 : return true;
1680 : }
1681 : else
1682 15955 : return false;
1683 : }
1684 :
1685 : /*
1686 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1687 : *
1688 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1689 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1690 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1691 : * later fix that, by redefining content with reference to the "more
1692 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1693 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1694 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1695 : * pg_restore).
1696 : *
1697 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1698 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1699 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1700 : * by detecting this case first and simply doing the parse as DOCUMENT.
1701 : *
1702 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1703 : * it will ordinarily start within a few dozen characters. The only things
1704 : * that can precede it are an XMLDecl (here, the caller will have called
1705 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1706 : * This function need only return true if it sees a valid sequence of such
1707 : * things leading to <!DOCTYPE. It can simply return false in any other
1708 : * cases, including malformed input; that will mean the input gets parsed as
1709 : * CONTENT as originally planned, with libxml reporting any errors.
1710 : *
1711 : * This is only to be called from xml_parse, when pg_xml_init has already
1712 : * been called. The input is already in UTF8 encoding.
1713 : */
1714 : static bool
1715 645 : xml_doctype_in_content(const xmlChar *str)
1716 : {
1717 645 : const xmlChar *p = str;
1718 :
1719 : for (;;)
1720 24 : {
1721 : const xmlChar *e;
1722 :
1723 729 : SKIP_XML_SPACE(p);
1724 669 : if (*p != '<')
1725 137 : return false;
1726 532 : p++;
1727 :
1728 532 : if (*p == '!')
1729 : {
1730 48 : p++;
1731 :
1732 : /* if we see <!DOCTYPE, we can return true */
1733 48 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1734 28 : return true;
1735 :
1736 : /* otherwise, if it's not a comment, fail */
1737 20 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1738 0 : return false;
1739 : /* find end of comment: find -- and a > must follow */
1740 20 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1741 20 : if (!p || p[2] != '>')
1742 0 : return false;
1743 : /* advance over comment, and keep scanning */
1744 20 : p += 3;
1745 20 : continue;
1746 : }
1747 :
1748 : /* otherwise, if it's not a PI <?target something?>, fail */
1749 484 : if (*p != '?')
1750 480 : return false;
1751 4 : p++;
1752 :
1753 : /* find end of PI (the string ?> is forbidden within a PI) */
1754 4 : e = xmlStrstr(p, (xmlChar *) "?>");
1755 4 : if (!e)
1756 0 : return false;
1757 :
1758 : /* advance over PI, keep scanning */
1759 4 : p = e + 2;
1760 : }
1761 : }
1762 :
1763 :
1764 : /*
1765 : * Convert a text object to XML internal representation
1766 : *
1767 : * data is the source data (must not be toasted!), encoding is its encoding,
1768 : * and xmloption_arg and preserve_whitespace are options for the
1769 : * transformation.
1770 : *
1771 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1772 : * XmlOptionType actually used to parse the input (typically the same as
1773 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1774 : *
1775 : * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1776 : * of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
1777 : * to *parsed_nodes. (It is caller's responsibility to free that.)
1778 : *
1779 : * Errors normally result in ereport(ERROR), but if escontext is an
1780 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1781 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1782 : *
1783 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1784 : * else a permanent memory leak will ensue! But note the result could
1785 : * be NULL after a soft error.
1786 : *
1787 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1788 : * yet do not use SAX - see xmlreader.c)
1789 : */
1790 : static xmlDocPtr
1791 851 : xml_parse(text *data, XmlOptionType xmloption_arg,
1792 : bool preserve_whitespace, int encoding,
1793 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1794 : Node *escontext)
1795 : {
1796 : int32 len;
1797 : xmlChar *string;
1798 : xmlChar *utf8string;
1799 : PgXmlErrorContext *xmlerrcxt;
1800 851 : volatile xmlParserCtxtPtr ctxt = NULL;
1801 851 : volatile xmlDocPtr doc = NULL;
1802 851 : volatile int save_keep_blanks = -1;
1803 :
1804 : /*
1805 : * This step looks annoyingly redundant, but we must do it to have a
1806 : * null-terminated string in case encoding conversion isn't required.
1807 : */
1808 851 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1809 851 : string = xml_text2xmlChar(data);
1810 :
1811 : /*
1812 : * If the data isn't UTF8, we must translate before giving it to libxml.
1813 : *
1814 : * XXX ideally, we'd catch any encoding conversion failure and return a
1815 : * soft error. However, failure to convert to UTF8 should be pretty darn
1816 : * rare, so for now this is left undone.
1817 : */
1818 851 : utf8string = pg_do_encoding_conversion(string,
1819 : len,
1820 : encoding,
1821 : PG_UTF8);
1822 :
1823 : /* Start up libxml and its parser */
1824 851 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1825 :
1826 : /* Use a TRY block to ensure we clean up correctly */
1827 851 : PG_TRY();
1828 : {
1829 851 : bool parse_as_document = false;
1830 : int res_code;
1831 851 : size_t count = 0;
1832 851 : xmlChar *version = NULL;
1833 851 : int standalone = 0;
1834 :
1835 : /* Any errors here are reported as hard ereport's */
1836 851 : xmlInitParser();
1837 :
1838 : /* Decide whether to parse as document or content */
1839 851 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1840 198 : parse_as_document = true;
1841 : else
1842 : {
1843 : /* Parse and skip over the XML declaration, if any */
1844 653 : res_code = parse_xml_decl(utf8string,
1845 : &count, &version, NULL, &standalone);
1846 653 : if (res_code != 0)
1847 : {
1848 8 : errsave(escontext,
1849 : errcode(ERRCODE_INVALID_XML_CONTENT),
1850 : errmsg_internal("invalid XML content: invalid XML declaration"),
1851 : errdetail_for_xml_code(res_code));
1852 8 : goto fail;
1853 : }
1854 :
1855 : /* Is there a DOCTYPE element? */
1856 645 : if (xml_doctype_in_content(utf8string + count))
1857 28 : parse_as_document = true;
1858 : }
1859 :
1860 : /* initialize output parameters */
1861 843 : if (parsed_xmloptiontype != NULL)
1862 96 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1863 : XMLOPTION_CONTENT;
1864 843 : if (parsed_nodes != NULL)
1865 96 : *parsed_nodes = NULL;
1866 :
1867 843 : if (parse_as_document)
1868 : {
1869 : int options;
1870 :
1871 : /* set up parser context used by xmlCtxtReadDoc */
1872 226 : ctxt = xmlNewParserCtxt();
1873 226 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1874 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1875 : "could not allocate parser context");
1876 :
1877 : /*
1878 : * Select parse options.
1879 : *
1880 : * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1881 : * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined
1882 : * by internal DTD are applied'. As for external DTDs, we try to
1883 : * support them too (see SQL/XML:2008 GR 10.16.7.e), but that
1884 : * doesn't really happen because xmlPgEntityLoader prevents it.
1885 : */
1886 226 : options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1887 226 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1888 :
1889 226 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1890 : NULL, /* no URL */
1891 : "UTF-8",
1892 : options);
1893 :
1894 226 : if (doc == NULL || xmlerrcxt->err_occurred)
1895 : {
1896 : /* Use original option to decide which error code to report */
1897 97 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1898 93 : xml_errsave(escontext, xmlerrcxt,
1899 : ERRCODE_INVALID_XML_DOCUMENT,
1900 : "invalid XML document");
1901 : else
1902 4 : xml_errsave(escontext, xmlerrcxt,
1903 : ERRCODE_INVALID_XML_CONTENT,
1904 : "invalid XML content");
1905 65 : goto fail;
1906 : }
1907 : }
1908 : else
1909 : {
1910 : /* set up document that xmlParseBalancedChunkMemory will add to */
1911 617 : doc = xmlNewDoc(version);
1912 617 : if (doc == NULL || xmlerrcxt->err_occurred)
1913 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1914 : "could not allocate XML document");
1915 :
1916 : Assert(doc->encoding == NULL);
1917 617 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1918 617 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1919 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1920 : "could not allocate XML document");
1921 617 : doc->standalone = standalone;
1922 :
1923 : /* set parse options --- have to do this the ugly way */
1924 617 : save_keep_blanks = xmlKeepBlanksDefault(preserve_whitespace ? 1 : 0);
1925 :
1926 : /* allow empty content */
1927 617 : if (*(utf8string + count))
1928 : {
1929 1202 : res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1930 601 : utf8string + count,
1931 : parsed_nodes);
1932 601 : if (res_code != 0 || xmlerrcxt->err_occurred)
1933 : {
1934 40 : xml_errsave(escontext, xmlerrcxt,
1935 : ERRCODE_INVALID_XML_CONTENT,
1936 : "invalid XML content");
1937 8 : goto fail;
1938 : }
1939 : }
1940 : }
1941 :
1942 787 : fail:
1943 : ;
1944 : }
1945 64 : PG_CATCH();
1946 : {
1947 64 : if (save_keep_blanks != -1)
1948 32 : xmlKeepBlanksDefault(save_keep_blanks);
1949 64 : if (doc != NULL)
1950 32 : xmlFreeDoc(doc);
1951 64 : if (ctxt != NULL)
1952 32 : xmlFreeParserCtxt(ctxt);
1953 :
1954 64 : pg_xml_done(xmlerrcxt, true);
1955 :
1956 64 : PG_RE_THROW();
1957 : }
1958 787 : PG_END_TRY();
1959 :
1960 787 : if (save_keep_blanks != -1)
1961 585 : xmlKeepBlanksDefault(save_keep_blanks);
1962 :
1963 787 : if (ctxt != NULL)
1964 194 : xmlFreeParserCtxt(ctxt);
1965 :
1966 787 : pg_xml_done(xmlerrcxt, false);
1967 :
1968 787 : return doc;
1969 : }
1970 :
1971 :
1972 : /*
1973 : * xmlChar<->text conversions
1974 : */
1975 : static xmlChar *
1976 956 : xml_text2xmlChar(text *in)
1977 : {
1978 956 : return (xmlChar *) text_to_cstring(in);
1979 : }
1980 :
1981 :
1982 : #ifdef USE_LIBXMLCONTEXT
1983 :
1984 : /*
1985 : * Manage the special context used for all libxml allocations (but only
1986 : * in special debug builds; see notes at top of file)
1987 : */
1988 : static void
1989 : xml_memory_init(void)
1990 : {
1991 : /* Create memory context if not there already */
1992 : if (LibxmlContext == NULL)
1993 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1994 : "Libxml context",
1995 : ALLOCSET_DEFAULT_SIZES);
1996 :
1997 : /* Re-establish the callbacks even if already set */
1998 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1999 : }
2000 :
2001 : /*
2002 : * Wrappers for memory management functions
2003 : */
2004 : static void *
2005 : xml_palloc(size_t size)
2006 : {
2007 : return MemoryContextAlloc(LibxmlContext, size);
2008 : }
2009 :
2010 :
2011 : static void *
2012 : xml_repalloc(void *ptr, size_t size)
2013 : {
2014 : return repalloc(ptr, size);
2015 : }
2016 :
2017 :
2018 : static void
2019 : xml_pfree(void *ptr)
2020 : {
2021 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
2022 : if (ptr)
2023 : pfree(ptr);
2024 : }
2025 :
2026 :
2027 : static char *
2028 : xml_pstrdup(const char *string)
2029 : {
2030 : return MemoryContextStrdup(LibxmlContext, string);
2031 : }
2032 : #endif /* USE_LIBXMLCONTEXT */
2033 :
2034 :
2035 : /*
2036 : * xmlPgEntityLoader --- entity loader callback function
2037 : *
2038 : * Silently prevent any external entity URL from being loaded. We don't want
2039 : * to throw an error, so instead make the entity appear to expand to an empty
2040 : * string.
2041 : *
2042 : * We would prefer to allow loading entities that exist in the system's
2043 : * global XML catalog; but the available libxml2 APIs make that a complex
2044 : * and fragile task. For now, just shut down all external access.
2045 : */
2046 : static xmlParserInputPtr
2047 12 : xmlPgEntityLoader(const char *URL, const char *ID,
2048 : xmlParserCtxtPtr ctxt)
2049 : {
2050 12 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
2051 : }
2052 :
2053 :
2054 : /*
2055 : * xml_ereport --- report an XML-related error
2056 : *
2057 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
2058 : * standard. This function adds libxml's native error message, if any, as
2059 : * detail.
2060 : *
2061 : * This is exported for modules that want to share the core libxml error
2062 : * handler. Note that pg_xml_init() *must* have been called previously.
2063 : */
2064 : void
2065 8 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2066 : {
2067 : char *detail;
2068 :
2069 : /* Defend against someone passing us a bogus context struct */
2070 8 : if (errcxt->magic != ERRCXT_MAGIC)
2071 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2072 :
2073 : /* Flag that the current libxml error has been reported */
2074 8 : errcxt->err_occurred = false;
2075 :
2076 : /* Include detail only if we have some text from libxml */
2077 8 : if (errcxt->err_buf.len > 0)
2078 8 : detail = errcxt->err_buf.data;
2079 : else
2080 0 : detail = NULL;
2081 :
2082 8 : ereport(level,
2083 : (errcode(sqlcode),
2084 : errmsg_internal("%s", msg),
2085 : detail ? errdetail_internal("%s", detail) : 0));
2086 0 : }
2087 :
2088 :
2089 : /*
2090 : * xml_errsave --- save an XML-related error
2091 : *
2092 : * If escontext is an ErrorSaveContext, error details are saved into it,
2093 : * and control returns normally.
2094 : *
2095 : * Otherwise, the error is thrown, so that this is equivalent to
2096 : * xml_ereport() with level == ERROR.
2097 : *
2098 : * This should be used only for errors that we're sure we do not need
2099 : * a transaction abort to clean up after.
2100 : */
2101 : static void
2102 137 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2103 : int sqlcode, const char *msg)
2104 : {
2105 : char *detail;
2106 :
2107 : /* Defend against someone passing us a bogus context struct */
2108 137 : if (errcxt->magic != ERRCXT_MAGIC)
2109 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2110 :
2111 : /* Flag that the current libxml error has been reported */
2112 137 : errcxt->err_occurred = false;
2113 :
2114 : /* Include detail only if we have some text from libxml */
2115 137 : if (errcxt->err_buf.len > 0)
2116 137 : detail = errcxt->err_buf.data;
2117 : else
2118 0 : detail = NULL;
2119 :
2120 137 : errsave(escontext,
2121 : (errcode(sqlcode),
2122 : errmsg_internal("%s", msg),
2123 : detail ? errdetail_internal("%s", detail) : 0));
2124 73 : }
2125 :
2126 :
2127 : /*
2128 : * Error handler for libxml errors and warnings
2129 : */
2130 : static void
2131 267 : xml_errorHandler(void *data, PgXmlErrorPtr error)
2132 : {
2133 267 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2134 267 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2135 267 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2136 267 : xmlNodePtr node = error->node;
2137 267 : const xmlChar *name = (node != NULL &&
2138 267 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2139 267 : int domain = error->domain;
2140 267 : int level = error->level;
2141 : StringInfoData errorBuf;
2142 :
2143 : /*
2144 : * Defend against someone passing us a bogus context struct.
2145 : *
2146 : * We force a backend exit if this check fails because longjmp'ing out of
2147 : * libxml would likely render it unsafe to use further.
2148 : */
2149 267 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
2150 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2151 :
2152 : /*----------
2153 : * Older libxml versions report some errors differently.
2154 : * First, some errors were previously reported as coming from the parser
2155 : * domain but are now reported as coming from the namespace domain.
2156 : * Second, some warnings were upgraded to errors.
2157 : * We attempt to compensate for that here.
2158 : *----------
2159 : */
2160 267 : switch (error->code)
2161 : {
2162 20 : case XML_WAR_NS_URI:
2163 20 : level = XML_ERR_ERROR;
2164 20 : domain = XML_FROM_NAMESPACE;
2165 20 : break;
2166 :
2167 37 : case XML_ERR_NS_DECL_ERROR:
2168 : case XML_WAR_NS_URI_RELATIVE:
2169 : case XML_WAR_NS_COLUMN:
2170 : case XML_NS_ERR_XML_NAMESPACE:
2171 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2172 : case XML_NS_ERR_QNAME:
2173 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2174 : case XML_NS_ERR_EMPTY:
2175 37 : domain = XML_FROM_NAMESPACE;
2176 37 : break;
2177 : }
2178 :
2179 : /* Decide whether to act on the error or not */
2180 267 : switch (domain)
2181 : {
2182 210 : case XML_FROM_PARSER:
2183 :
2184 : /*
2185 : * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2186 : * other, more on-point error. Furthermore, libxml2 2.13 reports
2187 : * it under a completely different set of rules than prior
2188 : * versions. To avoid cross-version behavioral differences,
2189 : * suppress it so long as we already logged some error.
2190 : */
2191 210 : if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2192 20 : xmlerrcxt->err_occurred)
2193 69 : return;
2194 : pg_fallthrough;
2195 :
2196 : case XML_FROM_NONE:
2197 : case XML_FROM_MEMORY:
2198 : case XML_FROM_IO:
2199 :
2200 : /*
2201 : * Suppress warnings about undeclared entities. We need to do
2202 : * this to avoid problems due to not loading DTD definitions.
2203 : */
2204 190 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
2205 4 : return;
2206 :
2207 : /* Otherwise, accept error regardless of the parsing purpose */
2208 186 : break;
2209 :
2210 57 : default:
2211 : /* Ignore error if only doing well-formedness check */
2212 57 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2213 44 : return;
2214 13 : break;
2215 : }
2216 :
2217 : /* Prepare error message in errorBuf */
2218 199 : initStringInfo(&errorBuf);
2219 :
2220 199 : if (error->line > 0)
2221 199 : appendStringInfo(&errorBuf, "line %d: ", error->line);
2222 199 : if (name != NULL)
2223 0 : appendStringInfo(&errorBuf, "element %s: ", name);
2224 199 : if (error->message != NULL)
2225 199 : appendStringInfoString(&errorBuf, error->message);
2226 : else
2227 0 : appendStringInfoString(&errorBuf, "(no message provided)");
2228 :
2229 : /*
2230 : * Append context information to errorBuf.
2231 : *
2232 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2233 : * write the context. Since we don't want to duplicate libxml
2234 : * functionality here, we set up a generic error handler temporarily.
2235 : *
2236 : * We use appendStringInfo() directly as libxml's generic error handler.
2237 : * This should work because it has essentially the same signature as
2238 : * libxml expects, namely (void *ptr, const char *msg, ...).
2239 : */
2240 199 : if (input != NULL)
2241 : {
2242 199 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2243 199 : void *errCtxSaved = xmlGenericErrorContext;
2244 :
2245 199 : xmlSetGenericErrorFunc(&errorBuf,
2246 : (xmlGenericErrorFunc) appendStringInfo);
2247 :
2248 : /* Add context information to errorBuf */
2249 199 : appendStringInfoLineSeparator(&errorBuf);
2250 :
2251 199 : xmlParserPrintFileContext(input);
2252 :
2253 : /* Restore generic error func */
2254 199 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2255 : }
2256 :
2257 : /* Get rid of any trailing newlines in errorBuf */
2258 199 : chopStringInfoNewlines(&errorBuf);
2259 :
2260 : /*
2261 : * Legacy error handling mode. err_occurred is never set, we just add the
2262 : * message to err_buf. This mode exists because the xml2 contrib module
2263 : * uses our error-handling infrastructure, but we don't want to change its
2264 : * behaviour since it's deprecated anyway. This is also why we don't
2265 : * distinguish between notices, warnings and errors here --- the old-style
2266 : * generic error handler wouldn't have done that either.
2267 : */
2268 199 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2269 : {
2270 1 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2271 1 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf.data,
2272 : errorBuf.len);
2273 :
2274 1 : pfree(errorBuf.data);
2275 1 : return;
2276 : }
2277 :
2278 : /*
2279 : * We don't want to ereport() here because that'd probably leave libxml in
2280 : * an inconsistent state. Instead, we remember the error and ereport()
2281 : * from xml_ereport().
2282 : *
2283 : * Warnings and notices can be reported immediately since they won't cause
2284 : * a longjmp() out of libxml.
2285 : */
2286 198 : if (level >= XML_ERR_ERROR)
2287 : {
2288 193 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2289 193 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf.data,
2290 : errorBuf.len);
2291 :
2292 193 : xmlerrcxt->err_occurred = true;
2293 : }
2294 5 : else if (level >= XML_ERR_WARNING)
2295 : {
2296 5 : ereport(WARNING,
2297 : (errmsg_internal("%s", errorBuf.data)));
2298 : }
2299 : else
2300 : {
2301 0 : ereport(NOTICE,
2302 : (errmsg_internal("%s", errorBuf.data)));
2303 : }
2304 :
2305 198 : pfree(errorBuf.data);
2306 : }
2307 :
2308 :
2309 : /*
2310 : * Convert libxml error codes into textual errdetail messages.
2311 : *
2312 : * This should be called within an ereport or errsave invocation,
2313 : * just as errdetail would be.
2314 : *
2315 : * At the moment, we only need to cover those codes that we
2316 : * may raise in this file.
2317 : */
2318 : static int
2319 4 : errdetail_for_xml_code(int code)
2320 : {
2321 : const char *det;
2322 :
2323 4 : switch (code)
2324 : {
2325 0 : case XML_ERR_INVALID_CHAR:
2326 0 : det = gettext_noop("Invalid character value.");
2327 0 : break;
2328 0 : case XML_ERR_SPACE_REQUIRED:
2329 0 : det = gettext_noop("Space required.");
2330 0 : break;
2331 4 : case XML_ERR_STANDALONE_VALUE:
2332 4 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2333 4 : break;
2334 0 : case XML_ERR_VERSION_MISSING:
2335 0 : det = gettext_noop("Malformed declaration: missing version.");
2336 0 : break;
2337 0 : case XML_ERR_MISSING_ENCODING:
2338 0 : det = gettext_noop("Missing encoding in text declaration.");
2339 0 : break;
2340 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
2341 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
2342 0 : break;
2343 0 : default:
2344 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
2345 0 : break;
2346 : }
2347 :
2348 4 : return errdetail(det, code);
2349 : }
2350 :
2351 :
2352 : /*
2353 : * Remove all trailing newlines from a StringInfo string
2354 : */
2355 : static void
2356 592 : chopStringInfoNewlines(StringInfo str)
2357 : {
2358 990 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2359 398 : str->data[--str->len] = '\0';
2360 592 : }
2361 :
2362 :
2363 : /*
2364 : * Append a newline after removing any existing trailing newlines
2365 : */
2366 : static void
2367 393 : appendStringInfoLineSeparator(StringInfo str)
2368 : {
2369 393 : chopStringInfoNewlines(str);
2370 393 : if (str->len > 0)
2371 247 : appendStringInfoChar(str, '\n');
2372 393 : }
2373 :
2374 :
2375 : /*
2376 : * Convert one char in the current server encoding to a Unicode codepoint.
2377 : */
2378 : static pg_wchar
2379 12173 : sqlchar_to_unicode(const char *s)
2380 : {
2381 : char *utf8string;
2382 : pg_wchar ret[2]; /* need space for trailing zero */
2383 :
2384 12173 : utf8string = pg_server_to_any(s, pg_mblen_cstr(s), PG_UTF8);
2385 :
2386 12173 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2387 : pg_encoding_mblen(PG_UTF8, utf8string));
2388 :
2389 12173 : if (utf8string != s)
2390 0 : pfree(utf8string);
2391 :
2392 12173 : return ret[0];
2393 : }
2394 :
2395 :
2396 : static bool
2397 2421 : is_valid_xml_namefirst(pg_wchar c)
2398 : {
2399 : /* (Letter | '_' | ':') */
2400 2425 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2401 4846 : || c == '_' || c == ':');
2402 : }
2403 :
2404 :
2405 : static bool
2406 9752 : is_valid_xml_namechar(pg_wchar c)
2407 : {
2408 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2409 10345 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2410 593 : || xmlIsDigitQ(c)
2411 169 : || c == '.' || c == '-' || c == '_' || c == ':'
2412 8 : || xmlIsCombiningQ(c)
2413 20690 : || xmlIsExtenderQ(c));
2414 : }
2415 : #endif /* USE_LIBXML */
2416 :
2417 :
2418 : /*
2419 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2420 : */
2421 : char *
2422 2430 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2423 : bool escape_period)
2424 : {
2425 : #ifdef USE_LIBXML
2426 : StringInfoData buf;
2427 : const char *p;
2428 :
2429 : /*
2430 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2431 : * mistake.
2432 : */
2433 : Assert(fully_escaped || !escape_period);
2434 :
2435 2430 : initStringInfo(&buf);
2436 :
2437 14616 : for (p = ident; *p; p += pg_mblen_cstr(p))
2438 : {
2439 12186 : if (*p == ':' && (p == ident || fully_escaped))
2440 9 : appendStringInfoString(&buf, "_x003A_");
2441 12177 : else if (*p == '_' && *(p + 1) == 'x')
2442 4 : appendStringInfoString(&buf, "_x005F_");
2443 14363 : else if (fully_escaped && p == ident &&
2444 2190 : pg_strncasecmp(p, "xml", 3) == 0)
2445 : {
2446 0 : if (*p == 'x')
2447 0 : appendStringInfoString(&buf, "_x0078_");
2448 : else
2449 0 : appendStringInfoString(&buf, "_x0058_");
2450 : }
2451 12173 : else if (escape_period && *p == '.')
2452 0 : appendStringInfoString(&buf, "_x002E_");
2453 : else
2454 : {
2455 12173 : pg_wchar u = sqlchar_to_unicode(p);
2456 :
2457 24346 : if ((p == ident)
2458 2421 : ? !is_valid_xml_namefirst(u)
2459 9752 : : !is_valid_xml_namechar(u))
2460 12 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2461 : else
2462 12161 : appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
2463 : }
2464 : }
2465 :
2466 2430 : return buf.data;
2467 : #else /* not USE_LIBXML */
2468 : NO_XML_SUPPORT();
2469 : return NULL;
2470 : #endif /* not USE_LIBXML */
2471 : }
2472 :
2473 :
2474 : /*
2475 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2476 : */
2477 : char *
2478 72 : map_xml_name_to_sql_identifier(const char *name)
2479 : {
2480 : StringInfoData buf;
2481 : const char *p;
2482 :
2483 72 : initStringInfo(&buf);
2484 :
2485 396 : for (p = name; *p; p += pg_mblen_cstr(p))
2486 : {
2487 324 : if (*p == '_' && *(p + 1) == 'x'
2488 9 : && isxdigit((unsigned char) *(p + 2))
2489 9 : && isxdigit((unsigned char) *(p + 3))
2490 9 : && isxdigit((unsigned char) *(p + 4))
2491 9 : && isxdigit((unsigned char) *(p + 5))
2492 9 : && *(p + 6) == '_')
2493 9 : {
2494 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2495 : unsigned int u;
2496 :
2497 9 : sscanf(p + 2, "%X", &u);
2498 9 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2499 9 : appendStringInfoString(&buf, cbuf);
2500 9 : p += 6;
2501 : }
2502 : else
2503 315 : appendBinaryStringInfo(&buf, p, pg_mblen_cstr(p));
2504 : }
2505 :
2506 72 : return buf.data;
2507 : }
2508 :
2509 : /*
2510 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2511 : *
2512 : * When xml_escape_strings is true, then certain characters in string
2513 : * values are replaced by entity references (< etc.), as specified
2514 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2515 : * wanted. The false case is mainly useful when the resulting value
2516 : * is used with xmlTextWriterWriteAttribute() to write out an
2517 : * attribute, because that function does the escaping itself.
2518 : */
2519 : char *
2520 91534 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2521 : {
2522 91534 : if (type_is_array_domain(type))
2523 : {
2524 : ArrayType *array;
2525 : Oid elmtype;
2526 : int16 elmlen;
2527 : bool elmbyval;
2528 : char elmalign;
2529 : int num_elems;
2530 : Datum *elem_values;
2531 : bool *elem_nulls;
2532 : StringInfoData buf;
2533 : int i;
2534 :
2535 4 : array = DatumGetArrayTypeP(value);
2536 4 : elmtype = ARR_ELEMTYPE(array);
2537 4 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2538 :
2539 4 : deconstruct_array(array, elmtype,
2540 : elmlen, elmbyval, elmalign,
2541 : &elem_values, &elem_nulls,
2542 : &num_elems);
2543 :
2544 4 : initStringInfo(&buf);
2545 :
2546 16 : for (i = 0; i < num_elems; i++)
2547 : {
2548 12 : if (elem_nulls[i])
2549 0 : continue;
2550 12 : appendStringInfoString(&buf, "<element>");
2551 12 : appendStringInfoString(&buf,
2552 12 : map_sql_value_to_xml_value(elem_values[i],
2553 : elmtype, true));
2554 12 : appendStringInfoString(&buf, "</element>");
2555 : }
2556 :
2557 4 : pfree(elem_values);
2558 4 : pfree(elem_nulls);
2559 :
2560 4 : return buf.data;
2561 : }
2562 : else
2563 : {
2564 : Oid typeOut;
2565 : bool isvarlena;
2566 : char *str;
2567 :
2568 : /*
2569 : * Flatten domains; the special-case treatments below should apply to,
2570 : * eg, domains over boolean not just boolean.
2571 : */
2572 91530 : type = getBaseType(type);
2573 :
2574 : /*
2575 : * Special XSD formatting for some data types
2576 : */
2577 91530 : switch (type)
2578 : {
2579 46 : case BOOLOID:
2580 46 : if (DatumGetBool(value))
2581 41 : return "true";
2582 : else
2583 5 : return "false";
2584 :
2585 32 : case DATEOID:
2586 : {
2587 : DateADT date;
2588 : struct pg_tm tm;
2589 : char buf[MAXDATELEN + 1];
2590 :
2591 32 : date = DatumGetDateADT(value);
2592 : /* XSD doesn't support infinite values */
2593 32 : if (DATE_NOT_FINITE(date))
2594 0 : ereport(ERROR,
2595 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2596 : errmsg("date out of range"),
2597 : errdetail("XML does not support infinite date values.")));
2598 32 : j2date(date + POSTGRES_EPOCH_JDATE,
2599 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2600 32 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2601 :
2602 32 : return pstrdup(buf);
2603 : }
2604 :
2605 24 : case TIMESTAMPOID:
2606 : {
2607 : Timestamp timestamp;
2608 : struct pg_tm tm;
2609 : fsec_t fsec;
2610 : char buf[MAXDATELEN + 1];
2611 :
2612 24 : timestamp = DatumGetTimestamp(value);
2613 :
2614 : /* XSD doesn't support infinite values */
2615 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2616 4 : ereport(ERROR,
2617 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2618 : errmsg("timestamp out of range"),
2619 : errdetail("XML does not support infinite timestamp values.")));
2620 20 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2621 20 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2622 : else
2623 0 : ereport(ERROR,
2624 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2625 : errmsg("timestamp out of range")));
2626 :
2627 20 : return pstrdup(buf);
2628 : }
2629 :
2630 16 : case TIMESTAMPTZOID:
2631 : {
2632 : TimestampTz timestamp;
2633 : struct pg_tm tm;
2634 : int tz;
2635 : fsec_t fsec;
2636 16 : const char *tzn = NULL;
2637 : char buf[MAXDATELEN + 1];
2638 :
2639 16 : timestamp = DatumGetTimestamp(value);
2640 :
2641 : /* XSD doesn't support infinite values */
2642 16 : if (TIMESTAMP_NOT_FINITE(timestamp))
2643 0 : ereport(ERROR,
2644 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2645 : errmsg("timestamp out of range"),
2646 : errdetail("XML does not support infinite timestamp values.")));
2647 16 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2648 16 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2649 : else
2650 0 : ereport(ERROR,
2651 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2652 : errmsg("timestamp out of range")));
2653 :
2654 16 : return pstrdup(buf);
2655 : }
2656 :
2657 : #ifdef USE_LIBXML
2658 24 : case BYTEAOID:
2659 : {
2660 24 : bytea *bstr = DatumGetByteaPP(value);
2661 : PgXmlErrorContext *xmlerrcxt;
2662 24 : volatile xmlBufferPtr buf = NULL;
2663 24 : volatile xmlTextWriterPtr writer = NULL;
2664 : char *result;
2665 :
2666 24 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2667 :
2668 24 : PG_TRY();
2669 : {
2670 24 : buf = xmlBufferCreate();
2671 24 : if (buf == NULL || xmlerrcxt->err_occurred)
2672 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2673 : "could not allocate xmlBuffer");
2674 24 : writer = xmlNewTextWriterMemory(buf, 0);
2675 24 : if (writer == NULL || xmlerrcxt->err_occurred)
2676 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2677 : "could not allocate xmlTextWriter");
2678 :
2679 24 : if (xmlbinary == XMLBINARY_BASE64)
2680 20 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2681 20 : 0, VARSIZE_ANY_EXHDR(bstr));
2682 : else
2683 4 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2684 4 : 0, VARSIZE_ANY_EXHDR(bstr));
2685 :
2686 : /* we MUST do this now to flush data out to the buffer */
2687 24 : xmlFreeTextWriter(writer);
2688 24 : writer = NULL;
2689 :
2690 24 : result = pstrdup((const char *) xmlBufferContent(buf));
2691 : }
2692 0 : PG_CATCH();
2693 : {
2694 0 : if (writer)
2695 0 : xmlFreeTextWriter(writer);
2696 0 : if (buf)
2697 0 : xmlBufferFree(buf);
2698 :
2699 0 : pg_xml_done(xmlerrcxt, true);
2700 :
2701 0 : PG_RE_THROW();
2702 : }
2703 24 : PG_END_TRY();
2704 :
2705 24 : xmlBufferFree(buf);
2706 :
2707 24 : pg_xml_done(xmlerrcxt, false);
2708 :
2709 24 : return result;
2710 : }
2711 : #endif /* USE_LIBXML */
2712 :
2713 : }
2714 :
2715 : /*
2716 : * otherwise, just use the type's native text representation
2717 : */
2718 91388 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2719 91388 : str = OidOutputFunctionCall(typeOut, value);
2720 :
2721 : /* ... exactly as-is for XML, and when escaping is not wanted */
2722 91388 : if (type == XMLOID || !xml_escape_strings)
2723 15163 : return str;
2724 :
2725 : /* otherwise, translate special characters as needed */
2726 76225 : return escape_xml(str);
2727 : }
2728 : }
2729 :
2730 :
2731 : /*
2732 : * Escape characters in text that have special meanings in XML.
2733 : *
2734 : * Returns a palloc'd string.
2735 : *
2736 : * NB: this is intentionally not dependent on libxml.
2737 : */
2738 : char *
2739 76510 : escape_xml(const char *str)
2740 : {
2741 : StringInfoData buf;
2742 : const char *p;
2743 :
2744 76510 : initStringInfo(&buf);
2745 483356 : for (p = str; *p; p++)
2746 : {
2747 406846 : switch (*p)
2748 : {
2749 0 : case '&':
2750 0 : appendStringInfoString(&buf, "&");
2751 0 : break;
2752 28 : case '<':
2753 28 : appendStringInfoString(&buf, "<");
2754 28 : break;
2755 18 : case '>':
2756 18 : appendStringInfoString(&buf, ">");
2757 18 : break;
2758 0 : case '\r':
2759 0 : appendStringInfoString(&buf, "
");
2760 0 : break;
2761 406800 : default:
2762 406800 : appendStringInfoCharMacro(&buf, *p);
2763 406800 : break;
2764 : }
2765 : }
2766 76510 : return buf.data;
2767 : }
2768 :
2769 :
2770 : static char *
2771 16 : _SPI_strdup(const char *s)
2772 : {
2773 16 : size_t len = strlen(s) + 1;
2774 16 : char *ret = SPI_palloc(len);
2775 :
2776 16 : memcpy(ret, s, len);
2777 16 : return ret;
2778 : }
2779 :
2780 :
2781 : /*
2782 : * SQL to XML mapping functions
2783 : *
2784 : * What follows below was at one point intentionally organized so that
2785 : * you can read along in the SQL/XML standard. The functions are
2786 : * mostly split up the way the clauses lay out in the standards
2787 : * document, and the identifiers are also aligned with the standard
2788 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2789 : * differently than SQL/XML:2003, so the order below doesn't make much
2790 : * sense anymore.
2791 : *
2792 : * There are many things going on there:
2793 : *
2794 : * There are two kinds of mappings: Mapping SQL data (table contents)
2795 : * to XML documents, and mapping SQL structure (the "schema") to XML
2796 : * Schema. And there are functions that do both at the same time.
2797 : *
2798 : * Then you can map a database, a schema, or a table, each in both
2799 : * ways. This breaks down recursively: Mapping a database invokes
2800 : * mapping schemas, which invokes mapping tables, which invokes
2801 : * mapping rows, which invokes mapping columns, although you can't
2802 : * call the last two from the outside. Because of this, there are a
2803 : * number of xyz_internal() functions which are to be called both from
2804 : * the function manager wrapper and from some upper layer in a
2805 : * recursive call.
2806 : *
2807 : * See the documentation about what the common function arguments
2808 : * nulls, tableforest, and targetns mean.
2809 : *
2810 : * Some style guidelines for XML output: Use double quotes for quoting
2811 : * XML attributes. Indent XML elements by two spaces, but remember
2812 : * that a lot of code is called recursively at different levels, so
2813 : * it's better not to indent rather than create output that indents
2814 : * and outdents weirdly. Add newlines to make the output look nice.
2815 : */
2816 :
2817 :
2818 : /*
2819 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2820 : * 4.10.8.
2821 : */
2822 :
2823 : /*
2824 : * Given a query, which must return type oid as first column, produce
2825 : * a list of Oids with the query results.
2826 : */
2827 : static List *
2828 24 : query_to_oid_list(const char *query)
2829 : {
2830 : uint64 i;
2831 24 : List *list = NIL;
2832 : int spi_result;
2833 :
2834 24 : spi_result = SPI_execute(query, true, 0);
2835 24 : if (spi_result != SPI_OK_SELECT)
2836 0 : elog(ERROR, "SPI_execute returned %s for %s",
2837 : SPI_result_code_string(spi_result), query);
2838 :
2839 72 : for (i = 0; i < SPI_processed; i++)
2840 : {
2841 : Datum oid;
2842 : bool isnull;
2843 :
2844 48 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2845 48 : SPI_tuptable->tupdesc,
2846 : 1,
2847 : &isnull);
2848 48 : if (!isnull)
2849 48 : list = lappend_oid(list, DatumGetObjectId(oid));
2850 : }
2851 :
2852 24 : return list;
2853 : }
2854 :
2855 :
2856 : static List *
2857 24 : schema_get_xml_visible_tables(Oid nspid)
2858 : {
2859 : StringInfoData query;
2860 :
2861 24 : initStringInfo(&query);
2862 24 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2863 : " WHERE relnamespace = %u AND relkind IN ("
2864 : CppAsString2(RELKIND_RELATION) ","
2865 : CppAsString2(RELKIND_MATVIEW) ","
2866 : CppAsString2(RELKIND_VIEW) ")"
2867 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2868 : " ORDER BY relname;", nspid);
2869 :
2870 24 : return query_to_oid_list(query.data);
2871 : }
2872 :
2873 :
2874 : /*
2875 : * Including the system schemas is probably not useful for a database
2876 : * mapping.
2877 : */
2878 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2879 :
2880 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2881 :
2882 :
2883 : static List *
2884 0 : database_get_xml_visible_schemas(void)
2885 : {
2886 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2887 : }
2888 :
2889 :
2890 : static List *
2891 0 : database_get_xml_visible_tables(void)
2892 : {
2893 : /* At the moment there is no order required here. */
2894 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2895 : " WHERE relkind IN ("
2896 : CppAsString2(RELKIND_RELATION) ","
2897 : CppAsString2(RELKIND_MATVIEW) ","
2898 : CppAsString2(RELKIND_VIEW) ")"
2899 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2900 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2901 : }
2902 :
2903 :
2904 : /*
2905 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2906 : * section 9.11.
2907 : */
2908 :
2909 : static StringInfo
2910 64 : table_to_xml_internal(Oid relid,
2911 : const char *xmlschema, bool nulls, bool tableforest,
2912 : const char *targetns, bool top_level)
2913 : {
2914 : StringInfoData query;
2915 :
2916 64 : initStringInfo(&query);
2917 64 : appendStringInfo(&query, "SELECT * FROM %s",
2918 : DatumGetCString(DirectFunctionCall1(regclassout,
2919 : ObjectIdGetDatum(relid))));
2920 64 : return query_to_xml_internal(query.data, get_rel_name(relid),
2921 : xmlschema, nulls, tableforest,
2922 : targetns, top_level);
2923 : }
2924 :
2925 :
2926 : Datum
2927 24 : table_to_xml(PG_FUNCTION_ARGS)
2928 : {
2929 24 : Oid relid = PG_GETARG_OID(0);
2930 24 : bool nulls = PG_GETARG_BOOL(1);
2931 24 : bool tableforest = PG_GETARG_BOOL(2);
2932 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2933 :
2934 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2935 : nulls, tableforest,
2936 : targetns, true)));
2937 : }
2938 :
2939 :
2940 : Datum
2941 6 : query_to_xml(PG_FUNCTION_ARGS)
2942 : {
2943 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2944 6 : bool nulls = PG_GETARG_BOOL(1);
2945 6 : bool tableforest = PG_GETARG_BOOL(2);
2946 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2947 :
2948 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2949 : NULL, nulls, tableforest,
2950 : targetns, true)));
2951 : }
2952 :
2953 :
2954 : Datum
2955 8 : cursor_to_xml(PG_FUNCTION_ARGS)
2956 : {
2957 8 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2958 8 : int32 count = PG_GETARG_INT32(1);
2959 8 : bool nulls = PG_GETARG_BOOL(2);
2960 8 : bool tableforest = PG_GETARG_BOOL(3);
2961 8 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2962 :
2963 : StringInfoData result;
2964 : Portal portal;
2965 : uint64 i;
2966 :
2967 8 : initStringInfo(&result);
2968 :
2969 8 : if (!tableforest)
2970 : {
2971 4 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2972 4 : appendStringInfoChar(&result, '\n');
2973 : }
2974 :
2975 8 : SPI_connect();
2976 8 : portal = SPI_cursor_find(name);
2977 8 : if (portal == NULL)
2978 0 : ereport(ERROR,
2979 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2980 : errmsg("cursor \"%s\" does not exist", name)));
2981 :
2982 8 : SPI_cursor_fetch(portal, true, count);
2983 32 : for (i = 0; i < SPI_processed; i++)
2984 24 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2985 : tableforest, targetns, true);
2986 :
2987 8 : SPI_finish();
2988 :
2989 8 : if (!tableforest)
2990 4 : xmldata_root_element_end(&result, "table");
2991 :
2992 8 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2993 : }
2994 :
2995 :
2996 : /*
2997 : * Write the start tag of the root element of a data mapping.
2998 : *
2999 : * top_level means that this is the very top level of the eventual
3000 : * output. For example, when the user calls table_to_xml, then a call
3001 : * with a table name to this function is the top level. When the user
3002 : * calls database_to_xml, then a call with a schema name to this
3003 : * function is not the top level. If top_level is false, then the XML
3004 : * namespace declarations are omitted, because they supposedly already
3005 : * appeared earlier in the output. Repeating them is not wrong, but
3006 : * it looks ugly.
3007 : */
3008 : static void
3009 158 : xmldata_root_element_start(StringInfo result, const char *eltname,
3010 : const char *xmlschema, const char *targetns,
3011 : bool top_level)
3012 : {
3013 : /* This isn't really wrong but currently makes no sense. */
3014 : Assert(top_level || !xmlschema);
3015 :
3016 158 : appendStringInfo(result, "<%s", eltname);
3017 158 : if (top_level)
3018 : {
3019 118 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
3020 118 : if (strlen(targetns) > 0)
3021 20 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
3022 : }
3023 158 : if (xmlschema)
3024 : {
3025 : /* FIXME: better targets */
3026 12 : if (strlen(targetns) > 0)
3027 4 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
3028 : else
3029 8 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
3030 : }
3031 158 : appendStringInfoString(result, ">\n");
3032 158 : }
3033 :
3034 :
3035 : static void
3036 158 : xmldata_root_element_end(StringInfo result, const char *eltname)
3037 : {
3038 158 : appendStringInfo(result, "</%s>\n", eltname);
3039 158 : }
3040 :
3041 :
3042 : static StringInfo
3043 74 : query_to_xml_internal(const char *query, char *tablename,
3044 : const char *xmlschema, bool nulls, bool tableforest,
3045 : const char *targetns, bool top_level)
3046 : {
3047 : StringInfo result;
3048 : char *xmltn;
3049 : uint64 i;
3050 :
3051 74 : if (tablename)
3052 64 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3053 : else
3054 10 : xmltn = "table";
3055 :
3056 74 : result = makeStringInfo();
3057 :
3058 74 : SPI_connect();
3059 74 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
3060 0 : ereport(ERROR,
3061 : (errcode(ERRCODE_DATA_EXCEPTION),
3062 : errmsg("invalid query")));
3063 :
3064 74 : if (!tableforest)
3065 : {
3066 34 : xmldata_root_element_start(result, xmltn, xmlschema,
3067 : targetns, top_level);
3068 34 : appendStringInfoChar(result, '\n');
3069 : }
3070 :
3071 74 : if (xmlschema)
3072 20 : appendStringInfo(result, "%s\n\n", xmlschema);
3073 :
3074 256 : for (i = 0; i < SPI_processed; i++)
3075 182 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3076 : tableforest, targetns, top_level);
3077 :
3078 74 : if (!tableforest)
3079 34 : xmldata_root_element_end(result, xmltn);
3080 :
3081 74 : SPI_finish();
3082 :
3083 74 : return result;
3084 : }
3085 :
3086 :
3087 : Datum
3088 20 : table_to_xmlschema(PG_FUNCTION_ARGS)
3089 : {
3090 20 : Oid relid = PG_GETARG_OID(0);
3091 20 : bool nulls = PG_GETARG_BOOL(1);
3092 20 : bool tableforest = PG_GETARG_BOOL(2);
3093 20 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3094 : const char *result;
3095 : Relation rel;
3096 :
3097 20 : rel = table_open(relid, AccessShareLock);
3098 20 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3099 : tableforest, targetns);
3100 20 : table_close(rel, NoLock);
3101 :
3102 20 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3103 : }
3104 :
3105 :
3106 : Datum
3107 4 : query_to_xmlschema(PG_FUNCTION_ARGS)
3108 : {
3109 4 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3110 4 : bool nulls = PG_GETARG_BOOL(1);
3111 4 : bool tableforest = PG_GETARG_BOOL(2);
3112 4 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3113 : const char *result;
3114 : SPIPlanPtr plan;
3115 : Portal portal;
3116 :
3117 4 : SPI_connect();
3118 :
3119 4 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3120 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3121 :
3122 4 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3123 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3124 :
3125 4 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3126 : InvalidOid, nulls,
3127 : tableforest, targetns));
3128 4 : SPI_cursor_close(portal);
3129 4 : SPI_finish();
3130 :
3131 4 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3132 : }
3133 :
3134 :
3135 : Datum
3136 8 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
3137 : {
3138 8 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3139 8 : bool nulls = PG_GETARG_BOOL(1);
3140 8 : bool tableforest = PG_GETARG_BOOL(2);
3141 8 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3142 : const char *xmlschema;
3143 : Portal portal;
3144 :
3145 8 : SPI_connect();
3146 8 : portal = SPI_cursor_find(name);
3147 8 : if (portal == NULL)
3148 0 : ereport(ERROR,
3149 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3150 : errmsg("cursor \"%s\" does not exist", name)));
3151 8 : if (portal->tupDesc == NULL)
3152 0 : ereport(ERROR,
3153 : (errcode(ERRCODE_INVALID_CURSOR_STATE),
3154 : errmsg("portal \"%s\" does not return tuples", name)));
3155 :
3156 8 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3157 : InvalidOid, nulls,
3158 : tableforest, targetns));
3159 8 : SPI_finish();
3160 :
3161 8 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3162 : }
3163 :
3164 :
3165 : Datum
3166 16 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3167 : {
3168 16 : Oid relid = PG_GETARG_OID(0);
3169 16 : bool nulls = PG_GETARG_BOOL(1);
3170 16 : bool tableforest = PG_GETARG_BOOL(2);
3171 16 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3172 : Relation rel;
3173 : const char *xmlschema;
3174 :
3175 16 : rel = table_open(relid, AccessShareLock);
3176 16 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3177 : tableforest, targetns);
3178 16 : table_close(rel, NoLock);
3179 :
3180 16 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3181 : xmlschema, nulls, tableforest,
3182 : targetns, true)));
3183 : }
3184 :
3185 :
3186 : Datum
3187 4 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3188 : {
3189 4 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3190 4 : bool nulls = PG_GETARG_BOOL(1);
3191 4 : bool tableforest = PG_GETARG_BOOL(2);
3192 4 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3193 :
3194 : const char *xmlschema;
3195 : SPIPlanPtr plan;
3196 : Portal portal;
3197 :
3198 4 : SPI_connect();
3199 :
3200 4 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3201 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3202 :
3203 4 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3204 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3205 :
3206 4 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3207 : InvalidOid, nulls, tableforest, targetns));
3208 4 : SPI_cursor_close(portal);
3209 4 : SPI_finish();
3210 :
3211 4 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3212 : xmlschema, nulls, tableforest,
3213 : targetns, true)));
3214 : }
3215 :
3216 :
3217 : /*
3218 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3219 : * sections 9.13, 9.14.
3220 : */
3221 :
3222 : static StringInfo
3223 12 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3224 : bool tableforest, const char *targetns, bool top_level)
3225 : {
3226 : StringInfo result;
3227 : char *xmlsn;
3228 : List *relid_list;
3229 : ListCell *cell;
3230 :
3231 12 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3232 : true, false);
3233 12 : result = makeStringInfo();
3234 :
3235 12 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3236 12 : appendStringInfoChar(result, '\n');
3237 :
3238 12 : if (xmlschema)
3239 4 : appendStringInfo(result, "%s\n\n", xmlschema);
3240 :
3241 12 : SPI_connect();
3242 :
3243 12 : relid_list = schema_get_xml_visible_tables(nspid);
3244 :
3245 36 : foreach(cell, relid_list)
3246 : {
3247 24 : Oid relid = lfirst_oid(cell);
3248 : StringInfo subres;
3249 :
3250 24 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3251 : targetns, false);
3252 :
3253 24 : appendBinaryStringInfo(result, subres->data, subres->len);
3254 24 : appendStringInfoChar(result, '\n');
3255 : }
3256 :
3257 12 : SPI_finish();
3258 :
3259 12 : xmldata_root_element_end(result, xmlsn);
3260 :
3261 12 : return result;
3262 : }
3263 :
3264 :
3265 : Datum
3266 8 : schema_to_xml(PG_FUNCTION_ARGS)
3267 : {
3268 8 : Name name = PG_GETARG_NAME(0);
3269 8 : bool nulls = PG_GETARG_BOOL(1);
3270 8 : bool tableforest = PG_GETARG_BOOL(2);
3271 8 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3272 :
3273 : char *schemaname;
3274 : Oid nspid;
3275 :
3276 8 : schemaname = NameStr(*name);
3277 8 : nspid = LookupExplicitNamespace(schemaname, false);
3278 :
3279 8 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3280 : nulls, tableforest, targetns, true)));
3281 : }
3282 :
3283 :
3284 : /*
3285 : * Write the start element of the root element of an XML Schema mapping.
3286 : */
3287 : static void
3288 64 : xsd_schema_element_start(StringInfo result, const char *targetns)
3289 : {
3290 64 : appendStringInfoString(result,
3291 : "<xsd:schema\n"
3292 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
3293 64 : if (strlen(targetns) > 0)
3294 12 : appendStringInfo(result,
3295 : "\n"
3296 : " targetNamespace=\"%s\"\n"
3297 : " elementFormDefault=\"qualified\"",
3298 : targetns);
3299 64 : appendStringInfoString(result,
3300 : ">\n\n");
3301 64 : }
3302 :
3303 :
3304 : static void
3305 64 : xsd_schema_element_end(StringInfo result)
3306 : {
3307 64 : appendStringInfoString(result, "</xsd:schema>");
3308 64 : }
3309 :
3310 :
3311 : static StringInfo
3312 12 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3313 : bool tableforest, const char *targetns)
3314 : {
3315 : Oid nspid;
3316 : List *relid_list;
3317 : List *tupdesc_list;
3318 : ListCell *cell;
3319 : StringInfo result;
3320 :
3321 12 : result = makeStringInfo();
3322 :
3323 12 : nspid = LookupExplicitNamespace(schemaname, false);
3324 :
3325 12 : xsd_schema_element_start(result, targetns);
3326 :
3327 12 : SPI_connect();
3328 :
3329 12 : relid_list = schema_get_xml_visible_tables(nspid);
3330 :
3331 12 : tupdesc_list = NIL;
3332 36 : foreach(cell, relid_list)
3333 : {
3334 : Relation rel;
3335 :
3336 24 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3337 24 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3338 24 : table_close(rel, NoLock);
3339 : }
3340 :
3341 12 : appendStringInfoString(result,
3342 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3343 :
3344 12 : appendStringInfoString(result,
3345 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3346 : nulls, tableforest, targetns));
3347 :
3348 12 : xsd_schema_element_end(result);
3349 :
3350 12 : SPI_finish();
3351 :
3352 12 : return result;
3353 : }
3354 :
3355 :
3356 : Datum
3357 8 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3358 : {
3359 8 : Name name = PG_GETARG_NAME(0);
3360 8 : bool nulls = PG_GETARG_BOOL(1);
3361 8 : bool tableforest = PG_GETARG_BOOL(2);
3362 8 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3363 :
3364 8 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3365 : nulls, tableforest, targetns)));
3366 : }
3367 :
3368 :
3369 : Datum
3370 4 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3371 : {
3372 4 : Name name = PG_GETARG_NAME(0);
3373 4 : bool nulls = PG_GETARG_BOOL(1);
3374 4 : bool tableforest = PG_GETARG_BOOL(2);
3375 4 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3376 : char *schemaname;
3377 : Oid nspid;
3378 : StringInfo xmlschema;
3379 :
3380 4 : schemaname = NameStr(*name);
3381 4 : nspid = LookupExplicitNamespace(schemaname, false);
3382 :
3383 4 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3384 : tableforest, targetns);
3385 :
3386 4 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3387 : xmlschema->data, nulls,
3388 : tableforest, targetns, true)));
3389 : }
3390 :
3391 :
3392 : /*
3393 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3394 : * sections 9.16, 9.17.
3395 : */
3396 :
3397 : static StringInfo
3398 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3399 : bool tableforest, const char *targetns)
3400 : {
3401 : StringInfo result;
3402 : List *nspid_list;
3403 : ListCell *cell;
3404 : char *xmlcn;
3405 :
3406 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3407 : true, false);
3408 0 : result = makeStringInfo();
3409 :
3410 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3411 0 : appendStringInfoChar(result, '\n');
3412 :
3413 0 : if (xmlschema)
3414 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3415 :
3416 0 : SPI_connect();
3417 :
3418 0 : nspid_list = database_get_xml_visible_schemas();
3419 :
3420 0 : foreach(cell, nspid_list)
3421 : {
3422 0 : Oid nspid = lfirst_oid(cell);
3423 : StringInfo subres;
3424 :
3425 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3426 : tableforest, targetns, false);
3427 :
3428 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3429 0 : appendStringInfoChar(result, '\n');
3430 : }
3431 :
3432 0 : SPI_finish();
3433 :
3434 0 : xmldata_root_element_end(result, xmlcn);
3435 :
3436 0 : return result;
3437 : }
3438 :
3439 :
3440 : Datum
3441 0 : database_to_xml(PG_FUNCTION_ARGS)
3442 : {
3443 0 : bool nulls = PG_GETARG_BOOL(0);
3444 0 : bool tableforest = PG_GETARG_BOOL(1);
3445 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3446 :
3447 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3448 : tableforest, targetns)));
3449 : }
3450 :
3451 :
3452 : static StringInfo
3453 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3454 : const char *targetns)
3455 : {
3456 : List *relid_list;
3457 : List *nspid_list;
3458 : List *tupdesc_list;
3459 : ListCell *cell;
3460 : StringInfo result;
3461 :
3462 0 : result = makeStringInfo();
3463 :
3464 0 : xsd_schema_element_start(result, targetns);
3465 :
3466 0 : SPI_connect();
3467 :
3468 0 : relid_list = database_get_xml_visible_tables();
3469 0 : nspid_list = database_get_xml_visible_schemas();
3470 :
3471 0 : tupdesc_list = NIL;
3472 0 : foreach(cell, relid_list)
3473 : {
3474 : Relation rel;
3475 :
3476 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3477 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3478 0 : table_close(rel, NoLock);
3479 : }
3480 :
3481 0 : appendStringInfoString(result,
3482 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3483 :
3484 0 : appendStringInfoString(result,
3485 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3486 :
3487 0 : xsd_schema_element_end(result);
3488 :
3489 0 : SPI_finish();
3490 :
3491 0 : return result;
3492 : }
3493 :
3494 :
3495 : Datum
3496 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3497 : {
3498 0 : bool nulls = PG_GETARG_BOOL(0);
3499 0 : bool tableforest = PG_GETARG_BOOL(1);
3500 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3501 :
3502 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3503 : tableforest, targetns)));
3504 : }
3505 :
3506 :
3507 : Datum
3508 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3509 : {
3510 0 : bool nulls = PG_GETARG_BOOL(0);
3511 0 : bool tableforest = PG_GETARG_BOOL(1);
3512 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3513 : StringInfo xmlschema;
3514 :
3515 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3516 :
3517 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3518 : nulls, tableforest, targetns)));
3519 : }
3520 :
3521 :
3522 : /*
3523 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3524 : * 9.2.
3525 : */
3526 : static char *
3527 256 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3528 : {
3529 : StringInfoData result;
3530 :
3531 256 : initStringInfo(&result);
3532 :
3533 256 : if (a)
3534 256 : appendStringInfoString(&result,
3535 256 : map_sql_identifier_to_xml_name(a, true, true));
3536 256 : if (b)
3537 256 : appendStringInfo(&result, ".%s",
3538 : map_sql_identifier_to_xml_name(b, true, true));
3539 256 : if (c)
3540 256 : appendStringInfo(&result, ".%s",
3541 : map_sql_identifier_to_xml_name(c, true, true));
3542 256 : if (d)
3543 244 : appendStringInfo(&result, ".%s",
3544 : map_sql_identifier_to_xml_name(d, true, true));
3545 :
3546 256 : return result.data;
3547 : }
3548 :
3549 :
3550 : /*
3551 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3552 : * section 9.11.
3553 : *
3554 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3555 : * 9.9.
3556 : */
3557 : static const char *
3558 52 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3559 : bool tableforest, const char *targetns)
3560 : {
3561 : int i;
3562 : char *xmltn;
3563 : char *tabletypename;
3564 : char *rowtypename;
3565 : StringInfoData result;
3566 :
3567 52 : initStringInfo(&result);
3568 :
3569 52 : if (OidIsValid(relid))
3570 : {
3571 : HeapTuple tuple;
3572 : Form_pg_class reltuple;
3573 :
3574 36 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3575 36 : if (!HeapTupleIsValid(tuple))
3576 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3577 36 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3578 :
3579 36 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3580 : true, false);
3581 :
3582 36 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3583 36 : get_database_name(MyDatabaseId),
3584 36 : get_namespace_name(reltuple->relnamespace),
3585 36 : NameStr(reltuple->relname));
3586 :
3587 36 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3588 36 : get_database_name(MyDatabaseId),
3589 36 : get_namespace_name(reltuple->relnamespace),
3590 36 : NameStr(reltuple->relname));
3591 :
3592 36 : ReleaseSysCache(tuple);
3593 : }
3594 : else
3595 : {
3596 16 : if (tableforest)
3597 8 : xmltn = "row";
3598 : else
3599 8 : xmltn = "table";
3600 :
3601 16 : tabletypename = "TableType";
3602 16 : rowtypename = "RowType";
3603 : }
3604 :
3605 52 : xsd_schema_element_start(&result, targetns);
3606 :
3607 52 : appendStringInfoString(&result,
3608 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3609 :
3610 52 : appendStringInfo(&result,
3611 : "<xsd:complexType name=\"%s\">\n"
3612 : " <xsd:sequence>\n",
3613 : rowtypename);
3614 :
3615 216 : for (i = 0; i < tupdesc->natts; i++)
3616 : {
3617 164 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3618 :
3619 164 : if (att->attisdropped)
3620 4 : continue;
3621 320 : appendStringInfo(&result,
3622 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3623 160 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3624 : true, false),
3625 : map_sql_type_to_xml_name(att->atttypid, -1),
3626 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3627 : }
3628 :
3629 52 : appendStringInfoString(&result,
3630 : " </xsd:sequence>\n"
3631 : "</xsd:complexType>\n\n");
3632 :
3633 52 : if (!tableforest)
3634 : {
3635 28 : appendStringInfo(&result,
3636 : "<xsd:complexType name=\"%s\">\n"
3637 : " <xsd:sequence>\n"
3638 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3639 : " </xsd:sequence>\n"
3640 : "</xsd:complexType>\n\n",
3641 : tabletypename, rowtypename);
3642 :
3643 28 : appendStringInfo(&result,
3644 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3645 : xmltn, tabletypename);
3646 : }
3647 : else
3648 24 : appendStringInfo(&result,
3649 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3650 : xmltn, rowtypename);
3651 :
3652 52 : xsd_schema_element_end(&result);
3653 :
3654 52 : return result.data;
3655 : }
3656 :
3657 :
3658 : /*
3659 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3660 : * section 9.12.
3661 : */
3662 : static const char *
3663 12 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3664 : bool tableforest, const char *targetns)
3665 : {
3666 : char *dbname;
3667 : char *nspname;
3668 : char *xmlsn;
3669 : char *schematypename;
3670 : StringInfoData result;
3671 : ListCell *cell;
3672 :
3673 12 : dbname = get_database_name(MyDatabaseId);
3674 12 : nspname = get_namespace_name(nspid);
3675 :
3676 12 : initStringInfo(&result);
3677 :
3678 12 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3679 :
3680 12 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3681 : dbname,
3682 : nspname,
3683 : NULL);
3684 :
3685 12 : appendStringInfo(&result,
3686 : "<xsd:complexType name=\"%s\">\n", schematypename);
3687 12 : if (!tableforest)
3688 4 : appendStringInfoString(&result,
3689 : " <xsd:all>\n");
3690 : else
3691 8 : appendStringInfoString(&result,
3692 : " <xsd:sequence>\n");
3693 :
3694 36 : foreach(cell, relid_list)
3695 : {
3696 24 : Oid relid = lfirst_oid(cell);
3697 24 : char *relname = get_rel_name(relid);
3698 24 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3699 24 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3700 : dbname,
3701 : nspname,
3702 : relname);
3703 :
3704 24 : if (!tableforest)
3705 8 : appendStringInfo(&result,
3706 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3707 : xmltn, tabletypename);
3708 : else
3709 16 : appendStringInfo(&result,
3710 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3711 : xmltn, tabletypename);
3712 : }
3713 :
3714 12 : if (!tableforest)
3715 4 : appendStringInfoString(&result,
3716 : " </xsd:all>\n");
3717 : else
3718 8 : appendStringInfoString(&result,
3719 : " </xsd:sequence>\n");
3720 12 : appendStringInfoString(&result,
3721 : "</xsd:complexType>\n\n");
3722 :
3723 12 : appendStringInfo(&result,
3724 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3725 : xmlsn, schematypename);
3726 :
3727 12 : return result.data;
3728 : }
3729 :
3730 :
3731 : /*
3732 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3733 : * section 9.15.
3734 : */
3735 : static const char *
3736 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3737 : bool tableforest, const char *targetns)
3738 : {
3739 : char *dbname;
3740 : char *xmlcn;
3741 : char *catalogtypename;
3742 : StringInfoData result;
3743 : ListCell *cell;
3744 :
3745 0 : dbname = get_database_name(MyDatabaseId);
3746 :
3747 0 : initStringInfo(&result);
3748 :
3749 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3750 :
3751 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3752 : dbname,
3753 : NULL,
3754 : NULL);
3755 :
3756 0 : appendStringInfo(&result,
3757 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3758 0 : appendStringInfoString(&result,
3759 : " <xsd:all>\n");
3760 :
3761 0 : foreach(cell, nspid_list)
3762 : {
3763 0 : Oid nspid = lfirst_oid(cell);
3764 0 : char *nspname = get_namespace_name(nspid);
3765 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3766 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3767 : dbname,
3768 : nspname,
3769 : NULL);
3770 :
3771 0 : appendStringInfo(&result,
3772 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3773 : xmlsn, schematypename);
3774 : }
3775 :
3776 0 : appendStringInfoString(&result,
3777 : " </xsd:all>\n");
3778 0 : appendStringInfoString(&result,
3779 : "</xsd:complexType>\n\n");
3780 :
3781 0 : appendStringInfo(&result,
3782 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3783 : xmlcn, catalogtypename);
3784 :
3785 0 : return result.data;
3786 : }
3787 :
3788 :
3789 : /*
3790 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3791 : */
3792 : static const char *
3793 540 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3794 : {
3795 : StringInfoData result;
3796 :
3797 540 : initStringInfo(&result);
3798 :
3799 540 : switch (typeoid)
3800 : {
3801 20 : case BPCHAROID:
3802 20 : if (typmod == -1)
3803 20 : appendStringInfoString(&result, "CHAR");
3804 : else
3805 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3806 20 : break;
3807 36 : case VARCHAROID:
3808 36 : if (typmod == -1)
3809 36 : appendStringInfoString(&result, "VARCHAR");
3810 : else
3811 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3812 36 : break;
3813 20 : case NUMERICOID:
3814 20 : if (typmod == -1)
3815 20 : appendStringInfoString(&result, "NUMERIC");
3816 : else
3817 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3818 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3819 0 : (typmod - VARHDRSZ) & 0xffff);
3820 20 : break;
3821 116 : case INT4OID:
3822 116 : appendStringInfoString(&result, "INTEGER");
3823 116 : break;
3824 20 : case INT2OID:
3825 20 : appendStringInfoString(&result, "SMALLINT");
3826 20 : break;
3827 20 : case INT8OID:
3828 20 : appendStringInfoString(&result, "BIGINT");
3829 20 : break;
3830 20 : case FLOAT4OID:
3831 20 : appendStringInfoString(&result, "REAL");
3832 20 : break;
3833 0 : case FLOAT8OID:
3834 0 : appendStringInfoString(&result, "DOUBLE");
3835 0 : break;
3836 20 : case BOOLOID:
3837 20 : appendStringInfoString(&result, "BOOLEAN");
3838 20 : break;
3839 20 : case TIMEOID:
3840 20 : if (typmod == -1)
3841 20 : appendStringInfoString(&result, "TIME");
3842 : else
3843 0 : appendStringInfo(&result, "TIME_%d", typmod);
3844 20 : break;
3845 20 : case TIMETZOID:
3846 20 : if (typmod == -1)
3847 20 : appendStringInfoString(&result, "TIME_WTZ");
3848 : else
3849 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3850 20 : break;
3851 20 : case TIMESTAMPOID:
3852 20 : if (typmod == -1)
3853 20 : appendStringInfoString(&result, "TIMESTAMP");
3854 : else
3855 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3856 20 : break;
3857 20 : case TIMESTAMPTZOID:
3858 20 : if (typmod == -1)
3859 20 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3860 : else
3861 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3862 20 : break;
3863 20 : case DATEOID:
3864 20 : appendStringInfoString(&result, "DATE");
3865 20 : break;
3866 20 : case XMLOID:
3867 20 : appendStringInfoString(&result, "XML");
3868 20 : break;
3869 148 : default:
3870 : {
3871 : HeapTuple tuple;
3872 : Form_pg_type typtuple;
3873 :
3874 148 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3875 148 : if (!HeapTupleIsValid(tuple))
3876 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3877 148 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3878 :
3879 148 : appendStringInfoString(&result,
3880 148 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3881 148 : get_database_name(MyDatabaseId),
3882 148 : get_namespace_name(typtuple->typnamespace),
3883 148 : NameStr(typtuple->typname)));
3884 :
3885 148 : ReleaseSysCache(tuple);
3886 : }
3887 : }
3888 :
3889 540 : return result.data;
3890 : }
3891 :
3892 :
3893 : /*
3894 : * Map a collection of SQL data types to XML Schema data types; see
3895 : * SQL/XML:2008 section 9.7.
3896 : */
3897 : static const char *
3898 64 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3899 : {
3900 64 : List *uniquetypes = NIL;
3901 : int i;
3902 : StringInfoData result;
3903 : ListCell *cell0;
3904 :
3905 : /* extract all column types used in the set of TupleDescs */
3906 140 : foreach(cell0, tupdesc_list)
3907 : {
3908 76 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3909 :
3910 468 : for (i = 0; i < tupdesc->natts; i++)
3911 : {
3912 392 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3913 :
3914 392 : if (att->attisdropped)
3915 16 : continue;
3916 376 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3917 : }
3918 : }
3919 :
3920 : /* add base types of domains */
3921 428 : foreach(cell0, uniquetypes)
3922 : {
3923 364 : Oid typid = lfirst_oid(cell0);
3924 364 : Oid basetypid = getBaseType(typid);
3925 :
3926 364 : if (basetypid != typid)
3927 16 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3928 : }
3929 :
3930 : /* Convert to textual form */
3931 64 : initStringInfo(&result);
3932 :
3933 428 : foreach(cell0, uniquetypes)
3934 : {
3935 364 : appendStringInfo(&result, "%s\n",
3936 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3937 : -1));
3938 : }
3939 :
3940 64 : return result.data;
3941 : }
3942 :
3943 :
3944 : /*
3945 : * Map an SQL data type to a named XML Schema data type; see
3946 : * SQL/XML:2008 sections 9.5 and 9.6.
3947 : *
3948 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3949 : * a name attribute, which this function does. The name-less version
3950 : * 9.5 doesn't appear to be required anywhere.)
3951 : */
3952 : static const char *
3953 364 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3954 : {
3955 : StringInfoData result;
3956 364 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3957 :
3958 364 : initStringInfo(&result);
3959 :
3960 364 : if (typeoid == XMLOID)
3961 : {
3962 16 : appendStringInfoString(&result,
3963 : "<xsd:complexType mixed=\"true\">\n"
3964 : " <xsd:sequence>\n"
3965 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3966 : " </xsd:sequence>\n"
3967 : "</xsd:complexType>\n");
3968 : }
3969 : else
3970 : {
3971 348 : appendStringInfo(&result,
3972 : "<xsd:simpleType name=\"%s\">\n", typename);
3973 :
3974 348 : switch (typeoid)
3975 : {
3976 92 : case BPCHAROID:
3977 : case VARCHAROID:
3978 : case TEXTOID:
3979 92 : appendStringInfoString(&result,
3980 : " <xsd:restriction base=\"xsd:string\">\n");
3981 92 : if (typmod != -1)
3982 0 : appendStringInfo(&result,
3983 : " <xsd:maxLength value=\"%d\"/>\n",
3984 : typmod - VARHDRSZ);
3985 92 : appendStringInfoString(&result, " </xsd:restriction>\n");
3986 92 : break;
3987 :
3988 16 : case BYTEAOID:
3989 16 : appendStringInfo(&result,
3990 : " <xsd:restriction base=\"xsd:%s\">\n"
3991 : " </xsd:restriction>\n",
3992 16 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3993 16 : break;
3994 :
3995 16 : case NUMERICOID:
3996 16 : if (typmod != -1)
3997 0 : appendStringInfo(&result,
3998 : " <xsd:restriction base=\"xsd:decimal\">\n"
3999 : " <xsd:totalDigits value=\"%d\"/>\n"
4000 : " <xsd:fractionDigits value=\"%d\"/>\n"
4001 : " </xsd:restriction>\n",
4002 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
4003 0 : (typmod - VARHDRSZ) & 0xffff);
4004 16 : break;
4005 :
4006 16 : case INT2OID:
4007 16 : appendStringInfo(&result,
4008 : " <xsd:restriction base=\"xsd:short\">\n"
4009 : " <xsd:maxInclusive value=\"%d\"/>\n"
4010 : " <xsd:minInclusive value=\"%d\"/>\n"
4011 : " </xsd:restriction>\n",
4012 : SHRT_MAX, SHRT_MIN);
4013 16 : break;
4014 :
4015 64 : case INT4OID:
4016 64 : appendStringInfo(&result,
4017 : " <xsd:restriction base=\"xsd:int\">\n"
4018 : " <xsd:maxInclusive value=\"%d\"/>\n"
4019 : " <xsd:minInclusive value=\"%d\"/>\n"
4020 : " </xsd:restriction>\n",
4021 : INT_MAX, INT_MIN);
4022 64 : break;
4023 :
4024 16 : case INT8OID:
4025 16 : appendStringInfo(&result,
4026 : " <xsd:restriction base=\"xsd:long\">\n"
4027 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
4028 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
4029 : " </xsd:restriction>\n",
4030 : PG_INT64_MAX,
4031 : PG_INT64_MIN);
4032 16 : break;
4033 :
4034 16 : case FLOAT4OID:
4035 16 : appendStringInfoString(&result,
4036 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
4037 16 : break;
4038 :
4039 0 : case FLOAT8OID:
4040 0 : appendStringInfoString(&result,
4041 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
4042 0 : break;
4043 :
4044 16 : case BOOLOID:
4045 16 : appendStringInfoString(&result,
4046 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
4047 16 : break;
4048 :
4049 32 : case TIMEOID:
4050 : case TIMETZOID:
4051 : {
4052 32 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4053 :
4054 32 : if (typmod == -1)
4055 32 : appendStringInfo(&result,
4056 : " <xsd:restriction base=\"xsd:time\">\n"
4057 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4058 : " </xsd:restriction>\n", tz);
4059 0 : else if (typmod == 0)
4060 0 : appendStringInfo(&result,
4061 : " <xsd:restriction base=\"xsd:time\">\n"
4062 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4063 : " </xsd:restriction>\n", tz);
4064 : else
4065 0 : appendStringInfo(&result,
4066 : " <xsd:restriction base=\"xsd:time\">\n"
4067 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4068 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4069 32 : break;
4070 : }
4071 :
4072 32 : case TIMESTAMPOID:
4073 : case TIMESTAMPTZOID:
4074 : {
4075 32 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4076 :
4077 32 : if (typmod == -1)
4078 32 : appendStringInfo(&result,
4079 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4080 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4081 : " </xsd:restriction>\n", tz);
4082 0 : else if (typmod == 0)
4083 0 : appendStringInfo(&result,
4084 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4085 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4086 : " </xsd:restriction>\n", tz);
4087 : else
4088 0 : appendStringInfo(&result,
4089 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4090 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4091 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4092 32 : break;
4093 : }
4094 :
4095 16 : case DATEOID:
4096 16 : appendStringInfoString(&result,
4097 : " <xsd:restriction base=\"xsd:date\">\n"
4098 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4099 : " </xsd:restriction>\n");
4100 16 : break;
4101 :
4102 16 : default:
4103 16 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4104 : {
4105 : Oid base_typeoid;
4106 16 : int32 base_typmod = -1;
4107 :
4108 16 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4109 :
4110 16 : appendStringInfo(&result,
4111 : " <xsd:restriction base=\"%s\"/>\n",
4112 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
4113 : }
4114 16 : break;
4115 : }
4116 348 : appendStringInfoString(&result, "</xsd:simpleType>\n");
4117 : }
4118 :
4119 364 : return result.data;
4120 : }
4121 :
4122 :
4123 : /*
4124 : * Map an SQL row to an XML element, taking the row from the active
4125 : * SPI cursor. See also SQL/XML:2008 section 9.10.
4126 : */
4127 : static void
4128 206 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4129 : bool nulls, bool tableforest,
4130 : const char *targetns, bool top_level)
4131 : {
4132 : int i;
4133 : char *xmltn;
4134 :
4135 206 : if (tablename)
4136 152 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4137 : else
4138 : {
4139 54 : if (tableforest)
4140 24 : xmltn = "row";
4141 : else
4142 30 : xmltn = "table";
4143 : }
4144 :
4145 206 : if (tableforest)
4146 108 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4147 : else
4148 98 : appendStringInfoString(result, "<row>\n");
4149 :
4150 844 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4151 : {
4152 : char *colname;
4153 : Datum colval;
4154 : bool isnull;
4155 :
4156 638 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4157 : true, false);
4158 638 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4159 638 : SPI_tuptable->tupdesc,
4160 : i,
4161 : &isnull);
4162 638 : if (isnull)
4163 : {
4164 76 : if (nulls)
4165 40 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
4166 : }
4167 : else
4168 562 : appendStringInfo(result, " <%s>%s</%s>\n",
4169 : colname,
4170 : map_sql_value_to_xml_value(colval,
4171 562 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4172 : colname);
4173 : }
4174 :
4175 206 : if (tableforest)
4176 : {
4177 108 : xmldata_root_element_end(result, xmltn);
4178 108 : appendStringInfoChar(result, '\n');
4179 : }
4180 : else
4181 98 : appendStringInfoString(result, "</row>\n\n");
4182 206 : }
4183 :
4184 :
4185 : /*
4186 : * XPath related functions
4187 : */
4188 :
4189 : #ifdef USE_LIBXML
4190 :
4191 : /*
4192 : * Convert XML node to text.
4193 : *
4194 : * For attribute and text nodes, return the escaped text. For anything else,
4195 : * dump the whole subtree.
4196 : */
4197 : static text *
4198 144 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4199 : {
4200 144 : xmltype *result = NULL;
4201 :
4202 144 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4203 118 : {
4204 118 : void (*volatile nodefree) (xmlNodePtr) = NULL;
4205 118 : volatile xmlBufferPtr buf = NULL;
4206 118 : volatile xmlNodePtr cur_copy = NULL;
4207 :
4208 118 : PG_TRY();
4209 : {
4210 : int bytes;
4211 :
4212 118 : buf = xmlBufferCreate();
4213 118 : if (buf == NULL || xmlerrcxt->err_occurred)
4214 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4215 : "could not allocate xmlBuffer");
4216 :
4217 : /*
4218 : * Produce a dump of the node that we can serialize. xmlNodeDump
4219 : * does that, but the result of that function won't contain
4220 : * namespace definitions from ancestor nodes, so we first do a
4221 : * xmlCopyNode() which duplicates the node along with its required
4222 : * namespace definitions.
4223 : *
4224 : * Some old libxml2 versions such as 2.7.6 produce partially
4225 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4226 : * copying them. xmlNodeDump of such a node works fine, but
4227 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4228 : */
4229 118 : cur_copy = xmlCopyNode(cur, 1);
4230 118 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
4231 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4232 : "could not copy node");
4233 236 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4234 118 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4235 :
4236 118 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4237 118 : if (bytes == -1 || xmlerrcxt->err_occurred)
4238 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4239 : "could not dump node");
4240 :
4241 118 : result = xmlBuffer_to_xmltype(buf);
4242 : }
4243 0 : PG_FINALLY();
4244 : {
4245 118 : if (nodefree)
4246 118 : nodefree(cur_copy);
4247 118 : if (buf)
4248 118 : xmlBufferFree(buf);
4249 : }
4250 118 : PG_END_TRY();
4251 : }
4252 : else
4253 : {
4254 26 : xmlChar *volatile str = NULL;
4255 :
4256 26 : PG_TRY();
4257 : {
4258 : char *escaped;
4259 :
4260 26 : str = xmlXPathCastNodeToString(cur);
4261 26 : if (str == NULL || xmlerrcxt->err_occurred)
4262 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4263 : "could not allocate xmlChar");
4264 :
4265 : /* Here we rely on XML having the same representation as TEXT */
4266 26 : escaped = escape_xml((char *) str);
4267 :
4268 26 : result = (xmltype *) cstring_to_text(escaped);
4269 26 : pfree(escaped);
4270 : }
4271 0 : PG_FINALLY();
4272 : {
4273 26 : if (str)
4274 26 : xmlFree(str);
4275 : }
4276 26 : PG_END_TRY();
4277 : }
4278 :
4279 144 : return result;
4280 : }
4281 :
4282 : /*
4283 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4284 : * to an array of xml values, which are appended to astate. The function
4285 : * result value is the number of elements in the array.
4286 : *
4287 : * If "astate" is NULL then we don't generate the array value, but we still
4288 : * return the number of elements it would have had.
4289 : *
4290 : * Nodesets are converted to an array containing the nodes' textual
4291 : * representations. Primitive values (float, double, string) are converted
4292 : * to a single-element array containing the value's string representation.
4293 : */
4294 : static int
4295 385 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4296 : ArrayBuildState *astate,
4297 : PgXmlErrorContext *xmlerrcxt)
4298 : {
4299 385 : int result = 0;
4300 : Datum datum;
4301 : Oid datumtype;
4302 : char *result_str;
4303 :
4304 385 : switch (xpathobj->type)
4305 : {
4306 350 : case XPATH_NODESET:
4307 350 : if (xpathobj->nodesetval != NULL)
4308 : {
4309 254 : result = xpathobj->nodesetval->nodeNr;
4310 254 : if (astate != NULL)
4311 : {
4312 : int i;
4313 :
4314 140 : for (i = 0; i < result; i++)
4315 : {
4316 76 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4317 : xmlerrcxt));
4318 76 : (void) accumArrayResult(astate, datum, false,
4319 : XMLOID, CurrentMemoryContext);
4320 : }
4321 : }
4322 : }
4323 350 : return result;
4324 :
4325 10 : case XPATH_BOOLEAN:
4326 10 : if (astate == NULL)
4327 0 : return 1;
4328 10 : datum = BoolGetDatum(xpathobj->boolval);
4329 10 : datumtype = BOOLOID;
4330 10 : break;
4331 :
4332 15 : case XPATH_NUMBER:
4333 15 : if (astate == NULL)
4334 10 : return 1;
4335 5 : datum = Float8GetDatum(xpathobj->floatval);
4336 5 : datumtype = FLOAT8OID;
4337 5 : break;
4338 :
4339 10 : case XPATH_STRING:
4340 10 : if (astate == NULL)
4341 0 : return 1;
4342 10 : datum = CStringGetDatum((char *) xpathobj->stringval);
4343 10 : datumtype = CSTRINGOID;
4344 10 : break;
4345 :
4346 0 : default:
4347 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4348 : xpathobj->type);
4349 : return 0; /* keep compiler quiet */
4350 : }
4351 :
4352 : /* Common code for scalar-value cases */
4353 25 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4354 25 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
4355 25 : (void) accumArrayResult(astate, datum, false,
4356 : XMLOID, CurrentMemoryContext);
4357 25 : return 1;
4358 : }
4359 :
4360 :
4361 : /*
4362 : * Common code for xpath() and xmlexists()
4363 : *
4364 : * Evaluate XPath expression and return number of nodes in res_nitems
4365 : * and array of XML values in astate. Either of those pointers can be
4366 : * NULL if the corresponding result isn't wanted.
4367 : *
4368 : * It is up to the user to ensure that the XML passed is in fact
4369 : * an XML document - XPath doesn't work easily on fragments without
4370 : * a context node being known.
4371 : */
4372 : static void
4373 397 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4374 : int *res_nitems, ArrayBuildState *astate)
4375 : {
4376 : PgXmlErrorContext *xmlerrcxt;
4377 397 : volatile xmlParserCtxtPtr ctxt = NULL;
4378 397 : volatile xmlDocPtr doc = NULL;
4379 397 : volatile xmlXPathContextPtr xpathctx = NULL;
4380 397 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4381 397 : volatile xmlXPathObjectPtr xpathobj = NULL;
4382 : char *datastr;
4383 : int32 len;
4384 : int32 xpath_len;
4385 : xmlChar *string;
4386 : xmlChar *xpath_expr;
4387 397 : size_t xmldecl_len = 0;
4388 : int i;
4389 : int ndim;
4390 : Datum *ns_names_uris;
4391 : bool *ns_names_uris_nulls;
4392 : int ns_count;
4393 :
4394 : /*
4395 : * Namespace mappings are passed as text[]. If an empty array is passed
4396 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4397 : * Else, a 2-dimensional array with length of the second axis being equal
4398 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4399 : * first element defining the name, the second one the URI. Example:
4400 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4401 : * 'http://example2.com']].
4402 : */
4403 397 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4404 397 : if (ndim != 0)
4405 : {
4406 : int *dims;
4407 :
4408 87 : dims = ARR_DIMS(namespaces);
4409 :
4410 87 : if (ndim != 2 || dims[1] != 2)
4411 0 : ereport(ERROR,
4412 : (errcode(ERRCODE_DATA_EXCEPTION),
4413 : errmsg("invalid array for XML namespace mapping"),
4414 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4415 :
4416 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4417 :
4418 87 : deconstruct_array_builtin(namespaces, TEXTOID,
4419 : &ns_names_uris, &ns_names_uris_nulls,
4420 : &ns_count);
4421 :
4422 : Assert((ns_count % 2) == 0); /* checked above */
4423 87 : ns_count /= 2; /* count pairs only */
4424 : }
4425 : else
4426 : {
4427 310 : ns_names_uris = NULL;
4428 310 : ns_names_uris_nulls = NULL;
4429 310 : ns_count = 0;
4430 : }
4431 :
4432 397 : datastr = VARDATA(data);
4433 397 : len = VARSIZE(data) - VARHDRSZ;
4434 397 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4435 397 : if (xpath_len == 0)
4436 4 : ereport(ERROR,
4437 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4438 : errmsg("empty XPath expression")));
4439 :
4440 393 : string = pg_xmlCharStrndup(datastr, len);
4441 393 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4442 :
4443 : /*
4444 : * In a UTF8 database, skip any xml declaration, which might assert
4445 : * another encoding. Ignore parse_xml_decl() failure, letting
4446 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4447 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4448 : * those scenarios bug-compatible with historical behavior.
4449 : */
4450 393 : if (GetDatabaseEncoding() == PG_UTF8)
4451 393 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4452 :
4453 393 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4454 :
4455 393 : PG_TRY();
4456 : {
4457 393 : xmlInitParser();
4458 :
4459 : /*
4460 : * redundant XML parsing (two parsings for the same value during one
4461 : * command execution are possible)
4462 : */
4463 393 : ctxt = xmlNewParserCtxt();
4464 393 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4465 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4466 : "could not allocate parser context");
4467 786 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4468 393 : len - xmldecl_len, NULL, NULL, 0);
4469 393 : if (doc == NULL || xmlerrcxt->err_occurred)
4470 8 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4471 : "could not parse XML document");
4472 385 : xpathctx = xmlXPathNewContext(doc);
4473 385 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4474 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4475 : "could not allocate XPath context");
4476 385 : xpathctx->node = (xmlNodePtr) doc;
4477 :
4478 : /* register namespaces, if any */
4479 385 : if (ns_count > 0)
4480 : {
4481 174 : for (i = 0; i < ns_count; i++)
4482 : {
4483 : char *ns_name;
4484 : char *ns_uri;
4485 :
4486 87 : if (ns_names_uris_nulls[i * 2] ||
4487 87 : ns_names_uris_nulls[i * 2 + 1])
4488 0 : ereport(ERROR,
4489 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4490 : errmsg("neither namespace name nor URI may be null")));
4491 87 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4492 87 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4493 87 : if (xmlXPathRegisterNs(xpathctx,
4494 : (xmlChar *) ns_name,
4495 : (xmlChar *) ns_uri) != 0)
4496 0 : ereport(ERROR, /* is this an internal error??? */
4497 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4498 : ns_name, ns_uri)));
4499 : }
4500 : }
4501 :
4502 : /*
4503 : * Note: here and elsewhere, be careful to use xmlXPathCtxtCompile not
4504 : * xmlXPathCompile. In libxml2 2.13.3 and older, the latter function
4505 : * fails to defend itself against recursion-to-stack-overflow. See
4506 : * https://gitlab.gnome.org/GNOME/libxml2/-/issues/799
4507 : */
4508 385 : xpathcomp = xmlXPathCtxtCompile(xpathctx, xpath_expr);
4509 385 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4510 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4511 : "invalid XPath expression");
4512 :
4513 : /*
4514 : * Version 2.6.27 introduces a function named
4515 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4516 : * but we can derive the existence by whether any nodes are returned,
4517 : * thereby preventing a library version upgrade and keeping the code
4518 : * the same.
4519 : */
4520 385 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4521 385 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4522 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4523 : "could not create XPath object");
4524 :
4525 : /*
4526 : * Extract the results as requested.
4527 : */
4528 385 : if (res_nitems != NULL)
4529 296 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4530 : else
4531 89 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4532 : }
4533 8 : PG_CATCH();
4534 : {
4535 8 : if (xpathobj)
4536 0 : xmlXPathFreeObject(xpathobj);
4537 8 : if (xpathcomp)
4538 0 : xmlXPathFreeCompExpr(xpathcomp);
4539 8 : if (xpathctx)
4540 0 : xmlXPathFreeContext(xpathctx);
4541 8 : if (doc)
4542 8 : xmlFreeDoc(doc);
4543 8 : if (ctxt)
4544 8 : xmlFreeParserCtxt(ctxt);
4545 :
4546 8 : pg_xml_done(xmlerrcxt, true);
4547 :
4548 8 : PG_RE_THROW();
4549 : }
4550 385 : PG_END_TRY();
4551 :
4552 385 : xmlXPathFreeObject(xpathobj);
4553 385 : xmlXPathFreeCompExpr(xpathcomp);
4554 385 : xmlXPathFreeContext(xpathctx);
4555 385 : xmlFreeDoc(doc);
4556 385 : xmlFreeParserCtxt(ctxt);
4557 :
4558 385 : pg_xml_done(xmlerrcxt, false);
4559 385 : }
4560 : #endif /* USE_LIBXML */
4561 :
4562 : /*
4563 : * Evaluate XPath expression and return array of XML values.
4564 : *
4565 : * As we have no support of XQuery sequences yet, this function seems
4566 : * to be the most useful one (array of XML functions plays a role of
4567 : * some kind of substitution for XQuery sequences).
4568 : */
4569 : Datum
4570 101 : xpath(PG_FUNCTION_ARGS)
4571 : {
4572 : #ifdef USE_LIBXML
4573 101 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4574 101 : xmltype *data = PG_GETARG_XML_P(1);
4575 101 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4576 : ArrayBuildState *astate;
4577 :
4578 101 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4579 101 : xpath_internal(xpath_expr_text, data, namespaces,
4580 : NULL, astate);
4581 89 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4582 : #else
4583 : NO_XML_SUPPORT();
4584 : return 0;
4585 : #endif
4586 : }
4587 :
4588 : /*
4589 : * Determines if the node specified by the supplied XPath exists
4590 : * in a given XML document, returning a boolean.
4591 : */
4592 : Datum
4593 135 : xmlexists(PG_FUNCTION_ARGS)
4594 : {
4595 : #ifdef USE_LIBXML
4596 135 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4597 135 : xmltype *data = PG_GETARG_XML_P(1);
4598 : int res_nitems;
4599 :
4600 135 : xpath_internal(xpath_expr_text, data, NULL,
4601 : &res_nitems, NULL);
4602 :
4603 135 : PG_RETURN_BOOL(res_nitems > 0);
4604 : #else
4605 : NO_XML_SUPPORT();
4606 : return 0;
4607 : #endif
4608 : }
4609 :
4610 : /*
4611 : * Determines if the node specified by the supplied XPath exists
4612 : * in a given XML document, returning a boolean. Differs from
4613 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4614 : */
4615 : Datum
4616 161 : xpath_exists(PG_FUNCTION_ARGS)
4617 : {
4618 : #ifdef USE_LIBXML
4619 161 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4620 161 : xmltype *data = PG_GETARG_XML_P(1);
4621 161 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4622 : int res_nitems;
4623 :
4624 161 : xpath_internal(xpath_expr_text, data, namespaces,
4625 : &res_nitems, NULL);
4626 :
4627 161 : PG_RETURN_BOOL(res_nitems > 0);
4628 : #else
4629 : NO_XML_SUPPORT();
4630 : return 0;
4631 : #endif
4632 : }
4633 :
4634 : /*
4635 : * Functions for checking well-formed-ness
4636 : */
4637 :
4638 : #ifdef USE_LIBXML
4639 : static bool
4640 80 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4641 : {
4642 : xmlDocPtr doc;
4643 80 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4644 :
4645 : /*
4646 : * We'll report "true" if no soft error is reported by xml_parse().
4647 : */
4648 80 : doc = xml_parse(data, xmloption_arg, true,
4649 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4650 80 : if (doc)
4651 43 : xmlFreeDoc(doc);
4652 :
4653 80 : return !escontext.error_occurred;
4654 : }
4655 : #endif
4656 :
4657 : Datum
4658 60 : xml_is_well_formed(PG_FUNCTION_ARGS)
4659 : {
4660 : #ifdef USE_LIBXML
4661 60 : text *data = PG_GETARG_TEXT_PP(0);
4662 :
4663 60 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4664 : #else
4665 : NO_XML_SUPPORT();
4666 : return 0;
4667 : #endif /* not USE_LIBXML */
4668 : }
4669 :
4670 : Datum
4671 10 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4672 : {
4673 : #ifdef USE_LIBXML
4674 10 : text *data = PG_GETARG_TEXT_PP(0);
4675 :
4676 10 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4677 : #else
4678 : NO_XML_SUPPORT();
4679 : return 0;
4680 : #endif /* not USE_LIBXML */
4681 : }
4682 :
4683 : Datum
4684 10 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4685 : {
4686 : #ifdef USE_LIBXML
4687 10 : text *data = PG_GETARG_TEXT_PP(0);
4688 :
4689 10 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4690 : #else
4691 : NO_XML_SUPPORT();
4692 : return 0;
4693 : #endif /* not USE_LIBXML */
4694 : }
4695 :
4696 : /*
4697 : * support functions for XMLTABLE
4698 : *
4699 : */
4700 : #ifdef USE_LIBXML
4701 :
4702 : /*
4703 : * Returns private data from executor state. Ensure validity by check with
4704 : * MAGIC number.
4705 : */
4706 : static inline XmlTableBuilderData *
4707 108641 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4708 : {
4709 : XmlTableBuilderData *result;
4710 :
4711 108641 : if (!IsA(state, TableFuncScanState))
4712 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4713 108641 : result = (XmlTableBuilderData *) state->opaque;
4714 108641 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4715 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4716 :
4717 108641 : return result;
4718 : }
4719 : #endif
4720 :
4721 : /*
4722 : * XmlTableInitOpaque
4723 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4724 : * the XML parser.
4725 : *
4726 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4727 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4728 : * executor nodes run until this node is processed to completion. Caller
4729 : * must execute this to completion (probably filling a tuplestore to exhaust
4730 : * this node in a single pass) instead of using row-per-call mode.
4731 : */
4732 : static void
4733 176 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4734 : {
4735 : #ifdef USE_LIBXML
4736 176 : volatile xmlParserCtxtPtr ctxt = NULL;
4737 : XmlTableBuilderData *xtCxt;
4738 : PgXmlErrorContext *xmlerrcxt;
4739 :
4740 176 : xtCxt = palloc0_object(XmlTableBuilderData);
4741 176 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4742 176 : xtCxt->natts = natts;
4743 176 : xtCxt->xpathscomp = palloc0_array(xmlXPathCompExprPtr, natts);
4744 :
4745 176 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4746 :
4747 176 : PG_TRY();
4748 : {
4749 176 : xmlInitParser();
4750 :
4751 176 : ctxt = xmlNewParserCtxt();
4752 176 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4753 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4754 : "could not allocate parser context");
4755 : }
4756 0 : PG_CATCH();
4757 : {
4758 0 : if (ctxt != NULL)
4759 0 : xmlFreeParserCtxt(ctxt);
4760 :
4761 0 : pg_xml_done(xmlerrcxt, true);
4762 :
4763 0 : PG_RE_THROW();
4764 : }
4765 176 : PG_END_TRY();
4766 :
4767 176 : xtCxt->xmlerrcxt = xmlerrcxt;
4768 176 : xtCxt->ctxt = ctxt;
4769 :
4770 176 : state->opaque = xtCxt;
4771 : #else
4772 : NO_XML_SUPPORT();
4773 : #endif /* not USE_LIBXML */
4774 176 : }
4775 :
4776 : /*
4777 : * XmlTableSetDocument
4778 : * Install the input document
4779 : */
4780 : static void
4781 176 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4782 : {
4783 : #ifdef USE_LIBXML
4784 : XmlTableBuilderData *xtCxt;
4785 176 : xmltype *xmlval = DatumGetXmlP(value);
4786 : char *str;
4787 : xmlChar *xstr;
4788 : int length;
4789 176 : volatile xmlDocPtr doc = NULL;
4790 176 : volatile xmlXPathContextPtr xpathcxt = NULL;
4791 :
4792 176 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4793 :
4794 : /*
4795 : * Use out function for casting to string (remove encoding property). See
4796 : * comment in xml_out.
4797 : */
4798 176 : str = xml_out_internal(xmlval, 0);
4799 :
4800 176 : length = strlen(str);
4801 176 : xstr = pg_xmlCharStrndup(str, length);
4802 :
4803 176 : PG_TRY();
4804 : {
4805 176 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4806 176 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4807 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4808 : "could not parse XML document");
4809 176 : xpathcxt = xmlXPathNewContext(doc);
4810 176 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4811 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4812 : "could not allocate XPath context");
4813 176 : xpathcxt->node = (xmlNodePtr) doc;
4814 : }
4815 0 : PG_CATCH();
4816 : {
4817 0 : if (xpathcxt != NULL)
4818 0 : xmlXPathFreeContext(xpathcxt);
4819 0 : if (doc != NULL)
4820 0 : xmlFreeDoc(doc);
4821 :
4822 0 : PG_RE_THROW();
4823 : }
4824 176 : PG_END_TRY();
4825 :
4826 176 : xtCxt->doc = doc;
4827 176 : xtCxt->xpathcxt = xpathcxt;
4828 : #else
4829 : NO_XML_SUPPORT();
4830 : #endif /* not USE_LIBXML */
4831 176 : }
4832 :
4833 : /*
4834 : * XmlTableSetNamespace
4835 : * Add a namespace declaration
4836 : */
4837 : static void
4838 12 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4839 : {
4840 : #ifdef USE_LIBXML
4841 : XmlTableBuilderData *xtCxt;
4842 :
4843 12 : if (name == NULL)
4844 4 : ereport(ERROR,
4845 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4846 : errmsg("DEFAULT namespace is not supported")));
4847 8 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4848 :
4849 8 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4850 8 : pg_xmlCharStrndup(name, strlen(name)),
4851 8 : pg_xmlCharStrndup(uri, strlen(uri))))
4852 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4853 : "could not set XML namespace");
4854 : #else
4855 : NO_XML_SUPPORT();
4856 : #endif /* not USE_LIBXML */
4857 8 : }
4858 :
4859 : /*
4860 : * XmlTableSetRowFilter
4861 : * Install the row-filter Xpath expression.
4862 : */
4863 : static void
4864 172 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4865 : {
4866 : #ifdef USE_LIBXML
4867 : XmlTableBuilderData *xtCxt;
4868 : xmlChar *xstr;
4869 :
4870 172 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4871 :
4872 172 : if (*path == '\0')
4873 0 : ereport(ERROR,
4874 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4875 : errmsg("row path filter must not be empty string")));
4876 :
4877 172 : xstr = pg_xmlCharStrndup(path, strlen(path));
4878 :
4879 : /* We require XmlTableSetDocument to have been done already */
4880 : Assert(xtCxt->xpathcxt != NULL);
4881 :
4882 172 : xtCxt->xpathcomp = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4883 172 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4884 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4885 : "invalid XPath expression");
4886 : #else
4887 : NO_XML_SUPPORT();
4888 : #endif /* not USE_LIBXML */
4889 172 : }
4890 :
4891 : /*
4892 : * XmlTableSetColumnFilter
4893 : * Install the column-filter Xpath expression, for the given column.
4894 : */
4895 : static void
4896 516 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4897 : {
4898 : #ifdef USE_LIBXML
4899 : XmlTableBuilderData *xtCxt;
4900 : xmlChar *xstr;
4901 :
4902 : Assert(path);
4903 :
4904 516 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4905 :
4906 516 : if (*path == '\0')
4907 0 : ereport(ERROR,
4908 : (errcode(ERRCODE_INVALID_ARGUMENT_FOR_XQUERY),
4909 : errmsg("column path filter must not be empty string")));
4910 :
4911 516 : xstr = pg_xmlCharStrndup(path, strlen(path));
4912 :
4913 : /* We require XmlTableSetDocument to have been done already */
4914 : Assert(xtCxt->xpathcxt != NULL);
4915 :
4916 516 : xtCxt->xpathscomp[colnum] = xmlXPathCtxtCompile(xtCxt->xpathcxt, xstr);
4917 516 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4918 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4919 : "invalid XPath expression");
4920 : #else
4921 : NO_XML_SUPPORT();
4922 : #endif /* not USE_LIBXML */
4923 516 : }
4924 :
4925 : /*
4926 : * XmlTableFetchRow
4927 : * Prepare the next "current" tuple for upcoming GetValue calls.
4928 : * Returns false if the row-filter expression returned no more rows.
4929 : */
4930 : static bool
4931 15611 : XmlTableFetchRow(TableFuncScanState *state)
4932 : {
4933 : #ifdef USE_LIBXML
4934 : XmlTableBuilderData *xtCxt;
4935 :
4936 15611 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4937 :
4938 : /* Propagate our own error context to libxml2 */
4939 15611 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4940 :
4941 15611 : if (xtCxt->xpathobj == NULL)
4942 : {
4943 172 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4944 172 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4945 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
4946 : "could not create XPath object");
4947 :
4948 172 : xtCxt->row_count = 0;
4949 : }
4950 :
4951 15611 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4952 : {
4953 15611 : if (xtCxt->xpathobj->nodesetval != NULL)
4954 : {
4955 15611 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4956 15447 : return true;
4957 : }
4958 : }
4959 :
4960 164 : return false;
4961 : #else
4962 : NO_XML_SUPPORT();
4963 : return false;
4964 : #endif /* not USE_LIBXML */
4965 : }
4966 :
4967 : /*
4968 : * XmlTableGetValue
4969 : * Return the value for column number 'colnum' for the current row. If
4970 : * column -1 is requested, return representation of the whole row.
4971 : *
4972 : * This leaks memory, so be sure to reset often the context in which it's
4973 : * called.
4974 : */
4975 : static Datum
4976 91982 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4977 : Oid typid, int32 typmod, bool *isnull)
4978 : {
4979 : #ifdef USE_LIBXML
4980 91982 : Datum result = (Datum) 0;
4981 : XmlTableBuilderData *xtCxt;
4982 91982 : volatile xmlXPathObjectPtr xpathobj = NULL;
4983 :
4984 91982 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4985 :
4986 : Assert(xtCxt->xpathobj &&
4987 : xtCxt->xpathobj->type == XPATH_NODESET &&
4988 : xtCxt->xpathobj->nodesetval != NULL);
4989 :
4990 : /* Propagate our own error context to libxml2 */
4991 91982 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
4992 :
4993 91982 : *isnull = false;
4994 :
4995 : Assert(xtCxt->xpathscomp[colnum] != NULL);
4996 :
4997 91982 : PG_TRY();
4998 : {
4999 : xmlNodePtr cur;
5000 91982 : char *cstr = NULL;
5001 :
5002 : /* Set current node as entry point for XPath evaluation */
5003 91982 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
5004 91982 : xtCxt->xpathcxt->node = cur;
5005 :
5006 : /* Evaluate column path */
5007 91982 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
5008 91982 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
5009 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_ARGUMENT_FOR_XQUERY,
5010 : "could not create XPath object");
5011 :
5012 : /*
5013 : * There are four possible cases, depending on the number of nodes
5014 : * returned by the XPath expression and the type of the target column:
5015 : * a) XPath returns no nodes. b) The target type is XML (return all
5016 : * as XML). For non-XML return types: c) One node (return content).
5017 : * d) Multiple nodes (error).
5018 : */
5019 91982 : if (xpathobj->type == XPATH_NODESET)
5020 : {
5021 91962 : int count = 0;
5022 :
5023 91962 : if (xpathobj->nodesetval != NULL)
5024 91822 : count = xpathobj->nodesetval->nodeNr;
5025 :
5026 91962 : if (xpathobj->nodesetval == NULL || count == 0)
5027 : {
5028 15370 : *isnull = true;
5029 : }
5030 : else
5031 : {
5032 76592 : if (typid == XMLOID)
5033 : {
5034 : text *textstr;
5035 : StringInfoData str;
5036 :
5037 : /* Concatenate serialized values */
5038 48 : initStringInfo(&str);
5039 116 : for (int i = 0; i < count; i++)
5040 : {
5041 : textstr =
5042 68 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
5043 : xtCxt->xmlerrcxt);
5044 :
5045 68 : appendStringInfoText(&str, textstr);
5046 : }
5047 48 : cstr = str.data;
5048 : }
5049 : else
5050 : {
5051 : xmlChar *str;
5052 :
5053 76544 : if (count > 1)
5054 4 : ereport(ERROR,
5055 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
5056 : errmsg("more than one value returned by column XPath expression")));
5057 :
5058 76540 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
5059 76540 : cstr = str ? xml_pstrdup_and_free(str) : "";
5060 : }
5061 : }
5062 : }
5063 20 : else if (xpathobj->type == XPATH_STRING)
5064 : {
5065 : /* Content should be escaped when target will be XML */
5066 12 : if (typid == XMLOID)
5067 4 : cstr = escape_xml((char *) xpathobj->stringval);
5068 : else
5069 8 : cstr = (char *) xpathobj->stringval;
5070 : }
5071 8 : else if (xpathobj->type == XPATH_BOOLEAN)
5072 : {
5073 : char typcategory;
5074 : bool typispreferred;
5075 : xmlChar *str;
5076 :
5077 : /* Allow implicit casting from boolean to numbers */
5078 4 : get_type_category_preferred(typid, &typcategory, &typispreferred);
5079 :
5080 4 : if (typcategory != TYPCATEGORY_NUMERIC)
5081 4 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
5082 : else
5083 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5084 :
5085 4 : cstr = xml_pstrdup_and_free(str);
5086 : }
5087 4 : else if (xpathobj->type == XPATH_NUMBER)
5088 : {
5089 : xmlChar *str;
5090 :
5091 4 : str = xmlXPathCastNumberToString(xpathobj->floatval);
5092 4 : cstr = xml_pstrdup_and_free(str);
5093 : }
5094 : else
5095 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5096 :
5097 : /*
5098 : * By here, either cstr contains the result value, or the isnull flag
5099 : * has been set.
5100 : */
5101 : Assert(cstr || *isnull);
5102 :
5103 91978 : if (!*isnull)
5104 76608 : result = InputFunctionCall(&state->in_functions[colnum],
5105 : cstr,
5106 76608 : state->typioparams[colnum],
5107 : typmod);
5108 : }
5109 4 : PG_FINALLY();
5110 : {
5111 91982 : if (xpathobj != NULL)
5112 91982 : xmlXPathFreeObject(xpathobj);
5113 : }
5114 91982 : PG_END_TRY();
5115 :
5116 91978 : return result;
5117 : #else
5118 : NO_XML_SUPPORT();
5119 : return 0;
5120 : #endif /* not USE_LIBXML */
5121 : }
5122 :
5123 : /*
5124 : * XmlTableDestroyOpaque
5125 : * Release all libxml2 resources
5126 : */
5127 : static void
5128 176 : XmlTableDestroyOpaque(TableFuncScanState *state)
5129 : {
5130 : #ifdef USE_LIBXML
5131 : XmlTableBuilderData *xtCxt;
5132 :
5133 176 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5134 :
5135 : /* Propagate our own error context to libxml2 */
5136 176 : xmlSetStructuredErrorFunc(xtCxt->xmlerrcxt, xml_errorHandler);
5137 :
5138 176 : if (xtCxt->xpathscomp != NULL)
5139 : {
5140 : int i;
5141 :
5142 744 : for (i = 0; i < xtCxt->natts; i++)
5143 568 : if (xtCxt->xpathscomp[i] != NULL)
5144 516 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5145 : }
5146 :
5147 176 : if (xtCxt->xpathobj != NULL)
5148 172 : xmlXPathFreeObject(xtCxt->xpathobj);
5149 176 : if (xtCxt->xpathcomp != NULL)
5150 172 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5151 176 : if (xtCxt->xpathcxt != NULL)
5152 176 : xmlXPathFreeContext(xtCxt->xpathcxt);
5153 176 : if (xtCxt->doc != NULL)
5154 176 : xmlFreeDoc(xtCxt->doc);
5155 176 : if (xtCxt->ctxt != NULL)
5156 176 : xmlFreeParserCtxt(xtCxt->ctxt);
5157 :
5158 176 : pg_xml_done(xtCxt->xmlerrcxt, true);
5159 :
5160 : /* not valid anymore */
5161 176 : xtCxt->magic = 0;
5162 176 : state->opaque = NULL;
5163 :
5164 : #else
5165 : NO_XML_SUPPORT();
5166 : #endif /* not USE_LIBXML */
5167 176 : }
|