Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/entities.h>
51 : #include <libxml/parser.h>
52 : #include <libxml/parserInternals.h>
53 : #include <libxml/tree.h>
54 : #include <libxml/uri.h>
55 : #include <libxml/xmlerror.h>
56 : #include <libxml/xmlsave.h>
57 : #include <libxml/xmlversion.h>
58 : #include <libxml/xmlwriter.h>
59 : #include <libxml/xpath.h>
60 : #include <libxml/xpathInternals.h>
61 :
62 : /*
63 : * We used to check for xmlStructuredErrorContext via a configure test; but
64 : * that doesn't work on Windows, so instead use this grottier method of
65 : * testing the library version number.
66 : */
67 : #if LIBXML_VERSION >= 20704
68 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
69 : #endif
70 :
71 : /*
72 : * libxml2 2.12 decided to insert "const" into the error handler API.
73 : */
74 : #if LIBXML_VERSION >= 21200
75 : #define PgXmlErrorPtr const xmlError *
76 : #else
77 : #define PgXmlErrorPtr xmlErrorPtr
78 : #endif
79 :
80 : #endif /* USE_LIBXML */
81 :
82 : #include "access/htup_details.h"
83 : #include "access/table.h"
84 : #include "catalog/namespace.h"
85 : #include "catalog/pg_class.h"
86 : #include "catalog/pg_type.h"
87 : #include "commands/dbcommands.h"
88 : #include "executor/spi.h"
89 : #include "executor/tablefunc.h"
90 : #include "fmgr.h"
91 : #include "lib/stringinfo.h"
92 : #include "libpq/pqformat.h"
93 : #include "mb/pg_wchar.h"
94 : #include "miscadmin.h"
95 : #include "nodes/execnodes.h"
96 : #include "nodes/miscnodes.h"
97 : #include "nodes/nodeFuncs.h"
98 : #include "utils/array.h"
99 : #include "utils/builtins.h"
100 : #include "utils/date.h"
101 : #include "utils/datetime.h"
102 : #include "utils/lsyscache.h"
103 : #include "utils/rel.h"
104 : #include "utils/syscache.h"
105 : #include "utils/xml.h"
106 :
107 :
108 : /* GUC variables */
109 : int xmlbinary = XMLBINARY_BASE64;
110 : int xmloption = XMLOPTION_CONTENT;
111 :
112 : #ifdef USE_LIBXML
113 :
114 : /* random number to identify PgXmlErrorContext */
115 : #define ERRCXT_MAGIC 68275028
116 :
117 : struct PgXmlErrorContext
118 : {
119 : int magic;
120 : /* strictness argument passed to pg_xml_init */
121 : PgXmlStrictness strictness;
122 : /* current error status and accumulated message, if any */
123 : bool err_occurred;
124 : StringInfoData err_buf;
125 : /* previous libxml error handling state (saved by pg_xml_init) */
126 : xmlStructuredErrorFunc saved_errfunc;
127 : void *saved_errcxt;
128 : /* previous libxml entity handler (saved by pg_xml_init) */
129 : xmlExternalEntityLoader saved_entityfunc;
130 : };
131 :
132 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
133 : xmlParserCtxtPtr ctxt);
134 : static void xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
135 : int sqlcode, const char *msg);
136 : static void xml_errorHandler(void *data, PgXmlErrorPtr error);
137 : static int errdetail_for_xml_code(int code);
138 : static void chopStringInfoNewlines(StringInfo str);
139 : static void appendStringInfoLineSeparator(StringInfo str);
140 :
141 : #ifdef USE_LIBXMLCONTEXT
142 :
143 : static MemoryContext LibxmlContext = NULL;
144 :
145 : static void xml_memory_init(void);
146 : static void *xml_palloc(size_t size);
147 : static void *xml_repalloc(void *ptr, size_t size);
148 : static void xml_pfree(void *ptr);
149 : static char *xml_pstrdup(const char *string);
150 : #endif /* USE_LIBXMLCONTEXT */
151 :
152 : static xmlChar *xml_text2xmlChar(text *in);
153 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
154 : xmlChar **version, xmlChar **encoding, int *standalone);
155 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
156 : pg_enc encoding, int standalone);
157 : static bool xml_doctype_in_content(const xmlChar *str);
158 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
159 : bool preserve_whitespace, int encoding,
160 : XmlOptionType *parsed_xmloptiontype,
161 : xmlNodePtr *parsed_nodes,
162 : Node *escontext);
163 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
164 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
165 : ArrayBuildState *astate,
166 : PgXmlErrorContext *xmlerrcxt);
167 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
168 : #endif /* USE_LIBXML */
169 :
170 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
171 : const char *xmlschema, const char *targetns,
172 : bool top_level);
173 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
174 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
175 : const char *xmlschema, bool nulls, bool tableforest,
176 : const char *targetns, bool top_level);
177 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
178 : bool nulls, bool tableforest, const char *targetns);
179 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
180 : List *relid_list, bool nulls,
181 : bool tableforest, const char *targetns);
182 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
183 : bool nulls, bool tableforest,
184 : const char *targetns);
185 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
186 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
187 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
188 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
189 : char *tablename, bool nulls, bool tableforest,
190 : const char *targetns, bool top_level);
191 :
192 : /* XMLTABLE support */
193 : #ifdef USE_LIBXML
194 : /* random number to identify XmlTableContext */
195 : #define XMLTABLE_CONTEXT_MAGIC 46922182
196 : typedef struct XmlTableBuilderData
197 : {
198 : int magic;
199 : int natts;
200 : long int row_count;
201 : PgXmlErrorContext *xmlerrcxt;
202 : xmlParserCtxtPtr ctxt;
203 : xmlDocPtr doc;
204 : xmlXPathContextPtr xpathcxt;
205 : xmlXPathCompExprPtr xpathcomp;
206 : xmlXPathObjectPtr xpathobj;
207 : xmlXPathCompExprPtr *xpathscomp;
208 : } XmlTableBuilderData;
209 : #endif
210 :
211 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
212 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
213 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
214 : const char *uri);
215 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
216 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
217 : const char *path, int colnum);
218 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
219 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
220 : Oid typid, int32 typmod, bool *isnull);
221 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
222 :
223 : const TableFuncRoutine XmlTableRoutine =
224 : {
225 : .InitOpaque = XmlTableInitOpaque,
226 : .SetDocument = XmlTableSetDocument,
227 : .SetNamespace = XmlTableSetNamespace,
228 : .SetRowFilter = XmlTableSetRowFilter,
229 : .SetColumnFilter = XmlTableSetColumnFilter,
230 : .FetchRow = XmlTableFetchRow,
231 : .GetValue = XmlTableGetValue,
232 : .DestroyOpaque = XmlTableDestroyOpaque
233 : };
234 :
235 : #define NO_XML_SUPPORT() \
236 : ereport(ERROR, \
237 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
238 : errmsg("unsupported XML feature"), \
239 : errdetail("This functionality requires the server to be built with libxml support.")))
240 :
241 :
242 : /* from SQL/XML:2008 section 4.9 */
243 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
244 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
245 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
246 :
247 :
248 : #ifdef USE_LIBXML
249 :
250 : static int
251 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
252 : {
253 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
254 :
255 0 : if (encoding < 0)
256 0 : ereport(ERROR,
257 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
258 : errmsg("invalid encoding name \"%s\"",
259 : (const char *) encoding_name)));
260 0 : return encoding;
261 : }
262 : #endif
263 :
264 :
265 : /*
266 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
267 : * we use the conversion function for the text datatype.
268 : *
269 : * This is only acceptable so long as xmltype and text use the same
270 : * representation.
271 : */
272 : Datum
273 828 : xml_in(PG_FUNCTION_ARGS)
274 : {
275 : #ifdef USE_LIBXML
276 828 : char *s = PG_GETARG_CSTRING(0);
277 : xmltype *vardata;
278 : xmlDocPtr doc;
279 :
280 : /* Build the result object. */
281 828 : vardata = (xmltype *) cstring_to_text(s);
282 :
283 : /*
284 : * Parse the data to check if it is well-formed XML data.
285 : *
286 : * Note: we don't need to worry about whether a soft error is detected.
287 : */
288 828 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
289 828 : NULL, NULL, fcinfo->context);
290 780 : if (doc != NULL)
291 768 : xmlFreeDoc(doc);
292 :
293 780 : PG_RETURN_XML_P(vardata);
294 : #else
295 : NO_XML_SUPPORT();
296 : return 0;
297 : #endif
298 : }
299 :
300 :
301 : #define PG_XML_DEFAULT_VERSION "1.0"
302 :
303 :
304 : /*
305 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
306 : * time being we use the conversion function for the text datatype.
307 : *
308 : * This is only acceptable so long as xmltype and text use the same
309 : * representation.
310 : */
311 : static char *
312 22862 : xml_out_internal(xmltype *x, pg_enc target_encoding)
313 : {
314 22862 : char *str = text_to_cstring((text *) x);
315 :
316 : #ifdef USE_LIBXML
317 22862 : size_t len = strlen(str);
318 : xmlChar *version;
319 : int standalone;
320 : int res_code;
321 :
322 22862 : if ((res_code = parse_xml_decl((xmlChar *) str,
323 : &len, &version, NULL, &standalone)) == 0)
324 : {
325 : StringInfoData buf;
326 :
327 22862 : initStringInfo(&buf);
328 :
329 22862 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
330 : {
331 : /*
332 : * If we are not going to produce an XML declaration, eat a single
333 : * newline in the original string to prevent empty first lines in
334 : * the output.
335 : */
336 22814 : if (*(str + len) == '\n')
337 6 : len += 1;
338 : }
339 22862 : appendStringInfoString(&buf, str + len);
340 :
341 22862 : pfree(str);
342 :
343 22862 : return buf.data;
344 : }
345 :
346 0 : ereport(WARNING,
347 : errcode(ERRCODE_INTERNAL_ERROR),
348 : errmsg_internal("could not parse XML declaration in stored value"),
349 : errdetail_for_xml_code(res_code));
350 : #endif
351 0 : return str;
352 : }
353 :
354 :
355 : Datum
356 22598 : xml_out(PG_FUNCTION_ARGS)
357 : {
358 22598 : xmltype *x = PG_GETARG_XML_P(0);
359 :
360 : /*
361 : * xml_out removes the encoding property in all cases. This is because we
362 : * cannot control from here whether the datum will be converted to a
363 : * different client encoding, so we'd do more harm than good by including
364 : * it.
365 : */
366 22598 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
367 : }
368 :
369 :
370 : Datum
371 0 : xml_recv(PG_FUNCTION_ARGS)
372 : {
373 : #ifdef USE_LIBXML
374 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
375 : xmltype *result;
376 : char *str;
377 : char *newstr;
378 : int nbytes;
379 : xmlDocPtr doc;
380 0 : xmlChar *encodingStr = NULL;
381 : int encoding;
382 :
383 : /*
384 : * Read the data in raw format. We don't know yet what the encoding is, as
385 : * that information is embedded in the xml declaration; so we have to
386 : * parse that before converting to server encoding.
387 : */
388 0 : nbytes = buf->len - buf->cursor;
389 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
390 :
391 : /*
392 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
393 : * than make a separate copy, make the temporary result one byte bigger
394 : * than it needs to be.
395 : */
396 0 : result = palloc(nbytes + 1 + VARHDRSZ);
397 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
398 0 : memcpy(VARDATA(result), str, nbytes);
399 0 : str = VARDATA(result);
400 0 : str[nbytes] = '\0';
401 :
402 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
403 :
404 : /*
405 : * If encoding wasn't explicitly specified in the XML header, treat it as
406 : * UTF-8, as that's the default in XML. This is different from xml_in(),
407 : * where the input has to go through the normal client to server encoding
408 : * conversion.
409 : */
410 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
411 :
412 : /*
413 : * Parse the data to check if it is well-formed XML data. Assume that
414 : * xml_parse will throw ERROR if not.
415 : */
416 0 : doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
417 0 : xmlFreeDoc(doc);
418 :
419 : /* Now that we know what we're dealing with, convert to server encoding */
420 0 : newstr = pg_any_to_server(str, nbytes, encoding);
421 :
422 0 : if (newstr != str)
423 : {
424 0 : pfree(result);
425 0 : result = (xmltype *) cstring_to_text(newstr);
426 0 : pfree(newstr);
427 : }
428 :
429 0 : PG_RETURN_XML_P(result);
430 : #else
431 : NO_XML_SUPPORT();
432 : return 0;
433 : #endif
434 : }
435 :
436 :
437 : Datum
438 0 : xml_send(PG_FUNCTION_ARGS)
439 : {
440 0 : xmltype *x = PG_GETARG_XML_P(0);
441 : char *outval;
442 : StringInfoData buf;
443 :
444 : /*
445 : * xml_out_internal doesn't convert the encoding, it just prints the right
446 : * declaration. pq_sendtext will do the conversion.
447 : */
448 0 : outval = xml_out_internal(x, pg_get_client_encoding());
449 :
450 0 : pq_begintypsend(&buf);
451 0 : pq_sendtext(&buf, outval, strlen(outval));
452 0 : pfree(outval);
453 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
454 : }
455 :
456 :
457 : #ifdef USE_LIBXML
458 : static void
459 132 : appendStringInfoText(StringInfo str, const text *t)
460 : {
461 132 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
462 132 : }
463 : #endif
464 :
465 :
466 : static xmltype *
467 21842 : stringinfo_to_xmltype(StringInfo buf)
468 : {
469 21842 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
470 : }
471 :
472 :
473 : static xmltype *
474 78 : cstring_to_xmltype(const char *string)
475 : {
476 78 : return (xmltype *) cstring_to_text(string);
477 : }
478 :
479 :
480 : #ifdef USE_LIBXML
481 : static xmltype *
482 21964 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
483 : {
484 21964 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
485 : xmlBufferLength(buf));
486 : }
487 : #endif
488 :
489 :
490 : Datum
491 42 : xmlcomment(PG_FUNCTION_ARGS)
492 : {
493 : #ifdef USE_LIBXML
494 42 : text *arg = PG_GETARG_TEXT_PP(0);
495 42 : char *argdata = VARDATA_ANY(arg);
496 42 : int len = VARSIZE_ANY_EXHDR(arg);
497 : StringInfoData buf;
498 : int i;
499 :
500 : /* check for "--" in string or "-" at the end */
501 180 : for (i = 1; i < len; i++)
502 : {
503 144 : if (argdata[i] == '-' && argdata[i - 1] == '-')
504 6 : ereport(ERROR,
505 : (errcode(ERRCODE_INVALID_XML_COMMENT),
506 : errmsg("invalid XML comment")));
507 : }
508 36 : if (len > 0 && argdata[len - 1] == '-')
509 6 : ereport(ERROR,
510 : (errcode(ERRCODE_INVALID_XML_COMMENT),
511 : errmsg("invalid XML comment")));
512 :
513 30 : initStringInfo(&buf);
514 30 : appendStringInfoString(&buf, "<!--");
515 30 : appendStringInfoText(&buf, arg);
516 30 : appendStringInfoString(&buf, "-->");
517 :
518 30 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
519 : #else
520 : NO_XML_SUPPORT();
521 : return 0;
522 : #endif
523 : }
524 :
525 :
526 : Datum
527 30 : xmltext(PG_FUNCTION_ARGS)
528 : {
529 : #ifdef USE_LIBXML
530 30 : text *arg = PG_GETARG_TEXT_PP(0);
531 : text *result;
532 30 : xmlChar *xmlbuf = NULL;
533 :
534 30 : xmlbuf = xmlEncodeSpecialChars(NULL, xml_text2xmlChar(arg));
535 :
536 : Assert(xmlbuf);
537 :
538 30 : result = cstring_to_text_with_len((const char *) xmlbuf, xmlStrlen(xmlbuf));
539 30 : xmlFree(xmlbuf);
540 30 : PG_RETURN_XML_P(result);
541 : #else
542 : NO_XML_SUPPORT();
543 : return 0;
544 : #endif /* not USE_LIBXML */
545 : }
546 :
547 :
548 : /*
549 : * TODO: xmlconcat needs to merge the notations and unparsed entities
550 : * of the argument values. Not very important in practice, though.
551 : */
552 : xmltype *
553 21592 : xmlconcat(List *args)
554 : {
555 : #ifdef USE_LIBXML
556 21592 : int global_standalone = 1;
557 21592 : xmlChar *global_version = NULL;
558 21592 : bool global_version_no_value = false;
559 : StringInfoData buf;
560 : ListCell *v;
561 :
562 21592 : initStringInfo(&buf);
563 64782 : foreach(v, args)
564 : {
565 43190 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
566 : size_t len;
567 : xmlChar *version;
568 : int standalone;
569 : char *str;
570 :
571 43190 : len = VARSIZE(x) - VARHDRSZ;
572 43190 : str = text_to_cstring((text *) x);
573 :
574 43190 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
575 :
576 43190 : if (standalone == 0 && global_standalone == 1)
577 0 : global_standalone = 0;
578 43190 : if (standalone < 0)
579 43178 : global_standalone = -1;
580 :
581 43190 : if (!version)
582 43172 : global_version_no_value = true;
583 18 : else if (!global_version)
584 12 : global_version = version;
585 6 : else if (xmlStrcmp(version, global_version) != 0)
586 0 : global_version_no_value = true;
587 :
588 43190 : appendStringInfoString(&buf, str + len);
589 43190 : pfree(str);
590 : }
591 :
592 21592 : if (!global_version_no_value || global_standalone >= 0)
593 : {
594 : StringInfoData buf2;
595 :
596 6 : initStringInfo(&buf2);
597 :
598 6 : print_xml_decl(&buf2,
599 6 : (!global_version_no_value) ? global_version : NULL,
600 : 0,
601 : global_standalone);
602 :
603 6 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
604 6 : buf = buf2;
605 : }
606 :
607 21592 : return stringinfo_to_xmltype(&buf);
608 : #else
609 : NO_XML_SUPPORT();
610 : return NULL;
611 : #endif
612 : }
613 :
614 :
615 : /*
616 : * XMLAGG support
617 : */
618 : Datum
619 21568 : xmlconcat2(PG_FUNCTION_ARGS)
620 : {
621 21568 : if (PG_ARGISNULL(0))
622 : {
623 18 : if (PG_ARGISNULL(1))
624 0 : PG_RETURN_NULL();
625 : else
626 18 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
627 : }
628 21550 : else if (PG_ARGISNULL(1))
629 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
630 : else
631 21550 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
632 : PG_GETARG_XML_P(1))));
633 : }
634 :
635 :
636 : Datum
637 6 : texttoxml(PG_FUNCTION_ARGS)
638 : {
639 6 : text *data = PG_GETARG_TEXT_PP(0);
640 :
641 6 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
642 : }
643 :
644 :
645 : Datum
646 0 : xmltotext(PG_FUNCTION_ARGS)
647 : {
648 0 : xmltype *data = PG_GETARG_XML_P(0);
649 :
650 : /* It's actually binary compatible. */
651 0 : PG_RETURN_TEXT_P((text *) data);
652 : }
653 :
654 :
655 : text *
656 168 : xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
657 : {
658 : #ifdef USE_LIBXML
659 : text *volatile result;
660 : xmlDocPtr doc;
661 : XmlOptionType parsed_xmloptiontype;
662 : xmlNodePtr content_nodes;
663 168 : volatile xmlBufferPtr buf = NULL;
664 168 : volatile xmlSaveCtxtPtr ctxt = NULL;
665 168 : ErrorSaveContext escontext = {T_ErrorSaveContext};
666 : PgXmlErrorContext *xmlerrcxt;
667 : #endif
668 :
669 168 : if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
670 : {
671 : /*
672 : * We don't actually need to do anything, so just return the
673 : * binary-compatible input. For backwards-compatibility reasons,
674 : * allow such cases to succeed even without USE_LIBXML.
675 : */
676 36 : return (text *) data;
677 : }
678 :
679 : #ifdef USE_LIBXML
680 : /* Parse the input according to the xmloption */
681 132 : doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
682 : &parsed_xmloptiontype, &content_nodes,
683 : (Node *) &escontext);
684 132 : if (doc == NULL || escontext.error_occurred)
685 : {
686 30 : if (doc)
687 0 : xmlFreeDoc(doc);
688 : /* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
689 30 : ereport(ERROR,
690 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
691 : errmsg("not an XML document")));
692 : }
693 :
694 : /* If we weren't asked to indent, we're done. */
695 102 : if (!indent)
696 : {
697 18 : xmlFreeDoc(doc);
698 18 : return (text *) data;
699 : }
700 :
701 : /* Otherwise, we gotta spin up some error handling. */
702 84 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
703 :
704 84 : PG_TRY();
705 : {
706 84 : size_t decl_len = 0;
707 :
708 : /* The serialized data will go into this buffer. */
709 84 : buf = xmlBufferCreate();
710 :
711 84 : if (buf == NULL || xmlerrcxt->err_occurred)
712 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
713 : "could not allocate xmlBuffer");
714 :
715 : /* Detect whether there's an XML declaration */
716 84 : parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
717 :
718 : /*
719 : * Emit declaration only if the input had one. Note: some versions of
720 : * xmlSaveToBuffer leak memory if a non-null encoding argument is
721 : * passed, so don't do that. We don't want any encoding conversion
722 : * anyway.
723 : */
724 84 : if (decl_len == 0)
725 72 : ctxt = xmlSaveToBuffer(buf, NULL,
726 : XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
727 : else
728 12 : ctxt = xmlSaveToBuffer(buf, NULL,
729 : XML_SAVE_FORMAT);
730 :
731 84 : if (ctxt == NULL || xmlerrcxt->err_occurred)
732 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
733 : "could not allocate xmlSaveCtxt");
734 :
735 84 : if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
736 : {
737 : /* If it's a document, saving is easy. */
738 36 : if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
739 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
740 : "could not save document to xmlBuffer");
741 : }
742 48 : else if (content_nodes != NULL)
743 : {
744 : /*
745 : * Deal with the case where we have non-singly-rooted XML.
746 : * libxml's dump functions don't work well for that without help.
747 : * We build a fake root node that serves as a container for the
748 : * content nodes, and then iterate over the nodes.
749 : */
750 : xmlNodePtr root;
751 : xmlNodePtr newline;
752 :
753 42 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
754 42 : if (root == NULL || xmlerrcxt->err_occurred)
755 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
756 : "could not allocate xml node");
757 :
758 : /* This attaches root to doc, so we need not free it separately. */
759 42 : xmlDocSetRootElement(doc, root);
760 42 : xmlAddChildList(root, content_nodes);
761 :
762 : /*
763 : * We use this node to insert newlines in the dump. Note: in at
764 : * least some libxml versions, xmlNewDocText would not attach the
765 : * node to the document even if we passed it. Therefore, manage
766 : * freeing of this node manually, and pass NULL here to make sure
767 : * there's not a dangling link.
768 : */
769 42 : newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
770 42 : if (newline == NULL || xmlerrcxt->err_occurred)
771 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
772 : "could not allocate xml node");
773 :
774 108 : for (xmlNodePtr node = root->children; node; node = node->next)
775 : {
776 : /* insert newlines between nodes */
777 66 : if (node->type != XML_TEXT_NODE && node->prev != NULL)
778 : {
779 18 : if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
780 : {
781 0 : xmlFreeNode(newline);
782 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
783 : "could not save newline to xmlBuffer");
784 : }
785 : }
786 :
787 66 : if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
788 : {
789 0 : xmlFreeNode(newline);
790 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
791 : "could not save content to xmlBuffer");
792 : }
793 : }
794 :
795 42 : xmlFreeNode(newline);
796 : }
797 :
798 84 : if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
799 : {
800 0 : ctxt = NULL; /* don't try to close it again */
801 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
802 : "could not close xmlSaveCtxtPtr");
803 : }
804 :
805 84 : result = (text *) xmlBuffer_to_xmltype(buf);
806 : }
807 0 : PG_CATCH();
808 : {
809 0 : if (ctxt)
810 0 : xmlSaveClose(ctxt);
811 0 : if (buf)
812 0 : xmlBufferFree(buf);
813 0 : if (doc)
814 0 : xmlFreeDoc(doc);
815 :
816 0 : pg_xml_done(xmlerrcxt, true);
817 :
818 0 : PG_RE_THROW();
819 : }
820 84 : PG_END_TRY();
821 :
822 84 : xmlBufferFree(buf);
823 84 : xmlFreeDoc(doc);
824 :
825 84 : pg_xml_done(xmlerrcxt, false);
826 :
827 84 : return result;
828 : #else
829 : NO_XML_SUPPORT();
830 : return NULL;
831 : #endif
832 : }
833 :
834 :
835 : xmltype *
836 21724 : xmlelement(XmlExpr *xexpr,
837 : Datum *named_argvalue, bool *named_argnull,
838 : Datum *argvalue, bool *argnull)
839 : {
840 : #ifdef USE_LIBXML
841 : xmltype *result;
842 : List *named_arg_strings;
843 : List *arg_strings;
844 : int i;
845 : ListCell *arg;
846 : ListCell *narg;
847 : PgXmlErrorContext *xmlerrcxt;
848 21724 : volatile xmlBufferPtr buf = NULL;
849 21724 : volatile xmlTextWriterPtr writer = NULL;
850 :
851 : /*
852 : * All arguments are already evaluated, and their values are passed in the
853 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
854 : * issues if one of the arguments involves a call to some other function
855 : * or subsystem that wants to use libxml on its own terms. We examine the
856 : * original XmlExpr to identify the numbers and types of the arguments.
857 : */
858 21724 : named_arg_strings = NIL;
859 21724 : i = 0;
860 21772 : foreach(arg, xexpr->named_args)
861 : {
862 54 : Expr *e = (Expr *) lfirst(arg);
863 : char *str;
864 :
865 54 : if (named_argnull[i])
866 0 : str = NULL;
867 : else
868 54 : str = map_sql_value_to_xml_value(named_argvalue[i],
869 : exprType((Node *) e),
870 : false);
871 48 : named_arg_strings = lappend(named_arg_strings, str);
872 48 : i++;
873 : }
874 :
875 21718 : arg_strings = NIL;
876 21718 : i = 0;
877 43412 : foreach(arg, xexpr->args)
878 : {
879 21694 : Expr *e = (Expr *) lfirst(arg);
880 : char *str;
881 :
882 : /* here we can just forget NULL elements immediately */
883 21694 : if (!argnull[i])
884 : {
885 21694 : str = map_sql_value_to_xml_value(argvalue[i],
886 : exprType((Node *) e),
887 : true);
888 21694 : arg_strings = lappend(arg_strings, str);
889 : }
890 21694 : i++;
891 : }
892 :
893 21718 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
894 :
895 21718 : PG_TRY();
896 : {
897 21718 : buf = xmlBufferCreate();
898 21718 : if (buf == NULL || xmlerrcxt->err_occurred)
899 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
900 : "could not allocate xmlBuffer");
901 21718 : writer = xmlNewTextWriterMemory(buf, 0);
902 21718 : if (writer == NULL || xmlerrcxt->err_occurred)
903 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
904 : "could not allocate xmlTextWriter");
905 :
906 21718 : xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
907 :
908 21766 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
909 : {
910 48 : char *str = (char *) lfirst(arg);
911 48 : char *argname = strVal(lfirst(narg));
912 :
913 48 : if (str)
914 48 : xmlTextWriterWriteAttribute(writer,
915 : (xmlChar *) argname,
916 : (xmlChar *) str);
917 : }
918 :
919 43412 : foreach(arg, arg_strings)
920 : {
921 21694 : char *str = (char *) lfirst(arg);
922 :
923 21694 : xmlTextWriterWriteRaw(writer, (xmlChar *) str);
924 : }
925 :
926 21718 : xmlTextWriterEndElement(writer);
927 :
928 : /* we MUST do this now to flush data out to the buffer ... */
929 21718 : xmlFreeTextWriter(writer);
930 21718 : writer = NULL;
931 :
932 21718 : result = xmlBuffer_to_xmltype(buf);
933 : }
934 0 : PG_CATCH();
935 : {
936 0 : if (writer)
937 0 : xmlFreeTextWriter(writer);
938 0 : if (buf)
939 0 : xmlBufferFree(buf);
940 :
941 0 : pg_xml_done(xmlerrcxt, true);
942 :
943 0 : PG_RE_THROW();
944 : }
945 21718 : PG_END_TRY();
946 :
947 21718 : xmlBufferFree(buf);
948 :
949 21718 : pg_xml_done(xmlerrcxt, false);
950 :
951 21718 : return result;
952 : #else
953 : NO_XML_SUPPORT();
954 : return NULL;
955 : #endif
956 : }
957 :
958 :
959 : xmltype *
960 138 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
961 : {
962 : #ifdef USE_LIBXML
963 : xmlDocPtr doc;
964 :
965 138 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
966 : GetDatabaseEncoding(), NULL, NULL, NULL);
967 90 : xmlFreeDoc(doc);
968 :
969 90 : return (xmltype *) data;
970 : #else
971 : NO_XML_SUPPORT();
972 : return NULL;
973 : #endif
974 : }
975 :
976 :
977 : xmltype *
978 72 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
979 : {
980 : #ifdef USE_LIBXML
981 : xmltype *result;
982 : StringInfoData buf;
983 :
984 72 : if (pg_strcasecmp(target, "xml") == 0)
985 12 : ereport(ERROR,
986 : (errcode(ERRCODE_SYNTAX_ERROR), /* really */
987 : errmsg("invalid XML processing instruction"),
988 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
989 :
990 : /*
991 : * Following the SQL standard, the null check comes after the syntax check
992 : * above.
993 : */
994 60 : *result_is_null = arg_is_null;
995 60 : if (*result_is_null)
996 12 : return NULL;
997 :
998 48 : initStringInfo(&buf);
999 :
1000 48 : appendStringInfo(&buf, "<?%s", target);
1001 :
1002 48 : if (arg != NULL)
1003 : {
1004 : char *string;
1005 :
1006 24 : string = text_to_cstring(arg);
1007 24 : if (strstr(string, "?>") != NULL)
1008 6 : ereport(ERROR,
1009 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
1010 : errmsg("invalid XML processing instruction"),
1011 : errdetail("XML processing instruction cannot contain \"?>\".")));
1012 :
1013 18 : appendStringInfoChar(&buf, ' ');
1014 18 : appendStringInfoString(&buf, string + strspn(string, " "));
1015 18 : pfree(string);
1016 : }
1017 42 : appendStringInfoString(&buf, "?>");
1018 :
1019 42 : result = stringinfo_to_xmltype(&buf);
1020 42 : pfree(buf.data);
1021 42 : return result;
1022 : #else
1023 : NO_XML_SUPPORT();
1024 : return NULL;
1025 : #endif
1026 : }
1027 :
1028 :
1029 : xmltype *
1030 60 : xmlroot(xmltype *data, text *version, int standalone)
1031 : {
1032 : #ifdef USE_LIBXML
1033 : char *str;
1034 : size_t len;
1035 : xmlChar *orig_version;
1036 : int orig_standalone;
1037 : StringInfoData buf;
1038 :
1039 60 : len = VARSIZE(data) - VARHDRSZ;
1040 60 : str = text_to_cstring((text *) data);
1041 :
1042 60 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
1043 :
1044 60 : if (version)
1045 24 : orig_version = xml_text2xmlChar(version);
1046 : else
1047 36 : orig_version = NULL;
1048 :
1049 60 : switch (standalone)
1050 : {
1051 18 : case XML_STANDALONE_YES:
1052 18 : orig_standalone = 1;
1053 18 : break;
1054 12 : case XML_STANDALONE_NO:
1055 12 : orig_standalone = 0;
1056 12 : break;
1057 12 : case XML_STANDALONE_NO_VALUE:
1058 12 : orig_standalone = -1;
1059 12 : break;
1060 18 : case XML_STANDALONE_OMITTED:
1061 : /* leave original value */
1062 18 : break;
1063 : }
1064 :
1065 60 : initStringInfo(&buf);
1066 60 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
1067 60 : appendStringInfoString(&buf, str + len);
1068 :
1069 60 : return stringinfo_to_xmltype(&buf);
1070 : #else
1071 : NO_XML_SUPPORT();
1072 : return NULL;
1073 : #endif
1074 : }
1075 :
1076 :
1077 : /*
1078 : * Validate document (given as string) against DTD (given as external link)
1079 : *
1080 : * This has been removed because it is a security hole: unprivileged users
1081 : * should not be able to use Postgres to fetch arbitrary external files,
1082 : * which unfortunately is exactly what libxml is willing to do with the DTD
1083 : * parameter.
1084 : */
1085 : Datum
1086 0 : xmlvalidate(PG_FUNCTION_ARGS)
1087 : {
1088 0 : ereport(ERROR,
1089 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1090 : errmsg("xmlvalidate is not implemented")));
1091 : return 0;
1092 : }
1093 :
1094 :
1095 : bool
1096 24 : xml_is_document(xmltype *arg)
1097 : {
1098 : #ifdef USE_LIBXML
1099 : xmlDocPtr doc;
1100 24 : ErrorSaveContext escontext = {T_ErrorSaveContext};
1101 :
1102 : /*
1103 : * We'll report "true" if no soft error is reported by xml_parse().
1104 : */
1105 24 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
1106 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
1107 24 : if (doc)
1108 12 : xmlFreeDoc(doc);
1109 :
1110 24 : return !escontext.error_occurred;
1111 : #else /* not USE_LIBXML */
1112 : NO_XML_SUPPORT();
1113 : return false;
1114 : #endif /* not USE_LIBXML */
1115 : }
1116 :
1117 :
1118 : #ifdef USE_LIBXML
1119 :
1120 : /*
1121 : * pg_xml_init_library --- set up for use of libxml
1122 : *
1123 : * This should be called by each function that is about to use libxml
1124 : * facilities but doesn't require error handling. It initializes libxml
1125 : * and verifies compatibility with the loaded libxml version. These are
1126 : * once-per-session activities.
1127 : *
1128 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
1129 : * check)
1130 : */
1131 : void
1132 91598 : pg_xml_init_library(void)
1133 : {
1134 : static bool first_time = true;
1135 :
1136 91598 : if (first_time)
1137 : {
1138 : /* Stuff we need do only once per session */
1139 :
1140 : /*
1141 : * Currently, we have no pure UTF-8 support for internals -- check if
1142 : * we can work.
1143 : */
1144 : if (sizeof(char) != sizeof(xmlChar))
1145 : ereport(ERROR,
1146 : (errmsg("could not initialize XML library"),
1147 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
1148 : sizeof(char), sizeof(xmlChar))));
1149 :
1150 : #ifdef USE_LIBXMLCONTEXT
1151 : /* Set up libxml's memory allocation our way */
1152 : xml_memory_init();
1153 : #endif
1154 :
1155 : /* Check library compatibility */
1156 26 : LIBXML_TEST_VERSION;
1157 :
1158 26 : first_time = false;
1159 : }
1160 91598 : }
1161 :
1162 : /*
1163 : * pg_xml_init --- set up for use of libxml and register an error handler
1164 : *
1165 : * This should be called by each function that is about to use libxml
1166 : * facilities and requires error handling. It initializes libxml with
1167 : * pg_xml_init_library() and establishes our libxml error handler.
1168 : *
1169 : * strictness determines which errors are reported and which are ignored.
1170 : *
1171 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
1172 : * that pg_xml_done() is called during either normal or error exit.
1173 : *
1174 : * This is exported for use by contrib/xml2, as well as other code that might
1175 : * wish to share use of this module's libxml error handler.
1176 : */
1177 : PgXmlErrorContext *
1178 23902 : pg_xml_init(PgXmlStrictness strictness)
1179 : {
1180 : PgXmlErrorContext *errcxt;
1181 : void *new_errcxt;
1182 :
1183 : /* Do one-time setup if needed */
1184 23902 : pg_xml_init_library();
1185 :
1186 : /* Create error handling context structure */
1187 23902 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1188 23902 : errcxt->magic = ERRCXT_MAGIC;
1189 23902 : errcxt->strictness = strictness;
1190 23902 : errcxt->err_occurred = false;
1191 23902 : initStringInfo(&errcxt->err_buf);
1192 :
1193 : /*
1194 : * Save original error handler and install ours. libxml originally didn't
1195 : * distinguish between the contexts for generic and for structured error
1196 : * handlers. If we're using an old libxml version, we must thus save the
1197 : * generic error context, even though we're using a structured error
1198 : * handler.
1199 : */
1200 23902 : errcxt->saved_errfunc = xmlStructuredError;
1201 :
1202 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1203 23902 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1204 : #else
1205 : errcxt->saved_errcxt = xmlGenericErrorContext;
1206 : #endif
1207 :
1208 23902 : xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1209 :
1210 : /*
1211 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1212 : * expected it to. If not, the error context pointer we just saved is not
1213 : * the correct thing to restore, and since that leaves us without a way to
1214 : * restore the context in pg_xml_done, we must fail.
1215 : *
1216 : * The only known situation in which this test fails is if we compile with
1217 : * headers from a libxml2 that doesn't track the structured error context
1218 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1219 : * versa. The libxml2 authors did not treat that change as constituting
1220 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1221 : * fails to protect us from this.
1222 : */
1223 :
1224 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1225 23902 : new_errcxt = xmlStructuredErrorContext;
1226 : #else
1227 : new_errcxt = xmlGenericErrorContext;
1228 : #endif
1229 :
1230 23902 : if (new_errcxt != (void *) errcxt)
1231 0 : ereport(ERROR,
1232 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1233 : errmsg("could not set up XML error handler"),
1234 : errhint("This probably indicates that the version of libxml2"
1235 : " being used is not compatible with the libxml2"
1236 : " header files that PostgreSQL was built with.")));
1237 :
1238 : /*
1239 : * Also, install an entity loader to prevent unwanted fetches of external
1240 : * files and URLs.
1241 : */
1242 23902 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1243 23902 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1244 :
1245 23902 : return errcxt;
1246 : }
1247 :
1248 :
1249 : /*
1250 : * pg_xml_done --- restore previous libxml error handling
1251 : *
1252 : * Resets libxml's global error-handling state to what it was before
1253 : * pg_xml_init() was called.
1254 : *
1255 : * This routine verifies that all pending errors have been dealt with
1256 : * (in assert-enabled builds, anyway).
1257 : */
1258 : void
1259 23902 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1260 : {
1261 : void *cur_errcxt;
1262 :
1263 : /* An assert seems like enough protection here */
1264 : Assert(errcxt->magic == ERRCXT_MAGIC);
1265 :
1266 : /*
1267 : * In a normal exit, there should be no un-handled libxml errors. But we
1268 : * shouldn't try to enforce this during error recovery, since the longjmp
1269 : * could have been thrown before xml_ereport had a chance to run.
1270 : */
1271 : Assert(!errcxt->err_occurred || isError);
1272 :
1273 : /*
1274 : * Check that libxml's global state is correct, warn if not. This is a
1275 : * real test and not an Assert because it has a higher probability of
1276 : * happening.
1277 : */
1278 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1279 23902 : cur_errcxt = xmlStructuredErrorContext;
1280 : #else
1281 : cur_errcxt = xmlGenericErrorContext;
1282 : #endif
1283 :
1284 23902 : if (cur_errcxt != (void *) errcxt)
1285 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1286 :
1287 : /* Restore the saved handlers */
1288 23902 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1289 23902 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1290 :
1291 : /*
1292 : * Mark the struct as invalid, just in case somebody somehow manages to
1293 : * call xml_errorHandler or xml_ereport with it.
1294 : */
1295 23902 : errcxt->magic = 0;
1296 :
1297 : /* Release memory */
1298 23902 : pfree(errcxt->err_buf.data);
1299 23902 : pfree(errcxt);
1300 23902 : }
1301 :
1302 :
1303 : /*
1304 : * pg_xml_error_occurred() --- test the error flag
1305 : */
1306 : bool
1307 0 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1308 : {
1309 0 : return errcxt->err_occurred;
1310 : }
1311 :
1312 :
1313 : /*
1314 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1315 : * documents" are specified by the XML specification and are parsed
1316 : * easily by libxml. "XML content" is specified by SQL/XML as the
1317 : * production "XMLDecl? content". But libxml can only parse the
1318 : * "content" part, so we have to parse the XML declaration ourselves
1319 : * to complete this.
1320 : */
1321 :
1322 : #define CHECK_XML_SPACE(p) \
1323 : do { \
1324 : if (!xmlIsBlank_ch(*(p))) \
1325 : return XML_ERR_SPACE_REQUIRED; \
1326 : } while (0)
1327 :
1328 : #define SKIP_XML_SPACE(p) \
1329 : while (xmlIsBlank_ch(*(p))) (p)++
1330 :
1331 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1332 : /* Beware of multiple evaluations of argument! */
1333 : #define PG_XMLISNAMECHAR(c) \
1334 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1335 : || xmlIsDigit_ch(c) \
1336 : || c == '.' || c == '-' || c == '_' || c == ':' \
1337 : || xmlIsCombiningQ(c) \
1338 : || xmlIsExtender_ch(c))
1339 :
1340 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1341 : static xmlChar *
1342 192 : xml_pnstrdup(const xmlChar *str, size_t len)
1343 : {
1344 : xmlChar *result;
1345 :
1346 192 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1347 192 : memcpy(result, str, len * sizeof(xmlChar));
1348 192 : result[len] = 0;
1349 192 : return result;
1350 : }
1351 :
1352 : /* Ditto, except input is char* */
1353 : static xmlChar *
1354 2424 : pg_xmlCharStrndup(const char *str, size_t len)
1355 : {
1356 : xmlChar *result;
1357 :
1358 2424 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1359 2424 : memcpy(result, str, len);
1360 2424 : result[len] = '\0';
1361 :
1362 2424 : return result;
1363 : }
1364 :
1365 : /*
1366 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1367 : *
1368 : * The input xmlChar is freed regardless of success of the copy.
1369 : */
1370 : static char *
1371 109242 : xml_pstrdup_and_free(xmlChar *str)
1372 : {
1373 : char *result;
1374 :
1375 109242 : if (str)
1376 : {
1377 109242 : PG_TRY();
1378 : {
1379 109242 : result = pstrdup((char *) str);
1380 : }
1381 0 : PG_FINALLY();
1382 : {
1383 109242 : xmlFree(str);
1384 : }
1385 109242 : PG_END_TRY();
1386 : }
1387 : else
1388 0 : result = NULL;
1389 :
1390 109242 : return result;
1391 : }
1392 :
1393 : /*
1394 : * str is the null-terminated input string. Remaining arguments are
1395 : * output arguments; each can be NULL if value is not wanted.
1396 : * version and encoding are returned as locally-palloc'd strings.
1397 : * Result is 0 if OK, an error code if not.
1398 : */
1399 : static int
1400 67696 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1401 : xmlChar **version, xmlChar **encoding, int *standalone)
1402 : {
1403 : const xmlChar *p;
1404 : const xmlChar *save_p;
1405 : size_t len;
1406 : int utf8char;
1407 : int utf8len;
1408 :
1409 : /*
1410 : * Only initialize libxml. We don't need error handling here, but we do
1411 : * need to make sure libxml is initialized before calling any of its
1412 : * functions. Note that this is safe (and a no-op) if caller has already
1413 : * done pg_xml_init().
1414 : */
1415 67696 : pg_xml_init_library();
1416 :
1417 : /* Initialize output arguments to "not present" */
1418 67696 : if (version)
1419 67060 : *version = NULL;
1420 67696 : if (encoding)
1421 0 : *encoding = NULL;
1422 67696 : if (standalone)
1423 67060 : *standalone = -1;
1424 :
1425 67696 : p = str;
1426 :
1427 67696 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1428 67474 : goto finished;
1429 :
1430 : /*
1431 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1432 : * rather than an XMLDecl, so we have done what we came to do and found no
1433 : * XMLDecl.
1434 : *
1435 : * We need an input length value for xmlGetUTF8Char, but there's no need
1436 : * to count the whole document size, so use strnlen not strlen.
1437 : */
1438 222 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1439 222 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1440 222 : if (PG_XMLISNAMECHAR(utf8char))
1441 12 : goto finished;
1442 :
1443 210 : p += 5;
1444 :
1445 : /* version */
1446 210 : CHECK_XML_SPACE(p);
1447 420 : SKIP_XML_SPACE(p);
1448 210 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1449 0 : return XML_ERR_VERSION_MISSING;
1450 210 : p += 7;
1451 210 : SKIP_XML_SPACE(p);
1452 210 : if (*p != '=')
1453 0 : return XML_ERR_VERSION_MISSING;
1454 210 : p += 1;
1455 210 : SKIP_XML_SPACE(p);
1456 :
1457 210 : if (*p == '\'' || *p == '"')
1458 210 : {
1459 : const xmlChar *q;
1460 :
1461 210 : q = xmlStrchr(p + 1, *p);
1462 210 : if (!q)
1463 0 : return XML_ERR_VERSION_MISSING;
1464 :
1465 210 : if (version)
1466 192 : *version = xml_pnstrdup(p + 1, q - p - 1);
1467 210 : p = q + 1;
1468 : }
1469 : else
1470 0 : return XML_ERR_VERSION_MISSING;
1471 :
1472 : /* encoding */
1473 210 : save_p = p;
1474 372 : SKIP_XML_SPACE(p);
1475 210 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1476 : {
1477 54 : CHECK_XML_SPACE(save_p);
1478 54 : p += 8;
1479 54 : SKIP_XML_SPACE(p);
1480 54 : if (*p != '=')
1481 0 : return XML_ERR_MISSING_ENCODING;
1482 54 : p += 1;
1483 54 : SKIP_XML_SPACE(p);
1484 :
1485 54 : if (*p == '\'' || *p == '"')
1486 54 : {
1487 : const xmlChar *q;
1488 :
1489 54 : q = xmlStrchr(p + 1, *p);
1490 54 : if (!q)
1491 0 : return XML_ERR_MISSING_ENCODING;
1492 :
1493 54 : if (encoding)
1494 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1495 54 : p = q + 1;
1496 : }
1497 : else
1498 0 : return XML_ERR_MISSING_ENCODING;
1499 : }
1500 : else
1501 : {
1502 156 : p = save_p;
1503 : }
1504 :
1505 : /* standalone */
1506 210 : save_p = p;
1507 318 : SKIP_XML_SPACE(p);
1508 210 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1509 : {
1510 108 : CHECK_XML_SPACE(save_p);
1511 108 : p += 10;
1512 108 : SKIP_XML_SPACE(p);
1513 108 : if (*p != '=')
1514 0 : return XML_ERR_STANDALONE_VALUE;
1515 108 : p += 1;
1516 108 : SKIP_XML_SPACE(p);
1517 216 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1518 108 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1519 : {
1520 60 : if (standalone)
1521 60 : *standalone = 1;
1522 60 : p += 5;
1523 : }
1524 96 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1525 48 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1526 : {
1527 36 : if (standalone)
1528 36 : *standalone = 0;
1529 36 : p += 4;
1530 : }
1531 : else
1532 12 : return XML_ERR_STANDALONE_VALUE;
1533 : }
1534 : else
1535 : {
1536 102 : p = save_p;
1537 : }
1538 :
1539 198 : SKIP_XML_SPACE(p);
1540 198 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1541 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1542 198 : p += 2;
1543 :
1544 67684 : finished:
1545 67684 : len = p - str;
1546 :
1547 74416 : for (p = str; p < str + len; p++)
1548 6732 : if (*p > 127)
1549 0 : return XML_ERR_INVALID_CHAR;
1550 :
1551 67684 : if (lenp)
1552 67684 : *lenp = len;
1553 :
1554 67684 : return XML_ERR_OK;
1555 : }
1556 :
1557 :
1558 : /*
1559 : * Write an XML declaration. On output, we adjust the XML declaration
1560 : * as follows. (These rules are the moral equivalent of the clause
1561 : * "Serialization of an XML value" in the SQL standard.)
1562 : *
1563 : * We try to avoid generating an XML declaration if possible. This is
1564 : * so that you don't get trivial things like xml '<foo/>' resulting in
1565 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1566 : * must provide a declaration if the standalone property is specified
1567 : * or if we include an encoding declaration. If we have a
1568 : * declaration, we must specify a version (XML requires this).
1569 : * Otherwise we only make a declaration if the version is not "1.0",
1570 : * which is the default version specified in SQL:2003.
1571 : */
1572 : static bool
1573 22928 : print_xml_decl(StringInfo buf, const xmlChar *version,
1574 : pg_enc encoding, int standalone)
1575 : {
1576 22928 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1577 22892 : || (encoding && encoding != PG_UTF8)
1578 22892 : || standalone != -1)
1579 : {
1580 96 : appendStringInfoString(buf, "<?xml");
1581 :
1582 96 : if (version)
1583 72 : appendStringInfo(buf, " version=\"%s\"", version);
1584 : else
1585 24 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1586 :
1587 96 : if (encoding && encoding != PG_UTF8)
1588 : {
1589 : /*
1590 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1591 : * instead of LATIN1 etc.); needs field experience
1592 : */
1593 0 : appendStringInfo(buf, " encoding=\"%s\"",
1594 : pg_encoding_to_char(encoding));
1595 : }
1596 :
1597 96 : if (standalone == 1)
1598 48 : appendStringInfoString(buf, " standalone=\"yes\"");
1599 48 : else if (standalone == 0)
1600 24 : appendStringInfoString(buf, " standalone=\"no\"");
1601 96 : appendStringInfoString(buf, "?>");
1602 :
1603 96 : return true;
1604 : }
1605 : else
1606 22832 : return false;
1607 : }
1608 :
1609 : /*
1610 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1611 : *
1612 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1613 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1614 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1615 : * later fix that, by redefining content with reference to the "more
1616 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1617 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1618 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1619 : * pg_restore).
1620 : *
1621 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1622 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1623 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1624 : * by detecting this case first and simply doing the parse as DOCUMENT.
1625 : *
1626 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1627 : * it will ordinarily start within a few dozen characters. The only things
1628 : * that can precede it are an XMLDecl (here, the caller will have called
1629 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1630 : * This function need only return true if it sees a valid sequence of such
1631 : * things leading to <!DOCTYPE. It can simply return false in any other
1632 : * cases, including malformed input; that will mean the input gets parsed as
1633 : * CONTENT as originally planned, with libxml reporting any errors.
1634 : *
1635 : * This is only to be called from xml_parse, when pg_xml_init has already
1636 : * been called. The input is already in UTF8 encoding.
1637 : */
1638 : static bool
1639 936 : xml_doctype_in_content(const xmlChar *str)
1640 : {
1641 936 : const xmlChar *p = str;
1642 :
1643 : for (;;)
1644 36 : {
1645 : const xmlChar *e;
1646 :
1647 1062 : SKIP_XML_SPACE(p);
1648 972 : if (*p != '<')
1649 194 : return false;
1650 778 : p++;
1651 :
1652 778 : if (*p == '!')
1653 : {
1654 72 : p++;
1655 :
1656 : /* if we see <!DOCTYPE, we can return true */
1657 72 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1658 42 : return true;
1659 :
1660 : /* otherwise, if it's not a comment, fail */
1661 30 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1662 0 : return false;
1663 : /* find end of comment: find -- and a > must follow */
1664 30 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1665 30 : if (!p || p[2] != '>')
1666 0 : return false;
1667 : /* advance over comment, and keep scanning */
1668 30 : p += 3;
1669 30 : continue;
1670 : }
1671 :
1672 : /* otherwise, if it's not a PI <?target something?>, fail */
1673 706 : if (*p != '?')
1674 700 : return false;
1675 6 : p++;
1676 :
1677 : /* find end of PI (the string ?> is forbidden within a PI) */
1678 6 : e = xmlStrstr(p, (xmlChar *) "?>");
1679 6 : if (!e)
1680 0 : return false;
1681 :
1682 : /* advance over PI, keep scanning */
1683 6 : p = e + 2;
1684 : }
1685 : }
1686 :
1687 :
1688 : /*
1689 : * Convert a text object to XML internal representation
1690 : *
1691 : * data is the source data (must not be toasted!), encoding is its encoding,
1692 : * and xmloption_arg and preserve_whitespace are options for the
1693 : * transformation.
1694 : *
1695 : * If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
1696 : * XmlOptionType actually used to parse the input (typically the same as
1697 : * xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
1698 : *
1699 : * If parsed_nodes isn't NULL and we parse in CONTENT mode, the list
1700 : * of parsed nodes from the xmlParseInNodeContext call will be returned
1701 : * to *parsed_nodes. (It is caller's responsibility to free that.)
1702 : *
1703 : * Errors normally result in ereport(ERROR), but if escontext is an
1704 : * ErrorSaveContext, then "safe" errors are reported there instead, and the
1705 : * caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
1706 : *
1707 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1708 : * else a permanent memory leak will ensue! But note the result could
1709 : * be NULL after a soft error.
1710 : *
1711 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1712 : * yet do not use SAX - see xmlreader.c)
1713 : */
1714 : static xmlDocPtr
1715 1236 : xml_parse(text *data, XmlOptionType xmloption_arg,
1716 : bool preserve_whitespace, int encoding,
1717 : XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
1718 : Node *escontext)
1719 : {
1720 : int32 len;
1721 : xmlChar *string;
1722 : xmlChar *utf8string;
1723 : PgXmlErrorContext *xmlerrcxt;
1724 1236 : volatile xmlParserCtxtPtr ctxt = NULL;
1725 1236 : volatile xmlDocPtr doc = NULL;
1726 :
1727 : /*
1728 : * This step looks annoyingly redundant, but we must do it to have a
1729 : * null-terminated string in case encoding conversion isn't required.
1730 : */
1731 1236 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1732 1236 : string = xml_text2xmlChar(data);
1733 :
1734 : /*
1735 : * If the data isn't UTF8, we must translate before giving it to libxml.
1736 : *
1737 : * XXX ideally, we'd catch any encoding conversion failure and return a
1738 : * soft error. However, failure to convert to UTF8 should be pretty darn
1739 : * rare, so for now this is left undone.
1740 : */
1741 1236 : utf8string = pg_do_encoding_conversion(string,
1742 : len,
1743 : encoding,
1744 : PG_UTF8);
1745 :
1746 : /* Start up libxml and its parser */
1747 1236 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1748 :
1749 : /* Use a TRY block to ensure we clean up correctly */
1750 1236 : PG_TRY();
1751 : {
1752 1236 : bool parse_as_document = false;
1753 : int options;
1754 : int res_code;
1755 1236 : size_t count = 0;
1756 1236 : xmlChar *version = NULL;
1757 1236 : int standalone = 0;
1758 :
1759 : /* Any errors here are reported as hard ereport's */
1760 1236 : xmlInitParser();
1761 :
1762 : /* Decide whether to parse as document or content */
1763 1236 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1764 288 : parse_as_document = true;
1765 : else
1766 : {
1767 : /* Parse and skip over the XML declaration, if any */
1768 948 : res_code = parse_xml_decl(utf8string,
1769 : &count, &version, NULL, &standalone);
1770 948 : if (res_code != 0)
1771 : {
1772 12 : errsave(escontext,
1773 : errcode(ERRCODE_INVALID_XML_CONTENT),
1774 : errmsg_internal("invalid XML content: invalid XML declaration"),
1775 : errdetail_for_xml_code(res_code));
1776 12 : goto fail;
1777 : }
1778 :
1779 : /* Is there a DOCTYPE element? */
1780 936 : if (xml_doctype_in_content(utf8string + count))
1781 42 : parse_as_document = true;
1782 : }
1783 :
1784 : /*
1785 : * Select parse options.
1786 : *
1787 : * Note that here we try to apply DTD defaults (XML_PARSE_DTDATTR)
1788 : * according to SQL/XML:2008 GR 10.16.7.d: 'Default values defined by
1789 : * internal DTD are applied'. As for external DTDs, we try to support
1790 : * them too (see SQL/XML:2008 GR 10.16.7.e), but that doesn't really
1791 : * happen because xmlPgEntityLoader prevents it.
1792 : */
1793 1224 : options = XML_PARSE_NOENT | XML_PARSE_DTDATTR
1794 1224 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS);
1795 :
1796 : /* initialize output parameters */
1797 1224 : if (parsed_xmloptiontype != NULL)
1798 132 : *parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
1799 : XMLOPTION_CONTENT;
1800 1224 : if (parsed_nodes != NULL)
1801 132 : *parsed_nodes = NULL;
1802 :
1803 1224 : if (parse_as_document)
1804 : {
1805 330 : ctxt = xmlNewParserCtxt();
1806 330 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1807 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1808 : "could not allocate parser context");
1809 :
1810 330 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1811 : NULL, /* no URL */
1812 : "UTF-8",
1813 : options);
1814 :
1815 330 : if (doc == NULL || xmlerrcxt->err_occurred)
1816 : {
1817 : /* Use original option to decide which error code to report */
1818 144 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1819 138 : xml_errsave(escontext, xmlerrcxt,
1820 : ERRCODE_INVALID_XML_DOCUMENT,
1821 : "invalid XML document");
1822 : else
1823 6 : xml_errsave(escontext, xmlerrcxt,
1824 : ERRCODE_INVALID_XML_CONTENT,
1825 : "invalid XML content");
1826 96 : goto fail;
1827 : }
1828 : }
1829 : else
1830 : {
1831 : xmlNodePtr root;
1832 :
1833 : /* set up document with empty root node to be the context node */
1834 894 : doc = xmlNewDoc(version);
1835 894 : if (doc == NULL || xmlerrcxt->err_occurred)
1836 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1837 : "could not allocate XML document");
1838 :
1839 : Assert(doc->encoding == NULL);
1840 894 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1841 894 : if (doc->encoding == NULL || xmlerrcxt->err_occurred)
1842 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1843 : "could not allocate XML document");
1844 894 : doc->standalone = standalone;
1845 :
1846 894 : root = xmlNewNode(NULL, (const xmlChar *) "content-root");
1847 894 : if (root == NULL || xmlerrcxt->err_occurred)
1848 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1849 : "could not allocate xml node");
1850 : /* This attaches root to doc, so we need not free it separately. */
1851 894 : xmlDocSetRootElement(doc, root);
1852 :
1853 : /* allow empty content */
1854 894 : if (*(utf8string + count))
1855 : {
1856 870 : xmlNodePtr node_list = NULL;
1857 : xmlParserErrors res;
1858 :
1859 1740 : res = xmlParseInNodeContext(root,
1860 : (char *) utf8string + count,
1861 870 : strlen((char *) utf8string + count),
1862 : options,
1863 : &node_list);
1864 :
1865 870 : if (res != XML_ERR_OK || xmlerrcxt->err_occurred)
1866 : {
1867 60 : xmlFreeNodeList(node_list);
1868 60 : xml_errsave(escontext, xmlerrcxt,
1869 : ERRCODE_INVALID_XML_CONTENT,
1870 : "invalid XML content");
1871 12 : goto fail;
1872 : }
1873 :
1874 810 : if (parsed_nodes != NULL)
1875 42 : *parsed_nodes = node_list;
1876 : else
1877 768 : xmlFreeNodeList(node_list);
1878 : }
1879 : }
1880 :
1881 1140 : fail:
1882 : ;
1883 : }
1884 96 : PG_CATCH();
1885 : {
1886 96 : if (doc != NULL)
1887 48 : xmlFreeDoc(doc);
1888 96 : if (ctxt != NULL)
1889 48 : xmlFreeParserCtxt(ctxt);
1890 :
1891 96 : pg_xml_done(xmlerrcxt, true);
1892 :
1893 96 : PG_RE_THROW();
1894 : }
1895 1140 : PG_END_TRY();
1896 :
1897 1140 : if (ctxt != NULL)
1898 282 : xmlFreeParserCtxt(ctxt);
1899 :
1900 1140 : pg_xml_done(xmlerrcxt, false);
1901 :
1902 1140 : return doc;
1903 : }
1904 :
1905 :
1906 : /*
1907 : * xmlChar<->text conversions
1908 : */
1909 : static xmlChar *
1910 1374 : xml_text2xmlChar(text *in)
1911 : {
1912 1374 : return (xmlChar *) text_to_cstring(in);
1913 : }
1914 :
1915 :
1916 : #ifdef USE_LIBXMLCONTEXT
1917 :
1918 : /*
1919 : * Manage the special context used for all libxml allocations (but only
1920 : * in special debug builds; see notes at top of file)
1921 : */
1922 : static void
1923 : xml_memory_init(void)
1924 : {
1925 : /* Create memory context if not there already */
1926 : if (LibxmlContext == NULL)
1927 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1928 : "Libxml context",
1929 : ALLOCSET_DEFAULT_SIZES);
1930 :
1931 : /* Re-establish the callbacks even if already set */
1932 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1933 : }
1934 :
1935 : /*
1936 : * Wrappers for memory management functions
1937 : */
1938 : static void *
1939 : xml_palloc(size_t size)
1940 : {
1941 : return MemoryContextAlloc(LibxmlContext, size);
1942 : }
1943 :
1944 :
1945 : static void *
1946 : xml_repalloc(void *ptr, size_t size)
1947 : {
1948 : return repalloc(ptr, size);
1949 : }
1950 :
1951 :
1952 : static void
1953 : xml_pfree(void *ptr)
1954 : {
1955 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1956 : if (ptr)
1957 : pfree(ptr);
1958 : }
1959 :
1960 :
1961 : static char *
1962 : xml_pstrdup(const char *string)
1963 : {
1964 : return MemoryContextStrdup(LibxmlContext, string);
1965 : }
1966 : #endif /* USE_LIBXMLCONTEXT */
1967 :
1968 :
1969 : /*
1970 : * xmlPgEntityLoader --- entity loader callback function
1971 : *
1972 : * Silently prevent any external entity URL from being loaded. We don't want
1973 : * to throw an error, so instead make the entity appear to expand to an empty
1974 : * string.
1975 : *
1976 : * We would prefer to allow loading entities that exist in the system's
1977 : * global XML catalog; but the available libxml2 APIs make that a complex
1978 : * and fragile task. For now, just shut down all external access.
1979 : */
1980 : static xmlParserInputPtr
1981 30 : xmlPgEntityLoader(const char *URL, const char *ID,
1982 : xmlParserCtxtPtr ctxt)
1983 : {
1984 30 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1985 : }
1986 :
1987 :
1988 : /*
1989 : * xml_ereport --- report an XML-related error
1990 : *
1991 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1992 : * standard. This function adds libxml's native error message, if any, as
1993 : * detail.
1994 : *
1995 : * This is exported for modules that want to share the core libxml error
1996 : * handler. Note that pg_xml_init() *must* have been called previously.
1997 : */
1998 : void
1999 12 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
2000 : {
2001 : char *detail;
2002 :
2003 : /* Defend against someone passing us a bogus context struct */
2004 12 : if (errcxt->magic != ERRCXT_MAGIC)
2005 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
2006 :
2007 : /* Flag that the current libxml error has been reported */
2008 12 : errcxt->err_occurred = false;
2009 :
2010 : /* Include detail only if we have some text from libxml */
2011 12 : if (errcxt->err_buf.len > 0)
2012 12 : detail = errcxt->err_buf.data;
2013 : else
2014 0 : detail = NULL;
2015 :
2016 12 : ereport(level,
2017 : (errcode(sqlcode),
2018 : errmsg_internal("%s", msg),
2019 : detail ? errdetail_internal("%s", detail) : 0));
2020 0 : }
2021 :
2022 :
2023 : /*
2024 : * xml_errsave --- save an XML-related error
2025 : *
2026 : * If escontext is an ErrorSaveContext, error details are saved into it,
2027 : * and control returns normally.
2028 : *
2029 : * Otherwise, the error is thrown, so that this is equivalent to
2030 : * xml_ereport() with level == ERROR.
2031 : *
2032 : * This should be used only for errors that we're sure we do not need
2033 : * a transaction abort to clean up after.
2034 : */
2035 : static void
2036 204 : xml_errsave(Node *escontext, PgXmlErrorContext *errcxt,
2037 : int sqlcode, const char *msg)
2038 : {
2039 : char *detail;
2040 :
2041 : /* Defend against someone passing us a bogus context struct */
2042 204 : if (errcxt->magic != ERRCXT_MAGIC)
2043 0 : elog(ERROR, "xml_errsave called with invalid PgXmlErrorContext");
2044 :
2045 : /* Flag that the current libxml error has been reported */
2046 204 : errcxt->err_occurred = false;
2047 :
2048 : /* Include detail only if we have some text from libxml */
2049 204 : if (errcxt->err_buf.len > 0)
2050 204 : detail = errcxt->err_buf.data;
2051 : else
2052 0 : detail = NULL;
2053 :
2054 204 : errsave(escontext,
2055 : (errcode(sqlcode),
2056 : errmsg_internal("%s", msg),
2057 : detail ? errdetail_internal("%s", detail) : 0));
2058 108 : }
2059 :
2060 :
2061 : /*
2062 : * Error handler for libxml errors and warnings
2063 : */
2064 : static void
2065 398 : xml_errorHandler(void *data, PgXmlErrorPtr error)
2066 : {
2067 398 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
2068 398 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
2069 398 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
2070 398 : xmlNodePtr node = error->node;
2071 398 : const xmlChar *name = (node != NULL &&
2072 398 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
2073 398 : int domain = error->domain;
2074 398 : int level = error->level;
2075 : StringInfo errorBuf;
2076 :
2077 : /*
2078 : * Defend against someone passing us a bogus context struct.
2079 : *
2080 : * We force a backend exit if this check fails because longjmp'ing out of
2081 : * libxml would likely render it unsafe to use further.
2082 : */
2083 398 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
2084 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
2085 :
2086 : /*----------
2087 : * Older libxml versions report some errors differently.
2088 : * First, some errors were previously reported as coming from the parser
2089 : * domain but are now reported as coming from the namespace domain.
2090 : * Second, some warnings were upgraded to errors.
2091 : * We attempt to compensate for that here.
2092 : *----------
2093 : */
2094 398 : switch (error->code)
2095 : {
2096 30 : case XML_WAR_NS_URI:
2097 30 : level = XML_ERR_ERROR;
2098 30 : domain = XML_FROM_NAMESPACE;
2099 30 : break;
2100 :
2101 54 : case XML_ERR_NS_DECL_ERROR:
2102 : case XML_WAR_NS_URI_RELATIVE:
2103 : case XML_WAR_NS_COLUMN:
2104 : case XML_NS_ERR_XML_NAMESPACE:
2105 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
2106 : case XML_NS_ERR_QNAME:
2107 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
2108 : case XML_NS_ERR_EMPTY:
2109 54 : domain = XML_FROM_NAMESPACE;
2110 54 : break;
2111 : }
2112 :
2113 : /* Decide whether to act on the error or not */
2114 398 : switch (domain)
2115 : {
2116 314 : case XML_FROM_PARSER:
2117 :
2118 : /*
2119 : * XML_ERR_NOT_WELL_BALANCED is typically reported after some
2120 : * other, more on-point error. Furthermore, libxml2 2.13 reports
2121 : * it under a completely different set of rules than prior
2122 : * versions. To avoid cross-version behavioral differences,
2123 : * suppress it so long as we already logged some error.
2124 : */
2125 314 : if (error->code == XML_ERR_NOT_WELL_BALANCED &&
2126 30 : xmlerrcxt->err_occurred)
2127 30 : return;
2128 : /* fall through */
2129 :
2130 : case XML_FROM_NONE:
2131 : case XML_FROM_MEMORY:
2132 : case XML_FROM_IO:
2133 :
2134 : /*
2135 : * Suppress warnings about undeclared entities. We need to do
2136 : * this to avoid problems due to not loading DTD definitions.
2137 : */
2138 284 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
2139 6 : return;
2140 :
2141 : /* Otherwise, accept error regardless of the parsing purpose */
2142 278 : break;
2143 :
2144 84 : default:
2145 : /* Ignore error if only doing well-formedness check */
2146 84 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
2147 66 : return;
2148 18 : break;
2149 : }
2150 :
2151 : /* Prepare error message in errorBuf */
2152 296 : errorBuf = makeStringInfo();
2153 :
2154 296 : if (error->line > 0)
2155 296 : appendStringInfo(errorBuf, "line %d: ", error->line);
2156 296 : if (name != NULL)
2157 0 : appendStringInfo(errorBuf, "element %s: ", name);
2158 296 : if (error->message != NULL)
2159 296 : appendStringInfoString(errorBuf, error->message);
2160 : else
2161 0 : appendStringInfoString(errorBuf, "(no message provided)");
2162 :
2163 : /*
2164 : * Append context information to errorBuf.
2165 : *
2166 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
2167 : * write the context. Since we don't want to duplicate libxml
2168 : * functionality here, we set up a generic error handler temporarily.
2169 : *
2170 : * We use appendStringInfo() directly as libxml's generic error handler.
2171 : * This should work because it has essentially the same signature as
2172 : * libxml expects, namely (void *ptr, const char *msg, ...).
2173 : */
2174 296 : if (input != NULL)
2175 : {
2176 296 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
2177 296 : void *errCtxSaved = xmlGenericErrorContext;
2178 :
2179 296 : xmlSetGenericErrorFunc((void *) errorBuf,
2180 : (xmlGenericErrorFunc) appendStringInfo);
2181 :
2182 : /* Add context information to errorBuf */
2183 296 : appendStringInfoLineSeparator(errorBuf);
2184 :
2185 296 : xmlParserPrintFileContext(input);
2186 :
2187 : /* Restore generic error func */
2188 296 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
2189 : }
2190 :
2191 : /* Get rid of any trailing newlines in errorBuf */
2192 296 : chopStringInfoNewlines(errorBuf);
2193 :
2194 : /*
2195 : * Legacy error handling mode. err_occurred is never set, we just add the
2196 : * message to err_buf. This mode exists because the xml2 contrib module
2197 : * uses our error-handling infrastructure, but we don't want to change its
2198 : * behaviour since it's deprecated anyway. This is also why we don't
2199 : * distinguish between notices, warnings and errors here --- the old-style
2200 : * generic error handler wouldn't have done that either.
2201 : */
2202 296 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
2203 : {
2204 2 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2205 2 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2206 : errorBuf->len);
2207 :
2208 2 : destroyStringInfo(errorBuf);
2209 2 : return;
2210 : }
2211 :
2212 : /*
2213 : * We don't want to ereport() here because that'd probably leave libxml in
2214 : * an inconsistent state. Instead, we remember the error and ereport()
2215 : * from xml_ereport().
2216 : *
2217 : * Warnings and notices can be reported immediately since they won't cause
2218 : * a longjmp() out of libxml.
2219 : */
2220 294 : if (level >= XML_ERR_ERROR)
2221 : {
2222 288 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
2223 288 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
2224 : errorBuf->len);
2225 :
2226 288 : xmlerrcxt->err_occurred = true;
2227 : }
2228 6 : else if (level >= XML_ERR_WARNING)
2229 : {
2230 6 : ereport(WARNING,
2231 : (errmsg_internal("%s", errorBuf->data)));
2232 : }
2233 : else
2234 : {
2235 0 : ereport(NOTICE,
2236 : (errmsg_internal("%s", errorBuf->data)));
2237 : }
2238 :
2239 294 : destroyStringInfo(errorBuf);
2240 : }
2241 :
2242 :
2243 : /*
2244 : * Convert libxml error codes into textual errdetail messages.
2245 : *
2246 : * This should be called within an ereport or errsave invocation,
2247 : * just as errdetail would be.
2248 : *
2249 : * At the moment, we only need to cover those codes that we
2250 : * may raise in this file.
2251 : */
2252 : static int
2253 6 : errdetail_for_xml_code(int code)
2254 : {
2255 : const char *det;
2256 :
2257 6 : switch (code)
2258 : {
2259 0 : case XML_ERR_INVALID_CHAR:
2260 0 : det = gettext_noop("Invalid character value.");
2261 0 : break;
2262 0 : case XML_ERR_SPACE_REQUIRED:
2263 0 : det = gettext_noop("Space required.");
2264 0 : break;
2265 6 : case XML_ERR_STANDALONE_VALUE:
2266 6 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
2267 6 : break;
2268 0 : case XML_ERR_VERSION_MISSING:
2269 0 : det = gettext_noop("Malformed declaration: missing version.");
2270 0 : break;
2271 0 : case XML_ERR_MISSING_ENCODING:
2272 0 : det = gettext_noop("Missing encoding in text declaration.");
2273 0 : break;
2274 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
2275 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
2276 0 : break;
2277 0 : default:
2278 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
2279 0 : break;
2280 : }
2281 :
2282 6 : return errdetail(det, code);
2283 : }
2284 :
2285 :
2286 : /*
2287 : * Remove all trailing newlines from a StringInfo string
2288 : */
2289 : static void
2290 882 : chopStringInfoNewlines(StringInfo str)
2291 : {
2292 1474 : while (str->len > 0 && str->data[str->len - 1] == '\n')
2293 592 : str->data[--str->len] = '\0';
2294 882 : }
2295 :
2296 :
2297 : /*
2298 : * Append a newline after removing any existing trailing newlines
2299 : */
2300 : static void
2301 586 : appendStringInfoLineSeparator(StringInfo str)
2302 : {
2303 586 : chopStringInfoNewlines(str);
2304 586 : if (str->len > 0)
2305 368 : appendStringInfoChar(str, '\n');
2306 586 : }
2307 :
2308 :
2309 : /*
2310 : * Convert one char in the current server encoding to a Unicode codepoint.
2311 : */
2312 : static pg_wchar
2313 18280 : sqlchar_to_unicode(const char *s)
2314 : {
2315 : char *utf8string;
2316 : pg_wchar ret[2]; /* need space for trailing zero */
2317 :
2318 : /* note we're not assuming s is null-terminated */
2319 18280 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
2320 :
2321 18280 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2322 : pg_encoding_mblen(PG_UTF8, utf8string));
2323 :
2324 18280 : if (utf8string != s)
2325 0 : pfree(utf8string);
2326 :
2327 18280 : return ret[0];
2328 : }
2329 :
2330 :
2331 : static bool
2332 3638 : is_valid_xml_namefirst(pg_wchar c)
2333 : {
2334 : /* (Letter | '_' | ':') */
2335 3644 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2336 7282 : || c == '_' || c == ':');
2337 : }
2338 :
2339 :
2340 : static bool
2341 14642 : is_valid_xml_namechar(pg_wchar c)
2342 : {
2343 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2344 15532 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2345 890 : || xmlIsDigitQ(c)
2346 254 : || c == '.' || c == '-' || c == '_' || c == ':'
2347 12 : || xmlIsCombiningQ(c)
2348 31064 : || xmlIsExtenderQ(c));
2349 : }
2350 : #endif /* USE_LIBXML */
2351 :
2352 :
2353 : /*
2354 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2355 : */
2356 : char *
2357 3652 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2358 : bool escape_period)
2359 : {
2360 : #ifdef USE_LIBXML
2361 : StringInfoData buf;
2362 : const char *p;
2363 :
2364 : /*
2365 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2366 : * mistake.
2367 : */
2368 : Assert(fully_escaped || !escape_period);
2369 :
2370 3652 : initStringInfo(&buf);
2371 :
2372 21952 : for (p = ident; *p; p += pg_mblen(p))
2373 : {
2374 18300 : if (*p == ':' && (p == ident || fully_escaped))
2375 14 : appendStringInfoString(&buf, "_x003A_");
2376 18286 : else if (*p == '_' && *(p + 1) == 'x')
2377 6 : appendStringInfoString(&buf, "_x005F_");
2378 21568 : else if (fully_escaped && p == ident &&
2379 3288 : pg_strncasecmp(p, "xml", 3) == 0)
2380 : {
2381 0 : if (*p == 'x')
2382 0 : appendStringInfoString(&buf, "_x0078_");
2383 : else
2384 0 : appendStringInfoString(&buf, "_x0058_");
2385 : }
2386 18280 : else if (escape_period && *p == '.')
2387 0 : appendStringInfoString(&buf, "_x002E_");
2388 : else
2389 : {
2390 18280 : pg_wchar u = sqlchar_to_unicode(p);
2391 :
2392 36560 : if ((p == ident)
2393 3638 : ? !is_valid_xml_namefirst(u)
2394 14642 : : !is_valid_xml_namechar(u))
2395 18 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2396 : else
2397 18262 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2398 : }
2399 : }
2400 :
2401 3652 : return buf.data;
2402 : #else /* not USE_LIBXML */
2403 : NO_XML_SUPPORT();
2404 : return NULL;
2405 : #endif /* not USE_LIBXML */
2406 : }
2407 :
2408 :
2409 : /*
2410 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2411 : */
2412 : char *
2413 128 : map_xml_name_to_sql_identifier(const char *name)
2414 : {
2415 : StringInfoData buf;
2416 : const char *p;
2417 :
2418 128 : initStringInfo(&buf);
2419 :
2420 704 : for (p = name; *p; p += pg_mblen(p))
2421 : {
2422 576 : if (*p == '_' && *(p + 1) == 'x'
2423 16 : && isxdigit((unsigned char) *(p + 2))
2424 16 : && isxdigit((unsigned char) *(p + 3))
2425 16 : && isxdigit((unsigned char) *(p + 4))
2426 16 : && isxdigit((unsigned char) *(p + 5))
2427 16 : && *(p + 6) == '_')
2428 16 : {
2429 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2430 : unsigned int u;
2431 :
2432 16 : sscanf(p + 2, "%X", &u);
2433 16 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2434 16 : appendStringInfoString(&buf, cbuf);
2435 16 : p += 6;
2436 : }
2437 : else
2438 560 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2439 : }
2440 :
2441 128 : return buf.data;
2442 : }
2443 :
2444 : /*
2445 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2446 : *
2447 : * When xml_escape_strings is true, then certain characters in string
2448 : * values are replaced by entity references (< etc.), as specified
2449 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2450 : * wanted. The false case is mainly useful when the resulting value
2451 : * is used with xmlTextWriterWriteAttribute() to write out an
2452 : * attribute, because that function does the escaping itself.
2453 : */
2454 : char *
2455 130606 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2456 : {
2457 130606 : if (type_is_array_domain(type))
2458 : {
2459 : ArrayType *array;
2460 : Oid elmtype;
2461 : int16 elmlen;
2462 : bool elmbyval;
2463 : char elmalign;
2464 : int num_elems;
2465 : Datum *elem_values;
2466 : bool *elem_nulls;
2467 : StringInfoData buf;
2468 : int i;
2469 :
2470 6 : array = DatumGetArrayTypeP(value);
2471 6 : elmtype = ARR_ELEMTYPE(array);
2472 6 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2473 :
2474 6 : deconstruct_array(array, elmtype,
2475 : elmlen, elmbyval, elmalign,
2476 : &elem_values, &elem_nulls,
2477 : &num_elems);
2478 :
2479 6 : initStringInfo(&buf);
2480 :
2481 24 : for (i = 0; i < num_elems; i++)
2482 : {
2483 18 : if (elem_nulls[i])
2484 0 : continue;
2485 18 : appendStringInfoString(&buf, "<element>");
2486 18 : appendStringInfoString(&buf,
2487 18 : map_sql_value_to_xml_value(elem_values[i],
2488 : elmtype, true));
2489 18 : appendStringInfoString(&buf, "</element>");
2490 : }
2491 :
2492 6 : pfree(elem_values);
2493 6 : pfree(elem_nulls);
2494 :
2495 6 : return buf.data;
2496 : }
2497 : else
2498 : {
2499 : Oid typeOut;
2500 : bool isvarlena;
2501 : char *str;
2502 :
2503 : /*
2504 : * Flatten domains; the special-case treatments below should apply to,
2505 : * eg, domains over boolean not just boolean.
2506 : */
2507 130600 : type = getBaseType(type);
2508 :
2509 : /*
2510 : * Special XSD formatting for some data types
2511 : */
2512 130600 : switch (type)
2513 : {
2514 66 : case BOOLOID:
2515 66 : if (DatumGetBool(value))
2516 60 : return "true";
2517 : else
2518 6 : return "false";
2519 :
2520 48 : case DATEOID:
2521 : {
2522 : DateADT date;
2523 : struct pg_tm tm;
2524 : char buf[MAXDATELEN + 1];
2525 :
2526 48 : date = DatumGetDateADT(value);
2527 : /* XSD doesn't support infinite values */
2528 48 : if (DATE_NOT_FINITE(date))
2529 0 : ereport(ERROR,
2530 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2531 : errmsg("date out of range"),
2532 : errdetail("XML does not support infinite date values.")));
2533 48 : j2date(date + POSTGRES_EPOCH_JDATE,
2534 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2535 48 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2536 :
2537 48 : return pstrdup(buf);
2538 : }
2539 :
2540 36 : case TIMESTAMPOID:
2541 : {
2542 : Timestamp timestamp;
2543 : struct pg_tm tm;
2544 : fsec_t fsec;
2545 : char buf[MAXDATELEN + 1];
2546 :
2547 36 : timestamp = DatumGetTimestamp(value);
2548 :
2549 : /* XSD doesn't support infinite values */
2550 36 : if (TIMESTAMP_NOT_FINITE(timestamp))
2551 6 : ereport(ERROR,
2552 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2553 : errmsg("timestamp out of range"),
2554 : errdetail("XML does not support infinite timestamp values.")));
2555 30 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2556 30 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2557 : else
2558 0 : ereport(ERROR,
2559 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2560 : errmsg("timestamp out of range")));
2561 :
2562 30 : return pstrdup(buf);
2563 : }
2564 :
2565 24 : case TIMESTAMPTZOID:
2566 : {
2567 : TimestampTz timestamp;
2568 : struct pg_tm tm;
2569 : int tz;
2570 : fsec_t fsec;
2571 24 : const char *tzn = NULL;
2572 : char buf[MAXDATELEN + 1];
2573 :
2574 24 : timestamp = DatumGetTimestamp(value);
2575 :
2576 : /* XSD doesn't support infinite values */
2577 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2578 0 : ereport(ERROR,
2579 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2580 : errmsg("timestamp out of range"),
2581 : errdetail("XML does not support infinite timestamp values.")));
2582 24 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2583 24 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2584 : else
2585 0 : ereport(ERROR,
2586 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2587 : errmsg("timestamp out of range")));
2588 :
2589 24 : return pstrdup(buf);
2590 : }
2591 :
2592 : #ifdef USE_LIBXML
2593 36 : case BYTEAOID:
2594 : {
2595 36 : bytea *bstr = DatumGetByteaPP(value);
2596 : PgXmlErrorContext *xmlerrcxt;
2597 36 : volatile xmlBufferPtr buf = NULL;
2598 36 : volatile xmlTextWriterPtr writer = NULL;
2599 : char *result;
2600 :
2601 36 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2602 :
2603 36 : PG_TRY();
2604 : {
2605 36 : buf = xmlBufferCreate();
2606 36 : if (buf == NULL || xmlerrcxt->err_occurred)
2607 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2608 : "could not allocate xmlBuffer");
2609 36 : writer = xmlNewTextWriterMemory(buf, 0);
2610 36 : if (writer == NULL || xmlerrcxt->err_occurred)
2611 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2612 : "could not allocate xmlTextWriter");
2613 :
2614 36 : if (xmlbinary == XMLBINARY_BASE64)
2615 30 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2616 30 : 0, VARSIZE_ANY_EXHDR(bstr));
2617 : else
2618 6 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2619 6 : 0, VARSIZE_ANY_EXHDR(bstr));
2620 :
2621 : /* we MUST do this now to flush data out to the buffer */
2622 36 : xmlFreeTextWriter(writer);
2623 36 : writer = NULL;
2624 :
2625 36 : result = pstrdup((const char *) xmlBufferContent(buf));
2626 : }
2627 0 : PG_CATCH();
2628 : {
2629 0 : if (writer)
2630 0 : xmlFreeTextWriter(writer);
2631 0 : if (buf)
2632 0 : xmlBufferFree(buf);
2633 :
2634 0 : pg_xml_done(xmlerrcxt, true);
2635 :
2636 0 : PG_RE_THROW();
2637 : }
2638 36 : PG_END_TRY();
2639 :
2640 36 : xmlBufferFree(buf);
2641 :
2642 36 : pg_xml_done(xmlerrcxt, false);
2643 :
2644 36 : return result;
2645 : }
2646 : #endif /* USE_LIBXML */
2647 :
2648 : }
2649 :
2650 : /*
2651 : * otherwise, just use the type's native text representation
2652 : */
2653 130390 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2654 130390 : str = OidOutputFunctionCall(typeOut, value);
2655 :
2656 : /* ... exactly as-is for XML, and when escaping is not wanted */
2657 130390 : if (type == XMLOID || !xml_escape_strings)
2658 21634 : return str;
2659 :
2660 : /* otherwise, translate special characters as needed */
2661 108756 : return escape_xml(str);
2662 : }
2663 : }
2664 :
2665 :
2666 : /*
2667 : * Escape characters in text that have special meanings in XML.
2668 : *
2669 : * Returns a palloc'd string.
2670 : *
2671 : * NB: this is intentionally not dependent on libxml.
2672 : */
2673 : char *
2674 109002 : escape_xml(const char *str)
2675 : {
2676 : StringInfoData buf;
2677 : const char *p;
2678 :
2679 109002 : initStringInfo(&buf);
2680 680500 : for (p = str; *p; p++)
2681 : {
2682 571498 : switch (*p)
2683 : {
2684 0 : case '&':
2685 0 : appendStringInfoString(&buf, "&");
2686 0 : break;
2687 36 : case '<':
2688 36 : appendStringInfoString(&buf, "<");
2689 36 : break;
2690 24 : case '>':
2691 24 : appendStringInfoString(&buf, ">");
2692 24 : break;
2693 0 : case '\r':
2694 0 : appendStringInfoString(&buf, "
");
2695 0 : break;
2696 571438 : default:
2697 571438 : appendStringInfoCharMacro(&buf, *p);
2698 571438 : break;
2699 : }
2700 : }
2701 109002 : return buf.data;
2702 : }
2703 :
2704 :
2705 : static char *
2706 24 : _SPI_strdup(const char *s)
2707 : {
2708 24 : size_t len = strlen(s) + 1;
2709 24 : char *ret = SPI_palloc(len);
2710 :
2711 24 : memcpy(ret, s, len);
2712 24 : return ret;
2713 : }
2714 :
2715 :
2716 : /*
2717 : * SQL to XML mapping functions
2718 : *
2719 : * What follows below was at one point intentionally organized so that
2720 : * you can read along in the SQL/XML standard. The functions are
2721 : * mostly split up the way the clauses lay out in the standards
2722 : * document, and the identifiers are also aligned with the standard
2723 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2724 : * differently than SQL/XML:2003, so the order below doesn't make much
2725 : * sense anymore.
2726 : *
2727 : * There are many things going on there:
2728 : *
2729 : * There are two kinds of mappings: Mapping SQL data (table contents)
2730 : * to XML documents, and mapping SQL structure (the "schema") to XML
2731 : * Schema. And there are functions that do both at the same time.
2732 : *
2733 : * Then you can map a database, a schema, or a table, each in both
2734 : * ways. This breaks down recursively: Mapping a database invokes
2735 : * mapping schemas, which invokes mapping tables, which invokes
2736 : * mapping rows, which invokes mapping columns, although you can't
2737 : * call the last two from the outside. Because of this, there are a
2738 : * number of xyz_internal() functions which are to be called both from
2739 : * the function manager wrapper and from some upper layer in a
2740 : * recursive call.
2741 : *
2742 : * See the documentation about what the common function arguments
2743 : * nulls, tableforest, and targetns mean.
2744 : *
2745 : * Some style guidelines for XML output: Use double quotes for quoting
2746 : * XML attributes. Indent XML elements by two spaces, but remember
2747 : * that a lot of code is called recursively at different levels, so
2748 : * it's better not to indent rather than create output that indents
2749 : * and outdents weirdly. Add newlines to make the output look nice.
2750 : */
2751 :
2752 :
2753 : /*
2754 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2755 : * 4.10.8.
2756 : */
2757 :
2758 : /*
2759 : * Given a query, which must return type oid as first column, produce
2760 : * a list of Oids with the query results.
2761 : */
2762 : static List *
2763 36 : query_to_oid_list(const char *query)
2764 : {
2765 : uint64 i;
2766 36 : List *list = NIL;
2767 : int spi_result;
2768 :
2769 36 : spi_result = SPI_execute(query, true, 0);
2770 36 : if (spi_result != SPI_OK_SELECT)
2771 0 : elog(ERROR, "SPI_execute returned %s for %s",
2772 : SPI_result_code_string(spi_result), query);
2773 :
2774 108 : for (i = 0; i < SPI_processed; i++)
2775 : {
2776 : Datum oid;
2777 : bool isnull;
2778 :
2779 72 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2780 72 : SPI_tuptable->tupdesc,
2781 : 1,
2782 : &isnull);
2783 72 : if (!isnull)
2784 72 : list = lappend_oid(list, DatumGetObjectId(oid));
2785 : }
2786 :
2787 36 : return list;
2788 : }
2789 :
2790 :
2791 : static List *
2792 36 : schema_get_xml_visible_tables(Oid nspid)
2793 : {
2794 : StringInfoData query;
2795 :
2796 36 : initStringInfo(&query);
2797 36 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2798 : " WHERE relnamespace = %u AND relkind IN ("
2799 : CppAsString2(RELKIND_RELATION) ","
2800 : CppAsString2(RELKIND_MATVIEW) ","
2801 : CppAsString2(RELKIND_VIEW) ")"
2802 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2803 : " ORDER BY relname;", nspid);
2804 :
2805 36 : return query_to_oid_list(query.data);
2806 : }
2807 :
2808 :
2809 : /*
2810 : * Including the system schemas is probably not useful for a database
2811 : * mapping.
2812 : */
2813 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2814 :
2815 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2816 :
2817 :
2818 : static List *
2819 0 : database_get_xml_visible_schemas(void)
2820 : {
2821 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2822 : }
2823 :
2824 :
2825 : static List *
2826 0 : database_get_xml_visible_tables(void)
2827 : {
2828 : /* At the moment there is no order required here. */
2829 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2830 : " WHERE relkind IN ("
2831 : CppAsString2(RELKIND_RELATION) ","
2832 : CppAsString2(RELKIND_MATVIEW) ","
2833 : CppAsString2(RELKIND_VIEW) ")"
2834 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2835 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2836 : }
2837 :
2838 :
2839 : /*
2840 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2841 : * section 9.11.
2842 : */
2843 :
2844 : static StringInfo
2845 96 : table_to_xml_internal(Oid relid,
2846 : const char *xmlschema, bool nulls, bool tableforest,
2847 : const char *targetns, bool top_level)
2848 : {
2849 : StringInfoData query;
2850 :
2851 96 : initStringInfo(&query);
2852 96 : appendStringInfo(&query, "SELECT * FROM %s",
2853 : DatumGetCString(DirectFunctionCall1(regclassout,
2854 : ObjectIdGetDatum(relid))));
2855 96 : return query_to_xml_internal(query.data, get_rel_name(relid),
2856 : xmlschema, nulls, tableforest,
2857 : targetns, top_level);
2858 : }
2859 :
2860 :
2861 : Datum
2862 36 : table_to_xml(PG_FUNCTION_ARGS)
2863 : {
2864 36 : Oid relid = PG_GETARG_OID(0);
2865 36 : bool nulls = PG_GETARG_BOOL(1);
2866 36 : bool tableforest = PG_GETARG_BOOL(2);
2867 36 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2868 :
2869 36 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2870 : nulls, tableforest,
2871 : targetns, true)));
2872 : }
2873 :
2874 :
2875 : Datum
2876 10 : query_to_xml(PG_FUNCTION_ARGS)
2877 : {
2878 10 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2879 10 : bool nulls = PG_GETARG_BOOL(1);
2880 10 : bool tableforest = PG_GETARG_BOOL(2);
2881 10 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2882 :
2883 10 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2884 : NULL, nulls, tableforest,
2885 : targetns, true)));
2886 : }
2887 :
2888 :
2889 : Datum
2890 12 : cursor_to_xml(PG_FUNCTION_ARGS)
2891 : {
2892 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2893 12 : int32 count = PG_GETARG_INT32(1);
2894 12 : bool nulls = PG_GETARG_BOOL(2);
2895 12 : bool tableforest = PG_GETARG_BOOL(3);
2896 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2897 :
2898 : StringInfoData result;
2899 : Portal portal;
2900 : uint64 i;
2901 :
2902 12 : initStringInfo(&result);
2903 :
2904 12 : if (!tableforest)
2905 : {
2906 6 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2907 6 : appendStringInfoChar(&result, '\n');
2908 : }
2909 :
2910 12 : SPI_connect();
2911 12 : portal = SPI_cursor_find(name);
2912 12 : if (portal == NULL)
2913 0 : ereport(ERROR,
2914 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2915 : errmsg("cursor \"%s\" does not exist", name)));
2916 :
2917 12 : SPI_cursor_fetch(portal, true, count);
2918 48 : for (i = 0; i < SPI_processed; i++)
2919 36 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2920 : tableforest, targetns, true);
2921 :
2922 12 : SPI_finish();
2923 :
2924 12 : if (!tableforest)
2925 6 : xmldata_root_element_end(&result, "table");
2926 :
2927 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2928 : }
2929 :
2930 :
2931 : /*
2932 : * Write the start tag of the root element of a data mapping.
2933 : *
2934 : * top_level means that this is the very top level of the eventual
2935 : * output. For example, when the user calls table_to_xml, then a call
2936 : * with a table name to this function is the top level. When the user
2937 : * calls database_to_xml, then a call with a schema name to this
2938 : * function is not the top level. If top_level is false, then the XML
2939 : * namespace declarations are omitted, because they supposedly already
2940 : * appeared earlier in the output. Repeating them is not wrong, but
2941 : * it looks ugly.
2942 : */
2943 : static void
2944 238 : xmldata_root_element_start(StringInfo result, const char *eltname,
2945 : const char *xmlschema, const char *targetns,
2946 : bool top_level)
2947 : {
2948 : /* This isn't really wrong but currently makes no sense. */
2949 : Assert(top_level || !xmlschema);
2950 :
2951 238 : appendStringInfo(result, "<%s", eltname);
2952 238 : if (top_level)
2953 : {
2954 178 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2955 178 : if (strlen(targetns) > 0)
2956 30 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
2957 : }
2958 238 : if (xmlschema)
2959 : {
2960 : /* FIXME: better targets */
2961 18 : if (strlen(targetns) > 0)
2962 6 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2963 : else
2964 12 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2965 : }
2966 238 : appendStringInfoString(result, ">\n");
2967 238 : }
2968 :
2969 :
2970 : static void
2971 238 : xmldata_root_element_end(StringInfo result, const char *eltname)
2972 : {
2973 238 : appendStringInfo(result, "</%s>\n", eltname);
2974 238 : }
2975 :
2976 :
2977 : static StringInfo
2978 112 : query_to_xml_internal(const char *query, char *tablename,
2979 : const char *xmlschema, bool nulls, bool tableforest,
2980 : const char *targetns, bool top_level)
2981 : {
2982 : StringInfo result;
2983 : char *xmltn;
2984 : uint64 i;
2985 :
2986 112 : if (tablename)
2987 96 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2988 : else
2989 16 : xmltn = "table";
2990 :
2991 112 : result = makeStringInfo();
2992 :
2993 112 : SPI_connect();
2994 112 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2995 0 : ereport(ERROR,
2996 : (errcode(ERRCODE_DATA_EXCEPTION),
2997 : errmsg("invalid query")));
2998 :
2999 112 : if (!tableforest)
3000 : {
3001 52 : xmldata_root_element_start(result, xmltn, xmlschema,
3002 : targetns, top_level);
3003 52 : appendStringInfoChar(result, '\n');
3004 : }
3005 :
3006 112 : if (xmlschema)
3007 30 : appendStringInfo(result, "%s\n\n", xmlschema);
3008 :
3009 388 : for (i = 0; i < SPI_processed; i++)
3010 276 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
3011 : tableforest, targetns, top_level);
3012 :
3013 112 : if (!tableforest)
3014 52 : xmldata_root_element_end(result, xmltn);
3015 :
3016 112 : SPI_finish();
3017 :
3018 112 : return result;
3019 : }
3020 :
3021 :
3022 : Datum
3023 30 : table_to_xmlschema(PG_FUNCTION_ARGS)
3024 : {
3025 30 : Oid relid = PG_GETARG_OID(0);
3026 30 : bool nulls = PG_GETARG_BOOL(1);
3027 30 : bool tableforest = PG_GETARG_BOOL(2);
3028 30 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3029 : const char *result;
3030 : Relation rel;
3031 :
3032 30 : rel = table_open(relid, AccessShareLock);
3033 30 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3034 : tableforest, targetns);
3035 30 : table_close(rel, NoLock);
3036 :
3037 30 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3038 : }
3039 :
3040 :
3041 : Datum
3042 6 : query_to_xmlschema(PG_FUNCTION_ARGS)
3043 : {
3044 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3045 6 : bool nulls = PG_GETARG_BOOL(1);
3046 6 : bool tableforest = PG_GETARG_BOOL(2);
3047 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3048 : const char *result;
3049 : SPIPlanPtr plan;
3050 : Portal portal;
3051 :
3052 6 : SPI_connect();
3053 :
3054 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3055 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3056 :
3057 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3058 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3059 :
3060 6 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3061 : InvalidOid, nulls,
3062 : tableforest, targetns));
3063 6 : SPI_cursor_close(portal);
3064 6 : SPI_finish();
3065 :
3066 6 : PG_RETURN_XML_P(cstring_to_xmltype(result));
3067 : }
3068 :
3069 :
3070 : Datum
3071 12 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
3072 : {
3073 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
3074 12 : bool nulls = PG_GETARG_BOOL(1);
3075 12 : bool tableforest = PG_GETARG_BOOL(2);
3076 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3077 : const char *xmlschema;
3078 : Portal portal;
3079 :
3080 12 : SPI_connect();
3081 12 : portal = SPI_cursor_find(name);
3082 12 : if (portal == NULL)
3083 0 : ereport(ERROR,
3084 : (errcode(ERRCODE_UNDEFINED_CURSOR),
3085 : errmsg("cursor \"%s\" does not exist", name)));
3086 12 : if (portal->tupDesc == NULL)
3087 0 : ereport(ERROR,
3088 : (errcode(ERRCODE_INVALID_CURSOR_STATE),
3089 : errmsg("portal \"%s\" does not return tuples", name)));
3090 :
3091 12 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3092 : InvalidOid, nulls,
3093 : tableforest, targetns));
3094 12 : SPI_finish();
3095 :
3096 12 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
3097 : }
3098 :
3099 :
3100 : Datum
3101 24 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3102 : {
3103 24 : Oid relid = PG_GETARG_OID(0);
3104 24 : bool nulls = PG_GETARG_BOOL(1);
3105 24 : bool tableforest = PG_GETARG_BOOL(2);
3106 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3107 : Relation rel;
3108 : const char *xmlschema;
3109 :
3110 24 : rel = table_open(relid, AccessShareLock);
3111 24 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
3112 : tableforest, targetns);
3113 24 : table_close(rel, NoLock);
3114 :
3115 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
3116 : xmlschema, nulls, tableforest,
3117 : targetns, true)));
3118 : }
3119 :
3120 :
3121 : Datum
3122 6 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3123 : {
3124 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
3125 6 : bool nulls = PG_GETARG_BOOL(1);
3126 6 : bool tableforest = PG_GETARG_BOOL(2);
3127 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3128 :
3129 : const char *xmlschema;
3130 : SPIPlanPtr plan;
3131 : Portal portal;
3132 :
3133 6 : SPI_connect();
3134 :
3135 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
3136 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
3137 :
3138 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
3139 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
3140 :
3141 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
3142 : InvalidOid, nulls, tableforest, targetns));
3143 6 : SPI_cursor_close(portal);
3144 6 : SPI_finish();
3145 :
3146 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
3147 : xmlschema, nulls, tableforest,
3148 : targetns, true)));
3149 : }
3150 :
3151 :
3152 : /*
3153 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
3154 : * sections 9.13, 9.14.
3155 : */
3156 :
3157 : static StringInfo
3158 18 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
3159 : bool tableforest, const char *targetns, bool top_level)
3160 : {
3161 : StringInfo result;
3162 : char *xmlsn;
3163 : List *relid_list;
3164 : ListCell *cell;
3165 :
3166 18 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
3167 : true, false);
3168 18 : result = makeStringInfo();
3169 :
3170 18 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
3171 18 : appendStringInfoChar(result, '\n');
3172 :
3173 18 : if (xmlschema)
3174 6 : appendStringInfo(result, "%s\n\n", xmlschema);
3175 :
3176 18 : SPI_connect();
3177 :
3178 18 : relid_list = schema_get_xml_visible_tables(nspid);
3179 :
3180 54 : foreach(cell, relid_list)
3181 : {
3182 36 : Oid relid = lfirst_oid(cell);
3183 : StringInfo subres;
3184 :
3185 36 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
3186 : targetns, false);
3187 :
3188 36 : appendBinaryStringInfo(result, subres->data, subres->len);
3189 36 : appendStringInfoChar(result, '\n');
3190 : }
3191 :
3192 18 : SPI_finish();
3193 :
3194 18 : xmldata_root_element_end(result, xmlsn);
3195 :
3196 18 : return result;
3197 : }
3198 :
3199 :
3200 : Datum
3201 12 : schema_to_xml(PG_FUNCTION_ARGS)
3202 : {
3203 12 : Name name = PG_GETARG_NAME(0);
3204 12 : bool nulls = PG_GETARG_BOOL(1);
3205 12 : bool tableforest = PG_GETARG_BOOL(2);
3206 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3207 :
3208 : char *schemaname;
3209 : Oid nspid;
3210 :
3211 12 : schemaname = NameStr(*name);
3212 12 : nspid = LookupExplicitNamespace(schemaname, false);
3213 :
3214 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
3215 : nulls, tableforest, targetns, true)));
3216 : }
3217 :
3218 :
3219 : /*
3220 : * Write the start element of the root element of an XML Schema mapping.
3221 : */
3222 : static void
3223 96 : xsd_schema_element_start(StringInfo result, const char *targetns)
3224 : {
3225 96 : appendStringInfoString(result,
3226 : "<xsd:schema\n"
3227 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
3228 96 : if (strlen(targetns) > 0)
3229 18 : appendStringInfo(result,
3230 : "\n"
3231 : " targetNamespace=\"%s\"\n"
3232 : " elementFormDefault=\"qualified\"",
3233 : targetns);
3234 96 : appendStringInfoString(result,
3235 : ">\n\n");
3236 96 : }
3237 :
3238 :
3239 : static void
3240 96 : xsd_schema_element_end(StringInfo result)
3241 : {
3242 96 : appendStringInfoString(result, "</xsd:schema>");
3243 96 : }
3244 :
3245 :
3246 : static StringInfo
3247 18 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
3248 : bool tableforest, const char *targetns)
3249 : {
3250 : Oid nspid;
3251 : List *relid_list;
3252 : List *tupdesc_list;
3253 : ListCell *cell;
3254 : StringInfo result;
3255 :
3256 18 : result = makeStringInfo();
3257 :
3258 18 : nspid = LookupExplicitNamespace(schemaname, false);
3259 :
3260 18 : xsd_schema_element_start(result, targetns);
3261 :
3262 18 : SPI_connect();
3263 :
3264 18 : relid_list = schema_get_xml_visible_tables(nspid);
3265 :
3266 18 : tupdesc_list = NIL;
3267 54 : foreach(cell, relid_list)
3268 : {
3269 : Relation rel;
3270 :
3271 36 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3272 36 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3273 36 : table_close(rel, NoLock);
3274 : }
3275 :
3276 18 : appendStringInfoString(result,
3277 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3278 :
3279 18 : appendStringInfoString(result,
3280 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
3281 : nulls, tableforest, targetns));
3282 :
3283 18 : xsd_schema_element_end(result);
3284 :
3285 18 : SPI_finish();
3286 :
3287 18 : return result;
3288 : }
3289 :
3290 :
3291 : Datum
3292 12 : schema_to_xmlschema(PG_FUNCTION_ARGS)
3293 : {
3294 12 : Name name = PG_GETARG_NAME(0);
3295 12 : bool nulls = PG_GETARG_BOOL(1);
3296 12 : bool tableforest = PG_GETARG_BOOL(2);
3297 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3298 :
3299 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
3300 : nulls, tableforest, targetns)));
3301 : }
3302 :
3303 :
3304 : Datum
3305 6 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3306 : {
3307 6 : Name name = PG_GETARG_NAME(0);
3308 6 : bool nulls = PG_GETARG_BOOL(1);
3309 6 : bool tableforest = PG_GETARG_BOOL(2);
3310 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
3311 : char *schemaname;
3312 : Oid nspid;
3313 : StringInfo xmlschema;
3314 :
3315 6 : schemaname = NameStr(*name);
3316 6 : nspid = LookupExplicitNamespace(schemaname, false);
3317 :
3318 6 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
3319 : tableforest, targetns);
3320 :
3321 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
3322 : xmlschema->data, nulls,
3323 : tableforest, targetns, true)));
3324 : }
3325 :
3326 :
3327 : /*
3328 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3329 : * sections 9.16, 9.17.
3330 : */
3331 :
3332 : static StringInfo
3333 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3334 : bool tableforest, const char *targetns)
3335 : {
3336 : StringInfo result;
3337 : List *nspid_list;
3338 : ListCell *cell;
3339 : char *xmlcn;
3340 :
3341 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3342 : true, false);
3343 0 : result = makeStringInfo();
3344 :
3345 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3346 0 : appendStringInfoChar(result, '\n');
3347 :
3348 0 : if (xmlschema)
3349 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3350 :
3351 0 : SPI_connect();
3352 :
3353 0 : nspid_list = database_get_xml_visible_schemas();
3354 :
3355 0 : foreach(cell, nspid_list)
3356 : {
3357 0 : Oid nspid = lfirst_oid(cell);
3358 : StringInfo subres;
3359 :
3360 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3361 : tableforest, targetns, false);
3362 :
3363 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3364 0 : appendStringInfoChar(result, '\n');
3365 : }
3366 :
3367 0 : SPI_finish();
3368 :
3369 0 : xmldata_root_element_end(result, xmlcn);
3370 :
3371 0 : return result;
3372 : }
3373 :
3374 :
3375 : Datum
3376 0 : database_to_xml(PG_FUNCTION_ARGS)
3377 : {
3378 0 : bool nulls = PG_GETARG_BOOL(0);
3379 0 : bool tableforest = PG_GETARG_BOOL(1);
3380 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3381 :
3382 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3383 : tableforest, targetns)));
3384 : }
3385 :
3386 :
3387 : static StringInfo
3388 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3389 : const char *targetns)
3390 : {
3391 : List *relid_list;
3392 : List *nspid_list;
3393 : List *tupdesc_list;
3394 : ListCell *cell;
3395 : StringInfo result;
3396 :
3397 0 : result = makeStringInfo();
3398 :
3399 0 : xsd_schema_element_start(result, targetns);
3400 :
3401 0 : SPI_connect();
3402 :
3403 0 : relid_list = database_get_xml_visible_tables();
3404 0 : nspid_list = database_get_xml_visible_schemas();
3405 :
3406 0 : tupdesc_list = NIL;
3407 0 : foreach(cell, relid_list)
3408 : {
3409 : Relation rel;
3410 :
3411 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3412 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3413 0 : table_close(rel, NoLock);
3414 : }
3415 :
3416 0 : appendStringInfoString(result,
3417 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3418 :
3419 0 : appendStringInfoString(result,
3420 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3421 :
3422 0 : xsd_schema_element_end(result);
3423 :
3424 0 : SPI_finish();
3425 :
3426 0 : return result;
3427 : }
3428 :
3429 :
3430 : Datum
3431 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3432 : {
3433 0 : bool nulls = PG_GETARG_BOOL(0);
3434 0 : bool tableforest = PG_GETARG_BOOL(1);
3435 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3436 :
3437 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3438 : tableforest, targetns)));
3439 : }
3440 :
3441 :
3442 : Datum
3443 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3444 : {
3445 0 : bool nulls = PG_GETARG_BOOL(0);
3446 0 : bool tableforest = PG_GETARG_BOOL(1);
3447 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3448 : StringInfo xmlschema;
3449 :
3450 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3451 :
3452 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3453 : nulls, tableforest, targetns)));
3454 : }
3455 :
3456 :
3457 : /*
3458 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3459 : * 9.2.
3460 : */
3461 : static char *
3462 384 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3463 : {
3464 : StringInfoData result;
3465 :
3466 384 : initStringInfo(&result);
3467 :
3468 384 : if (a)
3469 384 : appendStringInfoString(&result,
3470 384 : map_sql_identifier_to_xml_name(a, true, true));
3471 384 : if (b)
3472 384 : appendStringInfo(&result, ".%s",
3473 : map_sql_identifier_to_xml_name(b, true, true));
3474 384 : if (c)
3475 384 : appendStringInfo(&result, ".%s",
3476 : map_sql_identifier_to_xml_name(c, true, true));
3477 384 : if (d)
3478 366 : appendStringInfo(&result, ".%s",
3479 : map_sql_identifier_to_xml_name(d, true, true));
3480 :
3481 384 : return result.data;
3482 : }
3483 :
3484 :
3485 : /*
3486 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3487 : * section 9.11.
3488 : *
3489 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3490 : * 9.9.
3491 : */
3492 : static const char *
3493 78 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3494 : bool tableforest, const char *targetns)
3495 : {
3496 : int i;
3497 : char *xmltn;
3498 : char *tabletypename;
3499 : char *rowtypename;
3500 : StringInfoData result;
3501 :
3502 78 : initStringInfo(&result);
3503 :
3504 78 : if (OidIsValid(relid))
3505 : {
3506 : HeapTuple tuple;
3507 : Form_pg_class reltuple;
3508 :
3509 54 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3510 54 : if (!HeapTupleIsValid(tuple))
3511 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3512 54 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3513 :
3514 54 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3515 : true, false);
3516 :
3517 54 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3518 54 : get_database_name(MyDatabaseId),
3519 54 : get_namespace_name(reltuple->relnamespace),
3520 54 : NameStr(reltuple->relname));
3521 :
3522 54 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3523 54 : get_database_name(MyDatabaseId),
3524 54 : get_namespace_name(reltuple->relnamespace),
3525 54 : NameStr(reltuple->relname));
3526 :
3527 54 : ReleaseSysCache(tuple);
3528 : }
3529 : else
3530 : {
3531 24 : if (tableforest)
3532 12 : xmltn = "row";
3533 : else
3534 12 : xmltn = "table";
3535 :
3536 24 : tabletypename = "TableType";
3537 24 : rowtypename = "RowType";
3538 : }
3539 :
3540 78 : xsd_schema_element_start(&result, targetns);
3541 :
3542 78 : appendStringInfoString(&result,
3543 78 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3544 :
3545 78 : appendStringInfo(&result,
3546 : "<xsd:complexType name=\"%s\">\n"
3547 : " <xsd:sequence>\n",
3548 : rowtypename);
3549 :
3550 324 : for (i = 0; i < tupdesc->natts; i++)
3551 : {
3552 246 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3553 :
3554 246 : if (att->attisdropped)
3555 6 : continue;
3556 480 : appendStringInfo(&result,
3557 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3558 240 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3559 : true, false),
3560 : map_sql_type_to_xml_name(att->atttypid, -1),
3561 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3562 : }
3563 :
3564 78 : appendStringInfoString(&result,
3565 : " </xsd:sequence>\n"
3566 : "</xsd:complexType>\n\n");
3567 :
3568 78 : if (!tableforest)
3569 : {
3570 42 : appendStringInfo(&result,
3571 : "<xsd:complexType name=\"%s\">\n"
3572 : " <xsd:sequence>\n"
3573 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3574 : " </xsd:sequence>\n"
3575 : "</xsd:complexType>\n\n",
3576 : tabletypename, rowtypename);
3577 :
3578 42 : appendStringInfo(&result,
3579 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3580 : xmltn, tabletypename);
3581 : }
3582 : else
3583 36 : appendStringInfo(&result,
3584 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3585 : xmltn, rowtypename);
3586 :
3587 78 : xsd_schema_element_end(&result);
3588 :
3589 78 : return result.data;
3590 : }
3591 :
3592 :
3593 : /*
3594 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3595 : * section 9.12.
3596 : */
3597 : static const char *
3598 18 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3599 : bool tableforest, const char *targetns)
3600 : {
3601 : char *dbname;
3602 : char *nspname;
3603 : char *xmlsn;
3604 : char *schematypename;
3605 : StringInfoData result;
3606 : ListCell *cell;
3607 :
3608 18 : dbname = get_database_name(MyDatabaseId);
3609 18 : nspname = get_namespace_name(nspid);
3610 :
3611 18 : initStringInfo(&result);
3612 :
3613 18 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3614 :
3615 18 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3616 : dbname,
3617 : nspname,
3618 : NULL);
3619 :
3620 18 : appendStringInfo(&result,
3621 : "<xsd:complexType name=\"%s\">\n", schematypename);
3622 18 : if (!tableforest)
3623 6 : appendStringInfoString(&result,
3624 : " <xsd:all>\n");
3625 : else
3626 12 : appendStringInfoString(&result,
3627 : " <xsd:sequence>\n");
3628 :
3629 54 : foreach(cell, relid_list)
3630 : {
3631 36 : Oid relid = lfirst_oid(cell);
3632 36 : char *relname = get_rel_name(relid);
3633 36 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3634 36 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3635 : dbname,
3636 : nspname,
3637 : relname);
3638 :
3639 36 : if (!tableforest)
3640 12 : appendStringInfo(&result,
3641 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3642 : xmltn, tabletypename);
3643 : else
3644 24 : appendStringInfo(&result,
3645 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3646 : xmltn, tabletypename);
3647 : }
3648 :
3649 18 : if (!tableforest)
3650 6 : appendStringInfoString(&result,
3651 : " </xsd:all>\n");
3652 : else
3653 12 : appendStringInfoString(&result,
3654 : " </xsd:sequence>\n");
3655 18 : appendStringInfoString(&result,
3656 : "</xsd:complexType>\n\n");
3657 :
3658 18 : appendStringInfo(&result,
3659 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3660 : xmlsn, schematypename);
3661 :
3662 18 : return result.data;
3663 : }
3664 :
3665 :
3666 : /*
3667 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3668 : * section 9.15.
3669 : */
3670 : static const char *
3671 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3672 : bool tableforest, const char *targetns)
3673 : {
3674 : char *dbname;
3675 : char *xmlcn;
3676 : char *catalogtypename;
3677 : StringInfoData result;
3678 : ListCell *cell;
3679 :
3680 0 : dbname = get_database_name(MyDatabaseId);
3681 :
3682 0 : initStringInfo(&result);
3683 :
3684 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3685 :
3686 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3687 : dbname,
3688 : NULL,
3689 : NULL);
3690 :
3691 0 : appendStringInfo(&result,
3692 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3693 0 : appendStringInfoString(&result,
3694 : " <xsd:all>\n");
3695 :
3696 0 : foreach(cell, nspid_list)
3697 : {
3698 0 : Oid nspid = lfirst_oid(cell);
3699 0 : char *nspname = get_namespace_name(nspid);
3700 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3701 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3702 : dbname,
3703 : nspname,
3704 : NULL);
3705 :
3706 0 : appendStringInfo(&result,
3707 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3708 : xmlsn, schematypename);
3709 : }
3710 :
3711 0 : appendStringInfoString(&result,
3712 : " </xsd:all>\n");
3713 0 : appendStringInfoString(&result,
3714 : "</xsd:complexType>\n\n");
3715 :
3716 0 : appendStringInfo(&result,
3717 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3718 : xmlcn, catalogtypename);
3719 :
3720 0 : return result.data;
3721 : }
3722 :
3723 :
3724 : /*
3725 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3726 : */
3727 : static const char *
3728 810 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3729 : {
3730 : StringInfoData result;
3731 :
3732 810 : initStringInfo(&result);
3733 :
3734 810 : switch (typeoid)
3735 : {
3736 30 : case BPCHAROID:
3737 30 : if (typmod == -1)
3738 30 : appendStringInfoString(&result, "CHAR");
3739 : else
3740 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3741 30 : break;
3742 54 : case VARCHAROID:
3743 54 : if (typmod == -1)
3744 54 : appendStringInfoString(&result, "VARCHAR");
3745 : else
3746 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3747 54 : break;
3748 30 : case NUMERICOID:
3749 30 : if (typmod == -1)
3750 30 : appendStringInfoString(&result, "NUMERIC");
3751 : else
3752 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3753 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3754 0 : (typmod - VARHDRSZ) & 0xffff);
3755 30 : break;
3756 174 : case INT4OID:
3757 174 : appendStringInfoString(&result, "INTEGER");
3758 174 : break;
3759 30 : case INT2OID:
3760 30 : appendStringInfoString(&result, "SMALLINT");
3761 30 : break;
3762 30 : case INT8OID:
3763 30 : appendStringInfoString(&result, "BIGINT");
3764 30 : break;
3765 30 : case FLOAT4OID:
3766 30 : appendStringInfoString(&result, "REAL");
3767 30 : break;
3768 0 : case FLOAT8OID:
3769 0 : appendStringInfoString(&result, "DOUBLE");
3770 0 : break;
3771 30 : case BOOLOID:
3772 30 : appendStringInfoString(&result, "BOOLEAN");
3773 30 : break;
3774 30 : case TIMEOID:
3775 30 : if (typmod == -1)
3776 30 : appendStringInfoString(&result, "TIME");
3777 : else
3778 0 : appendStringInfo(&result, "TIME_%d", typmod);
3779 30 : break;
3780 30 : case TIMETZOID:
3781 30 : if (typmod == -1)
3782 30 : appendStringInfoString(&result, "TIME_WTZ");
3783 : else
3784 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3785 30 : break;
3786 30 : case TIMESTAMPOID:
3787 30 : if (typmod == -1)
3788 30 : appendStringInfoString(&result, "TIMESTAMP");
3789 : else
3790 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3791 30 : break;
3792 30 : case TIMESTAMPTZOID:
3793 30 : if (typmod == -1)
3794 30 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3795 : else
3796 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3797 30 : break;
3798 30 : case DATEOID:
3799 30 : appendStringInfoString(&result, "DATE");
3800 30 : break;
3801 30 : case XMLOID:
3802 30 : appendStringInfoString(&result, "XML");
3803 30 : break;
3804 222 : default:
3805 : {
3806 : HeapTuple tuple;
3807 : Form_pg_type typtuple;
3808 :
3809 222 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3810 222 : if (!HeapTupleIsValid(tuple))
3811 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3812 222 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3813 :
3814 222 : appendStringInfoString(&result,
3815 222 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3816 222 : get_database_name(MyDatabaseId),
3817 222 : get_namespace_name(typtuple->typnamespace),
3818 222 : NameStr(typtuple->typname)));
3819 :
3820 222 : ReleaseSysCache(tuple);
3821 : }
3822 : }
3823 :
3824 810 : return result.data;
3825 : }
3826 :
3827 :
3828 : /*
3829 : * Map a collection of SQL data types to XML Schema data types; see
3830 : * SQL/XML:2008 section 9.7.
3831 : */
3832 : static const char *
3833 96 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3834 : {
3835 96 : List *uniquetypes = NIL;
3836 : int i;
3837 : StringInfoData result;
3838 : ListCell *cell0;
3839 :
3840 : /* extract all column types used in the set of TupleDescs */
3841 210 : foreach(cell0, tupdesc_list)
3842 : {
3843 114 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3844 :
3845 702 : for (i = 0; i < tupdesc->natts; i++)
3846 : {
3847 588 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3848 :
3849 588 : if (att->attisdropped)
3850 24 : continue;
3851 564 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3852 : }
3853 : }
3854 :
3855 : /* add base types of domains */
3856 642 : foreach(cell0, uniquetypes)
3857 : {
3858 546 : Oid typid = lfirst_oid(cell0);
3859 546 : Oid basetypid = getBaseType(typid);
3860 :
3861 546 : if (basetypid != typid)
3862 24 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3863 : }
3864 :
3865 : /* Convert to textual form */
3866 96 : initStringInfo(&result);
3867 :
3868 642 : foreach(cell0, uniquetypes)
3869 : {
3870 546 : appendStringInfo(&result, "%s\n",
3871 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3872 : -1));
3873 : }
3874 :
3875 96 : return result.data;
3876 : }
3877 :
3878 :
3879 : /*
3880 : * Map an SQL data type to a named XML Schema data type; see
3881 : * SQL/XML:2008 sections 9.5 and 9.6.
3882 : *
3883 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3884 : * a name attribute, which this function does. The name-less version
3885 : * 9.5 doesn't appear to be required anywhere.)
3886 : */
3887 : static const char *
3888 546 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3889 : {
3890 : StringInfoData result;
3891 546 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3892 :
3893 546 : initStringInfo(&result);
3894 :
3895 546 : if (typeoid == XMLOID)
3896 : {
3897 24 : appendStringInfoString(&result,
3898 : "<xsd:complexType mixed=\"true\">\n"
3899 : " <xsd:sequence>\n"
3900 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3901 : " </xsd:sequence>\n"
3902 : "</xsd:complexType>\n");
3903 : }
3904 : else
3905 : {
3906 522 : appendStringInfo(&result,
3907 : "<xsd:simpleType name=\"%s\">\n", typename);
3908 :
3909 522 : switch (typeoid)
3910 : {
3911 138 : case BPCHAROID:
3912 : case VARCHAROID:
3913 : case TEXTOID:
3914 138 : appendStringInfoString(&result,
3915 : " <xsd:restriction base=\"xsd:string\">\n");
3916 138 : if (typmod != -1)
3917 0 : appendStringInfo(&result,
3918 : " <xsd:maxLength value=\"%d\"/>\n",
3919 : typmod - VARHDRSZ);
3920 138 : appendStringInfoString(&result, " </xsd:restriction>\n");
3921 138 : break;
3922 :
3923 24 : case BYTEAOID:
3924 24 : appendStringInfo(&result,
3925 : " <xsd:restriction base=\"xsd:%s\">\n"
3926 : " </xsd:restriction>\n",
3927 24 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3928 24 : break;
3929 :
3930 24 : case NUMERICOID:
3931 24 : if (typmod != -1)
3932 0 : appendStringInfo(&result,
3933 : " <xsd:restriction base=\"xsd:decimal\">\n"
3934 : " <xsd:totalDigits value=\"%d\"/>\n"
3935 : " <xsd:fractionDigits value=\"%d\"/>\n"
3936 : " </xsd:restriction>\n",
3937 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3938 0 : (typmod - VARHDRSZ) & 0xffff);
3939 24 : break;
3940 :
3941 24 : case INT2OID:
3942 24 : appendStringInfo(&result,
3943 : " <xsd:restriction base=\"xsd:short\">\n"
3944 : " <xsd:maxInclusive value=\"%d\"/>\n"
3945 : " <xsd:minInclusive value=\"%d\"/>\n"
3946 : " </xsd:restriction>\n",
3947 : SHRT_MAX, SHRT_MIN);
3948 24 : break;
3949 :
3950 96 : case INT4OID:
3951 96 : appendStringInfo(&result,
3952 : " <xsd:restriction base=\"xsd:int\">\n"
3953 : " <xsd:maxInclusive value=\"%d\"/>\n"
3954 : " <xsd:minInclusive value=\"%d\"/>\n"
3955 : " </xsd:restriction>\n",
3956 : INT_MAX, INT_MIN);
3957 96 : break;
3958 :
3959 24 : case INT8OID:
3960 24 : appendStringInfo(&result,
3961 : " <xsd:restriction base=\"xsd:long\">\n"
3962 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3963 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3964 : " </xsd:restriction>\n",
3965 : PG_INT64_MAX,
3966 : PG_INT64_MIN);
3967 24 : break;
3968 :
3969 24 : case FLOAT4OID:
3970 24 : appendStringInfoString(&result,
3971 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3972 24 : break;
3973 :
3974 0 : case FLOAT8OID:
3975 0 : appendStringInfoString(&result,
3976 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3977 0 : break;
3978 :
3979 24 : case BOOLOID:
3980 24 : appendStringInfoString(&result,
3981 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3982 24 : break;
3983 :
3984 48 : case TIMEOID:
3985 : case TIMETZOID:
3986 : {
3987 48 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3988 :
3989 48 : if (typmod == -1)
3990 48 : appendStringInfo(&result,
3991 : " <xsd:restriction base=\"xsd:time\">\n"
3992 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3993 : " </xsd:restriction>\n", tz);
3994 0 : else if (typmod == 0)
3995 0 : appendStringInfo(&result,
3996 : " <xsd:restriction base=\"xsd:time\">\n"
3997 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3998 : " </xsd:restriction>\n", tz);
3999 : else
4000 0 : appendStringInfo(&result,
4001 : " <xsd:restriction base=\"xsd:time\">\n"
4002 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4003 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4004 48 : break;
4005 : }
4006 :
4007 48 : case TIMESTAMPOID:
4008 : case TIMESTAMPTZOID:
4009 : {
4010 48 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
4011 :
4012 48 : if (typmod == -1)
4013 48 : appendStringInfo(&result,
4014 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4015 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
4016 : " </xsd:restriction>\n", tz);
4017 0 : else if (typmod == 0)
4018 0 : appendStringInfo(&result,
4019 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4020 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
4021 : " </xsd:restriction>\n", tz);
4022 : else
4023 0 : appendStringInfo(&result,
4024 : " <xsd:restriction base=\"xsd:dateTime\">\n"
4025 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
4026 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
4027 48 : break;
4028 : }
4029 :
4030 24 : case DATEOID:
4031 24 : appendStringInfoString(&result,
4032 : " <xsd:restriction base=\"xsd:date\">\n"
4033 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
4034 : " </xsd:restriction>\n");
4035 24 : break;
4036 :
4037 24 : default:
4038 24 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
4039 : {
4040 : Oid base_typeoid;
4041 24 : int32 base_typmod = -1;
4042 :
4043 24 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
4044 :
4045 24 : appendStringInfo(&result,
4046 : " <xsd:restriction base=\"%s\"/>\n",
4047 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
4048 : }
4049 24 : break;
4050 : }
4051 522 : appendStringInfoString(&result, "</xsd:simpleType>\n");
4052 : }
4053 :
4054 546 : return result.data;
4055 : }
4056 :
4057 :
4058 : /*
4059 : * Map an SQL row to an XML element, taking the row from the active
4060 : * SPI cursor. See also SQL/XML:2008 section 9.10.
4061 : */
4062 : static void
4063 312 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
4064 : bool nulls, bool tableforest,
4065 : const char *targetns, bool top_level)
4066 : {
4067 : int i;
4068 : char *xmltn;
4069 :
4070 312 : if (tablename)
4071 228 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
4072 : else
4073 : {
4074 84 : if (tableforest)
4075 36 : xmltn = "row";
4076 : else
4077 48 : xmltn = "table";
4078 : }
4079 :
4080 312 : if (tableforest)
4081 162 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
4082 : else
4083 150 : appendStringInfoString(result, "<row>\n");
4084 :
4085 1272 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
4086 : {
4087 : char *colname;
4088 : Datum colval;
4089 : bool isnull;
4090 :
4091 960 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
4092 : true, false);
4093 960 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
4094 960 : SPI_tuptable->tupdesc,
4095 : i,
4096 : &isnull);
4097 960 : if (isnull)
4098 : {
4099 114 : if (nulls)
4100 60 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
4101 : }
4102 : else
4103 846 : appendStringInfo(result, " <%s>%s</%s>\n",
4104 : colname,
4105 : map_sql_value_to_xml_value(colval,
4106 846 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
4107 : colname);
4108 : }
4109 :
4110 312 : if (tableforest)
4111 : {
4112 162 : xmldata_root_element_end(result, xmltn);
4113 162 : appendStringInfoChar(result, '\n');
4114 : }
4115 : else
4116 150 : appendStringInfoString(result, "</row>\n\n");
4117 312 : }
4118 :
4119 :
4120 : /*
4121 : * XPath related functions
4122 : */
4123 :
4124 : #ifdef USE_LIBXML
4125 :
4126 : /*
4127 : * Convert XML node to text.
4128 : *
4129 : * For attribute and text nodes, return the escaped text. For anything else,
4130 : * dump the whole subtree.
4131 : */
4132 : static text *
4133 192 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
4134 : {
4135 192 : xmltype *result = NULL;
4136 :
4137 192 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
4138 162 : {
4139 162 : void (*volatile nodefree) (xmlNodePtr) = NULL;
4140 162 : volatile xmlBufferPtr buf = NULL;
4141 162 : volatile xmlNodePtr cur_copy = NULL;
4142 :
4143 162 : PG_TRY();
4144 : {
4145 : int bytes;
4146 :
4147 162 : buf = xmlBufferCreate();
4148 162 : if (buf == NULL || xmlerrcxt->err_occurred)
4149 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4150 : "could not allocate xmlBuffer");
4151 :
4152 : /*
4153 : * Produce a dump of the node that we can serialize. xmlNodeDump
4154 : * does that, but the result of that function won't contain
4155 : * namespace definitions from ancestor nodes, so we first do a
4156 : * xmlCopyNode() which duplicates the node along with its required
4157 : * namespace definitions.
4158 : *
4159 : * Some old libxml2 versions such as 2.7.6 produce partially
4160 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
4161 : * copying them. xmlNodeDump of such a node works fine, but
4162 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
4163 : */
4164 162 : cur_copy = xmlCopyNode(cur, 1);
4165 162 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
4166 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4167 : "could not copy node");
4168 324 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
4169 162 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
4170 :
4171 162 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
4172 162 : if (bytes == -1 || xmlerrcxt->err_occurred)
4173 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4174 : "could not dump node");
4175 :
4176 162 : result = xmlBuffer_to_xmltype(buf);
4177 : }
4178 0 : PG_FINALLY();
4179 : {
4180 162 : if (nodefree)
4181 162 : nodefree(cur_copy);
4182 162 : if (buf)
4183 162 : xmlBufferFree(buf);
4184 : }
4185 162 : PG_END_TRY();
4186 : }
4187 : else
4188 : {
4189 : xmlChar *str;
4190 :
4191 30 : str = xmlXPathCastNodeToString(cur);
4192 30 : PG_TRY();
4193 : {
4194 : /* Here we rely on XML having the same representation as TEXT */
4195 30 : char *escaped = escape_xml((char *) str);
4196 :
4197 30 : result = (xmltype *) cstring_to_text(escaped);
4198 30 : pfree(escaped);
4199 : }
4200 0 : PG_FINALLY();
4201 : {
4202 30 : xmlFree(str);
4203 : }
4204 30 : PG_END_TRY();
4205 : }
4206 :
4207 192 : return result;
4208 : }
4209 :
4210 : /*
4211 : * Convert an XML XPath object (the result of evaluating an XPath expression)
4212 : * to an array of xml values, which are appended to astate. The function
4213 : * result value is the number of elements in the array.
4214 : *
4215 : * If "astate" is NULL then we don't generate the array value, but we still
4216 : * return the number of elements it would have had.
4217 : *
4218 : * Nodesets are converted to an array containing the nodes' textual
4219 : * representations. Primitive values (float, double, string) are converted
4220 : * to a single-element array containing the value's string representation.
4221 : */
4222 : static int
4223 540 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
4224 : ArrayBuildState *astate,
4225 : PgXmlErrorContext *xmlerrcxt)
4226 : {
4227 540 : int result = 0;
4228 : Datum datum;
4229 : Oid datumtype;
4230 : char *result_str;
4231 :
4232 540 : switch (xpathobj->type)
4233 : {
4234 498 : case XPATH_NODESET:
4235 498 : if (xpathobj->nodesetval != NULL)
4236 : {
4237 354 : result = xpathobj->nodesetval->nodeNr;
4238 354 : if (astate != NULL)
4239 : {
4240 : int i;
4241 :
4242 168 : for (i = 0; i < result; i++)
4243 : {
4244 90 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4245 : xmlerrcxt));
4246 90 : (void) accumArrayResult(astate, datum, false,
4247 : XMLOID, CurrentMemoryContext);
4248 : }
4249 : }
4250 : }
4251 498 : return result;
4252 :
4253 12 : case XPATH_BOOLEAN:
4254 12 : if (astate == NULL)
4255 0 : return 1;
4256 12 : datum = BoolGetDatum(xpathobj->boolval);
4257 12 : datumtype = BOOLOID;
4258 12 : break;
4259 :
4260 18 : case XPATH_NUMBER:
4261 18 : if (astate == NULL)
4262 12 : return 1;
4263 6 : datum = Float8GetDatum(xpathobj->floatval);
4264 6 : datumtype = FLOAT8OID;
4265 6 : break;
4266 :
4267 12 : case XPATH_STRING:
4268 12 : if (astate == NULL)
4269 0 : return 1;
4270 12 : datum = CStringGetDatum((char *) xpathobj->stringval);
4271 12 : datumtype = CSTRINGOID;
4272 12 : break;
4273 :
4274 0 : default:
4275 0 : elog(ERROR, "xpath expression result type %d is unsupported",
4276 : xpathobj->type);
4277 : return 0; /* keep compiler quiet */
4278 : }
4279 :
4280 : /* Common code for scalar-value cases */
4281 30 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
4282 30 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
4283 30 : (void) accumArrayResult(astate, datum, false,
4284 : XMLOID, CurrentMemoryContext);
4285 30 : return 1;
4286 : }
4287 :
4288 :
4289 : /*
4290 : * Common code for xpath() and xmlexists()
4291 : *
4292 : * Evaluate XPath expression and return number of nodes in res_nitems
4293 : * and array of XML values in astate. Either of those pointers can be
4294 : * NULL if the corresponding result isn't wanted.
4295 : *
4296 : * It is up to the user to ensure that the XML passed is in fact
4297 : * an XML document - XPath doesn't work easily on fragments without
4298 : * a context node being known.
4299 : */
4300 : static void
4301 558 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
4302 : int *res_nitems, ArrayBuildState *astate)
4303 : {
4304 : PgXmlErrorContext *xmlerrcxt;
4305 558 : volatile xmlParserCtxtPtr ctxt = NULL;
4306 558 : volatile xmlDocPtr doc = NULL;
4307 558 : volatile xmlXPathContextPtr xpathctx = NULL;
4308 558 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
4309 558 : volatile xmlXPathObjectPtr xpathobj = NULL;
4310 : char *datastr;
4311 : int32 len;
4312 : int32 xpath_len;
4313 : xmlChar *string;
4314 : xmlChar *xpath_expr;
4315 558 : size_t xmldecl_len = 0;
4316 : int i;
4317 : int ndim;
4318 : Datum *ns_names_uris;
4319 : bool *ns_names_uris_nulls;
4320 : int ns_count;
4321 :
4322 : /*
4323 : * Namespace mappings are passed as text[]. If an empty array is passed
4324 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
4325 : * Else, a 2-dimensional array with length of the second axis being equal
4326 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4327 : * first element defining the name, the second one the URI. Example:
4328 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4329 : * 'http://example2.com']].
4330 : */
4331 558 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4332 558 : if (ndim != 0)
4333 : {
4334 : int *dims;
4335 :
4336 126 : dims = ARR_DIMS(namespaces);
4337 :
4338 126 : if (ndim != 2 || dims[1] != 2)
4339 0 : ereport(ERROR,
4340 : (errcode(ERRCODE_DATA_EXCEPTION),
4341 : errmsg("invalid array for XML namespace mapping"),
4342 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4343 :
4344 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4345 :
4346 126 : deconstruct_array_builtin(namespaces, TEXTOID,
4347 : &ns_names_uris, &ns_names_uris_nulls,
4348 : &ns_count);
4349 :
4350 : Assert((ns_count % 2) == 0); /* checked above */
4351 126 : ns_count /= 2; /* count pairs only */
4352 : }
4353 : else
4354 : {
4355 432 : ns_names_uris = NULL;
4356 432 : ns_names_uris_nulls = NULL;
4357 432 : ns_count = 0;
4358 : }
4359 :
4360 558 : datastr = VARDATA(data);
4361 558 : len = VARSIZE(data) - VARHDRSZ;
4362 558 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4363 558 : if (xpath_len == 0)
4364 6 : ereport(ERROR,
4365 : (errcode(ERRCODE_DATA_EXCEPTION),
4366 : errmsg("empty XPath expression")));
4367 :
4368 552 : string = pg_xmlCharStrndup(datastr, len);
4369 552 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4370 :
4371 : /*
4372 : * In a UTF8 database, skip any xml declaration, which might assert
4373 : * another encoding. Ignore parse_xml_decl() failure, letting
4374 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4375 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4376 : * those scenarios bug-compatible with historical behavior.
4377 : */
4378 552 : if (GetDatabaseEncoding() == PG_UTF8)
4379 552 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4380 :
4381 552 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4382 :
4383 552 : PG_TRY();
4384 : {
4385 552 : xmlInitParser();
4386 :
4387 : /*
4388 : * redundant XML parsing (two parsings for the same value during one
4389 : * command execution are possible)
4390 : */
4391 552 : ctxt = xmlNewParserCtxt();
4392 552 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4393 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4394 : "could not allocate parser context");
4395 1104 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4396 552 : len - xmldecl_len, NULL, NULL, 0);
4397 552 : if (doc == NULL || xmlerrcxt->err_occurred)
4398 12 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4399 : "could not parse XML document");
4400 540 : xpathctx = xmlXPathNewContext(doc);
4401 540 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4402 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4403 : "could not allocate XPath context");
4404 540 : xpathctx->node = (xmlNodePtr) doc;
4405 :
4406 : /* register namespaces, if any */
4407 540 : if (ns_count > 0)
4408 : {
4409 252 : for (i = 0; i < ns_count; i++)
4410 : {
4411 : char *ns_name;
4412 : char *ns_uri;
4413 :
4414 126 : if (ns_names_uris_nulls[i * 2] ||
4415 126 : ns_names_uris_nulls[i * 2 + 1])
4416 0 : ereport(ERROR,
4417 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4418 : errmsg("neither namespace name nor URI may be null")));
4419 126 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4420 126 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4421 126 : if (xmlXPathRegisterNs(xpathctx,
4422 : (xmlChar *) ns_name,
4423 : (xmlChar *) ns_uri) != 0)
4424 0 : ereport(ERROR, /* is this an internal error??? */
4425 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4426 : ns_name, ns_uri)));
4427 : }
4428 : }
4429 :
4430 540 : xpathcomp = xmlXPathCompile(xpath_expr);
4431 540 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4432 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4433 : "invalid XPath expression");
4434 :
4435 : /*
4436 : * Version 2.6.27 introduces a function named
4437 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4438 : * but we can derive the existence by whether any nodes are returned,
4439 : * thereby preventing a library version upgrade and keeping the code
4440 : * the same.
4441 : */
4442 540 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4443 540 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4444 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4445 : "could not create XPath object");
4446 :
4447 : /*
4448 : * Extract the results as requested.
4449 : */
4450 540 : if (res_nitems != NULL)
4451 432 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4452 : else
4453 108 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4454 : }
4455 12 : PG_CATCH();
4456 : {
4457 12 : if (xpathobj)
4458 0 : xmlXPathFreeObject(xpathobj);
4459 12 : if (xpathcomp)
4460 0 : xmlXPathFreeCompExpr(xpathcomp);
4461 12 : if (xpathctx)
4462 0 : xmlXPathFreeContext(xpathctx);
4463 12 : if (doc)
4464 12 : xmlFreeDoc(doc);
4465 12 : if (ctxt)
4466 12 : xmlFreeParserCtxt(ctxt);
4467 :
4468 12 : pg_xml_done(xmlerrcxt, true);
4469 :
4470 12 : PG_RE_THROW();
4471 : }
4472 540 : PG_END_TRY();
4473 :
4474 540 : xmlXPathFreeObject(xpathobj);
4475 540 : xmlXPathFreeCompExpr(xpathcomp);
4476 540 : xmlXPathFreeContext(xpathctx);
4477 540 : xmlFreeDoc(doc);
4478 540 : xmlFreeParserCtxt(ctxt);
4479 :
4480 540 : pg_xml_done(xmlerrcxt, false);
4481 540 : }
4482 : #endif /* USE_LIBXML */
4483 :
4484 : /*
4485 : * Evaluate XPath expression and return array of XML values.
4486 : *
4487 : * As we have no support of XQuery sequences yet, this function seems
4488 : * to be the most useful one (array of XML functions plays a role of
4489 : * some kind of substitution for XQuery sequences).
4490 : */
4491 : Datum
4492 126 : xpath(PG_FUNCTION_ARGS)
4493 : {
4494 : #ifdef USE_LIBXML
4495 126 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4496 126 : xmltype *data = PG_GETARG_XML_P(1);
4497 126 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4498 : ArrayBuildState *astate;
4499 :
4500 126 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4501 126 : xpath_internal(xpath_expr_text, data, namespaces,
4502 : NULL, astate);
4503 108 : PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
4504 : #else
4505 : NO_XML_SUPPORT();
4506 : return 0;
4507 : #endif
4508 : }
4509 :
4510 : /*
4511 : * Determines if the node specified by the supplied XPath exists
4512 : * in a given XML document, returning a boolean.
4513 : */
4514 : Datum
4515 198 : xmlexists(PG_FUNCTION_ARGS)
4516 : {
4517 : #ifdef USE_LIBXML
4518 198 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4519 198 : xmltype *data = PG_GETARG_XML_P(1);
4520 : int res_nitems;
4521 :
4522 198 : xpath_internal(xpath_expr_text, data, NULL,
4523 : &res_nitems, NULL);
4524 :
4525 198 : PG_RETURN_BOOL(res_nitems > 0);
4526 : #else
4527 : NO_XML_SUPPORT();
4528 : return 0;
4529 : #endif
4530 : }
4531 :
4532 : /*
4533 : * Determines if the node specified by the supplied XPath exists
4534 : * in a given XML document, returning a boolean. Differs from
4535 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4536 : */
4537 : Datum
4538 234 : xpath_exists(PG_FUNCTION_ARGS)
4539 : {
4540 : #ifdef USE_LIBXML
4541 234 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4542 234 : xmltype *data = PG_GETARG_XML_P(1);
4543 234 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4544 : int res_nitems;
4545 :
4546 234 : xpath_internal(xpath_expr_text, data, namespaces,
4547 : &res_nitems, NULL);
4548 :
4549 234 : PG_RETURN_BOOL(res_nitems > 0);
4550 : #else
4551 : NO_XML_SUPPORT();
4552 : return 0;
4553 : #endif
4554 : }
4555 :
4556 : /*
4557 : * Functions for checking well-formed-ness
4558 : */
4559 :
4560 : #ifdef USE_LIBXML
4561 : static bool
4562 114 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4563 : {
4564 : xmlDocPtr doc;
4565 114 : ErrorSaveContext escontext = {T_ErrorSaveContext};
4566 :
4567 : /*
4568 : * We'll report "true" if no soft error is reported by xml_parse().
4569 : */
4570 114 : doc = xml_parse(data, xmloption_arg, true,
4571 : GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
4572 114 : if (doc)
4573 60 : xmlFreeDoc(doc);
4574 :
4575 114 : return !escontext.error_occurred;
4576 : }
4577 : #endif
4578 :
4579 : Datum
4580 90 : xml_is_well_formed(PG_FUNCTION_ARGS)
4581 : {
4582 : #ifdef USE_LIBXML
4583 90 : text *data = PG_GETARG_TEXT_PP(0);
4584 :
4585 90 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4586 : #else
4587 : NO_XML_SUPPORT();
4588 : return 0;
4589 : #endif /* not USE_LIBXML */
4590 : }
4591 :
4592 : Datum
4593 12 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4594 : {
4595 : #ifdef USE_LIBXML
4596 12 : text *data = PG_GETARG_TEXT_PP(0);
4597 :
4598 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4599 : #else
4600 : NO_XML_SUPPORT();
4601 : return 0;
4602 : #endif /* not USE_LIBXML */
4603 : }
4604 :
4605 : Datum
4606 12 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4607 : {
4608 : #ifdef USE_LIBXML
4609 12 : text *data = PG_GETARG_TEXT_PP(0);
4610 :
4611 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4612 : #else
4613 : NO_XML_SUPPORT();
4614 : return 0;
4615 : #endif /* not USE_LIBXML */
4616 : }
4617 :
4618 : /*
4619 : * support functions for XMLTABLE
4620 : *
4621 : */
4622 : #ifdef USE_LIBXML
4623 :
4624 : /*
4625 : * Returns private data from executor state. Ensure validity by check with
4626 : * MAGIC number.
4627 : */
4628 : static inline XmlTableBuilderData *
4629 155188 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4630 : {
4631 : XmlTableBuilderData *result;
4632 :
4633 155188 : if (!IsA(state, TableFuncScanState))
4634 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4635 155188 : result = (XmlTableBuilderData *) state->opaque;
4636 155188 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4637 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4638 :
4639 155188 : return result;
4640 : }
4641 : #endif
4642 :
4643 : /*
4644 : * XmlTableInitOpaque
4645 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4646 : * the XML parser.
4647 : *
4648 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4649 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4650 : * executor nodes run until this node is processed to completion. Caller
4651 : * must execute this to completion (probably filling a tuplestore to exhaust
4652 : * this node in a single pass) instead of using row-per-call mode.
4653 : */
4654 : static void
4655 264 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4656 : {
4657 : #ifdef USE_LIBXML
4658 264 : volatile xmlParserCtxtPtr ctxt = NULL;
4659 : XmlTableBuilderData *xtCxt;
4660 : PgXmlErrorContext *xmlerrcxt;
4661 :
4662 264 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4663 264 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4664 264 : xtCxt->natts = natts;
4665 264 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4666 :
4667 264 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4668 :
4669 264 : PG_TRY();
4670 : {
4671 264 : xmlInitParser();
4672 :
4673 264 : ctxt = xmlNewParserCtxt();
4674 264 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4675 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4676 : "could not allocate parser context");
4677 : }
4678 0 : PG_CATCH();
4679 : {
4680 0 : if (ctxt != NULL)
4681 0 : xmlFreeParserCtxt(ctxt);
4682 :
4683 0 : pg_xml_done(xmlerrcxt, true);
4684 :
4685 0 : PG_RE_THROW();
4686 : }
4687 264 : PG_END_TRY();
4688 :
4689 264 : xtCxt->xmlerrcxt = xmlerrcxt;
4690 264 : xtCxt->ctxt = ctxt;
4691 :
4692 264 : state->opaque = xtCxt;
4693 : #else
4694 : NO_XML_SUPPORT();
4695 : #endif /* not USE_LIBXML */
4696 264 : }
4697 :
4698 : /*
4699 : * XmlTableSetDocument
4700 : * Install the input document
4701 : */
4702 : static void
4703 264 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4704 : {
4705 : #ifdef USE_LIBXML
4706 : XmlTableBuilderData *xtCxt;
4707 264 : xmltype *xmlval = DatumGetXmlP(value);
4708 : char *str;
4709 : xmlChar *xstr;
4710 : int length;
4711 264 : volatile xmlDocPtr doc = NULL;
4712 264 : volatile xmlXPathContextPtr xpathcxt = NULL;
4713 :
4714 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4715 :
4716 : /*
4717 : * Use out function for casting to string (remove encoding property). See
4718 : * comment in xml_out.
4719 : */
4720 264 : str = xml_out_internal(xmlval, 0);
4721 :
4722 264 : length = strlen(str);
4723 264 : xstr = pg_xmlCharStrndup(str, length);
4724 :
4725 264 : PG_TRY();
4726 : {
4727 264 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4728 264 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4729 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4730 : "could not parse XML document");
4731 264 : xpathcxt = xmlXPathNewContext(doc);
4732 264 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4733 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4734 : "could not allocate XPath context");
4735 264 : xpathcxt->node = (xmlNodePtr) doc;
4736 : }
4737 0 : PG_CATCH();
4738 : {
4739 0 : if (xpathcxt != NULL)
4740 0 : xmlXPathFreeContext(xpathcxt);
4741 0 : if (doc != NULL)
4742 0 : xmlFreeDoc(doc);
4743 :
4744 0 : PG_RE_THROW();
4745 : }
4746 264 : PG_END_TRY();
4747 :
4748 264 : xtCxt->doc = doc;
4749 264 : xtCxt->xpathcxt = xpathcxt;
4750 : #else
4751 : NO_XML_SUPPORT();
4752 : #endif /* not USE_LIBXML */
4753 264 : }
4754 :
4755 : /*
4756 : * XmlTableSetNamespace
4757 : * Add a namespace declaration
4758 : */
4759 : static void
4760 18 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4761 : {
4762 : #ifdef USE_LIBXML
4763 : XmlTableBuilderData *xtCxt;
4764 :
4765 18 : if (name == NULL)
4766 6 : ereport(ERROR,
4767 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4768 : errmsg("DEFAULT namespace is not supported")));
4769 12 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4770 :
4771 12 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4772 12 : pg_xmlCharStrndup(name, strlen(name)),
4773 12 : pg_xmlCharStrndup(uri, strlen(uri))))
4774 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4775 : "could not set XML namespace");
4776 : #else
4777 : NO_XML_SUPPORT();
4778 : #endif /* not USE_LIBXML */
4779 12 : }
4780 :
4781 : /*
4782 : * XmlTableSetRowFilter
4783 : * Install the row-filter Xpath expression.
4784 : */
4785 : static void
4786 258 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4787 : {
4788 : #ifdef USE_LIBXML
4789 : XmlTableBuilderData *xtCxt;
4790 : xmlChar *xstr;
4791 :
4792 258 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4793 :
4794 258 : if (*path == '\0')
4795 0 : ereport(ERROR,
4796 : (errcode(ERRCODE_DATA_EXCEPTION),
4797 : errmsg("row path filter must not be empty string")));
4798 :
4799 258 : xstr = pg_xmlCharStrndup(path, strlen(path));
4800 :
4801 258 : xtCxt->xpathcomp = xmlXPathCompile(xstr);
4802 258 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4803 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4804 : "invalid XPath expression");
4805 : #else
4806 : NO_XML_SUPPORT();
4807 : #endif /* not USE_LIBXML */
4808 258 : }
4809 :
4810 : /*
4811 : * XmlTableSetColumnFilter
4812 : * Install the column-filter Xpath expression, for the given column.
4813 : */
4814 : static void
4815 774 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4816 : {
4817 : #ifdef USE_LIBXML
4818 : XmlTableBuilderData *xtCxt;
4819 : xmlChar *xstr;
4820 :
4821 : Assert(PointerIsValid(path));
4822 :
4823 774 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4824 :
4825 774 : if (*path == '\0')
4826 0 : ereport(ERROR,
4827 : (errcode(ERRCODE_DATA_EXCEPTION),
4828 : errmsg("column path filter must not be empty string")));
4829 :
4830 774 : xstr = pg_xmlCharStrndup(path, strlen(path));
4831 :
4832 774 : xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4833 774 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4834 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4835 : "invalid XPath expression");
4836 : #else
4837 : NO_XML_SUPPORT();
4838 : #endif /* not USE_LIBXML */
4839 774 : }
4840 :
4841 : /*
4842 : * XmlTableFetchRow
4843 : * Prepare the next "current" tuple for upcoming GetValue calls.
4844 : * Returns false if the row-filter expression returned no more rows.
4845 : */
4846 : static bool
4847 22306 : XmlTableFetchRow(TableFuncScanState *state)
4848 : {
4849 : #ifdef USE_LIBXML
4850 : XmlTableBuilderData *xtCxt;
4851 :
4852 22306 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4853 :
4854 : /* Propagate our own error context to libxml2 */
4855 22306 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4856 :
4857 22306 : if (xtCxt->xpathobj == NULL)
4858 : {
4859 258 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4860 258 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4861 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4862 : "could not create XPath object");
4863 :
4864 258 : xtCxt->row_count = 0;
4865 : }
4866 :
4867 22306 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4868 : {
4869 22306 : if (xtCxt->xpathobj->nodesetval != NULL)
4870 : {
4871 22306 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4872 22060 : return true;
4873 : }
4874 : }
4875 :
4876 246 : return false;
4877 : #else
4878 : NO_XML_SUPPORT();
4879 : return false;
4880 : #endif /* not USE_LIBXML */
4881 : }
4882 :
4883 : /*
4884 : * XmlTableGetValue
4885 : * Return the value for column number 'colnum' for the current row. If
4886 : * column -1 is requested, return representation of the whole row.
4887 : *
4888 : * This leaks memory, so be sure to reset often the context in which it's
4889 : * called.
4890 : */
4891 : static Datum
4892 131310 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4893 : Oid typid, int32 typmod, bool *isnull)
4894 : {
4895 : #ifdef USE_LIBXML
4896 : XmlTableBuilderData *xtCxt;
4897 131310 : Datum result = (Datum) 0;
4898 : xmlNodePtr cur;
4899 131310 : char *cstr = NULL;
4900 131310 : volatile xmlXPathObjectPtr xpathobj = NULL;
4901 :
4902 131310 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4903 :
4904 : Assert(xtCxt->xpathobj &&
4905 : xtCxt->xpathobj->type == XPATH_NODESET &&
4906 : xtCxt->xpathobj->nodesetval != NULL);
4907 :
4908 : /* Propagate our own error context to libxml2 */
4909 131310 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4910 :
4911 131310 : *isnull = false;
4912 :
4913 131310 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4914 :
4915 : Assert(xtCxt->xpathscomp[colnum] != NULL);
4916 :
4917 131310 : PG_TRY();
4918 : {
4919 : /* Set current node as entry point for XPath evaluation */
4920 131310 : xtCxt->xpathcxt->node = cur;
4921 :
4922 : /* Evaluate column path */
4923 131310 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4924 131310 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4925 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4926 : "could not create XPath object");
4927 :
4928 : /*
4929 : * There are four possible cases, depending on the number of nodes
4930 : * returned by the XPath expression and the type of the target column:
4931 : * a) XPath returns no nodes. b) The target type is XML (return all
4932 : * as XML). For non-XML return types: c) One node (return content).
4933 : * d) Multiple nodes (error).
4934 : */
4935 131310 : if (xpathobj->type == XPATH_NODESET)
4936 : {
4937 131280 : int count = 0;
4938 :
4939 131280 : if (xpathobj->nodesetval != NULL)
4940 131070 : count = xpathobj->nodesetval->nodeNr;
4941 :
4942 131280 : if (xpathobj->nodesetval == NULL || count == 0)
4943 : {
4944 21972 : *isnull = true;
4945 : }
4946 : else
4947 : {
4948 109308 : if (typid == XMLOID)
4949 : {
4950 : text *textstr;
4951 : StringInfoData str;
4952 :
4953 : /* Concatenate serialized values */
4954 72 : initStringInfo(&str);
4955 174 : for (int i = 0; i < count; i++)
4956 : {
4957 : textstr =
4958 102 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4959 : xtCxt->xmlerrcxt);
4960 :
4961 102 : appendStringInfoText(&str, textstr);
4962 : }
4963 72 : cstr = str.data;
4964 : }
4965 : else
4966 : {
4967 : xmlChar *str;
4968 :
4969 109236 : if (count > 1)
4970 6 : ereport(ERROR,
4971 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
4972 : errmsg("more than one value returned by column XPath expression")));
4973 :
4974 109230 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4975 109230 : cstr = str ? xml_pstrdup_and_free(str) : "";
4976 : }
4977 : }
4978 : }
4979 30 : else if (xpathobj->type == XPATH_STRING)
4980 : {
4981 : /* Content should be escaped when target will be XML */
4982 18 : if (typid == XMLOID)
4983 6 : cstr = escape_xml((char *) xpathobj->stringval);
4984 : else
4985 12 : cstr = (char *) xpathobj->stringval;
4986 : }
4987 12 : else if (xpathobj->type == XPATH_BOOLEAN)
4988 : {
4989 : char typcategory;
4990 : bool typispreferred;
4991 : xmlChar *str;
4992 :
4993 : /* Allow implicit casting from boolean to numbers */
4994 6 : get_type_category_preferred(typid, &typcategory, &typispreferred);
4995 :
4996 6 : if (typcategory != TYPCATEGORY_NUMERIC)
4997 6 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
4998 : else
4999 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
5000 :
5001 6 : cstr = xml_pstrdup_and_free(str);
5002 : }
5003 6 : else if (xpathobj->type == XPATH_NUMBER)
5004 : {
5005 : xmlChar *str;
5006 :
5007 6 : str = xmlXPathCastNumberToString(xpathobj->floatval);
5008 6 : cstr = xml_pstrdup_and_free(str);
5009 : }
5010 : else
5011 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
5012 :
5013 : /*
5014 : * By here, either cstr contains the result value, or the isnull flag
5015 : * has been set.
5016 : */
5017 : Assert(cstr || *isnull);
5018 :
5019 131304 : if (!*isnull)
5020 109332 : result = InputFunctionCall(&state->in_functions[colnum],
5021 : cstr,
5022 109332 : state->typioparams[colnum],
5023 : typmod);
5024 : }
5025 6 : PG_FINALLY();
5026 : {
5027 131310 : if (xpathobj != NULL)
5028 131310 : xmlXPathFreeObject(xpathobj);
5029 : }
5030 131310 : PG_END_TRY();
5031 :
5032 131304 : return result;
5033 : #else
5034 : NO_XML_SUPPORT();
5035 : return 0;
5036 : #endif /* not USE_LIBXML */
5037 : }
5038 :
5039 : /*
5040 : * XmlTableDestroyOpaque
5041 : * Release all libxml2 resources
5042 : */
5043 : static void
5044 264 : XmlTableDestroyOpaque(TableFuncScanState *state)
5045 : {
5046 : #ifdef USE_LIBXML
5047 : XmlTableBuilderData *xtCxt;
5048 :
5049 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
5050 :
5051 : /* Propagate our own error context to libxml2 */
5052 264 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
5053 :
5054 264 : if (xtCxt->xpathscomp != NULL)
5055 : {
5056 : int i;
5057 :
5058 1116 : for (i = 0; i < xtCxt->natts; i++)
5059 852 : if (xtCxt->xpathscomp[i] != NULL)
5060 774 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
5061 : }
5062 :
5063 264 : if (xtCxt->xpathobj != NULL)
5064 258 : xmlXPathFreeObject(xtCxt->xpathobj);
5065 264 : if (xtCxt->xpathcomp != NULL)
5066 258 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
5067 264 : if (xtCxt->xpathcxt != NULL)
5068 264 : xmlXPathFreeContext(xtCxt->xpathcxt);
5069 264 : if (xtCxt->doc != NULL)
5070 264 : xmlFreeDoc(xtCxt->doc);
5071 264 : if (xtCxt->ctxt != NULL)
5072 264 : xmlFreeParserCtxt(xtCxt->ctxt);
5073 :
5074 264 : pg_xml_done(xtCxt->xmlerrcxt, true);
5075 :
5076 : /* not valid anymore */
5077 264 : xtCxt->magic = 0;
5078 264 : state->opaque = NULL;
5079 :
5080 : #else
5081 : NO_XML_SUPPORT();
5082 : #endif /* not USE_LIBXML */
5083 264 : }
|