Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * xml.c
4 : * XML data type support.
5 : *
6 : *
7 : * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
8 : * Portions Copyright (c) 1994, Regents of the University of California
9 : *
10 : * src/backend/utils/adt/xml.c
11 : *
12 : *-------------------------------------------------------------------------
13 : */
14 :
15 : /*
16 : * Generally, XML type support is only available when libxml use was
17 : * configured during the build. But even if that is not done, the
18 : * type and all the functions are available, but most of them will
19 : * fail. For one thing, this avoids having to manage variant catalog
20 : * installations. But it also has nice effects such as that you can
21 : * dump a database containing XML type data even if the server is not
22 : * linked with libxml. Thus, make sure xml_out() works even if nothing
23 : * else does.
24 : */
25 :
26 : /*
27 : * Notes on memory management:
28 : *
29 : * Sometimes libxml allocates global structures in the hope that it can reuse
30 : * them later on. This makes it impractical to change the xmlMemSetup
31 : * functions on-the-fly; that is likely to lead to trying to pfree() chunks
32 : * allocated with malloc() or vice versa. Since libxml might be used by
33 : * loadable modules, eg libperl, our only safe choices are to change the
34 : * functions at postmaster/backend launch or not at all. Since we'd rather
35 : * not activate libxml in sessions that might never use it, the latter choice
36 : * is the preferred one. However, for debugging purposes it can be awfully
37 : * handy to constrain libxml's allocations to be done in a specific palloc
38 : * context, where they're easy to track. Therefore there is code here that
39 : * can be enabled in debug builds to redirect libxml's allocations into a
40 : * special context LibxmlContext. It's not recommended to turn this on in
41 : * a production build because of the possibility of bad interactions with
42 : * external modules.
43 : */
44 : /* #define USE_LIBXMLCONTEXT */
45 :
46 : #include "postgres.h"
47 :
48 : #ifdef USE_LIBXML
49 : #include <libxml/chvalid.h>
50 : #include <libxml/parser.h>
51 : #include <libxml/parserInternals.h>
52 : #include <libxml/tree.h>
53 : #include <libxml/uri.h>
54 : #include <libxml/xmlerror.h>
55 : #include <libxml/xmlversion.h>
56 : #include <libxml/xmlwriter.h>
57 : #include <libxml/xpath.h>
58 : #include <libxml/xpathInternals.h>
59 :
60 : /*
61 : * We used to check for xmlStructuredErrorContext via a configure test; but
62 : * that doesn't work on Windows, so instead use this grottier method of
63 : * testing the library version number.
64 : */
65 : #if LIBXML_VERSION >= 20704
66 : #define HAVE_XMLSTRUCTUREDERRORCONTEXT 1
67 : #endif
68 : #endif /* USE_LIBXML */
69 :
70 : #include "access/htup_details.h"
71 : #include "access/table.h"
72 : #include "catalog/namespace.h"
73 : #include "catalog/pg_class.h"
74 : #include "catalog/pg_type.h"
75 : #include "commands/dbcommands.h"
76 : #include "executor/spi.h"
77 : #include "executor/tablefunc.h"
78 : #include "fmgr.h"
79 : #include "lib/stringinfo.h"
80 : #include "libpq/pqformat.h"
81 : #include "mb/pg_wchar.h"
82 : #include "miscadmin.h"
83 : #include "nodes/execnodes.h"
84 : #include "nodes/nodeFuncs.h"
85 : #include "utils/array.h"
86 : #include "utils/builtins.h"
87 : #include "utils/date.h"
88 : #include "utils/datetime.h"
89 : #include "utils/lsyscache.h"
90 : #include "utils/memutils.h"
91 : #include "utils/rel.h"
92 : #include "utils/syscache.h"
93 : #include "utils/xml.h"
94 :
95 :
96 : /* GUC variables */
97 : int xmlbinary;
98 : int xmloption;
99 :
100 : #ifdef USE_LIBXML
101 :
102 : /* random number to identify PgXmlErrorContext */
103 : #define ERRCXT_MAGIC 68275028
104 :
105 : struct PgXmlErrorContext
106 : {
107 : int magic;
108 : /* strictness argument passed to pg_xml_init */
109 : PgXmlStrictness strictness;
110 : /* current error status and accumulated message, if any */
111 : bool err_occurred;
112 : StringInfoData err_buf;
113 : /* previous libxml error handling state (saved by pg_xml_init) */
114 : xmlStructuredErrorFunc saved_errfunc;
115 : void *saved_errcxt;
116 : /* previous libxml entity handler (saved by pg_xml_init) */
117 : xmlExternalEntityLoader saved_entityfunc;
118 : };
119 :
120 : static xmlParserInputPtr xmlPgEntityLoader(const char *URL, const char *ID,
121 : xmlParserCtxtPtr ctxt);
122 : static void xml_errorHandler(void *data, xmlErrorPtr error);
123 : static void xml_ereport_by_code(int level, int sqlcode,
124 : const char *msg, int errcode);
125 : static void chopStringInfoNewlines(StringInfo str);
126 : static void appendStringInfoLineSeparator(StringInfo str);
127 :
128 : #ifdef USE_LIBXMLCONTEXT
129 :
130 : static MemoryContext LibxmlContext = NULL;
131 :
132 : static void xml_memory_init(void);
133 : static void *xml_palloc(size_t size);
134 : static void *xml_repalloc(void *ptr, size_t size);
135 : static void xml_pfree(void *ptr);
136 : static char *xml_pstrdup(const char *string);
137 : #endif /* USE_LIBXMLCONTEXT */
138 :
139 : static xmlChar *xml_text2xmlChar(text *in);
140 : static int parse_xml_decl(const xmlChar *str, size_t *lenp,
141 : xmlChar **version, xmlChar **encoding, int *standalone);
142 : static bool print_xml_decl(StringInfo buf, const xmlChar *version,
143 : pg_enc encoding, int standalone);
144 : static bool xml_doctype_in_content(const xmlChar *str);
145 : static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
146 : bool preserve_whitespace, int encoding);
147 : static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
148 : static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
149 : ArrayBuildState *astate,
150 : PgXmlErrorContext *xmlerrcxt);
151 : static xmlChar *pg_xmlCharStrndup(const char *str, size_t len);
152 : #endif /* USE_LIBXML */
153 :
154 : static void xmldata_root_element_start(StringInfo result, const char *eltname,
155 : const char *xmlschema, const char *targetns,
156 : bool top_level);
157 : static void xmldata_root_element_end(StringInfo result, const char *eltname);
158 : static StringInfo query_to_xml_internal(const char *query, char *tablename,
159 : const char *xmlschema, bool nulls, bool tableforest,
160 : const char *targetns, bool top_level);
161 : static const char *map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid,
162 : bool nulls, bool tableforest, const char *targetns);
163 : static const char *map_sql_schema_to_xmlschema_types(Oid nspid,
164 : List *relid_list, bool nulls,
165 : bool tableforest, const char *targetns);
166 : static const char *map_sql_catalog_to_xmlschema_types(List *nspid_list,
167 : bool nulls, bool tableforest,
168 : const char *targetns);
169 : static const char *map_sql_type_to_xml_name(Oid typeoid, int typmod);
170 : static const char *map_sql_typecoll_to_xmlschema_types(List *tupdesc_list);
171 : static const char *map_sql_type_to_xmlschema_type(Oid typeoid, int typmod);
172 : static void SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result,
173 : char *tablename, bool nulls, bool tableforest,
174 : const char *targetns, bool top_level);
175 :
176 : /* XMLTABLE support */
177 : #ifdef USE_LIBXML
178 : /* random number to identify XmlTableContext */
179 : #define XMLTABLE_CONTEXT_MAGIC 46922182
180 : typedef struct XmlTableBuilderData
181 : {
182 : int magic;
183 : int natts;
184 : long int row_count;
185 : PgXmlErrorContext *xmlerrcxt;
186 : xmlParserCtxtPtr ctxt;
187 : xmlDocPtr doc;
188 : xmlXPathContextPtr xpathcxt;
189 : xmlXPathCompExprPtr xpathcomp;
190 : xmlXPathObjectPtr xpathobj;
191 : xmlXPathCompExprPtr *xpathscomp;
192 : } XmlTableBuilderData;
193 : #endif
194 :
195 : static void XmlTableInitOpaque(struct TableFuncScanState *state, int natts);
196 : static void XmlTableSetDocument(struct TableFuncScanState *state, Datum value);
197 : static void XmlTableSetNamespace(struct TableFuncScanState *state, const char *name,
198 : const char *uri);
199 : static void XmlTableSetRowFilter(struct TableFuncScanState *state, const char *path);
200 : static void XmlTableSetColumnFilter(struct TableFuncScanState *state,
201 : const char *path, int colnum);
202 : static bool XmlTableFetchRow(struct TableFuncScanState *state);
203 : static Datum XmlTableGetValue(struct TableFuncScanState *state, int colnum,
204 : Oid typid, int32 typmod, bool *isnull);
205 : static void XmlTableDestroyOpaque(struct TableFuncScanState *state);
206 :
207 : const TableFuncRoutine XmlTableRoutine =
208 : {
209 : XmlTableInitOpaque,
210 : XmlTableSetDocument,
211 : XmlTableSetNamespace,
212 : XmlTableSetRowFilter,
213 : XmlTableSetColumnFilter,
214 : XmlTableFetchRow,
215 : XmlTableGetValue,
216 : XmlTableDestroyOpaque
217 : };
218 :
219 : #define NO_XML_SUPPORT() \
220 : ereport(ERROR, \
221 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), \
222 : errmsg("unsupported XML feature"), \
223 : errdetail("This functionality requires the server to be built with libxml support.")))
224 :
225 :
226 : /* from SQL/XML:2008 section 4.9 */
227 : #define NAMESPACE_XSD "http://www.w3.org/2001/XMLSchema"
228 : #define NAMESPACE_XSI "http://www.w3.org/2001/XMLSchema-instance"
229 : #define NAMESPACE_SQLXML "http://standards.iso.org/iso/9075/2003/sqlxml"
230 :
231 :
232 : #ifdef USE_LIBXML
233 :
234 : static int
235 0 : xmlChar_to_encoding(const xmlChar *encoding_name)
236 : {
237 0 : int encoding = pg_char_to_encoding((const char *) encoding_name);
238 :
239 0 : if (encoding < 0)
240 0 : ereport(ERROR,
241 : (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
242 : errmsg("invalid encoding name \"%s\"",
243 : (const char *) encoding_name)));
244 0 : return encoding;
245 : }
246 : #endif
247 :
248 :
249 : /*
250 : * xml_in uses a plain C string to VARDATA conversion, so for the time being
251 : * we use the conversion function for the text datatype.
252 : *
253 : * This is only acceptable so long as xmltype and text use the same
254 : * representation.
255 : */
256 : Datum
257 648 : xml_in(PG_FUNCTION_ARGS)
258 : {
259 : #ifdef USE_LIBXML
260 648 : char *s = PG_GETARG_CSTRING(0);
261 : xmltype *vardata;
262 : xmlDocPtr doc;
263 :
264 648 : vardata = (xmltype *) cstring_to_text(s);
265 :
266 : /*
267 : * Parse the data to check if it is well-formed XML data. Assume that
268 : * ERROR occurred if parsing failed.
269 : */
270 648 : doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding());
271 600 : xmlFreeDoc(doc);
272 :
273 600 : PG_RETURN_XML_P(vardata);
274 : #else
275 : NO_XML_SUPPORT();
276 : return 0;
277 : #endif
278 : }
279 :
280 :
281 : #define PG_XML_DEFAULT_VERSION "1.0"
282 :
283 :
284 : /*
285 : * xml_out_internal uses a plain VARDATA to C string conversion, so for the
286 : * time being we use the conversion function for the text datatype.
287 : *
288 : * This is only acceptable so long as xmltype and text use the same
289 : * representation.
290 : */
291 : static char *
292 22328 : xml_out_internal(xmltype *x, pg_enc target_encoding)
293 : {
294 22328 : char *str = text_to_cstring((text *) x);
295 :
296 : #ifdef USE_LIBXML
297 22328 : size_t len = strlen(str);
298 : xmlChar *version;
299 : int standalone;
300 : int res_code;
301 :
302 22328 : if ((res_code = parse_xml_decl((xmlChar *) str,
303 : &len, &version, NULL, &standalone)) == 0)
304 : {
305 : StringInfoData buf;
306 :
307 22328 : initStringInfo(&buf);
308 :
309 22328 : if (!print_xml_decl(&buf, version, target_encoding, standalone))
310 : {
311 : /*
312 : * If we are not going to produce an XML declaration, eat a single
313 : * newline in the original string to prevent empty first lines in
314 : * the output.
315 : */
316 22280 : if (*(str + len) == '\n')
317 6 : len += 1;
318 : }
319 22328 : appendStringInfoString(&buf, str + len);
320 :
321 22328 : pfree(str);
322 :
323 22328 : return buf.data;
324 : }
325 :
326 0 : xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
327 : "could not parse XML declaration in stored value",
328 : res_code);
329 : #endif
330 0 : return str;
331 : }
332 :
333 :
334 : Datum
335 22064 : xml_out(PG_FUNCTION_ARGS)
336 : {
337 22064 : xmltype *x = PG_GETARG_XML_P(0);
338 :
339 : /*
340 : * xml_out removes the encoding property in all cases. This is because we
341 : * cannot control from here whether the datum will be converted to a
342 : * different client encoding, so we'd do more harm than good by including
343 : * it.
344 : */
345 22064 : PG_RETURN_CSTRING(xml_out_internal(x, 0));
346 : }
347 :
348 :
349 : Datum
350 0 : xml_recv(PG_FUNCTION_ARGS)
351 : {
352 : #ifdef USE_LIBXML
353 0 : StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
354 : xmltype *result;
355 : char *str;
356 : char *newstr;
357 : int nbytes;
358 : xmlDocPtr doc;
359 0 : xmlChar *encodingStr = NULL;
360 : int encoding;
361 :
362 : /*
363 : * Read the data in raw format. We don't know yet what the encoding is, as
364 : * that information is embedded in the xml declaration; so we have to
365 : * parse that before converting to server encoding.
366 : */
367 0 : nbytes = buf->len - buf->cursor;
368 0 : str = (char *) pq_getmsgbytes(buf, nbytes);
369 :
370 : /*
371 : * We need a null-terminated string to pass to parse_xml_decl(). Rather
372 : * than make a separate copy, make the temporary result one byte bigger
373 : * than it needs to be.
374 : */
375 0 : result = palloc(nbytes + 1 + VARHDRSZ);
376 0 : SET_VARSIZE(result, nbytes + VARHDRSZ);
377 0 : memcpy(VARDATA(result), str, nbytes);
378 0 : str = VARDATA(result);
379 0 : str[nbytes] = '\0';
380 :
381 0 : parse_xml_decl((const xmlChar *) str, NULL, NULL, &encodingStr, NULL);
382 :
383 : /*
384 : * If encoding wasn't explicitly specified in the XML header, treat it as
385 : * UTF-8, as that's the default in XML. This is different from xml_in(),
386 : * where the input has to go through the normal client to server encoding
387 : * conversion.
388 : */
389 0 : encoding = encodingStr ? xmlChar_to_encoding(encodingStr) : PG_UTF8;
390 :
391 : /*
392 : * Parse the data to check if it is well-formed XML data. Assume that
393 : * xml_parse will throw ERROR if not.
394 : */
395 0 : doc = xml_parse(result, xmloption, true, encoding);
396 0 : xmlFreeDoc(doc);
397 :
398 : /* Now that we know what we're dealing with, convert to server encoding */
399 0 : newstr = pg_any_to_server(str, nbytes, encoding);
400 :
401 0 : if (newstr != str)
402 : {
403 0 : pfree(result);
404 0 : result = (xmltype *) cstring_to_text(newstr);
405 0 : pfree(newstr);
406 : }
407 :
408 0 : PG_RETURN_XML_P(result);
409 : #else
410 : NO_XML_SUPPORT();
411 : return 0;
412 : #endif
413 : }
414 :
415 :
416 : Datum
417 0 : xml_send(PG_FUNCTION_ARGS)
418 : {
419 0 : xmltype *x = PG_GETARG_XML_P(0);
420 : char *outval;
421 : StringInfoData buf;
422 :
423 : /*
424 : * xml_out_internal doesn't convert the encoding, it just prints the right
425 : * declaration. pq_sendtext will do the conversion.
426 : */
427 0 : outval = xml_out_internal(x, pg_get_client_encoding());
428 :
429 0 : pq_begintypsend(&buf);
430 0 : pq_sendtext(&buf, outval, strlen(outval));
431 0 : pfree(outval);
432 0 : PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
433 : }
434 :
435 :
436 : #ifdef USE_LIBXML
437 : static void
438 162 : appendStringInfoText(StringInfo str, const text *t)
439 : {
440 162 : appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
441 162 : }
442 : #endif
443 :
444 :
445 : static xmltype *
446 21338 : stringinfo_to_xmltype(StringInfo buf)
447 : {
448 21338 : return (xmltype *) cstring_to_text_with_len(buf->data, buf->len);
449 : }
450 :
451 :
452 : static xmltype *
453 78 : cstring_to_xmltype(const char *string)
454 : {
455 78 : return (xmltype *) cstring_to_text(string);
456 : }
457 :
458 :
459 : #ifdef USE_LIBXML
460 : static xmltype *
461 21394 : xmlBuffer_to_xmltype(xmlBufferPtr buf)
462 : {
463 21394 : return (xmltype *) cstring_to_text_with_len((const char *) xmlBufferContent(buf),
464 : xmlBufferLength(buf));
465 : }
466 : #endif
467 :
468 :
469 : Datum
470 42 : xmlcomment(PG_FUNCTION_ARGS)
471 : {
472 : #ifdef USE_LIBXML
473 42 : text *arg = PG_GETARG_TEXT_PP(0);
474 42 : char *argdata = VARDATA_ANY(arg);
475 42 : int len = VARSIZE_ANY_EXHDR(arg);
476 : StringInfoData buf;
477 : int i;
478 :
479 : /* check for "--" in string or "-" at the end */
480 180 : for (i = 1; i < len; i++)
481 : {
482 144 : if (argdata[i] == '-' && argdata[i - 1] == '-')
483 6 : ereport(ERROR,
484 : (errcode(ERRCODE_INVALID_XML_COMMENT),
485 : errmsg("invalid XML comment")));
486 : }
487 36 : if (len > 0 && argdata[len - 1] == '-')
488 6 : ereport(ERROR,
489 : (errcode(ERRCODE_INVALID_XML_COMMENT),
490 : errmsg("invalid XML comment")));
491 :
492 30 : initStringInfo(&buf);
493 30 : appendStringInfoString(&buf, "<!--");
494 30 : appendStringInfoText(&buf, arg);
495 30 : appendStringInfoString(&buf, "-->");
496 :
497 30 : PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
498 : #else
499 : NO_XML_SUPPORT();
500 : return 0;
501 : #endif
502 : }
503 :
504 :
505 :
506 : /*
507 : * TODO: xmlconcat needs to merge the notations and unparsed entities
508 : * of the argument values. Not very important in practice, though.
509 : */
510 : xmltype *
511 21088 : xmlconcat(List *args)
512 : {
513 : #ifdef USE_LIBXML
514 21088 : int global_standalone = 1;
515 21088 : xmlChar *global_version = NULL;
516 21088 : bool global_version_no_value = false;
517 : StringInfoData buf;
518 : ListCell *v;
519 :
520 21088 : initStringInfo(&buf);
521 63270 : foreach(v, args)
522 : {
523 42182 : xmltype *x = DatumGetXmlP(PointerGetDatum(lfirst(v)));
524 : size_t len;
525 : xmlChar *version;
526 : int standalone;
527 : char *str;
528 :
529 42182 : len = VARSIZE(x) - VARHDRSZ;
530 42182 : str = text_to_cstring((text *) x);
531 :
532 42182 : parse_xml_decl((xmlChar *) str, &len, &version, NULL, &standalone);
533 :
534 42182 : if (standalone == 0 && global_standalone == 1)
535 0 : global_standalone = 0;
536 42182 : if (standalone < 0)
537 42170 : global_standalone = -1;
538 :
539 42182 : if (!version)
540 42164 : global_version_no_value = true;
541 18 : else if (!global_version)
542 12 : global_version = version;
543 6 : else if (xmlStrcmp(version, global_version) != 0)
544 0 : global_version_no_value = true;
545 :
546 42182 : appendStringInfoString(&buf, str + len);
547 42182 : pfree(str);
548 : }
549 :
550 21088 : if (!global_version_no_value || global_standalone >= 0)
551 : {
552 : StringInfoData buf2;
553 :
554 6 : initStringInfo(&buf2);
555 :
556 6 : print_xml_decl(&buf2,
557 6 : (!global_version_no_value) ? global_version : NULL,
558 : 0,
559 : global_standalone);
560 :
561 6 : appendBinaryStringInfo(&buf2, buf.data, buf.len);
562 6 : buf = buf2;
563 : }
564 :
565 21088 : return stringinfo_to_xmltype(&buf);
566 : #else
567 : NO_XML_SUPPORT();
568 : return NULL;
569 : #endif
570 : }
571 :
572 :
573 : /*
574 : * XMLAGG support
575 : */
576 : Datum
577 21064 : xmlconcat2(PG_FUNCTION_ARGS)
578 : {
579 21064 : if (PG_ARGISNULL(0))
580 : {
581 18 : if (PG_ARGISNULL(1))
582 0 : PG_RETURN_NULL();
583 : else
584 18 : PG_RETURN_XML_P(PG_GETARG_XML_P(1));
585 : }
586 21046 : else if (PG_ARGISNULL(1))
587 0 : PG_RETURN_XML_P(PG_GETARG_XML_P(0));
588 : else
589 21046 : PG_RETURN_XML_P(xmlconcat(list_make2(PG_GETARG_XML_P(0),
590 : PG_GETARG_XML_P(1))));
591 : }
592 :
593 :
594 : Datum
595 6 : texttoxml(PG_FUNCTION_ARGS)
596 : {
597 6 : text *data = PG_GETARG_TEXT_PP(0);
598 :
599 6 : PG_RETURN_XML_P(xmlparse(data, xmloption, true));
600 : }
601 :
602 :
603 : Datum
604 0 : xmltotext(PG_FUNCTION_ARGS)
605 : {
606 0 : xmltype *data = PG_GETARG_XML_P(0);
607 :
608 : /* It's actually binary compatible. */
609 0 : PG_RETURN_TEXT_P((text *) data);
610 : }
611 :
612 :
613 : text *
614 24 : xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
615 : {
616 24 : if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
617 6 : ereport(ERROR,
618 : (errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
619 : errmsg("not an XML document")));
620 :
621 : /* It's actually binary compatible, save for the above check. */
622 18 : return (text *) data;
623 : }
624 :
625 :
626 : xmltype *
627 21220 : xmlelement(XmlExpr *xexpr,
628 : Datum *named_argvalue, bool *named_argnull,
629 : Datum *argvalue, bool *argnull)
630 : {
631 : #ifdef USE_LIBXML
632 : xmltype *result;
633 : List *named_arg_strings;
634 : List *arg_strings;
635 : int i;
636 : ListCell *arg;
637 : ListCell *narg;
638 : PgXmlErrorContext *xmlerrcxt;
639 21220 : volatile xmlBufferPtr buf = NULL;
640 21220 : volatile xmlTextWriterPtr writer = NULL;
641 :
642 : /*
643 : * All arguments are already evaluated, and their values are passed in the
644 : * named_argvalue/named_argnull or argvalue/argnull arrays. This avoids
645 : * issues if one of the arguments involves a call to some other function
646 : * or subsystem that wants to use libxml on its own terms. We examine the
647 : * original XmlExpr to identify the numbers and types of the arguments.
648 : */
649 21220 : named_arg_strings = NIL;
650 21220 : i = 0;
651 21268 : foreach(arg, xexpr->named_args)
652 : {
653 54 : Expr *e = (Expr *) lfirst(arg);
654 : char *str;
655 :
656 54 : if (named_argnull[i])
657 0 : str = NULL;
658 : else
659 54 : str = map_sql_value_to_xml_value(named_argvalue[i],
660 : exprType((Node *) e),
661 : false);
662 48 : named_arg_strings = lappend(named_arg_strings, str);
663 48 : i++;
664 : }
665 :
666 21214 : arg_strings = NIL;
667 21214 : i = 0;
668 42404 : foreach(arg, xexpr->args)
669 : {
670 21190 : Expr *e = (Expr *) lfirst(arg);
671 : char *str;
672 :
673 : /* here we can just forget NULL elements immediately */
674 21190 : if (!argnull[i])
675 : {
676 21190 : str = map_sql_value_to_xml_value(argvalue[i],
677 : exprType((Node *) e),
678 : true);
679 21190 : arg_strings = lappend(arg_strings, str);
680 : }
681 21190 : i++;
682 : }
683 :
684 21214 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
685 :
686 21214 : PG_TRY();
687 : {
688 21214 : buf = xmlBufferCreate();
689 21214 : if (buf == NULL || xmlerrcxt->err_occurred)
690 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
691 : "could not allocate xmlBuffer");
692 21214 : writer = xmlNewTextWriterMemory(buf, 0);
693 21214 : if (writer == NULL || xmlerrcxt->err_occurred)
694 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
695 : "could not allocate xmlTextWriter");
696 :
697 21214 : xmlTextWriterStartElement(writer, (xmlChar *) xexpr->name);
698 :
699 21262 : forboth(arg, named_arg_strings, narg, xexpr->arg_names)
700 : {
701 48 : char *str = (char *) lfirst(arg);
702 48 : char *argname = strVal(lfirst(narg));
703 :
704 48 : if (str)
705 48 : xmlTextWriterWriteAttribute(writer,
706 : (xmlChar *) argname,
707 : (xmlChar *) str);
708 : }
709 :
710 42404 : foreach(arg, arg_strings)
711 : {
712 21190 : char *str = (char *) lfirst(arg);
713 :
714 21190 : xmlTextWriterWriteRaw(writer, (xmlChar *) str);
715 : }
716 :
717 21214 : xmlTextWriterEndElement(writer);
718 :
719 : /* we MUST do this now to flush data out to the buffer ... */
720 21214 : xmlFreeTextWriter(writer);
721 21214 : writer = NULL;
722 :
723 21214 : result = xmlBuffer_to_xmltype(buf);
724 : }
725 0 : PG_CATCH();
726 : {
727 0 : if (writer)
728 0 : xmlFreeTextWriter(writer);
729 0 : if (buf)
730 0 : xmlBufferFree(buf);
731 :
732 0 : pg_xml_done(xmlerrcxt, true);
733 :
734 0 : PG_RE_THROW();
735 : }
736 21214 : PG_END_TRY();
737 :
738 21214 : xmlBufferFree(buf);
739 :
740 21214 : pg_xml_done(xmlerrcxt, false);
741 :
742 21214 : return result;
743 : #else
744 : NO_XML_SUPPORT();
745 : return NULL;
746 : #endif
747 : }
748 :
749 :
750 : xmltype *
751 138 : xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
752 : {
753 : #ifdef USE_LIBXML
754 : xmlDocPtr doc;
755 :
756 138 : doc = xml_parse(data, xmloption_arg, preserve_whitespace,
757 : GetDatabaseEncoding());
758 90 : xmlFreeDoc(doc);
759 :
760 90 : return (xmltype *) data;
761 : #else
762 : NO_XML_SUPPORT();
763 : return NULL;
764 : #endif
765 : }
766 :
767 :
768 : xmltype *
769 72 : xmlpi(const char *target, text *arg, bool arg_is_null, bool *result_is_null)
770 : {
771 : #ifdef USE_LIBXML
772 : xmltype *result;
773 : StringInfoData buf;
774 :
775 72 : if (pg_strcasecmp(target, "xml") == 0)
776 12 : ereport(ERROR,
777 : (errcode(ERRCODE_SYNTAX_ERROR), /* really */
778 : errmsg("invalid XML processing instruction"),
779 : errdetail("XML processing instruction target name cannot be \"%s\".", target)));
780 :
781 : /*
782 : * Following the SQL standard, the null check comes after the syntax check
783 : * above.
784 : */
785 60 : *result_is_null = arg_is_null;
786 60 : if (*result_is_null)
787 12 : return NULL;
788 :
789 48 : initStringInfo(&buf);
790 :
791 48 : appendStringInfo(&buf, "<?%s", target);
792 :
793 48 : if (arg != NULL)
794 : {
795 : char *string;
796 :
797 24 : string = text_to_cstring(arg);
798 24 : if (strstr(string, "?>") != NULL)
799 6 : ereport(ERROR,
800 : (errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
801 : errmsg("invalid XML processing instruction"),
802 : errdetail("XML processing instruction cannot contain \"?>\".")));
803 :
804 18 : appendStringInfoChar(&buf, ' ');
805 18 : appendStringInfoString(&buf, string + strspn(string, " "));
806 18 : pfree(string);
807 : }
808 42 : appendStringInfoString(&buf, "?>");
809 :
810 42 : result = stringinfo_to_xmltype(&buf);
811 42 : pfree(buf.data);
812 42 : return result;
813 : #else
814 : NO_XML_SUPPORT();
815 : return NULL;
816 : #endif
817 : }
818 :
819 :
820 : xmltype *
821 60 : xmlroot(xmltype *data, text *version, int standalone)
822 : {
823 : #ifdef USE_LIBXML
824 : char *str;
825 : size_t len;
826 : xmlChar *orig_version;
827 : int orig_standalone;
828 : StringInfoData buf;
829 :
830 60 : len = VARSIZE(data) - VARHDRSZ;
831 60 : str = text_to_cstring((text *) data);
832 :
833 60 : parse_xml_decl((xmlChar *) str, &len, &orig_version, NULL, &orig_standalone);
834 :
835 60 : if (version)
836 24 : orig_version = xml_text2xmlChar(version);
837 : else
838 36 : orig_version = NULL;
839 :
840 60 : switch (standalone)
841 : {
842 18 : case XML_STANDALONE_YES:
843 18 : orig_standalone = 1;
844 18 : break;
845 12 : case XML_STANDALONE_NO:
846 12 : orig_standalone = 0;
847 12 : break;
848 12 : case XML_STANDALONE_NO_VALUE:
849 12 : orig_standalone = -1;
850 12 : break;
851 18 : case XML_STANDALONE_OMITTED:
852 : /* leave original value */
853 18 : break;
854 : }
855 :
856 60 : initStringInfo(&buf);
857 60 : print_xml_decl(&buf, orig_version, 0, orig_standalone);
858 60 : appendStringInfoString(&buf, str + len);
859 :
860 60 : return stringinfo_to_xmltype(&buf);
861 : #else
862 : NO_XML_SUPPORT();
863 : return NULL;
864 : #endif
865 : }
866 :
867 :
868 : /*
869 : * Validate document (given as string) against DTD (given as external link)
870 : *
871 : * This has been removed because it is a security hole: unprivileged users
872 : * should not be able to use Postgres to fetch arbitrary external files,
873 : * which unfortunately is exactly what libxml is willing to do with the DTD
874 : * parameter.
875 : */
876 : Datum
877 0 : xmlvalidate(PG_FUNCTION_ARGS)
878 : {
879 0 : ereport(ERROR,
880 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
881 : errmsg("xmlvalidate is not implemented")));
882 : return 0;
883 : }
884 :
885 :
886 : bool
887 30 : xml_is_document(xmltype *arg)
888 : {
889 : #ifdef USE_LIBXML
890 : bool result;
891 30 : volatile xmlDocPtr doc = NULL;
892 30 : MemoryContext ccxt = CurrentMemoryContext;
893 :
894 : /* We want to catch ereport(INVALID_XML_DOCUMENT) and return false */
895 30 : PG_TRY();
896 : {
897 30 : doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
898 : GetDatabaseEncoding());
899 12 : result = true;
900 : }
901 18 : PG_CATCH();
902 : {
903 : ErrorData *errdata;
904 : MemoryContext ecxt;
905 :
906 18 : ecxt = MemoryContextSwitchTo(ccxt);
907 18 : errdata = CopyErrorData();
908 18 : if (errdata->sqlerrcode == ERRCODE_INVALID_XML_DOCUMENT)
909 : {
910 18 : FlushErrorState();
911 18 : result = false;
912 : }
913 : else
914 : {
915 0 : MemoryContextSwitchTo(ecxt);
916 0 : PG_RE_THROW();
917 : }
918 : }
919 30 : PG_END_TRY();
920 :
921 30 : if (doc)
922 12 : xmlFreeDoc(doc);
923 :
924 30 : return result;
925 : #else /* not USE_LIBXML */
926 : NO_XML_SUPPORT();
927 : return false;
928 : #endif /* not USE_LIBXML */
929 : }
930 :
931 :
932 : #ifdef USE_LIBXML
933 :
934 : /*
935 : * pg_xml_init_library --- set up for use of libxml
936 : *
937 : * This should be called by each function that is about to use libxml
938 : * facilities but doesn't require error handling. It initializes libxml
939 : * and verifies compatibility with the loaded libxml version. These are
940 : * once-per-session activities.
941 : *
942 : * TODO: xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and
943 : * check)
944 : */
945 : void
946 88844 : pg_xml_init_library(void)
947 : {
948 : static bool first_time = true;
949 :
950 88844 : if (first_time)
951 : {
952 : /* Stuff we need do only once per session */
953 :
954 : /*
955 : * Currently, we have no pure UTF-8 support for internals -- check if
956 : * we can work.
957 : */
958 : if (sizeof(char) != sizeof(xmlChar))
959 : ereport(ERROR,
960 : (errmsg("could not initialize XML library"),
961 : errdetail("libxml2 has incompatible char type: sizeof(char)=%zu, sizeof(xmlChar)=%zu.",
962 : sizeof(char), sizeof(xmlChar))));
963 :
964 : #ifdef USE_LIBXMLCONTEXT
965 : /* Set up libxml's memory allocation our way */
966 : xml_memory_init();
967 : #endif
968 :
969 : /* Check library compatibility */
970 26 : LIBXML_TEST_VERSION;
971 :
972 26 : first_time = false;
973 : }
974 88844 : }
975 :
976 : /*
977 : * pg_xml_init --- set up for use of libxml and register an error handler
978 : *
979 : * This should be called by each function that is about to use libxml
980 : * facilities and requires error handling. It initializes libxml with
981 : * pg_xml_init_library() and establishes our libxml error handler.
982 : *
983 : * strictness determines which errors are reported and which are ignored.
984 : *
985 : * Calls to this function MUST be followed by a PG_TRY block that guarantees
986 : * that pg_xml_done() is called during either normal or error exit.
987 : *
988 : * This is exported for use by contrib/xml2, as well as other code that might
989 : * wish to share use of this module's libxml error handler.
990 : */
991 : PgXmlErrorContext *
992 23008 : pg_xml_init(PgXmlStrictness strictness)
993 : {
994 : PgXmlErrorContext *errcxt;
995 : void *new_errcxt;
996 :
997 : /* Do one-time setup if needed */
998 23008 : pg_xml_init_library();
999 :
1000 : /* Create error handling context structure */
1001 23008 : errcxt = (PgXmlErrorContext *) palloc(sizeof(PgXmlErrorContext));
1002 23008 : errcxt->magic = ERRCXT_MAGIC;
1003 23008 : errcxt->strictness = strictness;
1004 23008 : errcxt->err_occurred = false;
1005 23008 : initStringInfo(&errcxt->err_buf);
1006 :
1007 : /*
1008 : * Save original error handler and install ours. libxml originally didn't
1009 : * distinguish between the contexts for generic and for structured error
1010 : * handlers. If we're using an old libxml version, we must thus save the
1011 : * generic error context, even though we're using a structured error
1012 : * handler.
1013 : */
1014 23008 : errcxt->saved_errfunc = xmlStructuredError;
1015 :
1016 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1017 23008 : errcxt->saved_errcxt = xmlStructuredErrorContext;
1018 : #else
1019 : errcxt->saved_errcxt = xmlGenericErrorContext;
1020 : #endif
1021 :
1022 23008 : xmlSetStructuredErrorFunc((void *) errcxt, xml_errorHandler);
1023 :
1024 : /*
1025 : * Verify that xmlSetStructuredErrorFunc set the context variable we
1026 : * expected it to. If not, the error context pointer we just saved is not
1027 : * the correct thing to restore, and since that leaves us without a way to
1028 : * restore the context in pg_xml_done, we must fail.
1029 : *
1030 : * The only known situation in which this test fails is if we compile with
1031 : * headers from a libxml2 that doesn't track the structured error context
1032 : * separately (< 2.7.4), but at runtime use a version that does, or vice
1033 : * versa. The libxml2 authors did not treat that change as constituting
1034 : * an ABI break, so the LIBXML_TEST_VERSION test in pg_xml_init_library
1035 : * fails to protect us from this.
1036 : */
1037 :
1038 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1039 23008 : new_errcxt = xmlStructuredErrorContext;
1040 : #else
1041 : new_errcxt = xmlGenericErrorContext;
1042 : #endif
1043 :
1044 23008 : if (new_errcxt != (void *) errcxt)
1045 0 : ereport(ERROR,
1046 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1047 : errmsg("could not set up XML error handler"),
1048 : errhint("This probably indicates that the version of libxml2"
1049 : " being used is not compatible with the libxml2"
1050 : " header files that PostgreSQL was built with.")));
1051 :
1052 : /*
1053 : * Also, install an entity loader to prevent unwanted fetches of external
1054 : * files and URLs.
1055 : */
1056 23008 : errcxt->saved_entityfunc = xmlGetExternalEntityLoader();
1057 23008 : xmlSetExternalEntityLoader(xmlPgEntityLoader);
1058 :
1059 23008 : return errcxt;
1060 : }
1061 :
1062 :
1063 : /*
1064 : * pg_xml_done --- restore previous libxml error handling
1065 : *
1066 : * Resets libxml's global error-handling state to what it was before
1067 : * pg_xml_init() was called.
1068 : *
1069 : * This routine verifies that all pending errors have been dealt with
1070 : * (in assert-enabled builds, anyway).
1071 : */
1072 : void
1073 23008 : pg_xml_done(PgXmlErrorContext *errcxt, bool isError)
1074 : {
1075 : void *cur_errcxt;
1076 :
1077 : /* An assert seems like enough protection here */
1078 : Assert(errcxt->magic == ERRCXT_MAGIC);
1079 :
1080 : /*
1081 : * In a normal exit, there should be no un-handled libxml errors. But we
1082 : * shouldn't try to enforce this during error recovery, since the longjmp
1083 : * could have been thrown before xml_ereport had a chance to run.
1084 : */
1085 : Assert(!errcxt->err_occurred || isError);
1086 :
1087 : /*
1088 : * Check that libxml's global state is correct, warn if not. This is a
1089 : * real test and not an Assert because it has a higher probability of
1090 : * happening.
1091 : */
1092 : #ifdef HAVE_XMLSTRUCTUREDERRORCONTEXT
1093 23008 : cur_errcxt = xmlStructuredErrorContext;
1094 : #else
1095 : cur_errcxt = xmlGenericErrorContext;
1096 : #endif
1097 :
1098 23008 : if (cur_errcxt != (void *) errcxt)
1099 0 : elog(WARNING, "libxml error handling state is out of sync with xml.c");
1100 :
1101 : /* Restore the saved handlers */
1102 23008 : xmlSetStructuredErrorFunc(errcxt->saved_errcxt, errcxt->saved_errfunc);
1103 23008 : xmlSetExternalEntityLoader(errcxt->saved_entityfunc);
1104 :
1105 : /*
1106 : * Mark the struct as invalid, just in case somebody somehow manages to
1107 : * call xml_errorHandler or xml_ereport with it.
1108 : */
1109 23008 : errcxt->magic = 0;
1110 :
1111 : /* Release memory */
1112 23008 : pfree(errcxt->err_buf.data);
1113 23008 : pfree(errcxt);
1114 23008 : }
1115 :
1116 :
1117 : /*
1118 : * pg_xml_error_occurred() --- test the error flag
1119 : */
1120 : bool
1121 0 : pg_xml_error_occurred(PgXmlErrorContext *errcxt)
1122 : {
1123 0 : return errcxt->err_occurred;
1124 : }
1125 :
1126 :
1127 : /*
1128 : * SQL/XML allows storing "XML documents" or "XML content". "XML
1129 : * documents" are specified by the XML specification and are parsed
1130 : * easily by libxml. "XML content" is specified by SQL/XML as the
1131 : * production "XMLDecl? content". But libxml can only parse the
1132 : * "content" part, so we have to parse the XML declaration ourselves
1133 : * to complete this.
1134 : */
1135 :
1136 : #define CHECK_XML_SPACE(p) \
1137 : do { \
1138 : if (!xmlIsBlank_ch(*(p))) \
1139 : return XML_ERR_SPACE_REQUIRED; \
1140 : } while (0)
1141 :
1142 : #define SKIP_XML_SPACE(p) \
1143 : while (xmlIsBlank_ch(*(p))) (p)++
1144 :
1145 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
1146 : /* Beware of multiple evaluations of argument! */
1147 : #define PG_XMLISNAMECHAR(c) \
1148 : (xmlIsBaseChar_ch(c) || xmlIsIdeographicQ(c) \
1149 : || xmlIsDigit_ch(c) \
1150 : || c == '.' || c == '-' || c == '_' || c == ':' \
1151 : || xmlIsCombiningQ(c) \
1152 : || xmlIsExtender_ch(c))
1153 :
1154 : /* pnstrdup, but deal with xmlChar not char; len is measured in xmlChars */
1155 : static xmlChar *
1156 162 : xml_pnstrdup(const xmlChar *str, size_t len)
1157 : {
1158 : xmlChar *result;
1159 :
1160 162 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1161 162 : memcpy(result, str, len * sizeof(xmlChar));
1162 162 : result[len] = 0;
1163 162 : return result;
1164 : }
1165 :
1166 : /* Ditto, except input is char* */
1167 : static xmlChar *
1168 2424 : pg_xmlCharStrndup(const char *str, size_t len)
1169 : {
1170 : xmlChar *result;
1171 :
1172 2424 : result = (xmlChar *) palloc((len + 1) * sizeof(xmlChar));
1173 2424 : memcpy(result, str, len);
1174 2424 : result[len] = '\0';
1175 :
1176 2424 : return result;
1177 : }
1178 :
1179 : /*
1180 : * Copy xmlChar string to PostgreSQL-owned memory, freeing the input.
1181 : *
1182 : * The input xmlChar is freed regardless of success of the copy.
1183 : */
1184 : static char *
1185 106472 : xml_pstrdup_and_free(xmlChar *str)
1186 : {
1187 : char *result;
1188 :
1189 106472 : if (str)
1190 : {
1191 106472 : PG_TRY();
1192 : {
1193 106472 : result = pstrdup((char *) str);
1194 : }
1195 0 : PG_FINALLY();
1196 : {
1197 106472 : xmlFree(str);
1198 : }
1199 106472 : PG_END_TRY();
1200 : }
1201 : else
1202 0 : result = NULL;
1203 :
1204 106472 : return result;
1205 : }
1206 :
1207 : /*
1208 : * str is the null-terminated input string. Remaining arguments are
1209 : * output arguments; each can be NULL if value is not wanted.
1210 : * version and encoding are returned as locally-palloc'd strings.
1211 : * Result is 0 if OK, an error code if not.
1212 : */
1213 : static int
1214 65836 : parse_xml_decl(const xmlChar *str, size_t *lenp,
1215 : xmlChar **version, xmlChar **encoding, int *standalone)
1216 : {
1217 : const xmlChar *p;
1218 : const xmlChar *save_p;
1219 : size_t len;
1220 : int utf8char;
1221 : int utf8len;
1222 :
1223 : /*
1224 : * Only initialize libxml. We don't need error handling here, but we do
1225 : * need to make sure libxml is initialized before calling any of its
1226 : * functions. Note that this is safe (and a no-op) if caller has already
1227 : * done pg_xml_init().
1228 : */
1229 65836 : pg_xml_init_library();
1230 :
1231 : /* Initialize output arguments to "not present" */
1232 65836 : if (version)
1233 65284 : *version = NULL;
1234 65836 : if (encoding)
1235 0 : *encoding = NULL;
1236 65836 : if (standalone)
1237 65284 : *standalone = -1;
1238 :
1239 65836 : p = str;
1240 :
1241 65836 : if (xmlStrncmp(p, (xmlChar *) "<?xml", 5) != 0)
1242 65656 : goto finished;
1243 :
1244 : /*
1245 : * If next char is a name char, it's a PI like <?xml-stylesheet ...?>
1246 : * rather than an XMLDecl, so we have done what we came to do and found no
1247 : * XMLDecl.
1248 : *
1249 : * We need an input length value for xmlGetUTF8Char, but there's no need
1250 : * to count the whole document size, so use strnlen not strlen.
1251 : */
1252 180 : utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
1253 180 : utf8char = xmlGetUTF8Char(p + 5, &utf8len);
1254 180 : if (PG_XMLISNAMECHAR(utf8char))
1255 12 : goto finished;
1256 :
1257 168 : p += 5;
1258 :
1259 : /* version */
1260 168 : CHECK_XML_SPACE(p);
1261 336 : SKIP_XML_SPACE(p);
1262 168 : if (xmlStrncmp(p, (xmlChar *) "version", 7) != 0)
1263 0 : return XML_ERR_VERSION_MISSING;
1264 168 : p += 7;
1265 168 : SKIP_XML_SPACE(p);
1266 168 : if (*p != '=')
1267 0 : return XML_ERR_VERSION_MISSING;
1268 168 : p += 1;
1269 168 : SKIP_XML_SPACE(p);
1270 :
1271 168 : if (*p == '\'' || *p == '"')
1272 168 : {
1273 : const xmlChar *q;
1274 :
1275 168 : q = xmlStrchr(p + 1, *p);
1276 168 : if (!q)
1277 0 : return XML_ERR_VERSION_MISSING;
1278 :
1279 168 : if (version)
1280 162 : *version = xml_pnstrdup(p + 1, q - p - 1);
1281 168 : p = q + 1;
1282 : }
1283 : else
1284 0 : return XML_ERR_VERSION_MISSING;
1285 :
1286 : /* encoding */
1287 168 : save_p = p;
1288 288 : SKIP_XML_SPACE(p);
1289 168 : if (xmlStrncmp(p, (xmlChar *) "encoding", 8) == 0)
1290 : {
1291 24 : CHECK_XML_SPACE(save_p);
1292 24 : p += 8;
1293 24 : SKIP_XML_SPACE(p);
1294 24 : if (*p != '=')
1295 0 : return XML_ERR_MISSING_ENCODING;
1296 24 : p += 1;
1297 24 : SKIP_XML_SPACE(p);
1298 :
1299 24 : if (*p == '\'' || *p == '"')
1300 24 : {
1301 : const xmlChar *q;
1302 :
1303 24 : q = xmlStrchr(p + 1, *p);
1304 24 : if (!q)
1305 0 : return XML_ERR_MISSING_ENCODING;
1306 :
1307 24 : if (encoding)
1308 0 : *encoding = xml_pnstrdup(p + 1, q - p - 1);
1309 24 : p = q + 1;
1310 : }
1311 : else
1312 0 : return XML_ERR_MISSING_ENCODING;
1313 : }
1314 : else
1315 : {
1316 144 : p = save_p;
1317 : }
1318 :
1319 : /* standalone */
1320 168 : save_p = p;
1321 264 : SKIP_XML_SPACE(p);
1322 168 : if (xmlStrncmp(p, (xmlChar *) "standalone", 10) == 0)
1323 : {
1324 96 : CHECK_XML_SPACE(save_p);
1325 96 : p += 10;
1326 96 : SKIP_XML_SPACE(p);
1327 96 : if (*p != '=')
1328 0 : return XML_ERR_STANDALONE_VALUE;
1329 96 : p += 1;
1330 96 : SKIP_XML_SPACE(p);
1331 192 : if (xmlStrncmp(p, (xmlChar *) "'yes'", 5) == 0 ||
1332 96 : xmlStrncmp(p, (xmlChar *) "\"yes\"", 5) == 0)
1333 : {
1334 60 : if (standalone)
1335 60 : *standalone = 1;
1336 60 : p += 5;
1337 : }
1338 72 : else if (xmlStrncmp(p, (xmlChar *) "'no'", 4) == 0 ||
1339 36 : xmlStrncmp(p, (xmlChar *) "\"no\"", 4) == 0)
1340 : {
1341 36 : if (standalone)
1342 36 : *standalone = 0;
1343 36 : p += 4;
1344 : }
1345 : else
1346 0 : return XML_ERR_STANDALONE_VALUE;
1347 : }
1348 : else
1349 : {
1350 72 : p = save_p;
1351 : }
1352 :
1353 168 : SKIP_XML_SPACE(p);
1354 168 : if (xmlStrncmp(p, (xmlChar *) "?>", 2) != 0)
1355 0 : return XML_ERR_XMLDECL_NOT_FINISHED;
1356 168 : p += 2;
1357 :
1358 65836 : finished:
1359 65836 : len = p - str;
1360 :
1361 71428 : for (p = str; p < str + len; p++)
1362 5592 : if (*p > 127)
1363 0 : return XML_ERR_INVALID_CHAR;
1364 :
1365 65836 : if (lenp)
1366 65836 : *lenp = len;
1367 :
1368 65836 : return XML_ERR_OK;
1369 : }
1370 :
1371 :
1372 : /*
1373 : * Write an XML declaration. On output, we adjust the XML declaration
1374 : * as follows. (These rules are the moral equivalent of the clause
1375 : * "Serialization of an XML value" in the SQL standard.)
1376 : *
1377 : * We try to avoid generating an XML declaration if possible. This is
1378 : * so that you don't get trivial things like xml '<foo/>' resulting in
1379 : * '<?xml version="1.0"?><foo/>', which would surely be annoying. We
1380 : * must provide a declaration if the standalone property is specified
1381 : * or if we include an encoding declaration. If we have a
1382 : * declaration, we must specify a version (XML requires this).
1383 : * Otherwise we only make a declaration if the version is not "1.0",
1384 : * which is the default version specified in SQL:2003.
1385 : */
1386 : static bool
1387 22394 : print_xml_decl(StringInfo buf, const xmlChar *version,
1388 : pg_enc encoding, int standalone)
1389 : {
1390 22394 : if ((version && strcmp((const char *) version, PG_XML_DEFAULT_VERSION) != 0)
1391 22358 : || (encoding && encoding != PG_UTF8)
1392 22358 : || standalone != -1)
1393 : {
1394 96 : appendStringInfoString(buf, "<?xml");
1395 :
1396 96 : if (version)
1397 72 : appendStringInfo(buf, " version=\"%s\"", version);
1398 : else
1399 24 : appendStringInfo(buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
1400 :
1401 96 : if (encoding && encoding != PG_UTF8)
1402 : {
1403 : /*
1404 : * XXX might be useful to convert this to IANA names (ISO-8859-1
1405 : * instead of LATIN1 etc.); needs field experience
1406 : */
1407 0 : appendStringInfo(buf, " encoding=\"%s\"",
1408 : pg_encoding_to_char(encoding));
1409 : }
1410 :
1411 96 : if (standalone == 1)
1412 48 : appendStringInfoString(buf, " standalone=\"yes\"");
1413 48 : else if (standalone == 0)
1414 24 : appendStringInfoString(buf, " standalone=\"no\"");
1415 96 : appendStringInfoString(buf, "?>");
1416 :
1417 96 : return true;
1418 : }
1419 : else
1420 22298 : return false;
1421 : }
1422 :
1423 : /*
1424 : * Test whether an input that is to be parsed as CONTENT contains a DTD.
1425 : *
1426 : * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
1427 : * satisfied by a document with a DTD, which is a bit of a wart, as it means
1428 : * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
1429 : * later fix that, by redefining content with reference to the "more
1430 : * permissive" Document Node of the XQuery/XPath Data Model, such that any
1431 : * DOCUMENT value is indeed also a CONTENT value. That definition is more
1432 : * useful, as CONTENT becomes usable for parsing input of unknown form (think
1433 : * pg_restore).
1434 : *
1435 : * As used below in parse_xml when parsing for CONTENT, libxml does not give
1436 : * us the 2006+ behavior, but only the 2003; it will choke if the input has
1437 : * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
1438 : * by detecting this case first and simply doing the parse as DOCUMENT.
1439 : *
1440 : * A DTD can be found arbitrarily far in, but that would be a contrived case;
1441 : * it will ordinarily start within a few dozen characters. The only things
1442 : * that can precede it are an XMLDecl (here, the caller will have called
1443 : * parse_xml_decl already), whitespace, comments, and processing instructions.
1444 : * This function need only return true if it sees a valid sequence of such
1445 : * things leading to <!DOCTYPE. It can simply return false in any other
1446 : * cases, including malformed input; that will mean the input gets parsed as
1447 : * CONTENT as originally planned, with libxml reporting any errors.
1448 : *
1449 : * This is only to be called from xml_parse, when pg_xml_init has already
1450 : * been called. The input is already in UTF8 encoding.
1451 : */
1452 : static bool
1453 714 : xml_doctype_in_content(const xmlChar *str)
1454 : {
1455 714 : const xmlChar *p = str;
1456 :
1457 : for (;;)
1458 36 : {
1459 : const xmlChar *e;
1460 :
1461 804 : SKIP_XML_SPACE(p);
1462 750 : if (*p != '<')
1463 146 : return false;
1464 604 : p++;
1465 :
1466 604 : if (*p == '!')
1467 : {
1468 54 : p++;
1469 :
1470 : /* if we see <!DOCTYPE, we can return true */
1471 54 : if (xmlStrncmp(p, (xmlChar *) "DOCTYPE", 7) == 0)
1472 24 : return true;
1473 :
1474 : /* otherwise, if it's not a comment, fail */
1475 30 : if (xmlStrncmp(p, (xmlChar *) "--", 2) != 0)
1476 0 : return false;
1477 : /* find end of comment: find -- and a > must follow */
1478 30 : p = xmlStrstr(p + 2, (xmlChar *) "--");
1479 30 : if (!p || p[2] != '>')
1480 0 : return false;
1481 : /* advance over comment, and keep scanning */
1482 30 : p += 3;
1483 30 : continue;
1484 : }
1485 :
1486 : /* otherwise, if it's not a PI <?target something?>, fail */
1487 550 : if (*p != '?')
1488 544 : return false;
1489 6 : p++;
1490 :
1491 : /* find end of PI (the string ?> is forbidden within a PI) */
1492 6 : e = xmlStrstr(p, (xmlChar *) "?>");
1493 6 : if (!e)
1494 0 : return false;
1495 :
1496 : /* advance over PI, keep scanning */
1497 6 : p = e + 2;
1498 : }
1499 : }
1500 :
1501 :
1502 : /*
1503 : * Convert a C string to XML internal representation
1504 : *
1505 : * Note: it is caller's responsibility to xmlFreeDoc() the result,
1506 : * else a permanent memory leak will ensue!
1507 : *
1508 : * TODO maybe libxml2's xmlreader is better? (do not construct DOM,
1509 : * yet do not use SAX - see xmlreader.c)
1510 : */
1511 : static xmlDocPtr
1512 930 : xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
1513 : int encoding)
1514 : {
1515 : int32 len;
1516 : xmlChar *string;
1517 : xmlChar *utf8string;
1518 : PgXmlErrorContext *xmlerrcxt;
1519 930 : volatile xmlParserCtxtPtr ctxt = NULL;
1520 930 : volatile xmlDocPtr doc = NULL;
1521 :
1522 930 : len = VARSIZE_ANY_EXHDR(data); /* will be useful later */
1523 930 : string = xml_text2xmlChar(data);
1524 :
1525 930 : utf8string = pg_do_encoding_conversion(string,
1526 : len,
1527 : encoding,
1528 : PG_UTF8);
1529 :
1530 : /* Start up libxml and its parser */
1531 930 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_WELLFORMED);
1532 :
1533 : /* Use a TRY block to ensure we clean up correctly */
1534 930 : PG_TRY();
1535 : {
1536 930 : bool parse_as_document = false;
1537 : int res_code;
1538 930 : size_t count = 0;
1539 930 : xmlChar *version = NULL;
1540 930 : int standalone = 0;
1541 :
1542 930 : xmlInitParser();
1543 :
1544 930 : ctxt = xmlNewParserCtxt();
1545 930 : if (ctxt == NULL || xmlerrcxt->err_occurred)
1546 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
1547 : "could not allocate parser context");
1548 :
1549 : /* Decide whether to parse as document or content */
1550 930 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1551 216 : parse_as_document = true;
1552 : else
1553 : {
1554 : /* Parse and skip over the XML declaration, if any */
1555 714 : res_code = parse_xml_decl(utf8string,
1556 : &count, &version, NULL, &standalone);
1557 714 : if (res_code != 0)
1558 0 : xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
1559 : "invalid XML content: invalid XML declaration",
1560 : res_code);
1561 :
1562 : /* Is there a DOCTYPE element? */
1563 714 : if (xml_doctype_in_content(utf8string + count))
1564 24 : parse_as_document = true;
1565 : }
1566 :
1567 930 : if (parse_as_document)
1568 : {
1569 : /*
1570 : * Note, that here we try to apply DTD defaults
1571 : * (XML_PARSE_DTDATTR) according to SQL/XML:2008 GR 10.16.7.d:
1572 : * 'Default values defined by internal DTD are applied'. As for
1573 : * external DTDs, we try to support them too, (see SQL/XML:2008 GR
1574 : * 10.16.7.e)
1575 : */
1576 240 : doc = xmlCtxtReadDoc(ctxt, utf8string,
1577 : NULL,
1578 : "UTF-8",
1579 : XML_PARSE_NOENT | XML_PARSE_DTDATTR
1580 : | (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
1581 240 : if (doc == NULL || xmlerrcxt->err_occurred)
1582 : {
1583 : /* Use original option to decide which error code to throw */
1584 120 : if (xmloption_arg == XMLOPTION_DOCUMENT)
1585 114 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
1586 : "invalid XML document");
1587 : else
1588 6 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1589 : "invalid XML content");
1590 : }
1591 : }
1592 : else
1593 : {
1594 690 : doc = xmlNewDoc(version);
1595 : Assert(doc->encoding == NULL);
1596 690 : doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
1597 690 : doc->standalone = standalone;
1598 :
1599 : /* allow empty content */
1600 690 : if (*(utf8string + count))
1601 : {
1602 1368 : res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
1603 684 : utf8string + count, NULL);
1604 684 : if (res_code != 0 || xmlerrcxt->err_occurred)
1605 48 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
1606 : "invalid XML content");
1607 : }
1608 : }
1609 : }
1610 168 : PG_CATCH();
1611 : {
1612 168 : if (doc != NULL)
1613 48 : xmlFreeDoc(doc);
1614 168 : if (ctxt != NULL)
1615 168 : xmlFreeParserCtxt(ctxt);
1616 :
1617 168 : pg_xml_done(xmlerrcxt, true);
1618 :
1619 168 : PG_RE_THROW();
1620 : }
1621 762 : PG_END_TRY();
1622 :
1623 762 : xmlFreeParserCtxt(ctxt);
1624 :
1625 762 : pg_xml_done(xmlerrcxt, false);
1626 :
1627 762 : return doc;
1628 : }
1629 :
1630 :
1631 : /*
1632 : * xmlChar<->text conversions
1633 : */
1634 : static xmlChar *
1635 954 : xml_text2xmlChar(text *in)
1636 : {
1637 954 : return (xmlChar *) text_to_cstring(in);
1638 : }
1639 :
1640 :
1641 : #ifdef USE_LIBXMLCONTEXT
1642 :
1643 : /*
1644 : * Manage the special context used for all libxml allocations (but only
1645 : * in special debug builds; see notes at top of file)
1646 : */
1647 : static void
1648 : xml_memory_init(void)
1649 : {
1650 : /* Create memory context if not there already */
1651 : if (LibxmlContext == NULL)
1652 : LibxmlContext = AllocSetContextCreate(TopMemoryContext,
1653 : "Libxml context",
1654 : ALLOCSET_DEFAULT_SIZES);
1655 :
1656 : /* Re-establish the callbacks even if already set */
1657 : xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
1658 : }
1659 :
1660 : /*
1661 : * Wrappers for memory management functions
1662 : */
1663 : static void *
1664 : xml_palloc(size_t size)
1665 : {
1666 : return MemoryContextAlloc(LibxmlContext, size);
1667 : }
1668 :
1669 :
1670 : static void *
1671 : xml_repalloc(void *ptr, size_t size)
1672 : {
1673 : return repalloc(ptr, size);
1674 : }
1675 :
1676 :
1677 : static void
1678 : xml_pfree(void *ptr)
1679 : {
1680 : /* At least some parts of libxml assume xmlFree(NULL) is allowed */
1681 : if (ptr)
1682 : pfree(ptr);
1683 : }
1684 :
1685 :
1686 : static char *
1687 : xml_pstrdup(const char *string)
1688 : {
1689 : return MemoryContextStrdup(LibxmlContext, string);
1690 : }
1691 : #endif /* USE_LIBXMLCONTEXT */
1692 :
1693 :
1694 : /*
1695 : * xmlPgEntityLoader --- entity loader callback function
1696 : *
1697 : * Silently prevent any external entity URL from being loaded. We don't want
1698 : * to throw an error, so instead make the entity appear to expand to an empty
1699 : * string.
1700 : *
1701 : * We would prefer to allow loading entities that exist in the system's
1702 : * global XML catalog; but the available libxml2 APIs make that a complex
1703 : * and fragile task. For now, just shut down all external access.
1704 : */
1705 : static xmlParserInputPtr
1706 30 : xmlPgEntityLoader(const char *URL, const char *ID,
1707 : xmlParserCtxtPtr ctxt)
1708 : {
1709 30 : return xmlNewStringInputStream(ctxt, (const xmlChar *) "");
1710 : }
1711 :
1712 :
1713 : /*
1714 : * xml_ereport --- report an XML-related error
1715 : *
1716 : * The "msg" is the SQL-level message; some can be adopted from the SQL/XML
1717 : * standard. This function adds libxml's native error message, if any, as
1718 : * detail.
1719 : *
1720 : * This is exported for modules that want to share the core libxml error
1721 : * handler. Note that pg_xml_init() *must* have been called previously.
1722 : */
1723 : void
1724 180 : xml_ereport(PgXmlErrorContext *errcxt, int level, int sqlcode, const char *msg)
1725 : {
1726 : char *detail;
1727 :
1728 : /* Defend against someone passing us a bogus context struct */
1729 180 : if (errcxt->magic != ERRCXT_MAGIC)
1730 0 : elog(ERROR, "xml_ereport called with invalid PgXmlErrorContext");
1731 :
1732 : /* Flag that the current libxml error has been reported */
1733 180 : errcxt->err_occurred = false;
1734 :
1735 : /* Include detail only if we have some text from libxml */
1736 180 : if (errcxt->err_buf.len > 0)
1737 180 : detail = errcxt->err_buf.data;
1738 : else
1739 0 : detail = NULL;
1740 :
1741 180 : ereport(level,
1742 : (errcode(sqlcode),
1743 : errmsg_internal("%s", msg),
1744 : detail ? errdetail_internal("%s", detail) : 0));
1745 0 : }
1746 :
1747 :
1748 : /*
1749 : * Error handler for libxml errors and warnings
1750 : */
1751 : static void
1752 332 : xml_errorHandler(void *data, xmlErrorPtr error)
1753 : {
1754 332 : PgXmlErrorContext *xmlerrcxt = (PgXmlErrorContext *) data;
1755 332 : xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) error->ctxt;
1756 332 : xmlParserInputPtr input = (ctxt != NULL) ? ctxt->input : NULL;
1757 332 : xmlNodePtr node = error->node;
1758 332 : const xmlChar *name = (node != NULL &&
1759 332 : node->type == XML_ELEMENT_NODE) ? node->name : NULL;
1760 332 : int domain = error->domain;
1761 332 : int level = error->level;
1762 : StringInfo errorBuf;
1763 :
1764 : /*
1765 : * Defend against someone passing us a bogus context struct.
1766 : *
1767 : * We force a backend exit if this check fails because longjmp'ing out of
1768 : * libxml would likely render it unsafe to use further.
1769 : */
1770 332 : if (xmlerrcxt->magic != ERRCXT_MAGIC)
1771 0 : elog(FATAL, "xml_errorHandler called with invalid PgXmlErrorContext");
1772 :
1773 : /*----------
1774 : * Older libxml versions report some errors differently.
1775 : * First, some errors were previously reported as coming from the parser
1776 : * domain but are now reported as coming from the namespace domain.
1777 : * Second, some warnings were upgraded to errors.
1778 : * We attempt to compensate for that here.
1779 : *----------
1780 : */
1781 332 : switch (error->code)
1782 : {
1783 30 : case XML_WAR_NS_URI:
1784 30 : level = XML_ERR_ERROR;
1785 30 : domain = XML_FROM_NAMESPACE;
1786 30 : break;
1787 :
1788 54 : case XML_ERR_NS_DECL_ERROR:
1789 : case XML_WAR_NS_URI_RELATIVE:
1790 : case XML_WAR_NS_COLUMN:
1791 : case XML_NS_ERR_XML_NAMESPACE:
1792 : case XML_NS_ERR_UNDEFINED_NAMESPACE:
1793 : case XML_NS_ERR_QNAME:
1794 : case XML_NS_ERR_ATTRIBUTE_REDEFINED:
1795 : case XML_NS_ERR_EMPTY:
1796 54 : domain = XML_FROM_NAMESPACE;
1797 54 : break;
1798 : }
1799 :
1800 : /* Decide whether to act on the error or not */
1801 332 : switch (domain)
1802 : {
1803 248 : case XML_FROM_PARSER:
1804 : case XML_FROM_NONE:
1805 : case XML_FROM_MEMORY:
1806 : case XML_FROM_IO:
1807 :
1808 : /*
1809 : * Suppress warnings about undeclared entities. We need to do
1810 : * this to avoid problems due to not loading DTD definitions.
1811 : */
1812 248 : if (error->code == XML_WAR_UNDECLARED_ENTITY)
1813 6 : return;
1814 :
1815 : /* Otherwise, accept error regardless of the parsing purpose */
1816 242 : break;
1817 :
1818 84 : default:
1819 : /* Ignore error if only doing well-formedness check */
1820 84 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_WELLFORMED)
1821 66 : return;
1822 18 : break;
1823 : }
1824 :
1825 : /* Prepare error message in errorBuf */
1826 260 : errorBuf = makeStringInfo();
1827 :
1828 260 : if (error->line > 0)
1829 260 : appendStringInfo(errorBuf, "line %d: ", error->line);
1830 260 : if (name != NULL)
1831 0 : appendStringInfo(errorBuf, "element %s: ", name);
1832 260 : if (error->message != NULL)
1833 260 : appendStringInfoString(errorBuf, error->message);
1834 : else
1835 0 : appendStringInfoString(errorBuf, "(no message provided)");
1836 :
1837 : /*
1838 : * Append context information to errorBuf.
1839 : *
1840 : * xmlParserPrintFileContext() uses libxml's "generic" error handler to
1841 : * write the context. Since we don't want to duplicate libxml
1842 : * functionality here, we set up a generic error handler temporarily.
1843 : *
1844 : * We use appendStringInfo() directly as libxml's generic error handler.
1845 : * This should work because it has essentially the same signature as
1846 : * libxml expects, namely (void *ptr, const char *msg, ...).
1847 : */
1848 260 : if (input != NULL)
1849 : {
1850 260 : xmlGenericErrorFunc errFuncSaved = xmlGenericError;
1851 260 : void *errCtxSaved = xmlGenericErrorContext;
1852 :
1853 260 : xmlSetGenericErrorFunc((void *) errorBuf,
1854 : (xmlGenericErrorFunc) appendStringInfo);
1855 :
1856 : /* Add context information to errorBuf */
1857 260 : appendStringInfoLineSeparator(errorBuf);
1858 :
1859 260 : xmlParserPrintFileContext(input);
1860 :
1861 : /* Restore generic error func */
1862 260 : xmlSetGenericErrorFunc(errCtxSaved, errFuncSaved);
1863 : }
1864 :
1865 : /* Get rid of any trailing newlines in errorBuf */
1866 260 : chopStringInfoNewlines(errorBuf);
1867 :
1868 : /*
1869 : * Legacy error handling mode. err_occurred is never set, we just add the
1870 : * message to err_buf. This mode exists because the xml2 contrib module
1871 : * uses our error-handling infrastructure, but we don't want to change its
1872 : * behaviour since it's deprecated anyway. This is also why we don't
1873 : * distinguish between notices, warnings and errors here --- the old-style
1874 : * generic error handler wouldn't have done that either.
1875 : */
1876 260 : if (xmlerrcxt->strictness == PG_XML_STRICTNESS_LEGACY)
1877 : {
1878 2 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1879 2 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1880 : errorBuf->len);
1881 :
1882 2 : pfree(errorBuf->data);
1883 2 : pfree(errorBuf);
1884 2 : return;
1885 : }
1886 :
1887 : /*
1888 : * We don't want to ereport() here because that'd probably leave libxml in
1889 : * an inconsistent state. Instead, we remember the error and ereport()
1890 : * from xml_ereport().
1891 : *
1892 : * Warnings and notices can be reported immediately since they won't cause
1893 : * a longjmp() out of libxml.
1894 : */
1895 258 : if (level >= XML_ERR_ERROR)
1896 : {
1897 252 : appendStringInfoLineSeparator(&xmlerrcxt->err_buf);
1898 252 : appendBinaryStringInfo(&xmlerrcxt->err_buf, errorBuf->data,
1899 : errorBuf->len);
1900 :
1901 252 : xmlerrcxt->err_occurred = true;
1902 : }
1903 6 : else if (level >= XML_ERR_WARNING)
1904 : {
1905 6 : ereport(WARNING,
1906 : (errmsg_internal("%s", errorBuf->data)));
1907 : }
1908 : else
1909 : {
1910 0 : ereport(NOTICE,
1911 : (errmsg_internal("%s", errorBuf->data)));
1912 : }
1913 :
1914 258 : pfree(errorBuf->data);
1915 258 : pfree(errorBuf);
1916 : }
1917 :
1918 :
1919 : /*
1920 : * Wrapper for "ereport" function for XML-related errors. The "msg"
1921 : * is the SQL-level message; some can be adopted from the SQL/XML
1922 : * standard. This function uses "code" to create a textual detail
1923 : * message. At the moment, we only need to cover those codes that we
1924 : * may raise in this file.
1925 : */
1926 : static void
1927 0 : xml_ereport_by_code(int level, int sqlcode,
1928 : const char *msg, int code)
1929 : {
1930 : const char *det;
1931 :
1932 0 : switch (code)
1933 : {
1934 0 : case XML_ERR_INVALID_CHAR:
1935 0 : det = gettext_noop("Invalid character value.");
1936 0 : break;
1937 0 : case XML_ERR_SPACE_REQUIRED:
1938 0 : det = gettext_noop("Space required.");
1939 0 : break;
1940 0 : case XML_ERR_STANDALONE_VALUE:
1941 0 : det = gettext_noop("standalone accepts only 'yes' or 'no'.");
1942 0 : break;
1943 0 : case XML_ERR_VERSION_MISSING:
1944 0 : det = gettext_noop("Malformed declaration: missing version.");
1945 0 : break;
1946 0 : case XML_ERR_MISSING_ENCODING:
1947 0 : det = gettext_noop("Missing encoding in text declaration.");
1948 0 : break;
1949 0 : case XML_ERR_XMLDECL_NOT_FINISHED:
1950 0 : det = gettext_noop("Parsing XML declaration: '?>' expected.");
1951 0 : break;
1952 0 : default:
1953 0 : det = gettext_noop("Unrecognized libxml error code: %d.");
1954 0 : break;
1955 : }
1956 :
1957 0 : ereport(level,
1958 : (errcode(sqlcode),
1959 : errmsg_internal("%s", msg),
1960 : errdetail(det, code)));
1961 0 : }
1962 :
1963 :
1964 : /*
1965 : * Remove all trailing newlines from a StringInfo string
1966 : */
1967 : static void
1968 774 : chopStringInfoNewlines(StringInfo str)
1969 : {
1970 1294 : while (str->len > 0 && str->data[str->len - 1] == '\n')
1971 520 : str->data[--str->len] = '\0';
1972 774 : }
1973 :
1974 :
1975 : /*
1976 : * Append a newline after removing any existing trailing newlines
1977 : */
1978 : static void
1979 514 : appendStringInfoLineSeparator(StringInfo str)
1980 : {
1981 514 : chopStringInfoNewlines(str);
1982 514 : if (str->len > 0)
1983 332 : appendStringInfoChar(str, '\n');
1984 514 : }
1985 :
1986 :
1987 : /*
1988 : * Convert one char in the current server encoding to a Unicode codepoint.
1989 : */
1990 : static pg_wchar
1991 18280 : sqlchar_to_unicode(const char *s)
1992 : {
1993 : char *utf8string;
1994 : pg_wchar ret[2]; /* need space for trailing zero */
1995 :
1996 : /* note we're not assuming s is null-terminated */
1997 18280 : utf8string = pg_server_to_any(s, pg_mblen(s), PG_UTF8);
1998 :
1999 18280 : pg_encoding_mb2wchar_with_len(PG_UTF8, utf8string, ret,
2000 : pg_encoding_mblen(PG_UTF8, utf8string));
2001 :
2002 18280 : if (utf8string != s)
2003 0 : pfree(utf8string);
2004 :
2005 18280 : return ret[0];
2006 : }
2007 :
2008 :
2009 : static bool
2010 3638 : is_valid_xml_namefirst(pg_wchar c)
2011 : {
2012 : /* (Letter | '_' | ':') */
2013 3644 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2014 7282 : || c == '_' || c == ':');
2015 : }
2016 :
2017 :
2018 : static bool
2019 14642 : is_valid_xml_namechar(pg_wchar c)
2020 : {
2021 : /* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
2022 15532 : return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
2023 890 : || xmlIsDigitQ(c)
2024 254 : || c == '.' || c == '-' || c == '_' || c == ':'
2025 12 : || xmlIsCombiningQ(c)
2026 31064 : || xmlIsExtenderQ(c));
2027 : }
2028 : #endif /* USE_LIBXML */
2029 :
2030 :
2031 : /*
2032 : * Map SQL identifier to XML name; see SQL/XML:2008 section 9.1.
2033 : */
2034 : char *
2035 3652 : map_sql_identifier_to_xml_name(const char *ident, bool fully_escaped,
2036 : bool escape_period)
2037 : {
2038 : #ifdef USE_LIBXML
2039 : StringInfoData buf;
2040 : const char *p;
2041 :
2042 : /*
2043 : * SQL/XML doesn't make use of this case anywhere, so it's probably a
2044 : * mistake.
2045 : */
2046 : Assert(fully_escaped || !escape_period);
2047 :
2048 3652 : initStringInfo(&buf);
2049 :
2050 21952 : for (p = ident; *p; p += pg_mblen(p))
2051 : {
2052 18300 : if (*p == ':' && (p == ident || fully_escaped))
2053 14 : appendStringInfoString(&buf, "_x003A_");
2054 18286 : else if (*p == '_' && *(p + 1) == 'x')
2055 6 : appendStringInfoString(&buf, "_x005F_");
2056 21568 : else if (fully_escaped && p == ident &&
2057 3288 : pg_strncasecmp(p, "xml", 3) == 0)
2058 : {
2059 0 : if (*p == 'x')
2060 0 : appendStringInfoString(&buf, "_x0078_");
2061 : else
2062 0 : appendStringInfoString(&buf, "_x0058_");
2063 : }
2064 18280 : else if (escape_period && *p == '.')
2065 0 : appendStringInfoString(&buf, "_x002E_");
2066 : else
2067 : {
2068 18280 : pg_wchar u = sqlchar_to_unicode(p);
2069 :
2070 36560 : if ((p == ident)
2071 3638 : ? !is_valid_xml_namefirst(u)
2072 14642 : : !is_valid_xml_namechar(u))
2073 18 : appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
2074 : else
2075 18262 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2076 : }
2077 : }
2078 :
2079 3652 : return buf.data;
2080 : #else /* not USE_LIBXML */
2081 : NO_XML_SUPPORT();
2082 : return NULL;
2083 : #endif /* not USE_LIBXML */
2084 : }
2085 :
2086 :
2087 : /*
2088 : * Map XML name to SQL identifier; see SQL/XML:2008 section 9.3.
2089 : */
2090 : char *
2091 128 : map_xml_name_to_sql_identifier(const char *name)
2092 : {
2093 : StringInfoData buf;
2094 : const char *p;
2095 :
2096 128 : initStringInfo(&buf);
2097 :
2098 704 : for (p = name; *p; p += pg_mblen(p))
2099 : {
2100 576 : if (*p == '_' && *(p + 1) == 'x'
2101 16 : && isxdigit((unsigned char) *(p + 2))
2102 16 : && isxdigit((unsigned char) *(p + 3))
2103 16 : && isxdigit((unsigned char) *(p + 4))
2104 16 : && isxdigit((unsigned char) *(p + 5))
2105 16 : && *(p + 6) == '_')
2106 16 : {
2107 : char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
2108 : unsigned int u;
2109 :
2110 16 : sscanf(p + 2, "%X", &u);
2111 16 : pg_unicode_to_server(u, (unsigned char *) cbuf);
2112 16 : appendStringInfoString(&buf, cbuf);
2113 16 : p += 6;
2114 : }
2115 : else
2116 560 : appendBinaryStringInfo(&buf, p, pg_mblen(p));
2117 : }
2118 :
2119 128 : return buf.data;
2120 : }
2121 :
2122 : /*
2123 : * Map SQL value to XML value; see SQL/XML:2008 section 9.8.
2124 : *
2125 : * When xml_escape_strings is true, then certain characters in string
2126 : * values are replaced by entity references (< etc.), as specified
2127 : * in SQL/XML:2008 section 9.8 GR 9) a) iii). This is normally what is
2128 : * wanted. The false case is mainly useful when the resulting value
2129 : * is used with xmlTextWriterWriteAttribute() to write out an
2130 : * attribute, because that function does the escaping itself.
2131 : */
2132 : char *
2133 127332 : map_sql_value_to_xml_value(Datum value, Oid type, bool xml_escape_strings)
2134 : {
2135 127332 : if (type_is_array_domain(type))
2136 : {
2137 : ArrayType *array;
2138 : Oid elmtype;
2139 : int16 elmlen;
2140 : bool elmbyval;
2141 : char elmalign;
2142 : int num_elems;
2143 : Datum *elem_values;
2144 : bool *elem_nulls;
2145 : StringInfoData buf;
2146 : int i;
2147 :
2148 6 : array = DatumGetArrayTypeP(value);
2149 6 : elmtype = ARR_ELEMTYPE(array);
2150 6 : get_typlenbyvalalign(elmtype, &elmlen, &elmbyval, &elmalign);
2151 :
2152 6 : deconstruct_array(array, elmtype,
2153 : elmlen, elmbyval, elmalign,
2154 : &elem_values, &elem_nulls,
2155 : &num_elems);
2156 :
2157 6 : initStringInfo(&buf);
2158 :
2159 24 : for (i = 0; i < num_elems; i++)
2160 : {
2161 18 : if (elem_nulls[i])
2162 0 : continue;
2163 18 : appendStringInfoString(&buf, "<element>");
2164 18 : appendStringInfoString(&buf,
2165 18 : map_sql_value_to_xml_value(elem_values[i],
2166 : elmtype, true));
2167 18 : appendStringInfoString(&buf, "</element>");
2168 : }
2169 :
2170 6 : pfree(elem_values);
2171 6 : pfree(elem_nulls);
2172 :
2173 6 : return buf.data;
2174 : }
2175 : else
2176 : {
2177 : Oid typeOut;
2178 : bool isvarlena;
2179 : char *str;
2180 :
2181 : /*
2182 : * Flatten domains; the special-case treatments below should apply to,
2183 : * eg, domains over boolean not just boolean.
2184 : */
2185 127326 : type = getBaseType(type);
2186 :
2187 : /*
2188 : * Special XSD formatting for some data types
2189 : */
2190 127326 : switch (type)
2191 : {
2192 66 : case BOOLOID:
2193 66 : if (DatumGetBool(value))
2194 60 : return "true";
2195 : else
2196 6 : return "false";
2197 :
2198 48 : case DATEOID:
2199 : {
2200 : DateADT date;
2201 : struct pg_tm tm;
2202 : char buf[MAXDATELEN + 1];
2203 :
2204 48 : date = DatumGetDateADT(value);
2205 : /* XSD doesn't support infinite values */
2206 48 : if (DATE_NOT_FINITE(date))
2207 0 : ereport(ERROR,
2208 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2209 : errmsg("date out of range"),
2210 : errdetail("XML does not support infinite date values.")));
2211 48 : j2date(date + POSTGRES_EPOCH_JDATE,
2212 : &(tm.tm_year), &(tm.tm_mon), &(tm.tm_mday));
2213 48 : EncodeDateOnly(&tm, USE_XSD_DATES, buf);
2214 :
2215 48 : return pstrdup(buf);
2216 : }
2217 :
2218 36 : case TIMESTAMPOID:
2219 : {
2220 : Timestamp timestamp;
2221 : struct pg_tm tm;
2222 : fsec_t fsec;
2223 : char buf[MAXDATELEN + 1];
2224 :
2225 36 : timestamp = DatumGetTimestamp(value);
2226 :
2227 : /* XSD doesn't support infinite values */
2228 36 : if (TIMESTAMP_NOT_FINITE(timestamp))
2229 6 : ereport(ERROR,
2230 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2231 : errmsg("timestamp out of range"),
2232 : errdetail("XML does not support infinite timestamp values.")));
2233 30 : else if (timestamp2tm(timestamp, NULL, &tm, &fsec, NULL, NULL) == 0)
2234 30 : EncodeDateTime(&tm, fsec, false, 0, NULL, USE_XSD_DATES, buf);
2235 : else
2236 0 : ereport(ERROR,
2237 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2238 : errmsg("timestamp out of range")));
2239 :
2240 30 : return pstrdup(buf);
2241 : }
2242 :
2243 24 : case TIMESTAMPTZOID:
2244 : {
2245 : TimestampTz timestamp;
2246 : struct pg_tm tm;
2247 : int tz;
2248 : fsec_t fsec;
2249 24 : const char *tzn = NULL;
2250 : char buf[MAXDATELEN + 1];
2251 :
2252 24 : timestamp = DatumGetTimestamp(value);
2253 :
2254 : /* XSD doesn't support infinite values */
2255 24 : if (TIMESTAMP_NOT_FINITE(timestamp))
2256 0 : ereport(ERROR,
2257 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2258 : errmsg("timestamp out of range"),
2259 : errdetail("XML does not support infinite timestamp values.")));
2260 24 : else if (timestamp2tm(timestamp, &tz, &tm, &fsec, &tzn, NULL) == 0)
2261 24 : EncodeDateTime(&tm, fsec, true, tz, tzn, USE_XSD_DATES, buf);
2262 : else
2263 0 : ereport(ERROR,
2264 : (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2265 : errmsg("timestamp out of range")));
2266 :
2267 24 : return pstrdup(buf);
2268 : }
2269 :
2270 : #ifdef USE_LIBXML
2271 36 : case BYTEAOID:
2272 : {
2273 36 : bytea *bstr = DatumGetByteaPP(value);
2274 : PgXmlErrorContext *xmlerrcxt;
2275 36 : volatile xmlBufferPtr buf = NULL;
2276 36 : volatile xmlTextWriterPtr writer = NULL;
2277 : char *result;
2278 :
2279 36 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
2280 :
2281 36 : PG_TRY();
2282 : {
2283 36 : buf = xmlBufferCreate();
2284 36 : if (buf == NULL || xmlerrcxt->err_occurred)
2285 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2286 : "could not allocate xmlBuffer");
2287 36 : writer = xmlNewTextWriterMemory(buf, 0);
2288 36 : if (writer == NULL || xmlerrcxt->err_occurred)
2289 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
2290 : "could not allocate xmlTextWriter");
2291 :
2292 36 : if (xmlbinary == XMLBINARY_BASE64)
2293 30 : xmlTextWriterWriteBase64(writer, VARDATA_ANY(bstr),
2294 30 : 0, VARSIZE_ANY_EXHDR(bstr));
2295 : else
2296 6 : xmlTextWriterWriteBinHex(writer, VARDATA_ANY(bstr),
2297 6 : 0, VARSIZE_ANY_EXHDR(bstr));
2298 :
2299 : /* we MUST do this now to flush data out to the buffer */
2300 36 : xmlFreeTextWriter(writer);
2301 36 : writer = NULL;
2302 :
2303 36 : result = pstrdup((const char *) xmlBufferContent(buf));
2304 : }
2305 0 : PG_CATCH();
2306 : {
2307 0 : if (writer)
2308 0 : xmlFreeTextWriter(writer);
2309 0 : if (buf)
2310 0 : xmlBufferFree(buf);
2311 :
2312 0 : pg_xml_done(xmlerrcxt, true);
2313 :
2314 0 : PG_RE_THROW();
2315 : }
2316 36 : PG_END_TRY();
2317 :
2318 36 : xmlBufferFree(buf);
2319 :
2320 36 : pg_xml_done(xmlerrcxt, false);
2321 :
2322 36 : return result;
2323 : }
2324 : #endif /* USE_LIBXML */
2325 :
2326 : }
2327 :
2328 : /*
2329 : * otherwise, just use the type's native text representation
2330 : */
2331 127116 : getTypeOutputInfo(type, &typeOut, &isvarlena);
2332 127116 : str = OidOutputFunctionCall(typeOut, value);
2333 :
2334 : /* ... exactly as-is for XML, and when escaping is not wanted */
2335 127116 : if (type == XMLOID || !xml_escape_strings)
2336 21130 : return str;
2337 :
2338 : /* otherwise, translate special characters as needed */
2339 105986 : return escape_xml(str);
2340 : }
2341 : }
2342 :
2343 :
2344 : /*
2345 : * Escape characters in text that have special meanings in XML.
2346 : *
2347 : * Returns a palloc'd string.
2348 : *
2349 : * NB: this is intentionally not dependent on libxml.
2350 : */
2351 : char *
2352 106244 : escape_xml(const char *str)
2353 : {
2354 : StringInfoData buf;
2355 : const char *p;
2356 :
2357 106244 : initStringInfo(&buf);
2358 654380 : for (p = str; *p; p++)
2359 : {
2360 548136 : switch (*p)
2361 : {
2362 0 : case '&':
2363 0 : appendStringInfoString(&buf, "&");
2364 0 : break;
2365 36 : case '<':
2366 36 : appendStringInfoString(&buf, "<");
2367 36 : break;
2368 24 : case '>':
2369 24 : appendStringInfoString(&buf, ">");
2370 24 : break;
2371 0 : case '\r':
2372 0 : appendStringInfoString(&buf, "
");
2373 0 : break;
2374 548076 : default:
2375 548076 : appendStringInfoCharMacro(&buf, *p);
2376 548076 : break;
2377 : }
2378 : }
2379 106244 : return buf.data;
2380 : }
2381 :
2382 :
2383 : static char *
2384 24 : _SPI_strdup(const char *s)
2385 : {
2386 24 : size_t len = strlen(s) + 1;
2387 24 : char *ret = SPI_palloc(len);
2388 :
2389 24 : memcpy(ret, s, len);
2390 24 : return ret;
2391 : }
2392 :
2393 :
2394 : /*
2395 : * SQL to XML mapping functions
2396 : *
2397 : * What follows below was at one point intentionally organized so that
2398 : * you can read along in the SQL/XML standard. The functions are
2399 : * mostly split up the way the clauses lay out in the standards
2400 : * document, and the identifiers are also aligned with the standard
2401 : * text. Unfortunately, SQL/XML:2006 reordered the clauses
2402 : * differently than SQL/XML:2003, so the order below doesn't make much
2403 : * sense anymore.
2404 : *
2405 : * There are many things going on there:
2406 : *
2407 : * There are two kinds of mappings: Mapping SQL data (table contents)
2408 : * to XML documents, and mapping SQL structure (the "schema") to XML
2409 : * Schema. And there are functions that do both at the same time.
2410 : *
2411 : * Then you can map a database, a schema, or a table, each in both
2412 : * ways. This breaks down recursively: Mapping a database invokes
2413 : * mapping schemas, which invokes mapping tables, which invokes
2414 : * mapping rows, which invokes mapping columns, although you can't
2415 : * call the last two from the outside. Because of this, there are a
2416 : * number of xyz_internal() functions which are to be called both from
2417 : * the function manager wrapper and from some upper layer in a
2418 : * recursive call.
2419 : *
2420 : * See the documentation about what the common function arguments
2421 : * nulls, tableforest, and targetns mean.
2422 : *
2423 : * Some style guidelines for XML output: Use double quotes for quoting
2424 : * XML attributes. Indent XML elements by two spaces, but remember
2425 : * that a lot of code is called recursively at different levels, so
2426 : * it's better not to indent rather than create output that indents
2427 : * and outdents weirdly. Add newlines to make the output look nice.
2428 : */
2429 :
2430 :
2431 : /*
2432 : * Visibility of objects for XML mappings; see SQL/XML:2008 section
2433 : * 4.10.8.
2434 : */
2435 :
2436 : /*
2437 : * Given a query, which must return type oid as first column, produce
2438 : * a list of Oids with the query results.
2439 : */
2440 : static List *
2441 36 : query_to_oid_list(const char *query)
2442 : {
2443 : uint64 i;
2444 36 : List *list = NIL;
2445 : int spi_result;
2446 :
2447 36 : spi_result = SPI_execute(query, true, 0);
2448 36 : if (spi_result != SPI_OK_SELECT)
2449 0 : elog(ERROR, "SPI_execute returned %s for %s",
2450 : SPI_result_code_string(spi_result), query);
2451 :
2452 108 : for (i = 0; i < SPI_processed; i++)
2453 : {
2454 : Datum oid;
2455 : bool isnull;
2456 :
2457 72 : oid = SPI_getbinval(SPI_tuptable->vals[i],
2458 72 : SPI_tuptable->tupdesc,
2459 : 1,
2460 : &isnull);
2461 72 : if (!isnull)
2462 72 : list = lappend_oid(list, DatumGetObjectId(oid));
2463 : }
2464 :
2465 36 : return list;
2466 : }
2467 :
2468 :
2469 : static List *
2470 36 : schema_get_xml_visible_tables(Oid nspid)
2471 : {
2472 : StringInfoData query;
2473 :
2474 36 : initStringInfo(&query);
2475 36 : appendStringInfo(&query, "SELECT oid FROM pg_catalog.pg_class"
2476 : " WHERE relnamespace = %u AND relkind IN ("
2477 : CppAsString2(RELKIND_RELATION) ","
2478 : CppAsString2(RELKIND_MATVIEW) ","
2479 : CppAsString2(RELKIND_VIEW) ")"
2480 : " AND pg_catalog.has_table_privilege (oid, 'SELECT')"
2481 : " ORDER BY relname;", nspid);
2482 :
2483 36 : return query_to_oid_list(query.data);
2484 : }
2485 :
2486 :
2487 : /*
2488 : * Including the system schemas is probably not useful for a database
2489 : * mapping.
2490 : */
2491 : #define XML_VISIBLE_SCHEMAS_EXCLUDE "(nspname ~ '^pg_' OR nspname = 'information_schema')"
2492 :
2493 : #define XML_VISIBLE_SCHEMAS "SELECT oid FROM pg_catalog.pg_namespace WHERE pg_catalog.has_schema_privilege (oid, 'USAGE') AND NOT " XML_VISIBLE_SCHEMAS_EXCLUDE
2494 :
2495 :
2496 : static List *
2497 0 : database_get_xml_visible_schemas(void)
2498 : {
2499 0 : return query_to_oid_list(XML_VISIBLE_SCHEMAS " ORDER BY nspname;");
2500 : }
2501 :
2502 :
2503 : static List *
2504 0 : database_get_xml_visible_tables(void)
2505 : {
2506 : /* At the moment there is no order required here. */
2507 0 : return query_to_oid_list("SELECT oid FROM pg_catalog.pg_class"
2508 : " WHERE relkind IN ("
2509 : CppAsString2(RELKIND_RELATION) ","
2510 : CppAsString2(RELKIND_MATVIEW) ","
2511 : CppAsString2(RELKIND_VIEW) ")"
2512 : " AND pg_catalog.has_table_privilege(pg_class.oid, 'SELECT')"
2513 : " AND relnamespace IN (" XML_VISIBLE_SCHEMAS ");");
2514 : }
2515 :
2516 :
2517 : /*
2518 : * Map SQL table to XML and/or XML Schema document; see SQL/XML:2008
2519 : * section 9.11.
2520 : */
2521 :
2522 : static StringInfo
2523 96 : table_to_xml_internal(Oid relid,
2524 : const char *xmlschema, bool nulls, bool tableforest,
2525 : const char *targetns, bool top_level)
2526 : {
2527 : StringInfoData query;
2528 :
2529 96 : initStringInfo(&query);
2530 96 : appendStringInfo(&query, "SELECT * FROM %s",
2531 96 : DatumGetCString(DirectFunctionCall1(regclassout,
2532 : ObjectIdGetDatum(relid))));
2533 96 : return query_to_xml_internal(query.data, get_rel_name(relid),
2534 : xmlschema, nulls, tableforest,
2535 : targetns, top_level);
2536 : }
2537 :
2538 :
2539 : Datum
2540 36 : table_to_xml(PG_FUNCTION_ARGS)
2541 : {
2542 36 : Oid relid = PG_GETARG_OID(0);
2543 36 : bool nulls = PG_GETARG_BOOL(1);
2544 36 : bool tableforest = PG_GETARG_BOOL(2);
2545 36 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2546 :
2547 36 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid, NULL,
2548 : nulls, tableforest,
2549 : targetns, true)));
2550 : }
2551 :
2552 :
2553 : Datum
2554 10 : query_to_xml(PG_FUNCTION_ARGS)
2555 : {
2556 10 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2557 10 : bool nulls = PG_GETARG_BOOL(1);
2558 10 : bool tableforest = PG_GETARG_BOOL(2);
2559 10 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2560 :
2561 10 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2562 : NULL, nulls, tableforest,
2563 : targetns, true)));
2564 : }
2565 :
2566 :
2567 : Datum
2568 12 : cursor_to_xml(PG_FUNCTION_ARGS)
2569 : {
2570 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2571 12 : int32 count = PG_GETARG_INT32(1);
2572 12 : bool nulls = PG_GETARG_BOOL(2);
2573 12 : bool tableforest = PG_GETARG_BOOL(3);
2574 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(4));
2575 :
2576 : StringInfoData result;
2577 : Portal portal;
2578 : uint64 i;
2579 :
2580 12 : initStringInfo(&result);
2581 :
2582 12 : if (!tableforest)
2583 : {
2584 6 : xmldata_root_element_start(&result, "table", NULL, targetns, true);
2585 6 : appendStringInfoChar(&result, '\n');
2586 : }
2587 :
2588 12 : SPI_connect();
2589 12 : portal = SPI_cursor_find(name);
2590 12 : if (portal == NULL)
2591 0 : ereport(ERROR,
2592 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2593 : errmsg("cursor \"%s\" does not exist", name)));
2594 :
2595 12 : SPI_cursor_fetch(portal, true, count);
2596 48 : for (i = 0; i < SPI_processed; i++)
2597 36 : SPI_sql_row_to_xmlelement(i, &result, NULL, nulls,
2598 : tableforest, targetns, true);
2599 :
2600 12 : SPI_finish();
2601 :
2602 12 : if (!tableforest)
2603 6 : xmldata_root_element_end(&result, "table");
2604 :
2605 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(&result));
2606 : }
2607 :
2608 :
2609 : /*
2610 : * Write the start tag of the root element of a data mapping.
2611 : *
2612 : * top_level means that this is the very top level of the eventual
2613 : * output. For example, when the user calls table_to_xml, then a call
2614 : * with a table name to this function is the top level. When the user
2615 : * calls database_to_xml, then a call with a schema name to this
2616 : * function is not the top level. If top_level is false, then the XML
2617 : * namespace declarations are omitted, because they supposedly already
2618 : * appeared earlier in the output. Repeating them is not wrong, but
2619 : * it looks ugly.
2620 : */
2621 : static void
2622 238 : xmldata_root_element_start(StringInfo result, const char *eltname,
2623 : const char *xmlschema, const char *targetns,
2624 : bool top_level)
2625 : {
2626 : /* This isn't really wrong but currently makes no sense. */
2627 : Assert(top_level || !xmlschema);
2628 :
2629 238 : appendStringInfo(result, "<%s", eltname);
2630 238 : if (top_level)
2631 : {
2632 178 : appendStringInfoString(result, " xmlns:xsi=\"" NAMESPACE_XSI "\"");
2633 178 : if (strlen(targetns) > 0)
2634 30 : appendStringInfo(result, " xmlns=\"%s\"", targetns);
2635 : }
2636 238 : if (xmlschema)
2637 : {
2638 : /* FIXME: better targets */
2639 18 : if (strlen(targetns) > 0)
2640 6 : appendStringInfo(result, " xsi:schemaLocation=\"%s #\"", targetns);
2641 : else
2642 12 : appendStringInfoString(result, " xsi:noNamespaceSchemaLocation=\"#\"");
2643 : }
2644 238 : appendStringInfoString(result, ">\n");
2645 238 : }
2646 :
2647 :
2648 : static void
2649 238 : xmldata_root_element_end(StringInfo result, const char *eltname)
2650 : {
2651 238 : appendStringInfo(result, "</%s>\n", eltname);
2652 238 : }
2653 :
2654 :
2655 : static StringInfo
2656 112 : query_to_xml_internal(const char *query, char *tablename,
2657 : const char *xmlschema, bool nulls, bool tableforest,
2658 : const char *targetns, bool top_level)
2659 : {
2660 : StringInfo result;
2661 : char *xmltn;
2662 : uint64 i;
2663 :
2664 112 : if (tablename)
2665 96 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
2666 : else
2667 16 : xmltn = "table";
2668 :
2669 112 : result = makeStringInfo();
2670 :
2671 112 : SPI_connect();
2672 112 : if (SPI_execute(query, true, 0) != SPI_OK_SELECT)
2673 0 : ereport(ERROR,
2674 : (errcode(ERRCODE_DATA_EXCEPTION),
2675 : errmsg("invalid query")));
2676 :
2677 112 : if (!tableforest)
2678 : {
2679 52 : xmldata_root_element_start(result, xmltn, xmlschema,
2680 : targetns, top_level);
2681 52 : appendStringInfoChar(result, '\n');
2682 : }
2683 :
2684 112 : if (xmlschema)
2685 30 : appendStringInfo(result, "%s\n\n", xmlschema);
2686 :
2687 388 : for (i = 0; i < SPI_processed; i++)
2688 276 : SPI_sql_row_to_xmlelement(i, result, tablename, nulls,
2689 : tableforest, targetns, top_level);
2690 :
2691 112 : if (!tableforest)
2692 52 : xmldata_root_element_end(result, xmltn);
2693 :
2694 112 : SPI_finish();
2695 :
2696 112 : return result;
2697 : }
2698 :
2699 :
2700 : Datum
2701 30 : table_to_xmlschema(PG_FUNCTION_ARGS)
2702 : {
2703 30 : Oid relid = PG_GETARG_OID(0);
2704 30 : bool nulls = PG_GETARG_BOOL(1);
2705 30 : bool tableforest = PG_GETARG_BOOL(2);
2706 30 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2707 : const char *result;
2708 : Relation rel;
2709 :
2710 30 : rel = table_open(relid, AccessShareLock);
2711 30 : result = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2712 : tableforest, targetns);
2713 30 : table_close(rel, NoLock);
2714 :
2715 30 : PG_RETURN_XML_P(cstring_to_xmltype(result));
2716 : }
2717 :
2718 :
2719 : Datum
2720 6 : query_to_xmlschema(PG_FUNCTION_ARGS)
2721 : {
2722 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2723 6 : bool nulls = PG_GETARG_BOOL(1);
2724 6 : bool tableforest = PG_GETARG_BOOL(2);
2725 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2726 : const char *result;
2727 : SPIPlanPtr plan;
2728 : Portal portal;
2729 :
2730 6 : SPI_connect();
2731 :
2732 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2733 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2734 :
2735 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2736 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2737 :
2738 6 : result = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2739 : InvalidOid, nulls,
2740 : tableforest, targetns));
2741 6 : SPI_cursor_close(portal);
2742 6 : SPI_finish();
2743 :
2744 6 : PG_RETURN_XML_P(cstring_to_xmltype(result));
2745 : }
2746 :
2747 :
2748 : Datum
2749 12 : cursor_to_xmlschema(PG_FUNCTION_ARGS)
2750 : {
2751 12 : char *name = text_to_cstring(PG_GETARG_TEXT_PP(0));
2752 12 : bool nulls = PG_GETARG_BOOL(1);
2753 12 : bool tableforest = PG_GETARG_BOOL(2);
2754 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2755 : const char *xmlschema;
2756 : Portal portal;
2757 :
2758 12 : SPI_connect();
2759 12 : portal = SPI_cursor_find(name);
2760 12 : if (portal == NULL)
2761 0 : ereport(ERROR,
2762 : (errcode(ERRCODE_UNDEFINED_CURSOR),
2763 : errmsg("cursor \"%s\" does not exist", name)));
2764 :
2765 12 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2766 : InvalidOid, nulls,
2767 : tableforest, targetns));
2768 12 : SPI_finish();
2769 :
2770 12 : PG_RETURN_XML_P(cstring_to_xmltype(xmlschema));
2771 : }
2772 :
2773 :
2774 : Datum
2775 24 : table_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2776 : {
2777 24 : Oid relid = PG_GETARG_OID(0);
2778 24 : bool nulls = PG_GETARG_BOOL(1);
2779 24 : bool tableforest = PG_GETARG_BOOL(2);
2780 24 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2781 : Relation rel;
2782 : const char *xmlschema;
2783 :
2784 24 : rel = table_open(relid, AccessShareLock);
2785 24 : xmlschema = map_sql_table_to_xmlschema(rel->rd_att, relid, nulls,
2786 : tableforest, targetns);
2787 24 : table_close(rel, NoLock);
2788 :
2789 24 : PG_RETURN_XML_P(stringinfo_to_xmltype(table_to_xml_internal(relid,
2790 : xmlschema, nulls, tableforest,
2791 : targetns, true)));
2792 : }
2793 :
2794 :
2795 : Datum
2796 6 : query_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2797 : {
2798 6 : char *query = text_to_cstring(PG_GETARG_TEXT_PP(0));
2799 6 : bool nulls = PG_GETARG_BOOL(1);
2800 6 : bool tableforest = PG_GETARG_BOOL(2);
2801 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2802 :
2803 : const char *xmlschema;
2804 : SPIPlanPtr plan;
2805 : Portal portal;
2806 :
2807 6 : SPI_connect();
2808 :
2809 6 : if ((plan = SPI_prepare(query, 0, NULL)) == NULL)
2810 0 : elog(ERROR, "SPI_prepare(\"%s\") failed", query);
2811 :
2812 6 : if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL)
2813 0 : elog(ERROR, "SPI_cursor_open(\"%s\") failed", query);
2814 :
2815 6 : xmlschema = _SPI_strdup(map_sql_table_to_xmlschema(portal->tupDesc,
2816 : InvalidOid, nulls, tableforest, targetns));
2817 6 : SPI_cursor_close(portal);
2818 6 : SPI_finish();
2819 :
2820 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(query_to_xml_internal(query, NULL,
2821 : xmlschema, nulls, tableforest,
2822 : targetns, true)));
2823 : }
2824 :
2825 :
2826 : /*
2827 : * Map SQL schema to XML and/or XML Schema document; see SQL/XML:2008
2828 : * sections 9.13, 9.14.
2829 : */
2830 :
2831 : static StringInfo
2832 18 : schema_to_xml_internal(Oid nspid, const char *xmlschema, bool nulls,
2833 : bool tableforest, const char *targetns, bool top_level)
2834 : {
2835 : StringInfo result;
2836 : char *xmlsn;
2837 : List *relid_list;
2838 : ListCell *cell;
2839 :
2840 18 : xmlsn = map_sql_identifier_to_xml_name(get_namespace_name(nspid),
2841 : true, false);
2842 18 : result = makeStringInfo();
2843 :
2844 18 : xmldata_root_element_start(result, xmlsn, xmlschema, targetns, top_level);
2845 18 : appendStringInfoChar(result, '\n');
2846 :
2847 18 : if (xmlschema)
2848 6 : appendStringInfo(result, "%s\n\n", xmlschema);
2849 :
2850 18 : SPI_connect();
2851 :
2852 18 : relid_list = schema_get_xml_visible_tables(nspid);
2853 :
2854 54 : foreach(cell, relid_list)
2855 : {
2856 36 : Oid relid = lfirst_oid(cell);
2857 : StringInfo subres;
2858 :
2859 36 : subres = table_to_xml_internal(relid, NULL, nulls, tableforest,
2860 : targetns, false);
2861 :
2862 36 : appendBinaryStringInfo(result, subres->data, subres->len);
2863 36 : appendStringInfoChar(result, '\n');
2864 : }
2865 :
2866 18 : SPI_finish();
2867 :
2868 18 : xmldata_root_element_end(result, xmlsn);
2869 :
2870 18 : return result;
2871 : }
2872 :
2873 :
2874 : Datum
2875 12 : schema_to_xml(PG_FUNCTION_ARGS)
2876 : {
2877 12 : Name name = PG_GETARG_NAME(0);
2878 12 : bool nulls = PG_GETARG_BOOL(1);
2879 12 : bool tableforest = PG_GETARG_BOOL(2);
2880 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2881 :
2882 : char *schemaname;
2883 : Oid nspid;
2884 :
2885 12 : schemaname = NameStr(*name);
2886 12 : nspid = LookupExplicitNamespace(schemaname, false);
2887 :
2888 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid, NULL,
2889 : nulls, tableforest, targetns, true)));
2890 : }
2891 :
2892 :
2893 : /*
2894 : * Write the start element of the root element of an XML Schema mapping.
2895 : */
2896 : static void
2897 96 : xsd_schema_element_start(StringInfo result, const char *targetns)
2898 : {
2899 96 : appendStringInfoString(result,
2900 : "<xsd:schema\n"
2901 : " xmlns:xsd=\"" NAMESPACE_XSD "\"");
2902 96 : if (strlen(targetns) > 0)
2903 18 : appendStringInfo(result,
2904 : "\n"
2905 : " targetNamespace=\"%s\"\n"
2906 : " elementFormDefault=\"qualified\"",
2907 : targetns);
2908 96 : appendStringInfoString(result,
2909 : ">\n\n");
2910 96 : }
2911 :
2912 :
2913 : static void
2914 96 : xsd_schema_element_end(StringInfo result)
2915 : {
2916 96 : appendStringInfoString(result, "</xsd:schema>");
2917 96 : }
2918 :
2919 :
2920 : static StringInfo
2921 18 : schema_to_xmlschema_internal(const char *schemaname, bool nulls,
2922 : bool tableforest, const char *targetns)
2923 : {
2924 : Oid nspid;
2925 : List *relid_list;
2926 : List *tupdesc_list;
2927 : ListCell *cell;
2928 : StringInfo result;
2929 :
2930 18 : result = makeStringInfo();
2931 :
2932 18 : nspid = LookupExplicitNamespace(schemaname, false);
2933 :
2934 18 : xsd_schema_element_start(result, targetns);
2935 :
2936 18 : SPI_connect();
2937 :
2938 18 : relid_list = schema_get_xml_visible_tables(nspid);
2939 :
2940 18 : tupdesc_list = NIL;
2941 54 : foreach(cell, relid_list)
2942 : {
2943 : Relation rel;
2944 :
2945 36 : rel = table_open(lfirst_oid(cell), AccessShareLock);
2946 36 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
2947 36 : table_close(rel, NoLock);
2948 : }
2949 :
2950 18 : appendStringInfoString(result,
2951 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
2952 :
2953 18 : appendStringInfoString(result,
2954 : map_sql_schema_to_xmlschema_types(nspid, relid_list,
2955 : nulls, tableforest, targetns));
2956 :
2957 18 : xsd_schema_element_end(result);
2958 :
2959 18 : SPI_finish();
2960 :
2961 18 : return result;
2962 : }
2963 :
2964 :
2965 : Datum
2966 12 : schema_to_xmlschema(PG_FUNCTION_ARGS)
2967 : {
2968 12 : Name name = PG_GETARG_NAME(0);
2969 12 : bool nulls = PG_GETARG_BOOL(1);
2970 12 : bool tableforest = PG_GETARG_BOOL(2);
2971 12 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2972 :
2973 12 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xmlschema_internal(NameStr(*name),
2974 : nulls, tableforest, targetns)));
2975 : }
2976 :
2977 :
2978 : Datum
2979 6 : schema_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
2980 : {
2981 6 : Name name = PG_GETARG_NAME(0);
2982 6 : bool nulls = PG_GETARG_BOOL(1);
2983 6 : bool tableforest = PG_GETARG_BOOL(2);
2984 6 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(3));
2985 : char *schemaname;
2986 : Oid nspid;
2987 : StringInfo xmlschema;
2988 :
2989 6 : schemaname = NameStr(*name);
2990 6 : nspid = LookupExplicitNamespace(schemaname, false);
2991 :
2992 6 : xmlschema = schema_to_xmlschema_internal(schemaname, nulls,
2993 : tableforest, targetns);
2994 :
2995 6 : PG_RETURN_XML_P(stringinfo_to_xmltype(schema_to_xml_internal(nspid,
2996 : xmlschema->data, nulls,
2997 : tableforest, targetns, true)));
2998 : }
2999 :
3000 :
3001 : /*
3002 : * Map SQL database to XML and/or XML Schema document; see SQL/XML:2008
3003 : * sections 9.16, 9.17.
3004 : */
3005 :
3006 : static StringInfo
3007 0 : database_to_xml_internal(const char *xmlschema, bool nulls,
3008 : bool tableforest, const char *targetns)
3009 : {
3010 : StringInfo result;
3011 : List *nspid_list;
3012 : ListCell *cell;
3013 : char *xmlcn;
3014 :
3015 0 : xmlcn = map_sql_identifier_to_xml_name(get_database_name(MyDatabaseId),
3016 : true, false);
3017 0 : result = makeStringInfo();
3018 :
3019 0 : xmldata_root_element_start(result, xmlcn, xmlschema, targetns, true);
3020 0 : appendStringInfoChar(result, '\n');
3021 :
3022 0 : if (xmlschema)
3023 0 : appendStringInfo(result, "%s\n\n", xmlschema);
3024 :
3025 0 : SPI_connect();
3026 :
3027 0 : nspid_list = database_get_xml_visible_schemas();
3028 :
3029 0 : foreach(cell, nspid_list)
3030 : {
3031 0 : Oid nspid = lfirst_oid(cell);
3032 : StringInfo subres;
3033 :
3034 0 : subres = schema_to_xml_internal(nspid, NULL, nulls,
3035 : tableforest, targetns, false);
3036 :
3037 0 : appendBinaryStringInfo(result, subres->data, subres->len);
3038 0 : appendStringInfoChar(result, '\n');
3039 : }
3040 :
3041 0 : SPI_finish();
3042 :
3043 0 : xmldata_root_element_end(result, xmlcn);
3044 :
3045 0 : return result;
3046 : }
3047 :
3048 :
3049 : Datum
3050 0 : database_to_xml(PG_FUNCTION_ARGS)
3051 : {
3052 0 : bool nulls = PG_GETARG_BOOL(0);
3053 0 : bool tableforest = PG_GETARG_BOOL(1);
3054 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3055 :
3056 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(NULL, nulls,
3057 : tableforest, targetns)));
3058 : }
3059 :
3060 :
3061 : static StringInfo
3062 0 : database_to_xmlschema_internal(bool nulls, bool tableforest,
3063 : const char *targetns)
3064 : {
3065 : List *relid_list;
3066 : List *nspid_list;
3067 : List *tupdesc_list;
3068 : ListCell *cell;
3069 : StringInfo result;
3070 :
3071 0 : result = makeStringInfo();
3072 :
3073 0 : xsd_schema_element_start(result, targetns);
3074 :
3075 0 : SPI_connect();
3076 :
3077 0 : relid_list = database_get_xml_visible_tables();
3078 0 : nspid_list = database_get_xml_visible_schemas();
3079 :
3080 0 : tupdesc_list = NIL;
3081 0 : foreach(cell, relid_list)
3082 : {
3083 : Relation rel;
3084 :
3085 0 : rel = table_open(lfirst_oid(cell), AccessShareLock);
3086 0 : tupdesc_list = lappend(tupdesc_list, CreateTupleDescCopy(rel->rd_att));
3087 0 : table_close(rel, NoLock);
3088 : }
3089 :
3090 0 : appendStringInfoString(result,
3091 : map_sql_typecoll_to_xmlschema_types(tupdesc_list));
3092 :
3093 0 : appendStringInfoString(result,
3094 : map_sql_catalog_to_xmlschema_types(nspid_list, nulls, tableforest, targetns));
3095 :
3096 0 : xsd_schema_element_end(result);
3097 :
3098 0 : SPI_finish();
3099 :
3100 0 : return result;
3101 : }
3102 :
3103 :
3104 : Datum
3105 0 : database_to_xmlschema(PG_FUNCTION_ARGS)
3106 : {
3107 0 : bool nulls = PG_GETARG_BOOL(0);
3108 0 : bool tableforest = PG_GETARG_BOOL(1);
3109 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3110 :
3111 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xmlschema_internal(nulls,
3112 : tableforest, targetns)));
3113 : }
3114 :
3115 :
3116 : Datum
3117 0 : database_to_xml_and_xmlschema(PG_FUNCTION_ARGS)
3118 : {
3119 0 : bool nulls = PG_GETARG_BOOL(0);
3120 0 : bool tableforest = PG_GETARG_BOOL(1);
3121 0 : const char *targetns = text_to_cstring(PG_GETARG_TEXT_PP(2));
3122 : StringInfo xmlschema;
3123 :
3124 0 : xmlschema = database_to_xmlschema_internal(nulls, tableforest, targetns);
3125 :
3126 0 : PG_RETURN_XML_P(stringinfo_to_xmltype(database_to_xml_internal(xmlschema->data,
3127 : nulls, tableforest, targetns)));
3128 : }
3129 :
3130 :
3131 : /*
3132 : * Map a multi-part SQL name to an XML name; see SQL/XML:2008 section
3133 : * 9.2.
3134 : */
3135 : static char *
3136 384 : map_multipart_sql_identifier_to_xml_name(const char *a, const char *b, const char *c, const char *d)
3137 : {
3138 : StringInfoData result;
3139 :
3140 384 : initStringInfo(&result);
3141 :
3142 384 : if (a)
3143 384 : appendStringInfoString(&result,
3144 384 : map_sql_identifier_to_xml_name(a, true, true));
3145 384 : if (b)
3146 384 : appendStringInfo(&result, ".%s",
3147 : map_sql_identifier_to_xml_name(b, true, true));
3148 384 : if (c)
3149 384 : appendStringInfo(&result, ".%s",
3150 : map_sql_identifier_to_xml_name(c, true, true));
3151 384 : if (d)
3152 366 : appendStringInfo(&result, ".%s",
3153 : map_sql_identifier_to_xml_name(d, true, true));
3154 :
3155 384 : return result.data;
3156 : }
3157 :
3158 :
3159 : /*
3160 : * Map an SQL table to an XML Schema document; see SQL/XML:2008
3161 : * section 9.11.
3162 : *
3163 : * Map an SQL table to XML Schema data types; see SQL/XML:2008 section
3164 : * 9.9.
3165 : */
3166 : static const char *
3167 78 : map_sql_table_to_xmlschema(TupleDesc tupdesc, Oid relid, bool nulls,
3168 : bool tableforest, const char *targetns)
3169 : {
3170 : int i;
3171 : char *xmltn;
3172 : char *tabletypename;
3173 : char *rowtypename;
3174 : StringInfoData result;
3175 :
3176 78 : initStringInfo(&result);
3177 :
3178 78 : if (OidIsValid(relid))
3179 : {
3180 : HeapTuple tuple;
3181 : Form_pg_class reltuple;
3182 :
3183 54 : tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid));
3184 54 : if (!HeapTupleIsValid(tuple))
3185 0 : elog(ERROR, "cache lookup failed for relation %u", relid);
3186 54 : reltuple = (Form_pg_class) GETSTRUCT(tuple);
3187 :
3188 54 : xmltn = map_sql_identifier_to_xml_name(NameStr(reltuple->relname),
3189 : true, false);
3190 :
3191 54 : tabletypename = map_multipart_sql_identifier_to_xml_name("TableType",
3192 54 : get_database_name(MyDatabaseId),
3193 54 : get_namespace_name(reltuple->relnamespace),
3194 54 : NameStr(reltuple->relname));
3195 :
3196 54 : rowtypename = map_multipart_sql_identifier_to_xml_name("RowType",
3197 54 : get_database_name(MyDatabaseId),
3198 54 : get_namespace_name(reltuple->relnamespace),
3199 54 : NameStr(reltuple->relname));
3200 :
3201 54 : ReleaseSysCache(tuple);
3202 : }
3203 : else
3204 : {
3205 24 : if (tableforest)
3206 12 : xmltn = "row";
3207 : else
3208 12 : xmltn = "table";
3209 :
3210 24 : tabletypename = "TableType";
3211 24 : rowtypename = "RowType";
3212 : }
3213 :
3214 78 : xsd_schema_element_start(&result, targetns);
3215 :
3216 78 : appendStringInfoString(&result,
3217 78 : map_sql_typecoll_to_xmlschema_types(list_make1(tupdesc)));
3218 :
3219 78 : appendStringInfo(&result,
3220 : "<xsd:complexType name=\"%s\">\n"
3221 : " <xsd:sequence>\n",
3222 : rowtypename);
3223 :
3224 324 : for (i = 0; i < tupdesc->natts; i++)
3225 : {
3226 246 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3227 :
3228 246 : if (att->attisdropped)
3229 6 : continue;
3230 480 : appendStringInfo(&result,
3231 : " <xsd:element name=\"%s\" type=\"%s\"%s></xsd:element>\n",
3232 240 : map_sql_identifier_to_xml_name(NameStr(att->attname),
3233 : true, false),
3234 : map_sql_type_to_xml_name(att->atttypid, -1),
3235 : nulls ? " nillable=\"true\"" : " minOccurs=\"0\"");
3236 : }
3237 :
3238 78 : appendStringInfoString(&result,
3239 : " </xsd:sequence>\n"
3240 : "</xsd:complexType>\n\n");
3241 :
3242 78 : if (!tableforest)
3243 : {
3244 42 : appendStringInfo(&result,
3245 : "<xsd:complexType name=\"%s\">\n"
3246 : " <xsd:sequence>\n"
3247 : " <xsd:element name=\"row\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n"
3248 : " </xsd:sequence>\n"
3249 : "</xsd:complexType>\n\n",
3250 : tabletypename, rowtypename);
3251 :
3252 42 : appendStringInfo(&result,
3253 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3254 : xmltn, tabletypename);
3255 : }
3256 : else
3257 36 : appendStringInfo(&result,
3258 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3259 : xmltn, rowtypename);
3260 :
3261 78 : xsd_schema_element_end(&result);
3262 :
3263 78 : return result.data;
3264 : }
3265 :
3266 :
3267 : /*
3268 : * Map an SQL schema to XML Schema data types; see SQL/XML:2008
3269 : * section 9.12.
3270 : */
3271 : static const char *
3272 18 : map_sql_schema_to_xmlschema_types(Oid nspid, List *relid_list, bool nulls,
3273 : bool tableforest, const char *targetns)
3274 : {
3275 : char *dbname;
3276 : char *nspname;
3277 : char *xmlsn;
3278 : char *schematypename;
3279 : StringInfoData result;
3280 : ListCell *cell;
3281 :
3282 18 : dbname = get_database_name(MyDatabaseId);
3283 18 : nspname = get_namespace_name(nspid);
3284 :
3285 18 : initStringInfo(&result);
3286 :
3287 18 : xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3288 :
3289 18 : schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3290 : dbname,
3291 : nspname,
3292 : NULL);
3293 :
3294 18 : appendStringInfo(&result,
3295 : "<xsd:complexType name=\"%s\">\n", schematypename);
3296 18 : if (!tableforest)
3297 6 : appendStringInfoString(&result,
3298 : " <xsd:all>\n");
3299 : else
3300 12 : appendStringInfoString(&result,
3301 : " <xsd:sequence>\n");
3302 :
3303 54 : foreach(cell, relid_list)
3304 : {
3305 36 : Oid relid = lfirst_oid(cell);
3306 36 : char *relname = get_rel_name(relid);
3307 36 : char *xmltn = map_sql_identifier_to_xml_name(relname, true, false);
3308 36 : char *tabletypename = map_multipart_sql_identifier_to_xml_name(tableforest ? "RowType" : "TableType",
3309 : dbname,
3310 : nspname,
3311 : relname);
3312 :
3313 36 : if (!tableforest)
3314 12 : appendStringInfo(&result,
3315 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3316 : xmltn, tabletypename);
3317 : else
3318 24 : appendStringInfo(&result,
3319 : " <xsd:element name=\"%s\" type=\"%s\" minOccurs=\"0\" maxOccurs=\"unbounded\"/>\n",
3320 : xmltn, tabletypename);
3321 : }
3322 :
3323 18 : if (!tableforest)
3324 6 : appendStringInfoString(&result,
3325 : " </xsd:all>\n");
3326 : else
3327 12 : appendStringInfoString(&result,
3328 : " </xsd:sequence>\n");
3329 18 : appendStringInfoString(&result,
3330 : "</xsd:complexType>\n\n");
3331 :
3332 18 : appendStringInfo(&result,
3333 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3334 : xmlsn, schematypename);
3335 :
3336 18 : return result.data;
3337 : }
3338 :
3339 :
3340 : /*
3341 : * Map an SQL catalog to XML Schema data types; see SQL/XML:2008
3342 : * section 9.15.
3343 : */
3344 : static const char *
3345 0 : map_sql_catalog_to_xmlschema_types(List *nspid_list, bool nulls,
3346 : bool tableforest, const char *targetns)
3347 : {
3348 : char *dbname;
3349 : char *xmlcn;
3350 : char *catalogtypename;
3351 : StringInfoData result;
3352 : ListCell *cell;
3353 :
3354 0 : dbname = get_database_name(MyDatabaseId);
3355 :
3356 0 : initStringInfo(&result);
3357 :
3358 0 : xmlcn = map_sql_identifier_to_xml_name(dbname, true, false);
3359 :
3360 0 : catalogtypename = map_multipart_sql_identifier_to_xml_name("CatalogType",
3361 : dbname,
3362 : NULL,
3363 : NULL);
3364 :
3365 0 : appendStringInfo(&result,
3366 : "<xsd:complexType name=\"%s\">\n", catalogtypename);
3367 0 : appendStringInfoString(&result,
3368 : " <xsd:all>\n");
3369 :
3370 0 : foreach(cell, nspid_list)
3371 : {
3372 0 : Oid nspid = lfirst_oid(cell);
3373 0 : char *nspname = get_namespace_name(nspid);
3374 0 : char *xmlsn = map_sql_identifier_to_xml_name(nspname, true, false);
3375 0 : char *schematypename = map_multipart_sql_identifier_to_xml_name("SchemaType",
3376 : dbname,
3377 : nspname,
3378 : NULL);
3379 :
3380 0 : appendStringInfo(&result,
3381 : " <xsd:element name=\"%s\" type=\"%s\"/>\n",
3382 : xmlsn, schematypename);
3383 : }
3384 :
3385 0 : appendStringInfoString(&result,
3386 : " </xsd:all>\n");
3387 0 : appendStringInfoString(&result,
3388 : "</xsd:complexType>\n\n");
3389 :
3390 0 : appendStringInfo(&result,
3391 : "<xsd:element name=\"%s\" type=\"%s\"/>\n\n",
3392 : xmlcn, catalogtypename);
3393 :
3394 0 : return result.data;
3395 : }
3396 :
3397 :
3398 : /*
3399 : * Map an SQL data type to an XML name; see SQL/XML:2008 section 9.4.
3400 : */
3401 : static const char *
3402 810 : map_sql_type_to_xml_name(Oid typeoid, int typmod)
3403 : {
3404 : StringInfoData result;
3405 :
3406 810 : initStringInfo(&result);
3407 :
3408 810 : switch (typeoid)
3409 : {
3410 30 : case BPCHAROID:
3411 30 : if (typmod == -1)
3412 30 : appendStringInfoString(&result, "CHAR");
3413 : else
3414 0 : appendStringInfo(&result, "CHAR_%d", typmod - VARHDRSZ);
3415 30 : break;
3416 54 : case VARCHAROID:
3417 54 : if (typmod == -1)
3418 54 : appendStringInfoString(&result, "VARCHAR");
3419 : else
3420 0 : appendStringInfo(&result, "VARCHAR_%d", typmod - VARHDRSZ);
3421 54 : break;
3422 30 : case NUMERICOID:
3423 30 : if (typmod == -1)
3424 30 : appendStringInfoString(&result, "NUMERIC");
3425 : else
3426 0 : appendStringInfo(&result, "NUMERIC_%d_%d",
3427 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3428 0 : (typmod - VARHDRSZ) & 0xffff);
3429 30 : break;
3430 174 : case INT4OID:
3431 174 : appendStringInfoString(&result, "INTEGER");
3432 174 : break;
3433 30 : case INT2OID:
3434 30 : appendStringInfoString(&result, "SMALLINT");
3435 30 : break;
3436 30 : case INT8OID:
3437 30 : appendStringInfoString(&result, "BIGINT");
3438 30 : break;
3439 30 : case FLOAT4OID:
3440 30 : appendStringInfoString(&result, "REAL");
3441 30 : break;
3442 0 : case FLOAT8OID:
3443 0 : appendStringInfoString(&result, "DOUBLE");
3444 0 : break;
3445 30 : case BOOLOID:
3446 30 : appendStringInfoString(&result, "BOOLEAN");
3447 30 : break;
3448 30 : case TIMEOID:
3449 30 : if (typmod == -1)
3450 30 : appendStringInfoString(&result, "TIME");
3451 : else
3452 0 : appendStringInfo(&result, "TIME_%d", typmod);
3453 30 : break;
3454 30 : case TIMETZOID:
3455 30 : if (typmod == -1)
3456 30 : appendStringInfoString(&result, "TIME_WTZ");
3457 : else
3458 0 : appendStringInfo(&result, "TIME_WTZ_%d", typmod);
3459 30 : break;
3460 30 : case TIMESTAMPOID:
3461 30 : if (typmod == -1)
3462 30 : appendStringInfoString(&result, "TIMESTAMP");
3463 : else
3464 0 : appendStringInfo(&result, "TIMESTAMP_%d", typmod);
3465 30 : break;
3466 30 : case TIMESTAMPTZOID:
3467 30 : if (typmod == -1)
3468 30 : appendStringInfoString(&result, "TIMESTAMP_WTZ");
3469 : else
3470 0 : appendStringInfo(&result, "TIMESTAMP_WTZ_%d", typmod);
3471 30 : break;
3472 30 : case DATEOID:
3473 30 : appendStringInfoString(&result, "DATE");
3474 30 : break;
3475 30 : case XMLOID:
3476 30 : appendStringInfoString(&result, "XML");
3477 30 : break;
3478 222 : default:
3479 : {
3480 : HeapTuple tuple;
3481 : Form_pg_type typtuple;
3482 :
3483 222 : tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeoid));
3484 222 : if (!HeapTupleIsValid(tuple))
3485 0 : elog(ERROR, "cache lookup failed for type %u", typeoid);
3486 222 : typtuple = (Form_pg_type) GETSTRUCT(tuple);
3487 :
3488 222 : appendStringInfoString(&result,
3489 222 : map_multipart_sql_identifier_to_xml_name((typtuple->typtype == TYPTYPE_DOMAIN) ? "Domain" : "UDT",
3490 222 : get_database_name(MyDatabaseId),
3491 222 : get_namespace_name(typtuple->typnamespace),
3492 222 : NameStr(typtuple->typname)));
3493 :
3494 222 : ReleaseSysCache(tuple);
3495 : }
3496 : }
3497 :
3498 810 : return result.data;
3499 : }
3500 :
3501 :
3502 : /*
3503 : * Map a collection of SQL data types to XML Schema data types; see
3504 : * SQL/XML:2008 section 9.7.
3505 : */
3506 : static const char *
3507 96 : map_sql_typecoll_to_xmlschema_types(List *tupdesc_list)
3508 : {
3509 96 : List *uniquetypes = NIL;
3510 : int i;
3511 : StringInfoData result;
3512 : ListCell *cell0;
3513 :
3514 : /* extract all column types used in the set of TupleDescs */
3515 210 : foreach(cell0, tupdesc_list)
3516 : {
3517 114 : TupleDesc tupdesc = (TupleDesc) lfirst(cell0);
3518 :
3519 702 : for (i = 0; i < tupdesc->natts; i++)
3520 : {
3521 588 : Form_pg_attribute att = TupleDescAttr(tupdesc, i);
3522 :
3523 588 : if (att->attisdropped)
3524 24 : continue;
3525 564 : uniquetypes = list_append_unique_oid(uniquetypes, att->atttypid);
3526 : }
3527 : }
3528 :
3529 : /* add base types of domains */
3530 642 : foreach(cell0, uniquetypes)
3531 : {
3532 546 : Oid typid = lfirst_oid(cell0);
3533 546 : Oid basetypid = getBaseType(typid);
3534 :
3535 546 : if (basetypid != typid)
3536 24 : uniquetypes = list_append_unique_oid(uniquetypes, basetypid);
3537 : }
3538 :
3539 : /* Convert to textual form */
3540 96 : initStringInfo(&result);
3541 :
3542 642 : foreach(cell0, uniquetypes)
3543 : {
3544 546 : appendStringInfo(&result, "%s\n",
3545 : map_sql_type_to_xmlschema_type(lfirst_oid(cell0),
3546 : -1));
3547 : }
3548 :
3549 96 : return result.data;
3550 : }
3551 :
3552 :
3553 : /*
3554 : * Map an SQL data type to a named XML Schema data type; see
3555 : * SQL/XML:2008 sections 9.5 and 9.6.
3556 : *
3557 : * (The distinction between 9.5 and 9.6 is basically that 9.6 adds
3558 : * a name attribute, which this function does. The name-less version
3559 : * 9.5 doesn't appear to be required anywhere.)
3560 : */
3561 : static const char *
3562 546 : map_sql_type_to_xmlschema_type(Oid typeoid, int typmod)
3563 : {
3564 : StringInfoData result;
3565 546 : const char *typename = map_sql_type_to_xml_name(typeoid, typmod);
3566 :
3567 546 : initStringInfo(&result);
3568 :
3569 546 : if (typeoid == XMLOID)
3570 : {
3571 24 : appendStringInfoString(&result,
3572 : "<xsd:complexType mixed=\"true\">\n"
3573 : " <xsd:sequence>\n"
3574 : " <xsd:any name=\"element\" minOccurs=\"0\" maxOccurs=\"unbounded\" processContents=\"skip\"/>\n"
3575 : " </xsd:sequence>\n"
3576 : "</xsd:complexType>\n");
3577 : }
3578 : else
3579 : {
3580 522 : appendStringInfo(&result,
3581 : "<xsd:simpleType name=\"%s\">\n", typename);
3582 :
3583 522 : switch (typeoid)
3584 : {
3585 138 : case BPCHAROID:
3586 : case VARCHAROID:
3587 : case TEXTOID:
3588 138 : appendStringInfoString(&result,
3589 : " <xsd:restriction base=\"xsd:string\">\n");
3590 138 : if (typmod != -1)
3591 0 : appendStringInfo(&result,
3592 : " <xsd:maxLength value=\"%d\"/>\n",
3593 : typmod - VARHDRSZ);
3594 138 : appendStringInfoString(&result, " </xsd:restriction>\n");
3595 138 : break;
3596 :
3597 24 : case BYTEAOID:
3598 24 : appendStringInfo(&result,
3599 : " <xsd:restriction base=\"xsd:%s\">\n"
3600 : " </xsd:restriction>\n",
3601 24 : xmlbinary == XMLBINARY_BASE64 ? "base64Binary" : "hexBinary");
3602 24 : break;
3603 :
3604 24 : case NUMERICOID:
3605 24 : if (typmod != -1)
3606 0 : appendStringInfo(&result,
3607 : " <xsd:restriction base=\"xsd:decimal\">\n"
3608 : " <xsd:totalDigits value=\"%d\"/>\n"
3609 : " <xsd:fractionDigits value=\"%d\"/>\n"
3610 : " </xsd:restriction>\n",
3611 0 : ((typmod - VARHDRSZ) >> 16) & 0xffff,
3612 0 : (typmod - VARHDRSZ) & 0xffff);
3613 24 : break;
3614 :
3615 24 : case INT2OID:
3616 24 : appendStringInfo(&result,
3617 : " <xsd:restriction base=\"xsd:short\">\n"
3618 : " <xsd:maxInclusive value=\"%d\"/>\n"
3619 : " <xsd:minInclusive value=\"%d\"/>\n"
3620 : " </xsd:restriction>\n",
3621 : SHRT_MAX, SHRT_MIN);
3622 24 : break;
3623 :
3624 96 : case INT4OID:
3625 96 : appendStringInfo(&result,
3626 : " <xsd:restriction base=\"xsd:int\">\n"
3627 : " <xsd:maxInclusive value=\"%d\"/>\n"
3628 : " <xsd:minInclusive value=\"%d\"/>\n"
3629 : " </xsd:restriction>\n",
3630 : INT_MAX, INT_MIN);
3631 96 : break;
3632 :
3633 24 : case INT8OID:
3634 24 : appendStringInfo(&result,
3635 : " <xsd:restriction base=\"xsd:long\">\n"
3636 : " <xsd:maxInclusive value=\"" INT64_FORMAT "\"/>\n"
3637 : " <xsd:minInclusive value=\"" INT64_FORMAT "\"/>\n"
3638 : " </xsd:restriction>\n",
3639 : PG_INT64_MAX,
3640 : PG_INT64_MIN);
3641 24 : break;
3642 :
3643 24 : case FLOAT4OID:
3644 24 : appendStringInfoString(&result,
3645 : " <xsd:restriction base=\"xsd:float\"></xsd:restriction>\n");
3646 24 : break;
3647 :
3648 0 : case FLOAT8OID:
3649 0 : appendStringInfoString(&result,
3650 : " <xsd:restriction base=\"xsd:double\"></xsd:restriction>\n");
3651 0 : break;
3652 :
3653 24 : case BOOLOID:
3654 24 : appendStringInfoString(&result,
3655 : " <xsd:restriction base=\"xsd:boolean\"></xsd:restriction>\n");
3656 24 : break;
3657 :
3658 48 : case TIMEOID:
3659 : case TIMETZOID:
3660 : {
3661 48 : const char *tz = (typeoid == TIMETZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3662 :
3663 48 : if (typmod == -1)
3664 48 : appendStringInfo(&result,
3665 : " <xsd:restriction base=\"xsd:time\">\n"
3666 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3667 : " </xsd:restriction>\n", tz);
3668 0 : else if (typmod == 0)
3669 0 : appendStringInfo(&result,
3670 : " <xsd:restriction base=\"xsd:time\">\n"
3671 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3672 : " </xsd:restriction>\n", tz);
3673 : else
3674 0 : appendStringInfo(&result,
3675 : " <xsd:restriction base=\"xsd:time\">\n"
3676 : " <xsd:pattern value=\"\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3677 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3678 48 : break;
3679 : }
3680 :
3681 48 : case TIMESTAMPOID:
3682 : case TIMESTAMPTZOID:
3683 : {
3684 48 : const char *tz = (typeoid == TIMESTAMPTZOID ? "(\\+|-)\\p{Nd}{2}:\\p{Nd}{2}" : "");
3685 :
3686 48 : if (typmod == -1)
3687 48 : appendStringInfo(&result,
3688 : " <xsd:restriction base=\"xsd:dateTime\">\n"
3689 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}(.\\p{Nd}+)?%s\"/>\n"
3690 : " </xsd:restriction>\n", tz);
3691 0 : else if (typmod == 0)
3692 0 : appendStringInfo(&result,
3693 : " <xsd:restriction base=\"xsd:dateTime\">\n"
3694 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}%s\"/>\n"
3695 : " </xsd:restriction>\n", tz);
3696 : else
3697 0 : appendStringInfo(&result,
3698 : " <xsd:restriction base=\"xsd:dateTime\">\n"
3699 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}T\\p{Nd}{2}:\\p{Nd}{2}:\\p{Nd}{2}.\\p{Nd}{%d}%s\"/>\n"
3700 : " </xsd:restriction>\n", typmod - VARHDRSZ, tz);
3701 48 : break;
3702 : }
3703 :
3704 24 : case DATEOID:
3705 24 : appendStringInfoString(&result,
3706 : " <xsd:restriction base=\"xsd:date\">\n"
3707 : " <xsd:pattern value=\"\\p{Nd}{4}-\\p{Nd}{2}-\\p{Nd}{2}\"/>\n"
3708 : " </xsd:restriction>\n");
3709 24 : break;
3710 :
3711 24 : default:
3712 24 : if (get_typtype(typeoid) == TYPTYPE_DOMAIN)
3713 : {
3714 : Oid base_typeoid;
3715 24 : int32 base_typmod = -1;
3716 :
3717 24 : base_typeoid = getBaseTypeAndTypmod(typeoid, &base_typmod);
3718 :
3719 24 : appendStringInfo(&result,
3720 : " <xsd:restriction base=\"%s\"/>\n",
3721 : map_sql_type_to_xml_name(base_typeoid, base_typmod));
3722 : }
3723 24 : break;
3724 : }
3725 522 : appendStringInfoString(&result, "</xsd:simpleType>\n");
3726 : }
3727 :
3728 546 : return result.data;
3729 : }
3730 :
3731 :
3732 : /*
3733 : * Map an SQL row to an XML element, taking the row from the active
3734 : * SPI cursor. See also SQL/XML:2008 section 9.10.
3735 : */
3736 : static void
3737 312 : SPI_sql_row_to_xmlelement(uint64 rownum, StringInfo result, char *tablename,
3738 : bool nulls, bool tableforest,
3739 : const char *targetns, bool top_level)
3740 : {
3741 : int i;
3742 : char *xmltn;
3743 :
3744 312 : if (tablename)
3745 228 : xmltn = map_sql_identifier_to_xml_name(tablename, true, false);
3746 : else
3747 : {
3748 84 : if (tableforest)
3749 36 : xmltn = "row";
3750 : else
3751 48 : xmltn = "table";
3752 : }
3753 :
3754 312 : if (tableforest)
3755 162 : xmldata_root_element_start(result, xmltn, NULL, targetns, top_level);
3756 : else
3757 150 : appendStringInfoString(result, "<row>\n");
3758 :
3759 1272 : for (i = 1; i <= SPI_tuptable->tupdesc->natts; i++)
3760 : {
3761 : char *colname;
3762 : Datum colval;
3763 : bool isnull;
3764 :
3765 960 : colname = map_sql_identifier_to_xml_name(SPI_fname(SPI_tuptable->tupdesc, i),
3766 : true, false);
3767 960 : colval = SPI_getbinval(SPI_tuptable->vals[rownum],
3768 960 : SPI_tuptable->tupdesc,
3769 : i,
3770 : &isnull);
3771 960 : if (isnull)
3772 : {
3773 114 : if (nulls)
3774 60 : appendStringInfo(result, " <%s xsi:nil=\"true\"/>\n", colname);
3775 : }
3776 : else
3777 846 : appendStringInfo(result, " <%s>%s</%s>\n",
3778 : colname,
3779 : map_sql_value_to_xml_value(colval,
3780 846 : SPI_gettypeid(SPI_tuptable->tupdesc, i), true),
3781 : colname);
3782 : }
3783 :
3784 312 : if (tableforest)
3785 : {
3786 162 : xmldata_root_element_end(result, xmltn);
3787 162 : appendStringInfoChar(result, '\n');
3788 : }
3789 : else
3790 150 : appendStringInfoString(result, "</row>\n\n");
3791 312 : }
3792 :
3793 :
3794 : /*
3795 : * XPath related functions
3796 : */
3797 :
3798 : #ifdef USE_LIBXML
3799 :
3800 : /*
3801 : * Convert XML node to text.
3802 : *
3803 : * For attribute and text nodes, return the escaped text. For anything else,
3804 : * dump the whole subtree.
3805 : */
3806 : static text *
3807 222 : xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt)
3808 : {
3809 222 : xmltype *result = NULL;
3810 :
3811 222 : if (cur->type != XML_ATTRIBUTE_NODE && cur->type != XML_TEXT_NODE)
3812 180 : {
3813 180 : void (*volatile nodefree) (xmlNodePtr) = NULL;
3814 180 : volatile xmlBufferPtr buf = NULL;
3815 180 : volatile xmlNodePtr cur_copy = NULL;
3816 :
3817 180 : PG_TRY();
3818 : {
3819 : int bytes;
3820 :
3821 180 : buf = xmlBufferCreate();
3822 180 : if (buf == NULL || xmlerrcxt->err_occurred)
3823 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3824 : "could not allocate xmlBuffer");
3825 :
3826 : /*
3827 : * Produce a dump of the node that we can serialize. xmlNodeDump
3828 : * does that, but the result of that function won't contain
3829 : * namespace definitions from ancestor nodes, so we first do a
3830 : * xmlCopyNode() which duplicates the node along with its required
3831 : * namespace definitions.
3832 : *
3833 : * Some old libxml2 versions such as 2.7.6 produce partially
3834 : * broken XML_DOCUMENT_NODE nodes (unset content field) when
3835 : * copying them. xmlNodeDump of such a node works fine, but
3836 : * xmlFreeNode crashes; set us up to call xmlFreeDoc instead.
3837 : */
3838 180 : cur_copy = xmlCopyNode(cur, 1);
3839 180 : if (cur_copy == NULL || xmlerrcxt->err_occurred)
3840 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3841 : "could not copy node");
3842 360 : nodefree = (cur_copy->type == XML_DOCUMENT_NODE) ?
3843 180 : (void (*) (xmlNodePtr)) xmlFreeDoc : xmlFreeNode;
3844 :
3845 180 : bytes = xmlNodeDump(buf, NULL, cur_copy, 0, 0);
3846 180 : if (bytes == -1 || xmlerrcxt->err_occurred)
3847 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
3848 : "could not dump node");
3849 :
3850 180 : result = xmlBuffer_to_xmltype(buf);
3851 : }
3852 0 : PG_FINALLY();
3853 : {
3854 180 : if (nodefree)
3855 180 : nodefree(cur_copy);
3856 180 : if (buf)
3857 180 : xmlBufferFree(buf);
3858 : }
3859 180 : PG_END_TRY();
3860 : }
3861 : else
3862 : {
3863 : xmlChar *str;
3864 :
3865 42 : str = xmlXPathCastNodeToString(cur);
3866 42 : PG_TRY();
3867 : {
3868 : /* Here we rely on XML having the same representation as TEXT */
3869 42 : char *escaped = escape_xml((char *) str);
3870 :
3871 42 : result = (xmltype *) cstring_to_text(escaped);
3872 42 : pfree(escaped);
3873 : }
3874 0 : PG_FINALLY();
3875 : {
3876 42 : xmlFree(str);
3877 : }
3878 42 : PG_END_TRY();
3879 : }
3880 :
3881 222 : return result;
3882 : }
3883 :
3884 : /*
3885 : * Convert an XML XPath object (the result of evaluating an XPath expression)
3886 : * to an array of xml values, which are appended to astate. The function
3887 : * result value is the number of elements in the array.
3888 : *
3889 : * If "astate" is NULL then we don't generate the array value, but we still
3890 : * return the number of elements it would have had.
3891 : *
3892 : * Nodesets are converted to an array containing the nodes' textual
3893 : * representations. Primitive values (float, double, string) are converted
3894 : * to a single-element array containing the value's string representation.
3895 : */
3896 : static int
3897 540 : xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
3898 : ArrayBuildState *astate,
3899 : PgXmlErrorContext *xmlerrcxt)
3900 : {
3901 540 : int result = 0;
3902 : Datum datum;
3903 : Oid datumtype;
3904 : char *result_str;
3905 :
3906 540 : switch (xpathobj->type)
3907 : {
3908 498 : case XPATH_NODESET:
3909 498 : if (xpathobj->nodesetval != NULL)
3910 : {
3911 354 : result = xpathobj->nodesetval->nodeNr;
3912 354 : if (astate != NULL)
3913 : {
3914 : int i;
3915 :
3916 168 : for (i = 0; i < result; i++)
3917 : {
3918 90 : datum = PointerGetDatum(xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
3919 : xmlerrcxt));
3920 90 : (void) accumArrayResult(astate, datum, false,
3921 : XMLOID, CurrentMemoryContext);
3922 : }
3923 : }
3924 : }
3925 498 : return result;
3926 :
3927 12 : case XPATH_BOOLEAN:
3928 12 : if (astate == NULL)
3929 0 : return 1;
3930 12 : datum = BoolGetDatum(xpathobj->boolval);
3931 12 : datumtype = BOOLOID;
3932 12 : break;
3933 :
3934 18 : case XPATH_NUMBER:
3935 18 : if (astate == NULL)
3936 12 : return 1;
3937 6 : datum = Float8GetDatum(xpathobj->floatval);
3938 6 : datumtype = FLOAT8OID;
3939 6 : break;
3940 :
3941 12 : case XPATH_STRING:
3942 12 : if (astate == NULL)
3943 0 : return 1;
3944 12 : datum = CStringGetDatum((char *) xpathobj->stringval);
3945 12 : datumtype = CSTRINGOID;
3946 12 : break;
3947 :
3948 0 : default:
3949 0 : elog(ERROR, "xpath expression result type %d is unsupported",
3950 : xpathobj->type);
3951 : return 0; /* keep compiler quiet */
3952 : }
3953 :
3954 : /* Common code for scalar-value cases */
3955 30 : result_str = map_sql_value_to_xml_value(datum, datumtype, true);
3956 30 : datum = PointerGetDatum(cstring_to_xmltype(result_str));
3957 30 : (void) accumArrayResult(astate, datum, false,
3958 : XMLOID, CurrentMemoryContext);
3959 30 : return 1;
3960 : }
3961 :
3962 :
3963 : /*
3964 : * Common code for xpath() and xmlexists()
3965 : *
3966 : * Evaluate XPath expression and return number of nodes in res_nitems
3967 : * and array of XML values in astate. Either of those pointers can be
3968 : * NULL if the corresponding result isn't wanted.
3969 : *
3970 : * It is up to the user to ensure that the XML passed is in fact
3971 : * an XML document - XPath doesn't work easily on fragments without
3972 : * a context node being known.
3973 : */
3974 : static void
3975 558 : xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
3976 : int *res_nitems, ArrayBuildState *astate)
3977 : {
3978 : PgXmlErrorContext *xmlerrcxt;
3979 558 : volatile xmlParserCtxtPtr ctxt = NULL;
3980 558 : volatile xmlDocPtr doc = NULL;
3981 558 : volatile xmlXPathContextPtr xpathctx = NULL;
3982 558 : volatile xmlXPathCompExprPtr xpathcomp = NULL;
3983 558 : volatile xmlXPathObjectPtr xpathobj = NULL;
3984 : char *datastr;
3985 : int32 len;
3986 : int32 xpath_len;
3987 : xmlChar *string;
3988 : xmlChar *xpath_expr;
3989 558 : size_t xmldecl_len = 0;
3990 : int i;
3991 : int ndim;
3992 : Datum *ns_names_uris;
3993 : bool *ns_names_uris_nulls;
3994 : int ns_count;
3995 :
3996 : /*
3997 : * Namespace mappings are passed as text[]. If an empty array is passed
3998 : * (ndim = 0, "0-dimensional"), then there are no namespace mappings.
3999 : * Else, a 2-dimensional array with length of the second axis being equal
4000 : * to 2 should be passed, i.e., every subarray contains 2 elements, the
4001 : * first element defining the name, the second one the URI. Example:
4002 : * ARRAY[ARRAY['myns', 'http://example.com'], ARRAY['myns2',
4003 : * 'http://example2.com']].
4004 : */
4005 558 : ndim = namespaces ? ARR_NDIM(namespaces) : 0;
4006 558 : if (ndim != 0)
4007 : {
4008 : int *dims;
4009 :
4010 126 : dims = ARR_DIMS(namespaces);
4011 :
4012 126 : if (ndim != 2 || dims[1] != 2)
4013 0 : ereport(ERROR,
4014 : (errcode(ERRCODE_DATA_EXCEPTION),
4015 : errmsg("invalid array for XML namespace mapping"),
4016 : errdetail("The array must be two-dimensional with length of the second axis equal to 2.")));
4017 :
4018 : Assert(ARR_ELEMTYPE(namespaces) == TEXTOID);
4019 :
4020 126 : deconstruct_array(namespaces, TEXTOID, -1, false, TYPALIGN_INT,
4021 : &ns_names_uris, &ns_names_uris_nulls,
4022 : &ns_count);
4023 :
4024 : Assert((ns_count % 2) == 0); /* checked above */
4025 126 : ns_count /= 2; /* count pairs only */
4026 : }
4027 : else
4028 : {
4029 432 : ns_names_uris = NULL;
4030 432 : ns_names_uris_nulls = NULL;
4031 432 : ns_count = 0;
4032 : }
4033 :
4034 558 : datastr = VARDATA(data);
4035 558 : len = VARSIZE(data) - VARHDRSZ;
4036 558 : xpath_len = VARSIZE_ANY_EXHDR(xpath_expr_text);
4037 558 : if (xpath_len == 0)
4038 6 : ereport(ERROR,
4039 : (errcode(ERRCODE_DATA_EXCEPTION),
4040 : errmsg("empty XPath expression")));
4041 :
4042 552 : string = pg_xmlCharStrndup(datastr, len);
4043 552 : xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
4044 :
4045 : /*
4046 : * In a UTF8 database, skip any xml declaration, which might assert
4047 : * another encoding. Ignore parse_xml_decl() failure, letting
4048 : * xmlCtxtReadMemory() report parse errors. Documentation disclaims
4049 : * xpath() support for non-ASCII data in non-UTF8 databases, so leave
4050 : * those scenarios bug-compatible with historical behavior.
4051 : */
4052 552 : if (GetDatabaseEncoding() == PG_UTF8)
4053 552 : parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
4054 :
4055 552 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4056 :
4057 552 : PG_TRY();
4058 : {
4059 552 : xmlInitParser();
4060 :
4061 : /*
4062 : * redundant XML parsing (two parsings for the same value during one
4063 : * command execution are possible)
4064 : */
4065 552 : ctxt = xmlNewParserCtxt();
4066 552 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4067 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4068 : "could not allocate parser context");
4069 1104 : doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
4070 552 : len - xmldecl_len, NULL, NULL, 0);
4071 552 : if (doc == NULL || xmlerrcxt->err_occurred)
4072 12 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4073 : "could not parse XML document");
4074 540 : xpathctx = xmlXPathNewContext(doc);
4075 540 : if (xpathctx == NULL || xmlerrcxt->err_occurred)
4076 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4077 : "could not allocate XPath context");
4078 540 : xpathctx->node = (xmlNodePtr) doc;
4079 :
4080 : /* register namespaces, if any */
4081 540 : if (ns_count > 0)
4082 : {
4083 252 : for (i = 0; i < ns_count; i++)
4084 : {
4085 : char *ns_name;
4086 : char *ns_uri;
4087 :
4088 126 : if (ns_names_uris_nulls[i * 2] ||
4089 126 : ns_names_uris_nulls[i * 2 + 1])
4090 0 : ereport(ERROR,
4091 : (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
4092 : errmsg("neither namespace name nor URI may be null")));
4093 126 : ns_name = TextDatumGetCString(ns_names_uris[i * 2]);
4094 126 : ns_uri = TextDatumGetCString(ns_names_uris[i * 2 + 1]);
4095 126 : if (xmlXPathRegisterNs(xpathctx,
4096 : (xmlChar *) ns_name,
4097 : (xmlChar *) ns_uri) != 0)
4098 0 : ereport(ERROR, /* is this an internal error??? */
4099 : (errmsg("could not register XML namespace with name \"%s\" and URI \"%s\"",
4100 : ns_name, ns_uri)));
4101 : }
4102 : }
4103 :
4104 540 : xpathcomp = xmlXPathCompile(xpath_expr);
4105 540 : if (xpathcomp == NULL || xmlerrcxt->err_occurred)
4106 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4107 : "invalid XPath expression");
4108 :
4109 : /*
4110 : * Version 2.6.27 introduces a function named
4111 : * xmlXPathCompiledEvalToBoolean, which would be enough for xmlexists,
4112 : * but we can derive the existence by whether any nodes are returned,
4113 : * thereby preventing a library version upgrade and keeping the code
4114 : * the same.
4115 : */
4116 540 : xpathobj = xmlXPathCompiledEval(xpathcomp, xpathctx);
4117 540 : if (xpathobj == NULL || xmlerrcxt->err_occurred)
4118 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4119 : "could not create XPath object");
4120 :
4121 : /*
4122 : * Extract the results as requested.
4123 : */
4124 540 : if (res_nitems != NULL)
4125 432 : *res_nitems = xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4126 : else
4127 108 : (void) xml_xpathobjtoxmlarray(xpathobj, astate, xmlerrcxt);
4128 : }
4129 12 : PG_CATCH();
4130 : {
4131 12 : if (xpathobj)
4132 0 : xmlXPathFreeObject(xpathobj);
4133 12 : if (xpathcomp)
4134 0 : xmlXPathFreeCompExpr(xpathcomp);
4135 12 : if (xpathctx)
4136 0 : xmlXPathFreeContext(xpathctx);
4137 12 : if (doc)
4138 12 : xmlFreeDoc(doc);
4139 12 : if (ctxt)
4140 12 : xmlFreeParserCtxt(ctxt);
4141 :
4142 12 : pg_xml_done(xmlerrcxt, true);
4143 :
4144 12 : PG_RE_THROW();
4145 : }
4146 540 : PG_END_TRY();
4147 :
4148 540 : xmlXPathFreeObject(xpathobj);
4149 540 : xmlXPathFreeCompExpr(xpathcomp);
4150 540 : xmlXPathFreeContext(xpathctx);
4151 540 : xmlFreeDoc(doc);
4152 540 : xmlFreeParserCtxt(ctxt);
4153 :
4154 540 : pg_xml_done(xmlerrcxt, false);
4155 540 : }
4156 : #endif /* USE_LIBXML */
4157 :
4158 : /*
4159 : * Evaluate XPath expression and return array of XML values.
4160 : *
4161 : * As we have no support of XQuery sequences yet, this function seems
4162 : * to be the most useful one (array of XML functions plays a role of
4163 : * some kind of substitution for XQuery sequences).
4164 : */
4165 : Datum
4166 126 : xpath(PG_FUNCTION_ARGS)
4167 : {
4168 : #ifdef USE_LIBXML
4169 126 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4170 126 : xmltype *data = PG_GETARG_XML_P(1);
4171 126 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4172 : ArrayBuildState *astate;
4173 :
4174 126 : astate = initArrayResult(XMLOID, CurrentMemoryContext, true);
4175 126 : xpath_internal(xpath_expr_text, data, namespaces,
4176 : NULL, astate);
4177 108 : PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext));
4178 : #else
4179 : NO_XML_SUPPORT();
4180 : return 0;
4181 : #endif
4182 : }
4183 :
4184 : /*
4185 : * Determines if the node specified by the supplied XPath exists
4186 : * in a given XML document, returning a boolean.
4187 : */
4188 : Datum
4189 198 : xmlexists(PG_FUNCTION_ARGS)
4190 : {
4191 : #ifdef USE_LIBXML
4192 198 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4193 198 : xmltype *data = PG_GETARG_XML_P(1);
4194 : int res_nitems;
4195 :
4196 198 : xpath_internal(xpath_expr_text, data, NULL,
4197 : &res_nitems, NULL);
4198 :
4199 198 : PG_RETURN_BOOL(res_nitems > 0);
4200 : #else
4201 : NO_XML_SUPPORT();
4202 : return 0;
4203 : #endif
4204 : }
4205 :
4206 : /*
4207 : * Determines if the node specified by the supplied XPath exists
4208 : * in a given XML document, returning a boolean. Differs from
4209 : * xmlexists as it supports namespaces and is not defined in SQL/XML.
4210 : */
4211 : Datum
4212 234 : xpath_exists(PG_FUNCTION_ARGS)
4213 : {
4214 : #ifdef USE_LIBXML
4215 234 : text *xpath_expr_text = PG_GETARG_TEXT_PP(0);
4216 234 : xmltype *data = PG_GETARG_XML_P(1);
4217 234 : ArrayType *namespaces = PG_GETARG_ARRAYTYPE_P(2);
4218 : int res_nitems;
4219 :
4220 234 : xpath_internal(xpath_expr_text, data, namespaces,
4221 : &res_nitems, NULL);
4222 :
4223 234 : PG_RETURN_BOOL(res_nitems > 0);
4224 : #else
4225 : NO_XML_SUPPORT();
4226 : return 0;
4227 : #endif
4228 : }
4229 :
4230 : /*
4231 : * Functions for checking well-formed-ness
4232 : */
4233 :
4234 : #ifdef USE_LIBXML
4235 : static bool
4236 114 : wellformed_xml(text *data, XmlOptionType xmloption_arg)
4237 : {
4238 : bool result;
4239 114 : volatile xmlDocPtr doc = NULL;
4240 :
4241 : /* We want to catch any exceptions and return false */
4242 114 : PG_TRY();
4243 : {
4244 114 : doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding());
4245 60 : result = true;
4246 : }
4247 54 : PG_CATCH();
4248 : {
4249 54 : FlushErrorState();
4250 54 : result = false;
4251 : }
4252 114 : PG_END_TRY();
4253 :
4254 114 : if (doc)
4255 60 : xmlFreeDoc(doc);
4256 :
4257 114 : return result;
4258 : }
4259 : #endif
4260 :
4261 : Datum
4262 90 : xml_is_well_formed(PG_FUNCTION_ARGS)
4263 : {
4264 : #ifdef USE_LIBXML
4265 90 : text *data = PG_GETARG_TEXT_PP(0);
4266 :
4267 90 : PG_RETURN_BOOL(wellformed_xml(data, xmloption));
4268 : #else
4269 : NO_XML_SUPPORT();
4270 : return 0;
4271 : #endif /* not USE_LIBXML */
4272 : }
4273 :
4274 : Datum
4275 12 : xml_is_well_formed_document(PG_FUNCTION_ARGS)
4276 : {
4277 : #ifdef USE_LIBXML
4278 12 : text *data = PG_GETARG_TEXT_PP(0);
4279 :
4280 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_DOCUMENT));
4281 : #else
4282 : NO_XML_SUPPORT();
4283 : return 0;
4284 : #endif /* not USE_LIBXML */
4285 : }
4286 :
4287 : Datum
4288 12 : xml_is_well_formed_content(PG_FUNCTION_ARGS)
4289 : {
4290 : #ifdef USE_LIBXML
4291 12 : text *data = PG_GETARG_TEXT_PP(0);
4292 :
4293 12 : PG_RETURN_BOOL(wellformed_xml(data, XMLOPTION_CONTENT));
4294 : #else
4295 : NO_XML_SUPPORT();
4296 : return 0;
4297 : #endif /* not USE_LIBXML */
4298 : }
4299 :
4300 : /*
4301 : * support functions for XMLTABLE
4302 : *
4303 : */
4304 : #ifdef USE_LIBXML
4305 :
4306 : /*
4307 : * Returns private data from executor state. Ensure validity by check with
4308 : * MAGIC number.
4309 : */
4310 : static inline XmlTableBuilderData *
4311 151660 : GetXmlTableBuilderPrivateData(TableFuncScanState *state, const char *fname)
4312 : {
4313 : XmlTableBuilderData *result;
4314 :
4315 151660 : if (!IsA(state, TableFuncScanState))
4316 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4317 151660 : result = (XmlTableBuilderData *) state->opaque;
4318 151660 : if (result->magic != XMLTABLE_CONTEXT_MAGIC)
4319 0 : elog(ERROR, "%s called with invalid TableFuncScanState", fname);
4320 :
4321 151660 : return result;
4322 : }
4323 : #endif
4324 :
4325 : /*
4326 : * XmlTableInitOpaque
4327 : * Fill in TableFuncScanState->opaque for XmlTable processor; initialize
4328 : * the XML parser.
4329 : *
4330 : * Note: Because we call pg_xml_init() here and pg_xml_done() in
4331 : * XmlTableDestroyOpaque, it is critical for robustness that no other
4332 : * executor nodes run until this node is processed to completion. Caller
4333 : * must execute this to completion (probably filling a tuplestore to exhaust
4334 : * this node in a single pass) instead of using row-per-call mode.
4335 : */
4336 : static void
4337 264 : XmlTableInitOpaque(TableFuncScanState *state, int natts)
4338 : {
4339 : #ifdef USE_LIBXML
4340 264 : volatile xmlParserCtxtPtr ctxt = NULL;
4341 : XmlTableBuilderData *xtCxt;
4342 : PgXmlErrorContext *xmlerrcxt;
4343 :
4344 264 : xtCxt = palloc0(sizeof(XmlTableBuilderData));
4345 264 : xtCxt->magic = XMLTABLE_CONTEXT_MAGIC;
4346 264 : xtCxt->natts = natts;
4347 264 : xtCxt->xpathscomp = palloc0(sizeof(xmlXPathCompExprPtr) * natts);
4348 :
4349 264 : xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
4350 :
4351 264 : PG_TRY();
4352 : {
4353 264 : xmlInitParser();
4354 :
4355 264 : ctxt = xmlNewParserCtxt();
4356 264 : if (ctxt == NULL || xmlerrcxt->err_occurred)
4357 0 : xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4358 : "could not allocate parser context");
4359 : }
4360 0 : PG_CATCH();
4361 : {
4362 0 : if (ctxt != NULL)
4363 0 : xmlFreeParserCtxt(ctxt);
4364 :
4365 0 : pg_xml_done(xmlerrcxt, true);
4366 :
4367 0 : PG_RE_THROW();
4368 : }
4369 264 : PG_END_TRY();
4370 :
4371 264 : xtCxt->xmlerrcxt = xmlerrcxt;
4372 264 : xtCxt->ctxt = ctxt;
4373 :
4374 264 : state->opaque = xtCxt;
4375 : #else
4376 : NO_XML_SUPPORT();
4377 : #endif /* not USE_LIBXML */
4378 264 : }
4379 :
4380 : /*
4381 : * XmlTableSetDocument
4382 : * Install the input document
4383 : */
4384 : static void
4385 264 : XmlTableSetDocument(TableFuncScanState *state, Datum value)
4386 : {
4387 : #ifdef USE_LIBXML
4388 : XmlTableBuilderData *xtCxt;
4389 264 : xmltype *xmlval = DatumGetXmlP(value);
4390 : char *str;
4391 : xmlChar *xstr;
4392 : int length;
4393 264 : volatile xmlDocPtr doc = NULL;
4394 264 : volatile xmlXPathContextPtr xpathcxt = NULL;
4395 :
4396 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetDocument");
4397 :
4398 : /*
4399 : * Use out function for casting to string (remove encoding property). See
4400 : * comment in xml_out.
4401 : */
4402 264 : str = xml_out_internal(xmlval, 0);
4403 :
4404 264 : length = strlen(str);
4405 264 : xstr = pg_xmlCharStrndup(str, length);
4406 :
4407 264 : PG_TRY();
4408 : {
4409 264 : doc = xmlCtxtReadMemory(xtCxt->ctxt, (char *) xstr, length, NULL, NULL, 0);
4410 264 : if (doc == NULL || xtCxt->xmlerrcxt->err_occurred)
4411 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
4412 : "could not parse XML document");
4413 264 : xpathcxt = xmlXPathNewContext(doc);
4414 264 : if (xpathcxt == NULL || xtCxt->xmlerrcxt->err_occurred)
4415 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
4416 : "could not allocate XPath context");
4417 264 : xpathcxt->node = (xmlNodePtr) doc;
4418 : }
4419 0 : PG_CATCH();
4420 : {
4421 0 : if (xpathcxt != NULL)
4422 0 : xmlXPathFreeContext(xpathcxt);
4423 0 : if (doc != NULL)
4424 0 : xmlFreeDoc(doc);
4425 :
4426 0 : PG_RE_THROW();
4427 : }
4428 264 : PG_END_TRY();
4429 :
4430 264 : xtCxt->doc = doc;
4431 264 : xtCxt->xpathcxt = xpathcxt;
4432 : #else
4433 : NO_XML_SUPPORT();
4434 : #endif /* not USE_LIBXML */
4435 264 : }
4436 :
4437 : /*
4438 : * XmlTableSetNamespace
4439 : * Add a namespace declaration
4440 : */
4441 : static void
4442 18 : XmlTableSetNamespace(TableFuncScanState *state, const char *name, const char *uri)
4443 : {
4444 : #ifdef USE_LIBXML
4445 : XmlTableBuilderData *xtCxt;
4446 :
4447 18 : if (name == NULL)
4448 6 : ereport(ERROR,
4449 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
4450 : errmsg("DEFAULT namespace is not supported")));
4451 12 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetNamespace");
4452 :
4453 12 : if (xmlXPathRegisterNs(xtCxt->xpathcxt,
4454 12 : pg_xmlCharStrndup(name, strlen(name)),
4455 12 : pg_xmlCharStrndup(uri, strlen(uri))))
4456 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4457 : "could not set XML namespace");
4458 : #else
4459 : NO_XML_SUPPORT();
4460 : #endif /* not USE_LIBXML */
4461 12 : }
4462 :
4463 : /*
4464 : * XmlTableSetRowFilter
4465 : * Install the row-filter Xpath expression.
4466 : */
4467 : static void
4468 258 : XmlTableSetRowFilter(TableFuncScanState *state, const char *path)
4469 : {
4470 : #ifdef USE_LIBXML
4471 : XmlTableBuilderData *xtCxt;
4472 : xmlChar *xstr;
4473 :
4474 258 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetRowFilter");
4475 :
4476 258 : if (*path == '\0')
4477 0 : ereport(ERROR,
4478 : (errcode(ERRCODE_DATA_EXCEPTION),
4479 : errmsg("row path filter must not be empty string")));
4480 :
4481 258 : xstr = pg_xmlCharStrndup(path, strlen(path));
4482 :
4483 258 : xtCxt->xpathcomp = xmlXPathCompile(xstr);
4484 258 : if (xtCxt->xpathcomp == NULL || xtCxt->xmlerrcxt->err_occurred)
4485 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_SYNTAX_ERROR,
4486 : "invalid XPath expression");
4487 : #else
4488 : NO_XML_SUPPORT();
4489 : #endif /* not USE_LIBXML */
4490 258 : }
4491 :
4492 : /*
4493 : * XmlTableSetColumnFilter
4494 : * Install the column-filter Xpath expression, for the given column.
4495 : */
4496 : static void
4497 774 : XmlTableSetColumnFilter(TableFuncScanState *state, const char *path, int colnum)
4498 : {
4499 : #ifdef USE_LIBXML
4500 : XmlTableBuilderData *xtCxt;
4501 : xmlChar *xstr;
4502 :
4503 : AssertArg(PointerIsValid(path));
4504 :
4505 774 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableSetColumnFilter");
4506 :
4507 774 : if (*path == '\0')
4508 0 : ereport(ERROR,
4509 : (errcode(ERRCODE_DATA_EXCEPTION),
4510 : errmsg("column path filter must not be empty string")));
4511 :
4512 774 : xstr = pg_xmlCharStrndup(path, strlen(path));
4513 :
4514 774 : xtCxt->xpathscomp[colnum] = xmlXPathCompile(xstr);
4515 774 : if (xtCxt->xpathscomp[colnum] == NULL || xtCxt->xmlerrcxt->err_occurred)
4516 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_DATA_EXCEPTION,
4517 : "invalid XPath expression");
4518 : #else
4519 : NO_XML_SUPPORT();
4520 : #endif /* not USE_LIBXML */
4521 774 : }
4522 :
4523 : /*
4524 : * XmlTableFetchRow
4525 : * Prepare the next "current" tuple for upcoming GetValue calls.
4526 : * Returns false if the row-filter expression returned no more rows.
4527 : */
4528 : static bool
4529 21802 : XmlTableFetchRow(TableFuncScanState *state)
4530 : {
4531 : #ifdef USE_LIBXML
4532 : XmlTableBuilderData *xtCxt;
4533 :
4534 21802 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableFetchRow");
4535 :
4536 : /* Propagate our own error context to libxml2 */
4537 21802 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4538 :
4539 21802 : if (xtCxt->xpathobj == NULL)
4540 : {
4541 258 : xtCxt->xpathobj = xmlXPathCompiledEval(xtCxt->xpathcomp, xtCxt->xpathcxt);
4542 258 : if (xtCxt->xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4543 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4544 : "could not create XPath object");
4545 :
4546 258 : xtCxt->row_count = 0;
4547 : }
4548 :
4549 21802 : if (xtCxt->xpathobj->type == XPATH_NODESET)
4550 : {
4551 21802 : if (xtCxt->xpathobj->nodesetval != NULL)
4552 : {
4553 21802 : if (xtCxt->row_count++ < xtCxt->xpathobj->nodesetval->nodeNr)
4554 21556 : return true;
4555 : }
4556 : }
4557 :
4558 246 : return false;
4559 : #else
4560 : NO_XML_SUPPORT();
4561 : return false;
4562 : #endif /* not USE_LIBXML */
4563 : }
4564 :
4565 : /*
4566 : * XmlTableGetValue
4567 : * Return the value for column number 'colnum' for the current row. If
4568 : * column -1 is requested, return representation of the whole row.
4569 : *
4570 : * This leaks memory, so be sure to reset often the context in which it's
4571 : * called.
4572 : */
4573 : static Datum
4574 128286 : XmlTableGetValue(TableFuncScanState *state, int colnum,
4575 : Oid typid, int32 typmod, bool *isnull)
4576 : {
4577 : #ifdef USE_LIBXML
4578 : XmlTableBuilderData *xtCxt;
4579 128286 : Datum result = (Datum) 0;
4580 : xmlNodePtr cur;
4581 128286 : char *cstr = NULL;
4582 128286 : volatile xmlXPathObjectPtr xpathobj = NULL;
4583 :
4584 128286 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableGetValue");
4585 :
4586 : Assert(xtCxt->xpathobj &&
4587 : xtCxt->xpathobj->type == XPATH_NODESET &&
4588 : xtCxt->xpathobj->nodesetval != NULL);
4589 :
4590 : /* Propagate our own error context to libxml2 */
4591 128286 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4592 :
4593 128286 : *isnull = false;
4594 :
4595 128286 : cur = xtCxt->xpathobj->nodesetval->nodeTab[xtCxt->row_count - 1];
4596 :
4597 : Assert(xtCxt->xpathscomp[colnum] != NULL);
4598 :
4599 128286 : PG_TRY();
4600 : {
4601 : /* Set current node as entry point for XPath evaluation */
4602 128286 : xtCxt->xpathcxt->node = cur;
4603 :
4604 : /* Evaluate column path */
4605 128286 : xpathobj = xmlXPathCompiledEval(xtCxt->xpathscomp[colnum], xtCxt->xpathcxt);
4606 128286 : if (xpathobj == NULL || xtCxt->xmlerrcxt->err_occurred)
4607 0 : xml_ereport(xtCxt->xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
4608 : "could not create XPath object");
4609 :
4610 : /*
4611 : * There are four possible cases, depending on the number of nodes
4612 : * returned by the XPath expression and the type of the target column:
4613 : * a) XPath returns no nodes. b) The target type is XML (return all
4614 : * as XML). For non-XML return types: c) One node (return content).
4615 : * d) Multiple nodes (error).
4616 : */
4617 128286 : if (xpathobj->type == XPATH_NODESET)
4618 : {
4619 128256 : int count = 0;
4620 :
4621 128256 : if (xpathobj->nodesetval != NULL)
4622 128046 : count = xpathobj->nodesetval->nodeNr;
4623 :
4624 128256 : if (xpathobj->nodesetval == NULL || count == 0)
4625 : {
4626 21718 : *isnull = true;
4627 : }
4628 : else
4629 : {
4630 106538 : if (typid == XMLOID)
4631 : {
4632 : text *textstr;
4633 : StringInfoData str;
4634 :
4635 : /* Concatenate serialized values */
4636 72 : initStringInfo(&str);
4637 204 : for (int i = 0; i < count; i++)
4638 : {
4639 : textstr =
4640 132 : xml_xmlnodetoxmltype(xpathobj->nodesetval->nodeTab[i],
4641 : xtCxt->xmlerrcxt);
4642 :
4643 132 : appendStringInfoText(&str, textstr);
4644 : }
4645 72 : cstr = str.data;
4646 : }
4647 : else
4648 : {
4649 : xmlChar *str;
4650 :
4651 106466 : if (count > 1)
4652 6 : ereport(ERROR,
4653 : (errcode(ERRCODE_CARDINALITY_VIOLATION),
4654 : errmsg("more than one value returned by column XPath expression")));
4655 :
4656 106460 : str = xmlXPathCastNodeSetToString(xpathobj->nodesetval);
4657 106460 : cstr = str ? xml_pstrdup_and_free(str) : "";
4658 : }
4659 : }
4660 : }
4661 30 : else if (xpathobj->type == XPATH_STRING)
4662 : {
4663 : /* Content should be escaped when target will be XML */
4664 18 : if (typid == XMLOID)
4665 6 : cstr = escape_xml((char *) xpathobj->stringval);
4666 : else
4667 12 : cstr = (char *) xpathobj->stringval;
4668 : }
4669 12 : else if (xpathobj->type == XPATH_BOOLEAN)
4670 : {
4671 : char typcategory;
4672 : bool typispreferred;
4673 : xmlChar *str;
4674 :
4675 : /* Allow implicit casting from boolean to numbers */
4676 6 : get_type_category_preferred(typid, &typcategory, &typispreferred);
4677 :
4678 6 : if (typcategory != TYPCATEGORY_NUMERIC)
4679 6 : str = xmlXPathCastBooleanToString(xpathobj->boolval);
4680 : else
4681 0 : str = xmlXPathCastNumberToString(xmlXPathCastBooleanToNumber(xpathobj->boolval));
4682 :
4683 6 : cstr = xml_pstrdup_and_free(str);
4684 : }
4685 6 : else if (xpathobj->type == XPATH_NUMBER)
4686 : {
4687 : xmlChar *str;
4688 :
4689 6 : str = xmlXPathCastNumberToString(xpathobj->floatval);
4690 6 : cstr = xml_pstrdup_and_free(str);
4691 : }
4692 : else
4693 0 : elog(ERROR, "unexpected XPath object type %u", xpathobj->type);
4694 :
4695 : /*
4696 : * By here, either cstr contains the result value, or the isnull flag
4697 : * has been set.
4698 : */
4699 : Assert(cstr || *isnull);
4700 :
4701 128280 : if (!*isnull)
4702 106562 : result = InputFunctionCall(&state->in_functions[colnum],
4703 : cstr,
4704 106562 : state->typioparams[colnum],
4705 : typmod);
4706 : }
4707 6 : PG_FINALLY();
4708 : {
4709 128286 : if (xpathobj != NULL)
4710 128286 : xmlXPathFreeObject(xpathobj);
4711 : }
4712 128286 : PG_END_TRY();
4713 :
4714 128280 : return result;
4715 : #else
4716 : NO_XML_SUPPORT();
4717 : return 0;
4718 : #endif /* not USE_LIBXML */
4719 : }
4720 :
4721 : /*
4722 : * XmlTableDestroyOpaque
4723 : * Release all libxml2 resources
4724 : */
4725 : static void
4726 264 : XmlTableDestroyOpaque(TableFuncScanState *state)
4727 : {
4728 : #ifdef USE_LIBXML
4729 : XmlTableBuilderData *xtCxt;
4730 :
4731 264 : xtCxt = GetXmlTableBuilderPrivateData(state, "XmlTableDestroyOpaque");
4732 :
4733 : /* Propagate our own error context to libxml2 */
4734 264 : xmlSetStructuredErrorFunc((void *) xtCxt->xmlerrcxt, xml_errorHandler);
4735 :
4736 264 : if (xtCxt->xpathscomp != NULL)
4737 : {
4738 : int i;
4739 :
4740 1116 : for (i = 0; i < xtCxt->natts; i++)
4741 852 : if (xtCxt->xpathscomp[i] != NULL)
4742 774 : xmlXPathFreeCompExpr(xtCxt->xpathscomp[i]);
4743 : }
4744 :
4745 264 : if (xtCxt->xpathobj != NULL)
4746 258 : xmlXPathFreeObject(xtCxt->xpathobj);
4747 264 : if (xtCxt->xpathcomp != NULL)
4748 258 : xmlXPathFreeCompExpr(xtCxt->xpathcomp);
4749 264 : if (xtCxt->xpathcxt != NULL)
4750 264 : xmlXPathFreeContext(xtCxt->xpathcxt);
4751 264 : if (xtCxt->doc != NULL)
4752 264 : xmlFreeDoc(xtCxt->doc);
4753 264 : if (xtCxt->ctxt != NULL)
4754 264 : xmlFreeParserCtxt(xtCxt->ctxt);
4755 :
4756 264 : pg_xml_done(xtCxt->xmlerrcxt, true);
4757 :
4758 : /* not valid anymore */
4759 264 : xtCxt->magic = 0;
4760 264 : state->opaque = NULL;
4761 :
4762 : #else
4763 : NO_XML_SUPPORT();
4764 : #endif /* not USE_LIBXML */
4765 264 : }
|