Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * copyto.c
4 : * COPY <table> TO file/program/client
5 : *
6 : * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/copyto.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 : #include <unistd.h>
19 : #include <sys/stat.h>
20 :
21 : #include "access/table.h"
22 : #include "access/tableam.h"
23 : #include "access/tupconvert.h"
24 : #include "catalog/pg_inherits.h"
25 : #include "commands/copyapi.h"
26 : #include "commands/progress.h"
27 : #include "executor/execdesc.h"
28 : #include "executor/executor.h"
29 : #include "executor/tuptable.h"
30 : #include "funcapi.h"
31 : #include "libpq/libpq.h"
32 : #include "libpq/pqformat.h"
33 : #include "mb/pg_wchar.h"
34 : #include "miscadmin.h"
35 : #include "pgstat.h"
36 : #include "storage/fd.h"
37 : #include "tcop/tcopprot.h"
38 : #include "utils/json.h"
39 : #include "utils/lsyscache.h"
40 : #include "utils/memutils.h"
41 : #include "utils/rel.h"
42 : #include "utils/snapmgr.h"
43 : #include "utils/wait_event.h"
44 :
45 : /*
46 : * Represents the different dest cases we need to worry about at
47 : * the bottom level
48 : */
49 : typedef enum CopyDest
50 : {
51 : COPY_FILE, /* to file (or a piped program) */
52 : COPY_FRONTEND, /* to frontend */
53 : COPY_CALLBACK, /* to callback function */
54 : } CopyDest;
55 :
56 : /*
57 : * This struct contains all the state variables used throughout a COPY TO
58 : * operation.
59 : *
60 : * Multi-byte encodings: all supported client-side encodings encode multi-byte
61 : * characters by having the first byte's high bit set. Subsequent bytes of the
62 : * character can have the high bit not set. When scanning data in such an
63 : * encoding to look for a match to a single-byte (ie ASCII) character, we must
64 : * use the full pg_encoding_mblen() machinery to skip over multibyte
65 : * characters, else we might find a false match to a trailing byte. In
66 : * supported server encodings, there is no possibility of a false match, and
67 : * it's faster to make useless comparisons to trailing bytes than it is to
68 : * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
69 : * when we have to do it the hard way.
70 : */
71 : typedef struct CopyToStateData
72 : {
73 : /* format-specific routines */
74 : const CopyToRoutine *routine;
75 :
76 : /* low-level state data */
77 : CopyDest copy_dest; /* type of copy source/destination */
78 : FILE *copy_file; /* used if copy_dest == COPY_FILE */
79 : StringInfo fe_msgbuf; /* used for all dests during COPY TO */
80 :
81 : int file_encoding; /* file or remote side's character encoding */
82 : bool need_transcoding; /* file encoding diff from server? */
83 : bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
84 :
85 : /* parameters from the COPY command */
86 : Relation rel; /* relation to copy to */
87 : QueryDesc *queryDesc; /* executable query to copy from */
88 : List *attnumlist; /* integer list of attnums to copy */
89 : char *filename; /* filename, or NULL for STDOUT */
90 : bool is_program; /* is 'filename' a program to popen? */
91 : bool json_row_delim_needed; /* need delimiter before next row */
92 : StringInfo json_buf; /* reusable buffer for JSON output,
93 : * initialized in BeginCopyTo */
94 : TupleDesc tupDesc; /* Descriptor for JSON output; for a column
95 : * list this is a projected descriptor */
96 : Datum *json_projvalues; /* pre-allocated projection values, or
97 : * NULL */
98 : bool *json_projnulls; /* pre-allocated projection nulls, or NULL */
99 : copy_data_dest_cb data_dest_cb; /* function for writing data */
100 :
101 : CopyFormatOptions opts;
102 : Node *whereClause; /* WHERE condition (or NULL) */
103 : List *partitions; /* OID list of partitions to copy data from */
104 :
105 : /*
106 : * Working state
107 : */
108 : MemoryContext copycontext; /* per-copy execution context */
109 :
110 : FmgrInfo *out_functions; /* lookup info for output functions */
111 : MemoryContext rowcontext; /* per-row evaluation context */
112 : uint64 bytes_processed; /* number of bytes processed so far */
113 : } CopyToStateData;
114 :
115 : /* DestReceiver for COPY (query) TO */
116 : typedef struct
117 : {
118 : DestReceiver pub; /* publicly-known function pointers */
119 : CopyToState cstate; /* CopyToStateData for the command */
120 : uint64 processed; /* # of tuples processed */
121 : } DR_copy;
122 :
123 : /* NOTE: there's a copy of this in copyfromparse.c */
124 : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
125 :
126 :
127 : /* non-export function prototypes */
128 : static void EndCopy(CopyToState cstate);
129 : static void ClosePipeToProgram(CopyToState cstate);
130 : static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
131 : static void CopyAttributeOutText(CopyToState cstate, const char *string);
132 : static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
133 : bool use_quote);
134 : static void CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel,
135 : uint64 *processed);
136 :
137 : /* built-in format-specific routines */
138 : static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
139 : static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
140 : static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
141 : static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot);
142 : static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot,
143 : bool is_csv);
144 : static void CopyToTextLikeEnd(CopyToState cstate);
145 : static void CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot);
146 : static void CopyToJsonEnd(CopyToState cstate);
147 : static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc);
148 : static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
149 : static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot);
150 : static void CopyToBinaryEnd(CopyToState cstate);
151 :
152 : /* Low-level communications functions */
153 : static void SendCopyBegin(CopyToState cstate);
154 : static void SendCopyEnd(CopyToState cstate);
155 : static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
156 : static void CopySendString(CopyToState cstate, const char *str);
157 : static void CopySendChar(CopyToState cstate, char c);
158 : static void CopySendEndOfRow(CopyToState cstate);
159 : static void CopySendTextLikeEndOfRow(CopyToState cstate);
160 : static void CopySendInt32(CopyToState cstate, int32 val);
161 : static void CopySendInt16(CopyToState cstate, int16 val);
162 :
163 : /*
164 : * COPY TO routines for built-in formats.
165 : */
166 :
167 : /* text format */
168 : static const CopyToRoutine CopyToRoutineText = {
169 : .CopyToStart = CopyToTextLikeStart,
170 : .CopyToOutFunc = CopyToTextLikeOutFunc,
171 : .CopyToOneRow = CopyToTextOneRow,
172 : .CopyToEnd = CopyToTextLikeEnd,
173 : };
174 :
175 : /* CSV format */
176 : static const CopyToRoutine CopyToRoutineCSV = {
177 : .CopyToStart = CopyToTextLikeStart,
178 : .CopyToOutFunc = CopyToTextLikeOutFunc,
179 : .CopyToOneRow = CopyToCSVOneRow,
180 : .CopyToEnd = CopyToTextLikeEnd,
181 : };
182 :
183 : /* json format */
184 : static const CopyToRoutine CopyToRoutineJson = {
185 : .CopyToStart = CopyToTextLikeStart,
186 : .CopyToOutFunc = CopyToTextLikeOutFunc,
187 : .CopyToOneRow = CopyToJsonOneRow,
188 : .CopyToEnd = CopyToJsonEnd,
189 : };
190 :
191 : /* binary format */
192 : static const CopyToRoutine CopyToRoutineBinary = {
193 : .CopyToStart = CopyToBinaryStart,
194 : .CopyToOutFunc = CopyToBinaryOutFunc,
195 : .CopyToOneRow = CopyToBinaryOneRow,
196 : .CopyToEnd = CopyToBinaryEnd,
197 : };
198 :
199 : /* Return a COPY TO routine for the given options */
200 : static const CopyToRoutine *
201 5259 : CopyToGetRoutine(const CopyFormatOptions *opts)
202 : {
203 5259 : if (opts->format == COPY_FORMAT_CSV)
204 88 : return &CopyToRoutineCSV;
205 5171 : else if (opts->format == COPY_FORMAT_BINARY)
206 9 : return &CopyToRoutineBinary;
207 5162 : else if (opts->format == COPY_FORMAT_JSON)
208 100 : return &CopyToRoutineJson;
209 :
210 : /* default is text */
211 5062 : return &CopyToRoutineText;
212 : }
213 :
214 : /* Implementation of the start callback for text, CSV, and json formats */
215 : static void
216 5161 : CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
217 : {
218 : /*
219 : * For non-binary copy, we need to convert null_print to file encoding,
220 : * because it will be sent directly with CopySendString.
221 : */
222 5161 : if (cstate->need_transcoding)
223 17 : cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
224 : cstate->opts.null_print_len,
225 : cstate->file_encoding);
226 :
227 : /* if a header has been requested send the line */
228 5161 : if (cstate->opts.header_line == COPY_HEADER_TRUE)
229 : {
230 : ListCell *cur;
231 28 : bool hdr_delim = false;
232 :
233 : Assert(cstate->opts.format != COPY_FORMAT_JSON);
234 :
235 76 : foreach(cur, cstate->attnumlist)
236 : {
237 48 : int attnum = lfirst_int(cur);
238 : char *colname;
239 :
240 48 : if (hdr_delim)
241 20 : CopySendChar(cstate, cstate->opts.delim[0]);
242 48 : hdr_delim = true;
243 :
244 48 : colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
245 :
246 48 : if (cstate->opts.format == COPY_FORMAT_CSV)
247 16 : CopyAttributeOutCSV(cstate, colname, false);
248 : else
249 32 : CopyAttributeOutText(cstate, colname);
250 : }
251 :
252 28 : CopySendTextLikeEndOfRow(cstate);
253 : }
254 :
255 : /*
256 : * If FORCE_ARRAY has been specified, send the opening bracket.
257 : */
258 5161 : if (cstate->opts.format == COPY_FORMAT_JSON && cstate->opts.force_array)
259 : {
260 20 : CopySendChar(cstate, '[');
261 20 : CopySendTextLikeEndOfRow(cstate);
262 : }
263 5161 : }
264 :
265 : /*
266 : * Implementation of the outfunc callback for text, CSV, and json formats. Assign
267 : * the output function data to the given *finfo.
268 : */
269 : static void
270 17380 : CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
271 : {
272 : Oid func_oid;
273 : bool is_varlena;
274 :
275 : /* Set output function for an attribute */
276 17380 : getTypeOutputInfo(atttypid, &func_oid, &is_varlena);
277 17380 : fmgr_info(func_oid, finfo);
278 17380 : }
279 :
280 : /* Implementation of the per-row callback for text format */
281 : static void
282 1840495 : CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot)
283 : {
284 1840495 : CopyToTextLikeOneRow(cstate, slot, false);
285 1840495 : }
286 :
287 : /* Implementation of the per-row callback for CSV format */
288 : static void
289 224 : CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot)
290 : {
291 224 : CopyToTextLikeOneRow(cstate, slot, true);
292 224 : }
293 :
294 : /*
295 : * Workhorse for CopyToTextOneRow() and CopyToCSVOneRow().
296 : *
297 : * We use pg_attribute_always_inline to reduce function call overhead
298 : * and to help compilers to optimize away the 'is_csv' condition.
299 : */
300 : static pg_attribute_always_inline void
301 1840719 : CopyToTextLikeOneRow(CopyToState cstate,
302 : TupleTableSlot *slot,
303 : bool is_csv)
304 : {
305 1840719 : bool need_delim = false;
306 1840719 : FmgrInfo *out_functions = cstate->out_functions;
307 :
308 10916188 : foreach_int(attnum, cstate->attnumlist)
309 : {
310 7234750 : Datum value = slot->tts_values[attnum - 1];
311 7234750 : bool isnull = slot->tts_isnull[attnum - 1];
312 :
313 7234750 : if (need_delim)
314 5394100 : CopySendChar(cstate, cstate->opts.delim[0]);
315 7234750 : need_delim = true;
316 :
317 7234750 : if (isnull)
318 : {
319 613145 : CopySendString(cstate, cstate->opts.null_print_client);
320 : }
321 : else
322 : {
323 : char *string;
324 :
325 6621605 : string = OutputFunctionCall(&out_functions[attnum - 1],
326 : value);
327 :
328 6621605 : if (is_csv)
329 400 : CopyAttributeOutCSV(cstate, string,
330 400 : cstate->opts.force_quote_flags[attnum - 1]);
331 : else
332 6621205 : CopyAttributeOutText(cstate, string);
333 : }
334 : }
335 :
336 1840719 : CopySendTextLikeEndOfRow(cstate);
337 1840719 : }
338 :
339 : /* Implementation of the end callback for text and CSV formats */
340 : static void
341 5060 : CopyToTextLikeEnd(CopyToState cstate)
342 : {
343 : /* Nothing to do here */
344 5060 : }
345 :
346 : /* Implementation of the end callback for json format */
347 : static void
348 100 : CopyToJsonEnd(CopyToState cstate)
349 : {
350 100 : if (cstate->opts.force_array)
351 : {
352 20 : CopySendChar(cstate, ']');
353 20 : CopySendTextLikeEndOfRow(cstate);
354 : }
355 100 : }
356 :
357 : /* Implementation of per-row callback for json format */
358 : static void
359 328 : CopyToJsonOneRow(CopyToState cstate, TupleTableSlot *slot)
360 : {
361 : Datum rowdata;
362 :
363 328 : resetStringInfo(cstate->json_buf);
364 :
365 328 : if (cstate->json_projvalues != NULL)
366 : {
367 : /*
368 : * Column list case: project selected column values into sequential
369 : * positions matching the custom TupleDesc, then form a new tuple.
370 : */
371 : HeapTuple tup;
372 120 : int i = 0;
373 :
374 460 : foreach_int(attnum, cstate->attnumlist)
375 : {
376 220 : cstate->json_projvalues[i] = slot->tts_values[attnum - 1];
377 220 : cstate->json_projnulls[i] = slot->tts_isnull[attnum - 1];
378 220 : i++;
379 : }
380 :
381 120 : tup = heap_form_tuple(cstate->tupDesc,
382 120 : cstate->json_projvalues,
383 120 : cstate->json_projnulls);
384 :
385 : /*
386 : * heap_form_tuple already stamps the datum-length, type-id, and
387 : * type-mod fields on t_data, so we can use it directly as a composite
388 : * Datum without the extra pallocmemcpy that heap_copy_tuple_as_datum
389 : * would do. Any TOAST pointers in the projected values will be
390 : * detoasted by the per-column output functions called from
391 : * composite_to_json.
392 : */
393 120 : rowdata = HeapTupleGetDatum(tup);
394 : }
395 : else
396 : {
397 : /*
398 : * Full table or query without column list. For queries, the slot's
399 : * TupleDesc may carry RECORDOID, which is not registered in the type
400 : * cache and would cause composite_to_json's lookup_rowtype_tupdesc
401 : * call to fail. Build a HeapTuple stamped with the blessed
402 : * descriptor so the type can be looked up correctly.
403 : */
404 208 : if (!cstate->rel && slot->tts_tupleDescriptor->tdtypeid == RECORDOID)
405 48 : {
406 48 : HeapTuple tup = heap_form_tuple(cstate->tupDesc,
407 48 : slot->tts_values,
408 48 : slot->tts_isnull);
409 :
410 48 : rowdata = HeapTupleGetDatum(tup);
411 : }
412 : else
413 160 : rowdata = ExecFetchSlotHeapTupleDatum(slot);
414 : }
415 :
416 328 : composite_to_json(rowdata, cstate->json_buf, false);
417 :
418 328 : if (cstate->opts.force_array)
419 : {
420 48 : if (cstate->json_row_delim_needed)
421 32 : CopySendChar(cstate, ',');
422 : else
423 : {
424 : /* first row needs no delimiter */
425 16 : CopySendChar(cstate, ' ');
426 16 : cstate->json_row_delim_needed = true;
427 : }
428 : }
429 :
430 : /*
431 : * Convert the JSON output to the target encoding if needed. Unlike the
432 : * text and CSV paths which convert per-attribute via CopyAttributeOut*,
433 : * composite_to_json() emits the whole row as one buffer, so we transcode
434 : * it here in a single call before sending.
435 : */
436 328 : if (cstate->need_transcoding)
437 : {
438 : char *converted;
439 :
440 8 : converted = pg_server_to_any(cstate->json_buf->data,
441 8 : cstate->json_buf->len,
442 : cstate->file_encoding);
443 8 : CopySendData(cstate, converted, strlen(converted));
444 8 : if (converted != cstate->json_buf->data)
445 8 : pfree(converted);
446 : }
447 : else
448 320 : CopySendData(cstate, cstate->json_buf->data, cstate->json_buf->len);
449 :
450 328 : CopySendTextLikeEndOfRow(cstate);
451 328 : }
452 :
453 : /*
454 : * Implementation of the start callback for binary format. Send a header
455 : * for a binary copy.
456 : */
457 : static void
458 8 : CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc)
459 : {
460 : int32 tmp;
461 :
462 : /* Signature */
463 8 : CopySendData(cstate, BinarySignature, 11);
464 : /* Flags field */
465 8 : tmp = 0;
466 8 : CopySendInt32(cstate, tmp);
467 : /* No header extension */
468 8 : tmp = 0;
469 8 : CopySendInt32(cstate, tmp);
470 8 : }
471 :
472 : /*
473 : * Implementation of the outfunc callback for binary format. Assign
474 : * the binary output function to the given *finfo.
475 : */
476 : static void
477 38 : CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
478 : {
479 : Oid func_oid;
480 : bool is_varlena;
481 :
482 : /* Set output function for an attribute */
483 38 : getTypeBinaryOutputInfo(atttypid, &func_oid, &is_varlena);
484 37 : fmgr_info(func_oid, finfo);
485 37 : }
486 :
487 : /* Implementation of the per-row callback for binary format */
488 : static void
489 19 : CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot)
490 : {
491 19 : FmgrInfo *out_functions = cstate->out_functions;
492 :
493 : /* Binary per-tuple header */
494 19 : CopySendInt16(cstate, list_length(cstate->attnumlist));
495 :
496 139 : foreach_int(attnum, cstate->attnumlist)
497 : {
498 101 : Datum value = slot->tts_values[attnum - 1];
499 101 : bool isnull = slot->tts_isnull[attnum - 1];
500 :
501 101 : if (isnull)
502 : {
503 20 : CopySendInt32(cstate, -1);
504 : }
505 : else
506 : {
507 : bytea *outputbytes;
508 :
509 81 : outputbytes = SendFunctionCall(&out_functions[attnum - 1],
510 : value);
511 81 : CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
512 81 : CopySendData(cstate, VARDATA(outputbytes),
513 81 : VARSIZE(outputbytes) - VARHDRSZ);
514 : }
515 : }
516 :
517 19 : CopySendEndOfRow(cstate);
518 19 : }
519 :
520 : /* Implementation of the end callback for binary format */
521 : static void
522 8 : CopyToBinaryEnd(CopyToState cstate)
523 : {
524 : /* Generate trailer for a binary copy */
525 8 : CopySendInt16(cstate, -1);
526 : /* Need to flush out the trailer */
527 8 : CopySendEndOfRow(cstate);
528 8 : }
529 :
530 : /*
531 : * Send copy start/stop messages for frontend copies. These have changed
532 : * in past protocol redesigns.
533 : */
534 : static void
535 5112 : SendCopyBegin(CopyToState cstate)
536 : {
537 : StringInfoData buf;
538 5112 : int natts = list_length(cstate->attnumlist);
539 5112 : int16 format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0);
540 : int i;
541 :
542 5112 : pq_beginmessage(&buf, PqMsg_CopyOutResponse);
543 5112 : pq_sendbyte(&buf, format); /* overall format */
544 5112 : if (cstate->opts.format != COPY_FORMAT_JSON)
545 : {
546 5020 : pq_sendint16(&buf, natts);
547 22061 : for (i = 0; i < natts; i++)
548 17041 : pq_sendint16(&buf, format); /* per-column formats */
549 : }
550 : else
551 : {
552 : /*
553 : * For JSON format, report one text-format column. Each CopyData
554 : * message contains one complete JSON object, not individual column
555 : * values, so the per-column count is always 1.
556 : */
557 92 : pq_sendint16(&buf, 1);
558 92 : pq_sendint16(&buf, 0);
559 : }
560 :
561 5112 : pq_endmessage(&buf);
562 5112 : cstate->copy_dest = COPY_FRONTEND;
563 5112 : }
564 :
565 : static void
566 5110 : SendCopyEnd(CopyToState cstate)
567 : {
568 : /* Shouldn't have any unsent data */
569 : Assert(cstate->fe_msgbuf->len == 0);
570 : /* Send Copy Done message */
571 5110 : pq_putemptymessage(PqMsg_CopyDone);
572 5110 : }
573 :
574 : /*----------
575 : * CopySendData sends output data to the destination (file or frontend)
576 : * CopySendString does the same for null-terminated strings
577 : * CopySendChar does the same for single characters
578 : * CopySendEndOfRow does the appropriate thing at end of each data row
579 : * (data is not actually flushed except by CopySendEndOfRow)
580 : *
581 : * NB: no data conversion is applied by these functions
582 : *----------
583 : */
584 : static void
585 6514871 : CopySendData(CopyToState cstate, const void *databuf, int datasize)
586 : {
587 6514871 : appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
588 6514871 : }
589 :
590 : static void
591 613341 : CopySendString(CopyToState cstate, const char *str)
592 : {
593 613341 : appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
594 613341 : }
595 :
596 : static void
597 7252515 : CopySendChar(CopyToState cstate, char c)
598 : {
599 7252515 : appendStringInfoCharMacro(cstate->fe_msgbuf, c);
600 7252515 : }
601 :
602 : static void
603 1841142 : CopySendEndOfRow(CopyToState cstate)
604 : {
605 1841142 : StringInfo fe_msgbuf = cstate->fe_msgbuf;
606 :
607 1841142 : switch (cstate->copy_dest)
608 : {
609 8203 : case COPY_FILE:
610 8203 : pgstat_report_wait_start(WAIT_EVENT_COPY_TO_WRITE);
611 8203 : if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
612 8203 : cstate->copy_file) != 1 ||
613 8203 : ferror(cstate->copy_file))
614 : {
615 0 : if (cstate->is_program)
616 : {
617 0 : if (errno == EPIPE)
618 : {
619 : /*
620 : * The pipe will be closed automatically on error at
621 : * the end of transaction, but we might get a better
622 : * error message from the subprocess' exit code than
623 : * just "Broken Pipe"
624 : */
625 0 : ClosePipeToProgram(cstate);
626 :
627 : /*
628 : * If ClosePipeToProgram() didn't throw an error, the
629 : * program terminated normally, but closed the pipe
630 : * first. Restore errno, and throw an error.
631 : */
632 0 : errno = EPIPE;
633 : }
634 0 : ereport(ERROR,
635 : (errcode_for_file_access(),
636 : errmsg("could not write to COPY program: %m")));
637 : }
638 : else
639 0 : ereport(ERROR,
640 : (errcode_for_file_access(),
641 : errmsg("could not write to COPY file: %m")));
642 : }
643 8203 : pgstat_report_wait_end();
644 8203 : break;
645 1832936 : case COPY_FRONTEND:
646 : /* Dump the accumulated row as one CopyData message */
647 1832936 : (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
648 1832936 : break;
649 3 : case COPY_CALLBACK:
650 3 : cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
651 3 : break;
652 : }
653 :
654 : /* Update the progress */
655 1841142 : cstate->bytes_processed += fe_msgbuf->len;
656 1841142 : pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
657 :
658 1841142 : resetStringInfo(fe_msgbuf);
659 1841142 : }
660 :
661 : /*
662 : * Wrapper function of CopySendEndOfRow for text, CSV, and json formats. Sends the
663 : * line termination and do common appropriate things for the end of row.
664 : */
665 : static inline void
666 1841115 : CopySendTextLikeEndOfRow(CopyToState cstate)
667 : {
668 1841115 : switch (cstate->copy_dest)
669 : {
670 8187 : case COPY_FILE:
671 : /* Default line termination depends on platform */
672 : #ifndef WIN32
673 8187 : CopySendChar(cstate, '\n');
674 : #else
675 : CopySendString(cstate, "\r\n");
676 : #endif
677 8187 : break;
678 1832925 : case COPY_FRONTEND:
679 : /* The FE/BE protocol uses \n as newline for all platforms */
680 1832925 : CopySendChar(cstate, '\n');
681 1832925 : break;
682 3 : default:
683 3 : break;
684 : }
685 :
686 : /* Now take the actions related to the end of a row */
687 1841115 : CopySendEndOfRow(cstate);
688 1841115 : }
689 :
690 : /*
691 : * These functions do apply some data conversion
692 : */
693 :
694 : /*
695 : * CopySendInt32 sends an int32 in network byte order
696 : */
697 : static inline void
698 117 : CopySendInt32(CopyToState cstate, int32 val)
699 : {
700 : uint32 buf;
701 :
702 117 : buf = pg_hton32((uint32) val);
703 117 : CopySendData(cstate, &buf, sizeof(buf));
704 117 : }
705 :
706 : /*
707 : * CopySendInt16 sends an int16 in network byte order
708 : */
709 : static inline void
710 27 : CopySendInt16(CopyToState cstate, int16 val)
711 : {
712 : uint16 buf;
713 :
714 27 : buf = pg_hton16((uint16) val);
715 27 : CopySendData(cstate, &buf, sizeof(buf));
716 27 : }
717 :
718 : /*
719 : * Closes the pipe to an external program, checking the pclose() return code.
720 : */
721 : static void
722 0 : ClosePipeToProgram(CopyToState cstate)
723 : {
724 : int pclose_rc;
725 :
726 : Assert(cstate->is_program);
727 :
728 0 : pclose_rc = ClosePipeStream(cstate->copy_file);
729 0 : if (pclose_rc == -1)
730 0 : ereport(ERROR,
731 : (errcode_for_file_access(),
732 : errmsg("could not close pipe to external command: %m")));
733 0 : else if (pclose_rc != 0)
734 : {
735 0 : ereport(ERROR,
736 : (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
737 : errmsg("program \"%s\" failed",
738 : cstate->filename),
739 : errdetail_internal("%s", wait_result_to_str(pclose_rc))));
740 : }
741 0 : }
742 :
743 : /*
744 : * Release resources allocated in a cstate for COPY TO.
745 : */
746 : static void
747 5168 : EndCopy(CopyToState cstate)
748 : {
749 5168 : if (cstate->is_program)
750 : {
751 0 : ClosePipeToProgram(cstate);
752 : }
753 : else
754 : {
755 5168 : if (cstate->filename != NULL && FreeFile(cstate->copy_file))
756 0 : ereport(ERROR,
757 : (errcode_for_file_access(),
758 : errmsg("could not close file \"%s\": %m",
759 : cstate->filename)));
760 : }
761 :
762 5168 : pgstat_progress_end_command();
763 :
764 5168 : MemoryContextDelete(cstate->copycontext);
765 :
766 5168 : if (cstate->partitions)
767 22 : list_free(cstate->partitions);
768 :
769 5168 : pfree(cstate);
770 5168 : }
771 :
772 : /*
773 : * Setup CopyToState to read tuples from a table or a query for COPY TO.
774 : *
775 : * 'rel': Relation to be copied
776 : * 'raw_query': Query whose results are to be copied
777 : * 'queryRelId': OID of base relation to convert to a query (for RLS)
778 : * 'filename': Name of server-local file to write, NULL for STDOUT
779 : * 'is_program': true if 'filename' is program to execute
780 : * 'data_dest_cb': Callback that processes the output data
781 : * 'attnamelist': List of char *, columns to include. NIL selects all cols.
782 : * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
783 : *
784 : * Returns a CopyToState, to be passed to DoCopyTo() and related functions.
785 : */
786 : CopyToState
787 5368 : BeginCopyTo(ParseState *pstate,
788 : Relation rel,
789 : RawStmt *raw_query,
790 : Oid queryRelId,
791 : const char *filename,
792 : bool is_program,
793 : copy_data_dest_cb data_dest_cb,
794 : List *attnamelist,
795 : List *options)
796 : {
797 : CopyToState cstate;
798 5368 : bool pipe = (filename == NULL && data_dest_cb == NULL);
799 : TupleDesc tupDesc;
800 : int num_phys_attrs;
801 : MemoryContext oldcontext;
802 5368 : const int progress_cols[] = {
803 : PROGRESS_COPY_COMMAND,
804 : PROGRESS_COPY_TYPE
805 : };
806 5368 : int64 progress_vals[] = {
807 : PROGRESS_COPY_COMMAND_TO,
808 : 0
809 : };
810 5368 : List *children = NIL;
811 :
812 5368 : if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
813 : {
814 39 : if (rel->rd_rel->relkind == RELKIND_VIEW)
815 8 : ereport(ERROR,
816 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
817 : errmsg("cannot copy from view \"%s\"",
818 : RelationGetRelationName(rel)),
819 : errhint("Try the COPY (SELECT ...) TO variant.")));
820 31 : else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
821 : {
822 8 : if (!RelationIsPopulated(rel))
823 4 : ereport(ERROR,
824 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
825 : errmsg("cannot copy from unpopulated materialized view \"%s\"",
826 : RelationGetRelationName(rel)),
827 : errhint("Use the REFRESH MATERIALIZED VIEW command."));
828 : }
829 23 : else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
830 0 : ereport(ERROR,
831 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
832 : errmsg("cannot copy from foreign table \"%s\"",
833 : RelationGetRelationName(rel)),
834 : errhint("Try the COPY (SELECT ...) TO variant.")));
835 23 : else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
836 0 : ereport(ERROR,
837 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
838 : errmsg("cannot copy from sequence \"%s\"",
839 : RelationGetRelationName(rel))));
840 23 : else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
841 : {
842 : /*
843 : * Collect OIDs of relation containing data, so that later
844 : * DoCopyTo can copy the data from them.
845 : */
846 23 : children = find_all_inheritors(RelationGetRelid(rel), AccessShareLock, NULL);
847 :
848 114 : foreach_oid(child, children)
849 : {
850 70 : char relkind = get_rel_relkind(child);
851 :
852 70 : if (relkind == RELKIND_FOREIGN_TABLE)
853 : {
854 1 : char *relation_name = get_rel_name(child);
855 :
856 1 : ereport(ERROR,
857 : errcode(ERRCODE_WRONG_OBJECT_TYPE),
858 : errmsg("cannot copy from foreign table \"%s\"", relation_name),
859 : errdetail("Partition \"%s\" is a foreign table in partitioned table \"%s\"",
860 : relation_name, RelationGetRelationName(rel)),
861 : errhint("Try the COPY (SELECT ...) TO variant."));
862 : }
863 :
864 : /* Exclude tables with no data */
865 69 : if (RELKIND_HAS_PARTITIONS(relkind))
866 33 : children = foreach_delete_current(children, child);
867 : }
868 : }
869 : else
870 0 : ereport(ERROR,
871 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
872 : errmsg("cannot copy from non-table relation \"%s\"",
873 : RelationGetRelationName(rel))));
874 : }
875 :
876 :
877 : /* Allocate workspace and zero all fields */
878 5355 : cstate = palloc0_object(CopyToStateData);
879 :
880 : /*
881 : * We allocate everything used by a cstate in a new memory context. This
882 : * avoids memory leaks during repeated use of COPY in a query.
883 : */
884 5355 : cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
885 : "COPY",
886 : ALLOCSET_DEFAULT_SIZES);
887 :
888 5355 : oldcontext = MemoryContextSwitchTo(cstate->copycontext);
889 :
890 : /* Extract options from the statement node tree */
891 5355 : ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
892 :
893 : /* Set format routine */
894 5259 : cstate->routine = CopyToGetRoutine(&cstate->opts);
895 :
896 : /* Process the source/target relation or query */
897 5259 : if (rel)
898 : {
899 : Assert(!raw_query);
900 :
901 4852 : cstate->rel = rel;
902 :
903 4852 : tupDesc = RelationGetDescr(cstate->rel);
904 4852 : cstate->partitions = children;
905 4852 : cstate->tupDesc = tupDesc;
906 : }
907 : else
908 : {
909 : List *rewritten;
910 : Query *query;
911 : PlannedStmt *plan;
912 : DestReceiver *dest;
913 :
914 407 : cstate->rel = NULL;
915 407 : cstate->partitions = NIL;
916 :
917 : /*
918 : * Run parse analysis and rewrite. Note this also acquires sufficient
919 : * locks on the source table(s).
920 : */
921 407 : rewritten = pg_analyze_and_rewrite_fixedparams(raw_query,
922 : pstate->p_sourcetext, NULL, 0,
923 : NULL);
924 :
925 : /* check that we got back something we can work with */
926 399 : if (rewritten == NIL)
927 : {
928 12 : ereport(ERROR,
929 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
930 : errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
931 : }
932 387 : else if (list_length(rewritten) > 1)
933 : {
934 : ListCell *lc;
935 :
936 : /* examine queries to determine which error message to issue */
937 68 : foreach(lc, rewritten)
938 : {
939 56 : Query *q = lfirst_node(Query, lc);
940 :
941 56 : if (q->querySource == QSRC_QUAL_INSTEAD_RULE)
942 12 : ereport(ERROR,
943 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
944 : errmsg("conditional DO INSTEAD rules are not supported for COPY")));
945 44 : if (q->querySource == QSRC_NON_INSTEAD_RULE)
946 12 : ereport(ERROR,
947 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
948 : errmsg("DO ALSO rules are not supported for COPY")));
949 : }
950 :
951 12 : ereport(ERROR,
952 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
953 : errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
954 : }
955 :
956 351 : query = linitial_node(Query, rewritten);
957 :
958 : /* The grammar allows SELECT INTO, but we don't support that */
959 351 : if (query->utilityStmt != NULL &&
960 12 : IsA(query->utilityStmt, CreateTableAsStmt))
961 8 : ereport(ERROR,
962 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
963 : errmsg("COPY (SELECT INTO) is not supported")));
964 :
965 : /* The only other utility command we could see is NOTIFY */
966 343 : if (query->utilityStmt != NULL)
967 4 : ereport(ERROR,
968 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
969 : errmsg("COPY query must not be a utility command")));
970 :
971 : /*
972 : * Similarly the grammar doesn't enforce the presence of a RETURNING
973 : * clause, but this is required here.
974 : */
975 339 : if (query->commandType != CMD_SELECT &&
976 69 : query->returningList == NIL)
977 : {
978 : Assert(query->commandType == CMD_INSERT ||
979 : query->commandType == CMD_UPDATE ||
980 : query->commandType == CMD_DELETE ||
981 : query->commandType == CMD_MERGE);
982 :
983 16 : ereport(ERROR,
984 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
985 : errmsg("COPY query must have a RETURNING clause")));
986 : }
987 :
988 : /* plan the query */
989 323 : plan = pg_plan_query(query, pstate->p_sourcetext,
990 : CURSOR_OPT_PARALLEL_OK, NULL, NULL);
991 :
992 : /*
993 : * With row-level security and a user using "COPY relation TO", we
994 : * have to convert the "COPY relation TO" to a query-based COPY (eg:
995 : * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to
996 : * add in any RLS clauses.
997 : *
998 : * When this happens, we are passed in the relid of the originally
999 : * found relation (which we have locked). As the planner will look up
1000 : * the relation again, we double-check here to make sure it found the
1001 : * same one that we have locked.
1002 : */
1003 322 : if (queryRelId != InvalidOid)
1004 : {
1005 : /*
1006 : * Note that with RLS involved there may be multiple relations,
1007 : * and while the one we need is almost certainly first, we don't
1008 : * make any guarantees of that in the planner, so check the whole
1009 : * list and make sure we find the original relation.
1010 : */
1011 52 : if (!list_member_oid(plan->relationOids, queryRelId))
1012 0 : ereport(ERROR,
1013 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
1014 : errmsg("relation referenced by COPY statement has changed")));
1015 : }
1016 :
1017 : /*
1018 : * Use a snapshot with an updated command ID to ensure this query sees
1019 : * results of any previously executed queries.
1020 : */
1021 322 : PushCopiedSnapshot(GetActiveSnapshot());
1022 322 : UpdateActiveSnapshotCommandId();
1023 :
1024 : /* Create dest receiver for COPY OUT */
1025 322 : dest = CreateDestReceiver(DestCopyOut);
1026 322 : ((DR_copy *) dest)->cstate = cstate;
1027 :
1028 : /* Create a QueryDesc requesting no output */
1029 322 : cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
1030 : GetActiveSnapshot(),
1031 : InvalidSnapshot,
1032 : dest, NULL, NULL, 0);
1033 :
1034 : /*
1035 : * Call ExecutorStart to prepare the plan for execution.
1036 : *
1037 : * ExecutorStart computes a result tupdesc for us
1038 : */
1039 322 : ExecutorStart(cstate->queryDesc, 0);
1040 :
1041 318 : tupDesc = cstate->queryDesc->tupDesc;
1042 318 : tupDesc = BlessTupleDesc(tupDesc);
1043 318 : cstate->tupDesc = tupDesc;
1044 : }
1045 :
1046 : /* Generate or convert list of attributes to process */
1047 5170 : cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
1048 :
1049 : /* Set up JSON-specific state */
1050 5170 : if (cstate->opts.format == COPY_FORMAT_JSON)
1051 : {
1052 100 : cstate->json_buf = makeStringInfo();
1053 :
1054 100 : if (rel && list_length(cstate->attnumlist) < tupDesc->natts)
1055 : {
1056 36 : int natts = list_length(cstate->attnumlist);
1057 : TupleDesc resultDesc;
1058 :
1059 : /*
1060 : * Build a TupleDesc describing only the selected columns so that
1061 : * composite_to_json() emits the right column names and types.
1062 : */
1063 36 : resultDesc = CreateTemplateTupleDesc(natts);
1064 :
1065 140 : foreach_int(attnum, cstate->attnumlist)
1066 : {
1067 68 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1068 :
1069 68 : TupleDescInitEntry(resultDesc,
1070 68 : foreach_current_index(attnum) + 1,
1071 68 : NameStr(attr->attname),
1072 : attr->atttypid,
1073 : attr->atttypmod,
1074 68 : attr->attndims);
1075 : }
1076 :
1077 36 : TupleDescFinalize(resultDesc);
1078 36 : cstate->tupDesc = BlessTupleDesc(resultDesc);
1079 :
1080 : /*
1081 : * Pre-allocate arrays for projecting selected column values into
1082 : * sequential positions matching the custom TupleDesc.
1083 : */
1084 36 : cstate->json_projvalues = palloc_array(Datum, natts);
1085 36 : cstate->json_projnulls = palloc_array(bool, natts);
1086 : }
1087 : }
1088 :
1089 5170 : num_phys_attrs = tupDesc->natts;
1090 :
1091 : /* Convert FORCE_QUOTE name list to per-column flags, check validity */
1092 5170 : cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
1093 5170 : if (cstate->opts.force_quote_all)
1094 : {
1095 12 : MemSet(cstate->opts.force_quote_flags, true, num_phys_attrs * sizeof(bool));
1096 : }
1097 5158 : else if (cstate->opts.force_quote)
1098 : {
1099 : List *attnums;
1100 : ListCell *cur;
1101 :
1102 16 : attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote);
1103 :
1104 32 : foreach(cur, attnums)
1105 : {
1106 16 : int attnum = lfirst_int(cur);
1107 16 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1108 :
1109 16 : if (!list_member_int(cstate->attnumlist, attnum))
1110 0 : ereport(ERROR,
1111 : (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
1112 : /*- translator: %s is the name of a COPY option, e.g. FORCE_NOT_NULL */
1113 : errmsg("%s column \"%s\" not referenced by COPY",
1114 : "FORCE_QUOTE", NameStr(attr->attname))));
1115 16 : cstate->opts.force_quote_flags[attnum - 1] = true;
1116 : }
1117 : }
1118 :
1119 : /* Use client encoding when ENCODING option is not specified. */
1120 5170 : if (cstate->opts.file_encoding < 0)
1121 5146 : cstate->file_encoding = pg_get_client_encoding();
1122 : else
1123 24 : cstate->file_encoding = cstate->opts.file_encoding;
1124 :
1125 : /*
1126 : * Set up encoding conversion info if the file and server encodings differ
1127 : * (see also pg_server_to_any).
1128 : */
1129 5170 : if (cstate->file_encoding == GetDatabaseEncoding() ||
1130 21 : cstate->file_encoding == PG_SQL_ASCII)
1131 5153 : cstate->need_transcoding = false;
1132 : else
1133 17 : cstate->need_transcoding = true;
1134 :
1135 : /* See Multibyte encoding comment above */
1136 5170 : cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
1137 :
1138 5170 : cstate->copy_dest = COPY_FILE; /* default */
1139 :
1140 5170 : if (data_dest_cb)
1141 : {
1142 1 : progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
1143 1 : cstate->copy_dest = COPY_CALLBACK;
1144 1 : cstate->data_dest_cb = data_dest_cb;
1145 : }
1146 5169 : else if (pipe)
1147 : {
1148 5112 : progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
1149 :
1150 : Assert(!is_program); /* the grammar does not allow this */
1151 5112 : if (whereToSendOutput != DestRemote)
1152 0 : cstate->copy_file = stdout;
1153 : }
1154 : else
1155 : {
1156 57 : cstate->filename = pstrdup(filename);
1157 57 : cstate->is_program = is_program;
1158 :
1159 57 : if (is_program)
1160 : {
1161 0 : progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
1162 0 : cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
1163 0 : if (cstate->copy_file == NULL)
1164 0 : ereport(ERROR,
1165 : (errcode_for_file_access(),
1166 : errmsg("could not execute command \"%s\": %m",
1167 : cstate->filename)));
1168 : }
1169 : else
1170 : {
1171 : mode_t oumask; /* Pre-existing umask value */
1172 : struct stat st;
1173 :
1174 57 : progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
1175 :
1176 : /*
1177 : * Prevent write to relative path ... too easy to shoot oneself in
1178 : * the foot by overwriting a database file ...
1179 : */
1180 57 : if (!is_absolute_path(filename))
1181 0 : ereport(ERROR,
1182 : (errcode(ERRCODE_INVALID_NAME),
1183 : errmsg("relative path not allowed for COPY to file")));
1184 :
1185 57 : oumask = umask(S_IWGRP | S_IWOTH);
1186 57 : PG_TRY();
1187 : {
1188 57 : cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
1189 : }
1190 0 : PG_FINALLY();
1191 : {
1192 57 : umask(oumask);
1193 : }
1194 57 : PG_END_TRY();
1195 57 : if (cstate->copy_file == NULL)
1196 : {
1197 : /* copy errno because ereport subfunctions might change it */
1198 0 : int save_errno = errno;
1199 :
1200 0 : ereport(ERROR,
1201 : (errcode_for_file_access(),
1202 : errmsg("could not open file \"%s\" for writing: %m",
1203 : cstate->filename),
1204 : (save_errno == ENOENT || save_errno == EACCES) ?
1205 : errhint("COPY TO instructs the PostgreSQL server process to write a file. "
1206 : "You may want a client-side facility such as psql's \\copy.") : 0));
1207 : }
1208 :
1209 57 : if (fstat(fileno(cstate->copy_file), &st))
1210 0 : ereport(ERROR,
1211 : (errcode_for_file_access(),
1212 : errmsg("could not stat file \"%s\": %m",
1213 : cstate->filename)));
1214 :
1215 57 : if (S_ISDIR(st.st_mode))
1216 0 : ereport(ERROR,
1217 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
1218 : errmsg("\"%s\" is a directory", cstate->filename)));
1219 : }
1220 : }
1221 :
1222 : /* initialize progress */
1223 5170 : pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
1224 5170 : cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
1225 5170 : pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
1226 :
1227 5170 : cstate->bytes_processed = 0;
1228 :
1229 5170 : MemoryContextSwitchTo(oldcontext);
1230 :
1231 5170 : return cstate;
1232 : }
1233 :
1234 : /*
1235 : * Clean up storage and release resources for COPY TO.
1236 : */
1237 : void
1238 5168 : EndCopyTo(CopyToState cstate)
1239 : {
1240 5168 : if (cstate->queryDesc != NULL)
1241 : {
1242 : /* Close down the query and free resources. */
1243 318 : ExecutorFinish(cstate->queryDesc);
1244 318 : ExecutorEnd(cstate->queryDesc);
1245 318 : FreeQueryDesc(cstate->queryDesc);
1246 318 : PopActiveSnapshot();
1247 : }
1248 :
1249 : /* Clean up storage */
1250 5168 : EndCopy(cstate);
1251 5168 : }
1252 :
1253 : /*
1254 : * Copy from relation or query TO file.
1255 : *
1256 : * Returns the number of rows processed.
1257 : */
1258 : uint64
1259 5170 : DoCopyTo(CopyToState cstate)
1260 : {
1261 5170 : bool pipe = (cstate->filename == NULL && cstate->data_dest_cb == NULL);
1262 5170 : bool fe_copy = (pipe && whereToSendOutput == DestRemote);
1263 : TupleDesc tupDesc;
1264 : int num_phys_attrs;
1265 : ListCell *cur;
1266 5170 : uint64 processed = 0;
1267 :
1268 5170 : if (fe_copy)
1269 5112 : SendCopyBegin(cstate);
1270 :
1271 5170 : if (cstate->rel)
1272 4852 : tupDesc = RelationGetDescr(cstate->rel);
1273 : else
1274 318 : tupDesc = cstate->queryDesc->tupDesc;
1275 5170 : num_phys_attrs = tupDesc->natts;
1276 5170 : cstate->opts.null_print_client = cstate->opts.null_print; /* default */
1277 :
1278 : /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1279 5170 : cstate->fe_msgbuf = makeStringInfo();
1280 :
1281 : /* Get info about the columns we need to process. */
1282 5170 : cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1283 22587 : foreach(cur, cstate->attnumlist)
1284 : {
1285 17418 : int attnum = lfirst_int(cur);
1286 17418 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1287 :
1288 17418 : cstate->routine->CopyToOutFunc(cstate, attr->atttypid,
1289 17418 : &cstate->out_functions[attnum - 1]);
1290 : }
1291 :
1292 : /*
1293 : * Create a temporary memory context that we can reset once per row to
1294 : * recover palloc'd memory. This avoids any problems with leaks inside
1295 : * datatype output routines, and should be faster than retail pfree's
1296 : * anyway. (We don't need a whole econtext as CopyFrom does.)
1297 : */
1298 5169 : cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1299 : "COPY TO",
1300 : ALLOCSET_DEFAULT_SIZES);
1301 :
1302 5169 : cstate->routine->CopyToStart(cstate, tupDesc);
1303 :
1304 5169 : if (cstate->rel)
1305 : {
1306 : /*
1307 : * If COPY TO source table is a partitioned table, then open each
1308 : * partition and process each individual partition.
1309 : */
1310 4851 : if (cstate->rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
1311 : {
1312 80 : foreach_oid(child, cstate->partitions)
1313 : {
1314 : Relation scan_rel;
1315 :
1316 : /* We already got the lock in BeginCopyTo */
1317 36 : scan_rel = table_open(child, NoLock);
1318 36 : CopyRelationTo(cstate, scan_rel, cstate->rel, &processed);
1319 36 : table_close(scan_rel, NoLock);
1320 : }
1321 : }
1322 : else
1323 4829 : CopyRelationTo(cstate, cstate->rel, NULL, &processed);
1324 : }
1325 : else
1326 : {
1327 : /* run the plan --- the dest receiver will send tuples */
1328 318 : ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0);
1329 318 : processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1330 : }
1331 :
1332 5168 : cstate->routine->CopyToEnd(cstate);
1333 :
1334 5168 : MemoryContextDelete(cstate->rowcontext);
1335 :
1336 5168 : if (fe_copy)
1337 5110 : SendCopyEnd(cstate);
1338 :
1339 5168 : return processed;
1340 : }
1341 :
1342 : /*
1343 : * Scans a single table and exports its rows to the COPY destination.
1344 : *
1345 : * root_rel can be set to the root table of rel if rel is a partition
1346 : * table so that we can send tuples in root_rel's rowtype, which might
1347 : * differ from individual partitions.
1348 : */
1349 : static void
1350 4865 : CopyRelationTo(CopyToState cstate, Relation rel, Relation root_rel, uint64 *processed)
1351 : {
1352 : TupleTableSlot *slot;
1353 : TableScanDesc scandesc;
1354 4865 : AttrMap *map = NULL;
1355 4865 : TupleTableSlot *root_slot = NULL;
1356 :
1357 4865 : scandesc = table_beginscan(rel, GetActiveSnapshot(), 0, NULL,
1358 : SO_NONE);
1359 4864 : slot = table_slot_create(rel, NULL);
1360 :
1361 : /*
1362 : * If we are exporting partition data here, we check if converting tuples
1363 : * to the root table's rowtype, because a partition might have column
1364 : * order different than its root table.
1365 : */
1366 4864 : if (root_rel != NULL)
1367 : {
1368 36 : root_slot = table_slot_create(root_rel, NULL);
1369 36 : map = build_attrmap_by_name_if_req(RelationGetDescr(rel),
1370 : RelationGetDescr(root_rel),
1371 : false);
1372 : }
1373 :
1374 1841148 : while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1375 : {
1376 : TupleTableSlot *copyslot;
1377 :
1378 1836284 : CHECK_FOR_INTERRUPTS();
1379 :
1380 1836284 : if (map != NULL)
1381 26 : copyslot = execute_attr_map_slot(map, slot, root_slot);
1382 : else
1383 : {
1384 : /* Deconstruct the tuple */
1385 1836258 : slot_getallattrs(slot);
1386 1836258 : copyslot = slot;
1387 : }
1388 :
1389 : /* Format and send the data */
1390 1836284 : CopyOneRowTo(cstate, copyslot);
1391 :
1392 : /*
1393 : * Increment the number of processed tuples, and report the progress.
1394 : */
1395 1836284 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1396 1836284 : ++(*processed));
1397 : }
1398 :
1399 4864 : ExecDropSingleTupleTableSlot(slot);
1400 :
1401 4864 : if (root_slot != NULL)
1402 36 : ExecDropSingleTupleTableSlot(root_slot);
1403 :
1404 4864 : if (map != NULL)
1405 11 : free_attrmap(map);
1406 :
1407 4864 : table_endscan(scandesc);
1408 4864 : }
1409 :
1410 : /*
1411 : * Emit one row during DoCopyTo().
1412 : */
1413 : static inline void
1414 1841066 : CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
1415 : {
1416 : MemoryContext oldcontext;
1417 :
1418 1841066 : MemoryContextReset(cstate->rowcontext);
1419 1841066 : oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1420 :
1421 : /* Make sure the tuple is fully deconstructed */
1422 1841066 : slot_getallattrs(slot);
1423 :
1424 1841066 : cstate->routine->CopyToOneRow(cstate, slot);
1425 :
1426 1841066 : MemoryContextSwitchTo(oldcontext);
1427 1841066 : }
1428 :
1429 : /*
1430 : * Send text representation of one attribute, with conversion and escaping
1431 : */
1432 : #define DUMPSOFAR() \
1433 : do { \
1434 : if (ptr > start) \
1435 : CopySendData(cstate, start, ptr - start); \
1436 : } while (0)
1437 :
1438 : static void
1439 6621237 : CopyAttributeOutText(CopyToState cstate, const char *string)
1440 : {
1441 : const char *ptr;
1442 : const char *start;
1443 : char c;
1444 6621237 : char delimc = cstate->opts.delim[0];
1445 :
1446 6621237 : if (cstate->need_transcoding)
1447 4 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1448 : else
1449 6621233 : ptr = string;
1450 :
1451 : /*
1452 : * We have to grovel through the string searching for control characters
1453 : * and instances of the delimiter character. In most cases, though, these
1454 : * are infrequent. To avoid overhead from calling CopySendData once per
1455 : * character, we dump out all characters between escaped characters in a
1456 : * single call. The loop invariant is that the data from "start" to "ptr"
1457 : * can be sent literally, but hasn't yet been.
1458 : *
1459 : * We can skip pg_encoding_mblen() overhead when encoding is safe, because
1460 : * in valid backend encodings, extra bytes of a multibyte character never
1461 : * look like ASCII. This loop is sufficiently performance-critical that
1462 : * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
1463 : * of the normal safe-encoding path.
1464 : */
1465 6621237 : if (cstate->encoding_embeds_ascii)
1466 : {
1467 4 : start = ptr;
1468 12 : while ((c = *ptr) != '\0')
1469 : {
1470 8 : if ((unsigned char) c < (unsigned char) 0x20)
1471 : {
1472 : /*
1473 : * \r and \n must be escaped, the others are traditional. We
1474 : * prefer to dump these using the C-like notation, rather than
1475 : * a backslash and the literal character, because it makes the
1476 : * dump file a bit more proof against Microsoftish data
1477 : * mangling.
1478 : */
1479 0 : switch (c)
1480 : {
1481 0 : case '\b':
1482 0 : c = 'b';
1483 0 : break;
1484 0 : case '\f':
1485 0 : c = 'f';
1486 0 : break;
1487 0 : case '\n':
1488 0 : c = 'n';
1489 0 : break;
1490 0 : case '\r':
1491 0 : c = 'r';
1492 0 : break;
1493 0 : case '\t':
1494 0 : c = 't';
1495 0 : break;
1496 0 : case '\v':
1497 0 : c = 'v';
1498 0 : break;
1499 0 : default:
1500 : /* If it's the delimiter, must backslash it */
1501 0 : if (c == delimc)
1502 0 : break;
1503 : /* All ASCII control chars are length 1 */
1504 0 : ptr++;
1505 0 : continue; /* fall to end of loop */
1506 : }
1507 : /* if we get here, we need to convert the control char */
1508 0 : DUMPSOFAR();
1509 0 : CopySendChar(cstate, '\\');
1510 0 : CopySendChar(cstate, c);
1511 0 : start = ++ptr; /* do not include char in next run */
1512 : }
1513 8 : else if (c == '\\' || c == delimc)
1514 : {
1515 0 : DUMPSOFAR();
1516 0 : CopySendChar(cstate, '\\');
1517 0 : start = ptr++; /* we include char in next run */
1518 : }
1519 8 : else if (IS_HIGHBIT_SET(c))
1520 4 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1521 : else
1522 4 : ptr++;
1523 : }
1524 : }
1525 : else
1526 : {
1527 6621233 : start = ptr;
1528 72309306 : while ((c = *ptr) != '\0')
1529 : {
1530 65688073 : if ((unsigned char) c < (unsigned char) 0x20)
1531 : {
1532 : /*
1533 : * \r and \n must be escaped, the others are traditional. We
1534 : * prefer to dump these using the C-like notation, rather than
1535 : * a backslash and the literal character, because it makes the
1536 : * dump file a bit more proof against Microsoftish data
1537 : * mangling.
1538 : */
1539 7117 : switch (c)
1540 : {
1541 0 : case '\b':
1542 0 : c = 'b';
1543 0 : break;
1544 0 : case '\f':
1545 0 : c = 'f';
1546 0 : break;
1547 6043 : case '\n':
1548 6043 : c = 'n';
1549 6043 : break;
1550 0 : case '\r':
1551 0 : c = 'r';
1552 0 : break;
1553 1074 : case '\t':
1554 1074 : c = 't';
1555 1074 : break;
1556 0 : case '\v':
1557 0 : c = 'v';
1558 0 : break;
1559 0 : default:
1560 : /* If it's the delimiter, must backslash it */
1561 0 : if (c == delimc)
1562 0 : break;
1563 : /* All ASCII control chars are length 1 */
1564 0 : ptr++;
1565 0 : continue; /* fall to end of loop */
1566 : }
1567 : /* if we get here, we need to convert the control char */
1568 7117 : DUMPSOFAR();
1569 7117 : CopySendChar(cstate, '\\');
1570 7117 : CopySendChar(cstate, c);
1571 7117 : start = ++ptr; /* do not include char in next run */
1572 : }
1573 65680956 : else if (c == '\\' || c == delimc)
1574 : {
1575 2417 : DUMPSOFAR();
1576 2417 : CopySendChar(cstate, '\\');
1577 2417 : start = ptr++; /* we include char in next run */
1578 : }
1579 : else
1580 65678539 : ptr++;
1581 : }
1582 : }
1583 :
1584 6621237 : DUMPSOFAR();
1585 6621237 : }
1586 :
1587 : /*
1588 : * Send text representation of one attribute, with conversion and
1589 : * CSV-style escaping
1590 : */
1591 : static void
1592 416 : CopyAttributeOutCSV(CopyToState cstate, const char *string,
1593 : bool use_quote)
1594 : {
1595 : const char *ptr;
1596 : const char *start;
1597 : char c;
1598 416 : char delimc = cstate->opts.delim[0];
1599 416 : char quotec = cstate->opts.quote[0];
1600 416 : char escapec = cstate->opts.escape[0];
1601 416 : bool single_attr = (list_length(cstate->attnumlist) == 1);
1602 :
1603 : /* force quoting if it matches null_print (before conversion!) */
1604 416 : if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
1605 36 : use_quote = true;
1606 :
1607 416 : if (cstate->need_transcoding)
1608 4 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1609 : else
1610 412 : ptr = string;
1611 :
1612 : /*
1613 : * Make a preliminary pass to discover if it needs quoting
1614 : */
1615 416 : if (!use_quote)
1616 : {
1617 : /*
1618 : * Quote '\.' if it appears alone on a line, so that it will not be
1619 : * interpreted as an end-of-data marker. (PG 18 and up will not
1620 : * interpret '\.' in CSV that way, except in embedded-in-SQL data; but
1621 : * we want the data to be loadable by older versions too. Also, this
1622 : * avoids breaking clients that are still using PQgetline().)
1623 : */
1624 292 : if (single_attr && strcmp(ptr, "\\.") == 0)
1625 4 : use_quote = true;
1626 : else
1627 : {
1628 288 : const char *tptr = ptr;
1629 :
1630 1480 : while ((c = *tptr) != '\0')
1631 : {
1632 1284 : if (c == delimc || c == quotec || c == '\n' || c == '\r')
1633 : {
1634 92 : use_quote = true;
1635 92 : break;
1636 : }
1637 1192 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1638 4 : tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
1639 : else
1640 1188 : tptr++;
1641 : }
1642 : }
1643 : }
1644 :
1645 416 : if (use_quote)
1646 : {
1647 220 : CopySendChar(cstate, quotec);
1648 :
1649 : /*
1650 : * We adopt the same optimization strategy as in CopyAttributeOutText
1651 : */
1652 220 : start = ptr;
1653 1704 : while ((c = *ptr) != '\0')
1654 : {
1655 1484 : if (c == quotec || c == escapec)
1656 : {
1657 104 : DUMPSOFAR();
1658 104 : CopySendChar(cstate, escapec);
1659 104 : start = ptr; /* we include char in next run */
1660 : }
1661 1484 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1662 4 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1663 : else
1664 1480 : ptr++;
1665 : }
1666 220 : DUMPSOFAR();
1667 :
1668 220 : CopySendChar(cstate, quotec);
1669 : }
1670 : else
1671 : {
1672 : /* If it doesn't need quoting, we can just dump it as-is */
1673 196 : CopySendString(cstate, ptr);
1674 : }
1675 416 : }
1676 :
1677 : /*
1678 : * copy_dest_startup --- executor startup
1679 : */
1680 : static void
1681 318 : copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
1682 : {
1683 : /* no-op */
1684 318 : }
1685 :
1686 : /*
1687 : * copy_dest_receive --- receive one tuple
1688 : */
1689 : static bool
1690 4782 : copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
1691 : {
1692 4782 : DR_copy *myState = (DR_copy *) self;
1693 4782 : CopyToState cstate = myState->cstate;
1694 :
1695 : /* Send the data */
1696 4782 : CopyOneRowTo(cstate, slot);
1697 :
1698 : /* Increment the number of processed tuples, and report the progress */
1699 4782 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1700 4782 : ++myState->processed);
1701 :
1702 4782 : return true;
1703 : }
1704 :
1705 : /*
1706 : * copy_dest_shutdown --- executor end
1707 : */
1708 : static void
1709 318 : copy_dest_shutdown(DestReceiver *self)
1710 : {
1711 : /* no-op */
1712 318 : }
1713 :
1714 : /*
1715 : * copy_dest_destroy --- release DestReceiver object
1716 : */
1717 : static void
1718 0 : copy_dest_destroy(DestReceiver *self)
1719 : {
1720 0 : pfree(self);
1721 0 : }
1722 :
1723 : /*
1724 : * CreateCopyDestReceiver -- create a suitable DestReceiver object
1725 : */
1726 : DestReceiver *
1727 322 : CreateCopyDestReceiver(void)
1728 : {
1729 322 : DR_copy *self = palloc_object(DR_copy);
1730 :
1731 322 : self->pub.receiveSlot = copy_dest_receive;
1732 322 : self->pub.rStartup = copy_dest_startup;
1733 322 : self->pub.rShutdown = copy_dest_shutdown;
1734 322 : self->pub.rDestroy = copy_dest_destroy;
1735 322 : self->pub.mydest = DestCopyOut;
1736 :
1737 322 : self->cstate = NULL; /* will be set later */
1738 322 : self->processed = 0;
1739 :
1740 322 : return (DestReceiver *) self;
1741 : }
|