Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * copyto.c
4 : * COPY <table> TO file/program/client
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/copyto.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 : #include <unistd.h>
19 : #include <sys/stat.h>
20 :
21 : #include "access/tableam.h"
22 : #include "commands/copyapi.h"
23 : #include "commands/progress.h"
24 : #include "executor/execdesc.h"
25 : #include "executor/executor.h"
26 : #include "executor/tuptable.h"
27 : #include "libpq/libpq.h"
28 : #include "libpq/pqformat.h"
29 : #include "mb/pg_wchar.h"
30 : #include "miscadmin.h"
31 : #include "pgstat.h"
32 : #include "storage/fd.h"
33 : #include "tcop/tcopprot.h"
34 : #include "utils/lsyscache.h"
35 : #include "utils/memutils.h"
36 : #include "utils/rel.h"
37 : #include "utils/snapmgr.h"
38 :
39 : /*
40 : * Represents the different dest cases we need to worry about at
41 : * the bottom level
42 : */
43 : typedef enum CopyDest
44 : {
45 : COPY_FILE, /* to file (or a piped program) */
46 : COPY_FRONTEND, /* to frontend */
47 : COPY_CALLBACK, /* to callback function */
48 : } CopyDest;
49 :
50 : /*
51 : * This struct contains all the state variables used throughout a COPY TO
52 : * operation.
53 : *
54 : * Multi-byte encodings: all supported client-side encodings encode multi-byte
55 : * characters by having the first byte's high bit set. Subsequent bytes of the
56 : * character can have the high bit not set. When scanning data in such an
57 : * encoding to look for a match to a single-byte (ie ASCII) character, we must
58 : * use the full pg_encoding_mblen() machinery to skip over multibyte
59 : * characters, else we might find a false match to a trailing byte. In
60 : * supported server encodings, there is no possibility of a false match, and
61 : * it's faster to make useless comparisons to trailing bytes than it is to
62 : * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
63 : * when we have to do it the hard way.
64 : */
65 : typedef struct CopyToStateData
66 : {
67 : /* format-specific routines */
68 : const CopyToRoutine *routine;
69 :
70 : /* low-level state data */
71 : CopyDest copy_dest; /* type of copy source/destination */
72 : FILE *copy_file; /* used if copy_dest == COPY_FILE */
73 : StringInfo fe_msgbuf; /* used for all dests during COPY TO */
74 :
75 : int file_encoding; /* file or remote side's character encoding */
76 : bool need_transcoding; /* file encoding diff from server? */
77 : bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
78 :
79 : /* parameters from the COPY command */
80 : Relation rel; /* relation to copy to */
81 : QueryDesc *queryDesc; /* executable query to copy from */
82 : List *attnumlist; /* integer list of attnums to copy */
83 : char *filename; /* filename, or NULL for STDOUT */
84 : bool is_program; /* is 'filename' a program to popen? */
85 : copy_data_dest_cb data_dest_cb; /* function for writing data */
86 :
87 : CopyFormatOptions opts;
88 : Node *whereClause; /* WHERE condition (or NULL) */
89 :
90 : /*
91 : * Working state
92 : */
93 : MemoryContext copycontext; /* per-copy execution context */
94 :
95 : FmgrInfo *out_functions; /* lookup info for output functions */
96 : MemoryContext rowcontext; /* per-row evaluation context */
97 : uint64 bytes_processed; /* number of bytes processed so far */
98 : } CopyToStateData;
99 :
100 : /* DestReceiver for COPY (query) TO */
101 : typedef struct
102 : {
103 : DestReceiver pub; /* publicly-known function pointers */
104 : CopyToState cstate; /* CopyToStateData for the command */
105 : uint64 processed; /* # of tuples processed */
106 : } DR_copy;
107 :
108 : /* NOTE: there's a copy of this in copyfromparse.c */
109 : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
110 :
111 :
112 : /* non-export function prototypes */
113 : static void EndCopy(CopyToState cstate);
114 : static void ClosePipeToProgram(CopyToState cstate);
115 : static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
116 : static void CopyAttributeOutText(CopyToState cstate, const char *string);
117 : static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
118 : bool use_quote);
119 :
120 : /* built-in format-specific routines */
121 : static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
122 : static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
123 : static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
124 : static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot);
125 : static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot,
126 : bool is_csv);
127 : static void CopyToTextLikeEnd(CopyToState cstate);
128 : static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc);
129 : static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
130 : static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot);
131 : static void CopyToBinaryEnd(CopyToState cstate);
132 :
133 : /* Low-level communications functions */
134 : static void SendCopyBegin(CopyToState cstate);
135 : static void SendCopyEnd(CopyToState cstate);
136 : static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
137 : static void CopySendString(CopyToState cstate, const char *str);
138 : static void CopySendChar(CopyToState cstate, char c);
139 : static void CopySendEndOfRow(CopyToState cstate);
140 : static void CopySendTextLikeEndOfRow(CopyToState cstate);
141 : static void CopySendInt32(CopyToState cstate, int32 val);
142 : static void CopySendInt16(CopyToState cstate, int16 val);
143 :
144 : /*
145 : * COPY TO routines for built-in formats.
146 : *
147 : * CSV and text formats share the same TextLike routines except for the
148 : * one-row callback.
149 : */
150 :
151 : /* text format */
152 : static const CopyToRoutine CopyToRoutineText = {
153 : .CopyToStart = CopyToTextLikeStart,
154 : .CopyToOutFunc = CopyToTextLikeOutFunc,
155 : .CopyToOneRow = CopyToTextOneRow,
156 : .CopyToEnd = CopyToTextLikeEnd,
157 : };
158 :
159 : /* CSV format */
160 : static const CopyToRoutine CopyToRoutineCSV = {
161 : .CopyToStart = CopyToTextLikeStart,
162 : .CopyToOutFunc = CopyToTextLikeOutFunc,
163 : .CopyToOneRow = CopyToCSVOneRow,
164 : .CopyToEnd = CopyToTextLikeEnd,
165 : };
166 :
167 : /* binary format */
168 : static const CopyToRoutine CopyToRoutineBinary = {
169 : .CopyToStart = CopyToBinaryStart,
170 : .CopyToOutFunc = CopyToBinaryOutFunc,
171 : .CopyToOneRow = CopyToBinaryOneRow,
172 : .CopyToEnd = CopyToBinaryEnd,
173 : };
174 :
175 : /* Return a COPY TO routine for the given options */
176 : static const CopyToRoutine *
177 8776 : CopyToGetRoutine(const CopyFormatOptions *opts)
178 : {
179 8776 : if (opts->csv_mode)
180 126 : return &CopyToRoutineCSV;
181 8650 : else if (opts->binary)
182 16 : return &CopyToRoutineBinary;
183 :
184 : /* default is text */
185 8634 : return &CopyToRoutineText;
186 : }
187 :
188 : /* Implementation of the start callback for text and CSV formats */
189 : static void
190 8626 : CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
191 : {
192 : /*
193 : * For non-binary copy, we need to convert null_print to file encoding,
194 : * because it will be sent directly with CopySendString.
195 : */
196 8626 : if (cstate->need_transcoding)
197 2 : cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
198 : cstate->opts.null_print_len,
199 : cstate->file_encoding);
200 :
201 : /* if a header has been requested send the line */
202 8626 : if (cstate->opts.header_line)
203 : {
204 : ListCell *cur;
205 18 : bool hdr_delim = false;
206 :
207 54 : foreach(cur, cstate->attnumlist)
208 : {
209 36 : int attnum = lfirst_int(cur);
210 : char *colname;
211 :
212 36 : if (hdr_delim)
213 18 : CopySendChar(cstate, cstate->opts.delim[0]);
214 36 : hdr_delim = true;
215 :
216 36 : colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
217 :
218 36 : if (cstate->opts.csv_mode)
219 24 : CopyAttributeOutCSV(cstate, colname, false);
220 : else
221 12 : CopyAttributeOutText(cstate, colname);
222 : }
223 :
224 18 : CopySendTextLikeEndOfRow(cstate);
225 : }
226 8626 : }
227 :
228 : /*
229 : * Implementation of the outfunc callback for text and CSV formats. Assign
230 : * the output function data to the given *finfo.
231 : */
232 : static void
233 30802 : CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
234 : {
235 : Oid func_oid;
236 : bool is_varlena;
237 :
238 : /* Set output function for an attribute */
239 30802 : getTypeOutputInfo(atttypid, &func_oid, &is_varlena);
240 30802 : fmgr_info(func_oid, finfo);
241 30802 : }
242 :
243 : /* Implementation of the per-row callback for text format */
244 : static void
245 3649572 : CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot)
246 : {
247 3649572 : CopyToTextLikeOneRow(cstate, slot, false);
248 3649572 : }
249 :
250 : /* Implementation of the per-row callback for CSV format */
251 : static void
252 330 : CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot)
253 : {
254 330 : CopyToTextLikeOneRow(cstate, slot, true);
255 330 : }
256 :
257 : /*
258 : * Workhorse for CopyToTextOneRow() and CopyToCSVOneRow().
259 : *
260 : * We use pg_attribute_always_inline to reduce function call overhead
261 : * and to help compilers to optimize away the 'is_csv' condition.
262 : */
263 : static pg_attribute_always_inline void
264 3649902 : CopyToTextLikeOneRow(CopyToState cstate,
265 : TupleTableSlot *slot,
266 : bool is_csv)
267 : {
268 3649902 : bool need_delim = false;
269 3649902 : FmgrInfo *out_functions = cstate->out_functions;
270 :
271 21334666 : foreach_int(attnum, cstate->attnumlist)
272 : {
273 14034862 : Datum value = slot->tts_values[attnum - 1];
274 14034862 : bool isnull = slot->tts_isnull[attnum - 1];
275 :
276 14034862 : if (need_delim)
277 10385102 : CopySendChar(cstate, cstate->opts.delim[0]);
278 14034862 : need_delim = true;
279 :
280 14034862 : if (isnull)
281 : {
282 1172786 : CopySendString(cstate, cstate->opts.null_print_client);
283 : }
284 : else
285 : {
286 : char *string;
287 :
288 12862076 : string = OutputFunctionCall(&out_functions[attnum - 1],
289 : value);
290 :
291 12862076 : if (is_csv)
292 594 : CopyAttributeOutCSV(cstate, string,
293 594 : cstate->opts.force_quote_flags[attnum - 1]);
294 : else
295 12861482 : CopyAttributeOutText(cstate, string);
296 : }
297 : }
298 :
299 3649902 : CopySendTextLikeEndOfRow(cstate);
300 3649902 : }
301 :
302 : /* Implementation of the end callback for text and CSV formats */
303 : static void
304 8626 : CopyToTextLikeEnd(CopyToState cstate)
305 : {
306 : /* Nothing to do here */
307 8626 : }
308 :
309 : /*
310 : * Implementation of the start callback for binary format. Send a header
311 : * for a binary copy.
312 : */
313 : static void
314 14 : CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc)
315 : {
316 : int32 tmp;
317 :
318 : /* Signature */
319 14 : CopySendData(cstate, BinarySignature, 11);
320 : /* Flags field */
321 14 : tmp = 0;
322 14 : CopySendInt32(cstate, tmp);
323 : /* No header extension */
324 14 : tmp = 0;
325 14 : CopySendInt32(cstate, tmp);
326 14 : }
327 :
328 : /*
329 : * Implementation of the outfunc callback for binary format. Assign
330 : * the binary output function to the given *finfo.
331 : */
332 : static void
333 62 : CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
334 : {
335 : Oid func_oid;
336 : bool is_varlena;
337 :
338 : /* Set output function for an attribute */
339 62 : getTypeBinaryOutputInfo(atttypid, &func_oid, &is_varlena);
340 60 : fmgr_info(func_oid, finfo);
341 60 : }
342 :
343 : /* Implementation of the per-row callback for binary format */
344 : static void
345 32 : CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot)
346 : {
347 32 : FmgrInfo *out_functions = cstate->out_functions;
348 :
349 : /* Binary per-tuple header */
350 32 : CopySendInt16(cstate, list_length(cstate->attnumlist));
351 :
352 224 : foreach_int(attnum, cstate->attnumlist)
353 : {
354 160 : Datum value = slot->tts_values[attnum - 1];
355 160 : bool isnull = slot->tts_isnull[attnum - 1];
356 :
357 160 : if (isnull)
358 : {
359 30 : CopySendInt32(cstate, -1);
360 : }
361 : else
362 : {
363 : bytea *outputbytes;
364 :
365 130 : outputbytes = SendFunctionCall(&out_functions[attnum - 1],
366 : value);
367 130 : CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
368 130 : CopySendData(cstate, VARDATA(outputbytes),
369 130 : VARSIZE(outputbytes) - VARHDRSZ);
370 : }
371 : }
372 :
373 32 : CopySendEndOfRow(cstate);
374 32 : }
375 :
376 : /* Implementation of the end callback for binary format */
377 : static void
378 14 : CopyToBinaryEnd(CopyToState cstate)
379 : {
380 : /* Generate trailer for a binary copy */
381 14 : CopySendInt16(cstate, -1);
382 : /* Need to flush out the trailer */
383 14 : CopySendEndOfRow(cstate);
384 14 : }
385 :
386 : /*
387 : * Send copy start/stop messages for frontend copies. These have changed
388 : * in past protocol redesigns.
389 : */
390 : static void
391 8578 : SendCopyBegin(CopyToState cstate)
392 : {
393 : StringInfoData buf;
394 8578 : int natts = list_length(cstate->attnumlist);
395 8578 : int16 format = (cstate->opts.binary ? 1 : 0);
396 : int i;
397 :
398 8578 : pq_beginmessage(&buf, PqMsg_CopyOutResponse);
399 8578 : pq_sendbyte(&buf, format); /* overall format */
400 8578 : pq_sendint16(&buf, natts);
401 39210 : for (i = 0; i < natts; i++)
402 30632 : pq_sendint16(&buf, format); /* per-column formats */
403 8578 : pq_endmessage(&buf);
404 8578 : cstate->copy_dest = COPY_FRONTEND;
405 8578 : }
406 :
407 : static void
408 8576 : SendCopyEnd(CopyToState cstate)
409 : {
410 : /* Shouldn't have any unsent data */
411 : Assert(cstate->fe_msgbuf->len == 0);
412 : /* Send Copy Done message */
413 8576 : pq_putemptymessage(PqMsg_CopyDone);
414 8576 : }
415 :
416 : /*----------
417 : * CopySendData sends output data to the destination (file or frontend)
418 : * CopySendString does the same for null-terminated strings
419 : * CopySendChar does the same for single characters
420 : * CopySendEndOfRow does the appropriate thing at end of each data row
421 : * (data is not actually flushed except by CopySendEndOfRow)
422 : *
423 : * NB: no data conversion is applied by these functions
424 : *----------
425 : */
426 : static void
427 12666762 : CopySendData(CopyToState cstate, const void *databuf, int datasize)
428 : {
429 12666762 : appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
430 12666762 : }
431 :
432 : static void
433 1173080 : CopySendString(CopyToState cstate, const char *str)
434 : {
435 1173080 : appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
436 1173080 : }
437 :
438 : static void
439 14067774 : CopySendChar(CopyToState cstate, char c)
440 : {
441 14067774 : appendStringInfoCharMacro(cstate->fe_msgbuf, c);
442 14067774 : }
443 :
444 : static void
445 3649966 : CopySendEndOfRow(CopyToState cstate)
446 : {
447 3649966 : StringInfo fe_msgbuf = cstate->fe_msgbuf;
448 :
449 3649966 : switch (cstate->copy_dest)
450 : {
451 12282 : case COPY_FILE:
452 12282 : if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
453 12282 : cstate->copy_file) != 1 ||
454 12282 : ferror(cstate->copy_file))
455 : {
456 0 : if (cstate->is_program)
457 : {
458 0 : if (errno == EPIPE)
459 : {
460 : /*
461 : * The pipe will be closed automatically on error at
462 : * the end of transaction, but we might get a better
463 : * error message from the subprocess' exit code than
464 : * just "Broken Pipe"
465 : */
466 0 : ClosePipeToProgram(cstate);
467 :
468 : /*
469 : * If ClosePipeToProgram() didn't throw an error, the
470 : * program terminated normally, but closed the pipe
471 : * first. Restore errno, and throw an error.
472 : */
473 0 : errno = EPIPE;
474 : }
475 0 : ereport(ERROR,
476 : (errcode_for_file_access(),
477 : errmsg("could not write to COPY program: %m")));
478 : }
479 : else
480 0 : ereport(ERROR,
481 : (errcode_for_file_access(),
482 : errmsg("could not write to COPY file: %m")));
483 : }
484 12282 : break;
485 3637678 : case COPY_FRONTEND:
486 : /* Dump the accumulated row as one CopyData message */
487 3637678 : (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
488 3637678 : break;
489 6 : case COPY_CALLBACK:
490 6 : cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
491 6 : break;
492 : }
493 :
494 : /* Update the progress */
495 3649966 : cstate->bytes_processed += fe_msgbuf->len;
496 3649966 : pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
497 :
498 3649966 : resetStringInfo(fe_msgbuf);
499 3649966 : }
500 :
501 : /*
502 : * Wrapper function of CopySendEndOfRow for text and CSV formats. Sends the
503 : * line termination and do common appropriate things for the end of row.
504 : */
505 : static inline void
506 3649920 : CopySendTextLikeEndOfRow(CopyToState cstate)
507 : {
508 3649920 : switch (cstate->copy_dest)
509 : {
510 12258 : case COPY_FILE:
511 : /* Default line termination depends on platform */
512 : #ifndef WIN32
513 12258 : CopySendChar(cstate, '\n');
514 : #else
515 : CopySendString(cstate, "\r\n");
516 : #endif
517 12258 : break;
518 3637656 : case COPY_FRONTEND:
519 : /* The FE/BE protocol uses \n as newline for all platforms */
520 3637656 : CopySendChar(cstate, '\n');
521 3637656 : break;
522 6 : default:
523 6 : break;
524 : }
525 :
526 : /* Now take the actions related to the end of a row */
527 3649920 : CopySendEndOfRow(cstate);
528 3649920 : }
529 :
530 : /*
531 : * These functions do apply some data conversion
532 : */
533 :
534 : /*
535 : * CopySendInt32 sends an int32 in network byte order
536 : */
537 : static inline void
538 188 : CopySendInt32(CopyToState cstate, int32 val)
539 : {
540 : uint32 buf;
541 :
542 188 : buf = pg_hton32((uint32) val);
543 188 : CopySendData(cstate, &buf, sizeof(buf));
544 188 : }
545 :
546 : /*
547 : * CopySendInt16 sends an int16 in network byte order
548 : */
549 : static inline void
550 46 : CopySendInt16(CopyToState cstate, int16 val)
551 : {
552 : uint16 buf;
553 :
554 46 : buf = pg_hton16((uint16) val);
555 46 : CopySendData(cstate, &buf, sizeof(buf));
556 46 : }
557 :
558 : /*
559 : * Closes the pipe to an external program, checking the pclose() return code.
560 : */
561 : static void
562 0 : ClosePipeToProgram(CopyToState cstate)
563 : {
564 : int pclose_rc;
565 :
566 : Assert(cstate->is_program);
567 :
568 0 : pclose_rc = ClosePipeStream(cstate->copy_file);
569 0 : if (pclose_rc == -1)
570 0 : ereport(ERROR,
571 : (errcode_for_file_access(),
572 : errmsg("could not close pipe to external command: %m")));
573 0 : else if (pclose_rc != 0)
574 : {
575 0 : ereport(ERROR,
576 : (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
577 : errmsg("program \"%s\" failed",
578 : cstate->filename),
579 : errdetail_internal("%s", wait_result_to_str(pclose_rc))));
580 : }
581 0 : }
582 :
583 : /*
584 : * Release resources allocated in a cstate for COPY TO/FROM.
585 : */
586 : static void
587 8640 : EndCopy(CopyToState cstate)
588 : {
589 8640 : if (cstate->is_program)
590 : {
591 0 : ClosePipeToProgram(cstate);
592 : }
593 : else
594 : {
595 8640 : if (cstate->filename != NULL && FreeFile(cstate->copy_file))
596 0 : ereport(ERROR,
597 : (errcode_for_file_access(),
598 : errmsg("could not close file \"%s\": %m",
599 : cstate->filename)));
600 : }
601 :
602 8640 : pgstat_progress_end_command();
603 :
604 8640 : MemoryContextDelete(cstate->copycontext);
605 8640 : pfree(cstate);
606 8640 : }
607 :
608 : /*
609 : * Setup CopyToState to read tuples from a table or a query for COPY TO.
610 : *
611 : * 'rel': Relation to be copied
612 : * 'raw_query': Query whose results are to be copied
613 : * 'queryRelId': OID of base relation to convert to a query (for RLS)
614 : * 'filename': Name of server-local file to write, NULL for STDOUT
615 : * 'is_program': true if 'filename' is program to execute
616 : * 'data_dest_cb': Callback that processes the output data
617 : * 'attnamelist': List of char *, columns to include. NIL selects all cols.
618 : * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
619 : *
620 : * Returns a CopyToState, to be passed to DoCopyTo() and related functions.
621 : */
622 : CopyToState
623 8830 : BeginCopyTo(ParseState *pstate,
624 : Relation rel,
625 : RawStmt *raw_query,
626 : Oid queryRelId,
627 : const char *filename,
628 : bool is_program,
629 : copy_data_dest_cb data_dest_cb,
630 : List *attnamelist,
631 : List *options)
632 : {
633 : CopyToState cstate;
634 8830 : bool pipe = (filename == NULL && data_dest_cb == NULL);
635 : TupleDesc tupDesc;
636 : int num_phys_attrs;
637 : MemoryContext oldcontext;
638 8830 : const int progress_cols[] = {
639 : PROGRESS_COPY_COMMAND,
640 : PROGRESS_COPY_TYPE
641 : };
642 8830 : int64 progress_vals[] = {
643 : PROGRESS_COPY_COMMAND_TO,
644 : 0
645 : };
646 :
647 8830 : if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
648 : {
649 12 : if (rel->rd_rel->relkind == RELKIND_VIEW)
650 12 : ereport(ERROR,
651 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
652 : errmsg("cannot copy from view \"%s\"",
653 : RelationGetRelationName(rel)),
654 : errhint("Try the COPY (SELECT ...) TO variant.")));
655 0 : else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
656 0 : ereport(ERROR,
657 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
658 : errmsg("cannot copy from materialized view \"%s\"",
659 : RelationGetRelationName(rel)),
660 : errhint("Try the COPY (SELECT ...) TO variant.")));
661 0 : else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
662 0 : ereport(ERROR,
663 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
664 : errmsg("cannot copy from foreign table \"%s\"",
665 : RelationGetRelationName(rel)),
666 : errhint("Try the COPY (SELECT ...) TO variant.")));
667 0 : else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
668 0 : ereport(ERROR,
669 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
670 : errmsg("cannot copy from sequence \"%s\"",
671 : RelationGetRelationName(rel))));
672 0 : else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
673 0 : ereport(ERROR,
674 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
675 : errmsg("cannot copy from partitioned table \"%s\"",
676 : RelationGetRelationName(rel)),
677 : errhint("Try the COPY (SELECT ...) TO variant.")));
678 : else
679 0 : ereport(ERROR,
680 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
681 : errmsg("cannot copy from non-table relation \"%s\"",
682 : RelationGetRelationName(rel))));
683 : }
684 :
685 :
686 : /* Allocate workspace and zero all fields */
687 8818 : cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
688 :
689 : /*
690 : * We allocate everything used by a cstate in a new memory context. This
691 : * avoids memory leaks during repeated use of COPY in a query.
692 : */
693 8818 : cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
694 : "COPY",
695 : ALLOCSET_DEFAULT_SIZES);
696 :
697 8818 : oldcontext = MemoryContextSwitchTo(cstate->copycontext);
698 :
699 : /* Extract options from the statement node tree */
700 8818 : ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
701 :
702 : /* Set format routine */
703 8776 : cstate->routine = CopyToGetRoutine(&cstate->opts);
704 :
705 : /* Process the source/target relation or query */
706 8776 : if (rel)
707 : {
708 : Assert(!raw_query);
709 :
710 8272 : cstate->rel = rel;
711 :
712 8272 : tupDesc = RelationGetDescr(cstate->rel);
713 : }
714 : else
715 : {
716 : List *rewritten;
717 : Query *query;
718 : PlannedStmt *plan;
719 : DestReceiver *dest;
720 :
721 504 : cstate->rel = NULL;
722 :
723 : /*
724 : * Run parse analysis and rewrite. Note this also acquires sufficient
725 : * locks on the source table(s).
726 : */
727 504 : rewritten = pg_analyze_and_rewrite_fixedparams(raw_query,
728 : pstate->p_sourcetext, NULL, 0,
729 : NULL);
730 :
731 : /* check that we got back something we can work with */
732 492 : if (rewritten == NIL)
733 : {
734 18 : ereport(ERROR,
735 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
736 : errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
737 : }
738 474 : else if (list_length(rewritten) > 1)
739 : {
740 : ListCell *lc;
741 :
742 : /* examine queries to determine which error message to issue */
743 102 : foreach(lc, rewritten)
744 : {
745 84 : Query *q = lfirst_node(Query, lc);
746 :
747 84 : if (q->querySource == QSRC_QUAL_INSTEAD_RULE)
748 18 : ereport(ERROR,
749 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
750 : errmsg("conditional DO INSTEAD rules are not supported for COPY")));
751 66 : if (q->querySource == QSRC_NON_INSTEAD_RULE)
752 18 : ereport(ERROR,
753 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
754 : errmsg("DO ALSO rules are not supported for COPY")));
755 : }
756 :
757 18 : ereport(ERROR,
758 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
759 : errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
760 : }
761 :
762 420 : query = linitial_node(Query, rewritten);
763 :
764 : /* The grammar allows SELECT INTO, but we don't support that */
765 420 : if (query->utilityStmt != NULL &&
766 18 : IsA(query->utilityStmt, CreateTableAsStmt))
767 12 : ereport(ERROR,
768 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
769 : errmsg("COPY (SELECT INTO) is not supported")));
770 :
771 : /* The only other utility command we could see is NOTIFY */
772 408 : if (query->utilityStmt != NULL)
773 6 : ereport(ERROR,
774 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
775 : errmsg("COPY query must not be a utility command")));
776 :
777 : /*
778 : * Similarly the grammar doesn't enforce the presence of a RETURNING
779 : * clause, but this is required here.
780 : */
781 402 : if (query->commandType != CMD_SELECT &&
782 110 : query->returningList == NIL)
783 : {
784 : Assert(query->commandType == CMD_INSERT ||
785 : query->commandType == CMD_UPDATE ||
786 : query->commandType == CMD_DELETE ||
787 : query->commandType == CMD_MERGE);
788 :
789 24 : ereport(ERROR,
790 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
791 : errmsg("COPY query must have a RETURNING clause")));
792 : }
793 :
794 : /* plan the query */
795 378 : plan = pg_plan_query(query, pstate->p_sourcetext,
796 : CURSOR_OPT_PARALLEL_OK, NULL);
797 :
798 : /*
799 : * With row-level security and a user using "COPY relation TO", we
800 : * have to convert the "COPY relation TO" to a query-based COPY (eg:
801 : * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to
802 : * add in any RLS clauses.
803 : *
804 : * When this happens, we are passed in the relid of the originally
805 : * found relation (which we have locked). As the planner will look up
806 : * the relation again, we double-check here to make sure it found the
807 : * same one that we have locked.
808 : */
809 376 : if (queryRelId != InvalidOid)
810 : {
811 : /*
812 : * Note that with RLS involved there may be multiple relations,
813 : * and while the one we need is almost certainly first, we don't
814 : * make any guarantees of that in the planner, so check the whole
815 : * list and make sure we find the original relation.
816 : */
817 54 : if (!list_member_oid(plan->relationOids, queryRelId))
818 0 : ereport(ERROR,
819 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
820 : errmsg("relation referenced by COPY statement has changed")));
821 : }
822 :
823 : /*
824 : * Use a snapshot with an updated command ID to ensure this query sees
825 : * results of any previously executed queries.
826 : */
827 376 : PushCopiedSnapshot(GetActiveSnapshot());
828 376 : UpdateActiveSnapshotCommandId();
829 :
830 : /* Create dest receiver for COPY OUT */
831 376 : dest = CreateDestReceiver(DestCopyOut);
832 376 : ((DR_copy *) dest)->cstate = cstate;
833 :
834 : /* Create a QueryDesc requesting no output */
835 376 : cstate->queryDesc = CreateQueryDesc(plan, NULL, pstate->p_sourcetext,
836 : GetActiveSnapshot(),
837 : InvalidSnapshot,
838 : dest, NULL, NULL, 0);
839 :
840 : /*
841 : * Call ExecutorStart to prepare the plan for execution.
842 : *
843 : * ExecutorStart computes a result tupdesc for us
844 : */
845 376 : if (!ExecutorStart(cstate->queryDesc, 0))
846 0 : elog(ERROR, "ExecutorStart() failed unexpectedly");
847 :
848 370 : tupDesc = cstate->queryDesc->tupDesc;
849 : }
850 :
851 : /* Generate or convert list of attributes to process */
852 8642 : cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
853 :
854 8642 : num_phys_attrs = tupDesc->natts;
855 :
856 : /* Convert FORCE_QUOTE name list to per-column flags, check validity */
857 8642 : cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
858 8642 : if (cstate->opts.force_quote_all)
859 : {
860 18 : MemSet(cstate->opts.force_quote_flags, true, num_phys_attrs * sizeof(bool));
861 : }
862 8624 : else if (cstate->opts.force_quote)
863 : {
864 : List *attnums;
865 : ListCell *cur;
866 :
867 24 : attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote);
868 :
869 48 : foreach(cur, attnums)
870 : {
871 24 : int attnum = lfirst_int(cur);
872 24 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
873 :
874 24 : if (!list_member_int(cstate->attnumlist, attnum))
875 0 : ereport(ERROR,
876 : (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
877 : /*- translator: %s is the name of a COPY option, e.g. FORCE_NOT_NULL */
878 : errmsg("%s column \"%s\" not referenced by COPY",
879 : "FORCE_QUOTE", NameStr(attr->attname))));
880 24 : cstate->opts.force_quote_flags[attnum - 1] = true;
881 : }
882 : }
883 :
884 : /* Use client encoding when ENCODING option is not specified. */
885 8642 : if (cstate->opts.file_encoding < 0)
886 8624 : cstate->file_encoding = pg_get_client_encoding();
887 : else
888 18 : cstate->file_encoding = cstate->opts.file_encoding;
889 :
890 : /*
891 : * Set up encoding conversion info if the file and server encodings differ
892 : * (see also pg_server_to_any).
893 : */
894 8642 : if (cstate->file_encoding == GetDatabaseEncoding() ||
895 8 : cstate->file_encoding == PG_SQL_ASCII)
896 8640 : cstate->need_transcoding = false;
897 : else
898 2 : cstate->need_transcoding = true;
899 :
900 : /* See Multibyte encoding comment above */
901 8642 : cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
902 :
903 8642 : cstate->copy_dest = COPY_FILE; /* default */
904 :
905 8642 : if (data_dest_cb)
906 : {
907 2 : progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
908 2 : cstate->copy_dest = COPY_CALLBACK;
909 2 : cstate->data_dest_cb = data_dest_cb;
910 : }
911 8640 : else if (pipe)
912 : {
913 8578 : progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
914 :
915 : Assert(!is_program); /* the grammar does not allow this */
916 8578 : if (whereToSendOutput != DestRemote)
917 0 : cstate->copy_file = stdout;
918 : }
919 : else
920 : {
921 62 : cstate->filename = pstrdup(filename);
922 62 : cstate->is_program = is_program;
923 :
924 62 : if (is_program)
925 : {
926 0 : progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
927 0 : cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
928 0 : if (cstate->copy_file == NULL)
929 0 : ereport(ERROR,
930 : (errcode_for_file_access(),
931 : errmsg("could not execute command \"%s\": %m",
932 : cstate->filename)));
933 : }
934 : else
935 : {
936 : mode_t oumask; /* Pre-existing umask value */
937 : struct stat st;
938 :
939 62 : progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
940 :
941 : /*
942 : * Prevent write to relative path ... too easy to shoot oneself in
943 : * the foot by overwriting a database file ...
944 : */
945 62 : if (!is_absolute_path(filename))
946 0 : ereport(ERROR,
947 : (errcode(ERRCODE_INVALID_NAME),
948 : errmsg("relative path not allowed for COPY to file")));
949 :
950 62 : oumask = umask(S_IWGRP | S_IWOTH);
951 62 : PG_TRY();
952 : {
953 62 : cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
954 : }
955 0 : PG_FINALLY();
956 : {
957 62 : umask(oumask);
958 : }
959 62 : PG_END_TRY();
960 62 : if (cstate->copy_file == NULL)
961 : {
962 : /* copy errno because ereport subfunctions might change it */
963 0 : int save_errno = errno;
964 :
965 0 : ereport(ERROR,
966 : (errcode_for_file_access(),
967 : errmsg("could not open file \"%s\" for writing: %m",
968 : cstate->filename),
969 : (save_errno == ENOENT || save_errno == EACCES) ?
970 : errhint("COPY TO instructs the PostgreSQL server process to write a file. "
971 : "You may want a client-side facility such as psql's \\copy.") : 0));
972 : }
973 :
974 62 : if (fstat(fileno(cstate->copy_file), &st))
975 0 : ereport(ERROR,
976 : (errcode_for_file_access(),
977 : errmsg("could not stat file \"%s\": %m",
978 : cstate->filename)));
979 :
980 62 : if (S_ISDIR(st.st_mode))
981 0 : ereport(ERROR,
982 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
983 : errmsg("\"%s\" is a directory", cstate->filename)));
984 : }
985 : }
986 :
987 : /* initialize progress */
988 8642 : pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
989 8642 : cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
990 8642 : pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
991 :
992 8642 : cstate->bytes_processed = 0;
993 :
994 8642 : MemoryContextSwitchTo(oldcontext);
995 :
996 8642 : return cstate;
997 : }
998 :
999 : /*
1000 : * Clean up storage and release resources for COPY TO.
1001 : */
1002 : void
1003 8640 : EndCopyTo(CopyToState cstate)
1004 : {
1005 8640 : if (cstate->queryDesc != NULL)
1006 : {
1007 : /* Close down the query and free resources. */
1008 370 : ExecutorFinish(cstate->queryDesc);
1009 370 : ExecutorEnd(cstate->queryDesc);
1010 370 : FreeQueryDesc(cstate->queryDesc);
1011 370 : PopActiveSnapshot();
1012 : }
1013 :
1014 : /* Clean up storage */
1015 8640 : EndCopy(cstate);
1016 8640 : }
1017 :
1018 : /*
1019 : * Copy from relation or query TO file.
1020 : *
1021 : * Returns the number of rows processed.
1022 : */
1023 : uint64
1024 8642 : DoCopyTo(CopyToState cstate)
1025 : {
1026 8642 : bool pipe = (cstate->filename == NULL && cstate->data_dest_cb == NULL);
1027 8642 : bool fe_copy = (pipe && whereToSendOutput == DestRemote);
1028 : TupleDesc tupDesc;
1029 : int num_phys_attrs;
1030 : ListCell *cur;
1031 : uint64 processed;
1032 :
1033 8642 : if (fe_copy)
1034 8578 : SendCopyBegin(cstate);
1035 :
1036 8642 : if (cstate->rel)
1037 8272 : tupDesc = RelationGetDescr(cstate->rel);
1038 : else
1039 370 : tupDesc = cstate->queryDesc->tupDesc;
1040 8642 : num_phys_attrs = tupDesc->natts;
1041 8642 : cstate->opts.null_print_client = cstate->opts.null_print; /* default */
1042 :
1043 : /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1044 8642 : cstate->fe_msgbuf = makeStringInfo();
1045 :
1046 : /* Get info about the columns we need to process. */
1047 8642 : cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1048 39504 : foreach(cur, cstate->attnumlist)
1049 : {
1050 30864 : int attnum = lfirst_int(cur);
1051 30864 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1052 :
1053 30864 : cstate->routine->CopyToOutFunc(cstate, attr->atttypid,
1054 30864 : &cstate->out_functions[attnum - 1]);
1055 : }
1056 :
1057 : /*
1058 : * Create a temporary memory context that we can reset once per row to
1059 : * recover palloc'd memory. This avoids any problems with leaks inside
1060 : * datatype output routines, and should be faster than retail pfree's
1061 : * anyway. (We don't need a whole econtext as CopyFrom does.)
1062 : */
1063 8640 : cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1064 : "COPY TO",
1065 : ALLOCSET_DEFAULT_SIZES);
1066 :
1067 8640 : cstate->routine->CopyToStart(cstate, tupDesc);
1068 :
1069 8640 : if (cstate->rel)
1070 : {
1071 : TupleTableSlot *slot;
1072 : TableScanDesc scandesc;
1073 :
1074 8270 : scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1075 8270 : slot = table_slot_create(cstate->rel, NULL);
1076 :
1077 8270 : processed = 0;
1078 3651206 : while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1079 : {
1080 3642936 : CHECK_FOR_INTERRUPTS();
1081 :
1082 : /* Deconstruct the tuple ... */
1083 3642936 : slot_getallattrs(slot);
1084 :
1085 : /* Format and send the data */
1086 3642936 : CopyOneRowTo(cstate, slot);
1087 :
1088 : /*
1089 : * Increment the number of processed tuples, and report the
1090 : * progress.
1091 : */
1092 3642936 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1093 : ++processed);
1094 : }
1095 :
1096 8270 : ExecDropSingleTupleTableSlot(slot);
1097 8270 : table_endscan(scandesc);
1098 : }
1099 : else
1100 : {
1101 : /* run the plan --- the dest receiver will send tuples */
1102 370 : ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0);
1103 370 : processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1104 : }
1105 :
1106 8640 : cstate->routine->CopyToEnd(cstate);
1107 :
1108 8640 : MemoryContextDelete(cstate->rowcontext);
1109 :
1110 8640 : if (fe_copy)
1111 8576 : SendCopyEnd(cstate);
1112 :
1113 8640 : return processed;
1114 : }
1115 :
1116 : /*
1117 : * Emit one row during DoCopyTo().
1118 : */
1119 : static inline void
1120 3649934 : CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
1121 : {
1122 : MemoryContext oldcontext;
1123 :
1124 3649934 : MemoryContextReset(cstate->rowcontext);
1125 3649934 : oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1126 :
1127 : /* Make sure the tuple is fully deconstructed */
1128 3649934 : slot_getallattrs(slot);
1129 :
1130 3649934 : cstate->routine->CopyToOneRow(cstate, slot);
1131 :
1132 3649934 : MemoryContextSwitchTo(oldcontext);
1133 3649934 : }
1134 :
1135 : /*
1136 : * Send text representation of one attribute, with conversion and escaping
1137 : */
1138 : #define DUMPSOFAR() \
1139 : do { \
1140 : if (ptr > start) \
1141 : CopySendData(cstate, start, ptr - start); \
1142 : } while (0)
1143 :
1144 : static void
1145 12861494 : CopyAttributeOutText(CopyToState cstate, const char *string)
1146 : {
1147 : const char *ptr;
1148 : const char *start;
1149 : char c;
1150 12861494 : char delimc = cstate->opts.delim[0];
1151 :
1152 12861494 : if (cstate->need_transcoding)
1153 0 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1154 : else
1155 12861494 : ptr = string;
1156 :
1157 : /*
1158 : * We have to grovel through the string searching for control characters
1159 : * and instances of the delimiter character. In most cases, though, these
1160 : * are infrequent. To avoid overhead from calling CopySendData once per
1161 : * character, we dump out all characters between escaped characters in a
1162 : * single call. The loop invariant is that the data from "start" to "ptr"
1163 : * can be sent literally, but hasn't yet been.
1164 : *
1165 : * We can skip pg_encoding_mblen() overhead when encoding is safe, because
1166 : * in valid backend encodings, extra bytes of a multibyte character never
1167 : * look like ASCII. This loop is sufficiently performance-critical that
1168 : * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
1169 : * of the normal safe-encoding path.
1170 : */
1171 12861494 : if (cstate->encoding_embeds_ascii)
1172 : {
1173 0 : start = ptr;
1174 0 : while ((c = *ptr) != '\0')
1175 : {
1176 0 : if ((unsigned char) c < (unsigned char) 0x20)
1177 : {
1178 : /*
1179 : * \r and \n must be escaped, the others are traditional. We
1180 : * prefer to dump these using the C-like notation, rather than
1181 : * a backslash and the literal character, because it makes the
1182 : * dump file a bit more proof against Microsoftish data
1183 : * mangling.
1184 : */
1185 0 : switch (c)
1186 : {
1187 0 : case '\b':
1188 0 : c = 'b';
1189 0 : break;
1190 0 : case '\f':
1191 0 : c = 'f';
1192 0 : break;
1193 0 : case '\n':
1194 0 : c = 'n';
1195 0 : break;
1196 0 : case '\r':
1197 0 : c = 'r';
1198 0 : break;
1199 0 : case '\t':
1200 0 : c = 't';
1201 0 : break;
1202 0 : case '\v':
1203 0 : c = 'v';
1204 0 : break;
1205 0 : default:
1206 : /* If it's the delimiter, must backslash it */
1207 0 : if (c == delimc)
1208 0 : break;
1209 : /* All ASCII control chars are length 1 */
1210 0 : ptr++;
1211 0 : continue; /* fall to end of loop */
1212 : }
1213 : /* if we get here, we need to convert the control char */
1214 0 : DUMPSOFAR();
1215 0 : CopySendChar(cstate, '\\');
1216 0 : CopySendChar(cstate, c);
1217 0 : start = ++ptr; /* do not include char in next run */
1218 : }
1219 0 : else if (c == '\\' || c == delimc)
1220 : {
1221 0 : DUMPSOFAR();
1222 0 : CopySendChar(cstate, '\\');
1223 0 : start = ptr++; /* we include char in next run */
1224 : }
1225 0 : else if (IS_HIGHBIT_SET(c))
1226 0 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1227 : else
1228 0 : ptr++;
1229 : }
1230 : }
1231 : else
1232 : {
1233 12861494 : start = ptr;
1234 135136274 : while ((c = *ptr) != '\0')
1235 : {
1236 122274780 : if ((unsigned char) c < (unsigned char) 0x20)
1237 : {
1238 : /*
1239 : * \r and \n must be escaped, the others are traditional. We
1240 : * prefer to dump these using the C-like notation, rather than
1241 : * a backslash and the literal character, because it makes the
1242 : * dump file a bit more proof against Microsoftish data
1243 : * mangling.
1244 : */
1245 13790 : switch (c)
1246 : {
1247 0 : case '\b':
1248 0 : c = 'b';
1249 0 : break;
1250 0 : case '\f':
1251 0 : c = 'f';
1252 0 : break;
1253 11676 : case '\n':
1254 11676 : c = 'n';
1255 11676 : break;
1256 0 : case '\r':
1257 0 : c = 'r';
1258 0 : break;
1259 2114 : case '\t':
1260 2114 : c = 't';
1261 2114 : break;
1262 0 : case '\v':
1263 0 : c = 'v';
1264 0 : break;
1265 0 : default:
1266 : /* If it's the delimiter, must backslash it */
1267 0 : if (c == delimc)
1268 0 : break;
1269 : /* All ASCII control chars are length 1 */
1270 0 : ptr++;
1271 0 : continue; /* fall to end of loop */
1272 : }
1273 : /* if we get here, we need to convert the control char */
1274 13790 : DUMPSOFAR();
1275 13790 : CopySendChar(cstate, '\\');
1276 13790 : CopySendChar(cstate, c);
1277 13790 : start = ++ptr; /* do not include char in next run */
1278 : }
1279 122260990 : else if (c == '\\' || c == delimc)
1280 : {
1281 4356 : DUMPSOFAR();
1282 4356 : CopySendChar(cstate, '\\');
1283 4356 : start = ptr++; /* we include char in next run */
1284 : }
1285 : else
1286 122256634 : ptr++;
1287 : }
1288 : }
1289 :
1290 12861494 : DUMPSOFAR();
1291 12861494 : }
1292 :
1293 : /*
1294 : * Send text representation of one attribute, with conversion and
1295 : * CSV-style escaping
1296 : */
1297 : static void
1298 618 : CopyAttributeOutCSV(CopyToState cstate, const char *string,
1299 : bool use_quote)
1300 : {
1301 : const char *ptr;
1302 : const char *start;
1303 : char c;
1304 618 : char delimc = cstate->opts.delim[0];
1305 618 : char quotec = cstate->opts.quote[0];
1306 618 : char escapec = cstate->opts.escape[0];
1307 618 : bool single_attr = (list_length(cstate->attnumlist) == 1);
1308 :
1309 : /* force quoting if it matches null_print (before conversion!) */
1310 618 : if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
1311 54 : use_quote = true;
1312 :
1313 618 : if (cstate->need_transcoding)
1314 0 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1315 : else
1316 618 : ptr = string;
1317 :
1318 : /*
1319 : * Make a preliminary pass to discover if it needs quoting
1320 : */
1321 618 : if (!use_quote)
1322 : {
1323 : /*
1324 : * Quote '\.' if it appears alone on a line, so that it will not be
1325 : * interpreted as an end-of-data marker. (PG 18 and up will not
1326 : * interpret '\.' in CSV that way, except in embedded-in-SQL data; but
1327 : * we want the data to be loadable by older versions too. Also, this
1328 : * avoids breaking clients that are still using PQgetline().)
1329 : */
1330 432 : if (single_attr && strcmp(ptr, "\\.") == 0)
1331 6 : use_quote = true;
1332 : else
1333 : {
1334 426 : const char *tptr = ptr;
1335 :
1336 2208 : while ((c = *tptr) != '\0')
1337 : {
1338 1914 : if (c == delimc || c == quotec || c == '\n' || c == '\r')
1339 : {
1340 132 : use_quote = true;
1341 132 : break;
1342 : }
1343 1782 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1344 0 : tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
1345 : else
1346 1782 : tptr++;
1347 : }
1348 : }
1349 : }
1350 :
1351 618 : if (use_quote)
1352 : {
1353 324 : CopySendChar(cstate, quotec);
1354 :
1355 : /*
1356 : * We adopt the same optimization strategy as in CopyAttributeOutText
1357 : */
1358 324 : start = ptr;
1359 2538 : while ((c = *ptr) != '\0')
1360 : {
1361 2214 : if (c == quotec || c == escapec)
1362 : {
1363 156 : DUMPSOFAR();
1364 156 : CopySendChar(cstate, escapec);
1365 156 : start = ptr; /* we include char in next run */
1366 : }
1367 2214 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1368 0 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1369 : else
1370 2214 : ptr++;
1371 : }
1372 324 : DUMPSOFAR();
1373 :
1374 324 : CopySendChar(cstate, quotec);
1375 : }
1376 : else
1377 : {
1378 : /* If it doesn't need quoting, we can just dump it as-is */
1379 294 : CopySendString(cstate, ptr);
1380 : }
1381 618 : }
1382 :
1383 : /*
1384 : * copy_dest_startup --- executor startup
1385 : */
1386 : static void
1387 370 : copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
1388 : {
1389 : /* no-op */
1390 370 : }
1391 :
1392 : /*
1393 : * copy_dest_receive --- receive one tuple
1394 : */
1395 : static bool
1396 6998 : copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
1397 : {
1398 6998 : DR_copy *myState = (DR_copy *) self;
1399 6998 : CopyToState cstate = myState->cstate;
1400 :
1401 : /* Send the data */
1402 6998 : CopyOneRowTo(cstate, slot);
1403 :
1404 : /* Increment the number of processed tuples, and report the progress */
1405 6998 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1406 6998 : ++myState->processed);
1407 :
1408 6998 : return true;
1409 : }
1410 :
1411 : /*
1412 : * copy_dest_shutdown --- executor end
1413 : */
1414 : static void
1415 370 : copy_dest_shutdown(DestReceiver *self)
1416 : {
1417 : /* no-op */
1418 370 : }
1419 :
1420 : /*
1421 : * copy_dest_destroy --- release DestReceiver object
1422 : */
1423 : static void
1424 0 : copy_dest_destroy(DestReceiver *self)
1425 : {
1426 0 : pfree(self);
1427 0 : }
1428 :
1429 : /*
1430 : * CreateCopyDestReceiver -- create a suitable DestReceiver object
1431 : */
1432 : DestReceiver *
1433 376 : CreateCopyDestReceiver(void)
1434 : {
1435 376 : DR_copy *self = (DR_copy *) palloc(sizeof(DR_copy));
1436 :
1437 376 : self->pub.receiveSlot = copy_dest_receive;
1438 376 : self->pub.rStartup = copy_dest_startup;
1439 376 : self->pub.rShutdown = copy_dest_shutdown;
1440 376 : self->pub.rDestroy = copy_dest_destroy;
1441 376 : self->pub.mydest = DestCopyOut;
1442 :
1443 376 : self->cstate = NULL; /* will be set later */
1444 376 : self->processed = 0;
1445 :
1446 376 : return (DestReceiver *) self;
1447 : }
|