Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * copyto.c
4 : * COPY <table> TO file/program/client
5 : *
6 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/copyto.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 : #include <unistd.h>
19 : #include <sys/stat.h>
20 :
21 : #include "access/tableam.h"
22 : #include "commands/copyapi.h"
23 : #include "commands/progress.h"
24 : #include "executor/execdesc.h"
25 : #include "executor/executor.h"
26 : #include "executor/tuptable.h"
27 : #include "libpq/libpq.h"
28 : #include "libpq/pqformat.h"
29 : #include "mb/pg_wchar.h"
30 : #include "miscadmin.h"
31 : #include "pgstat.h"
32 : #include "storage/fd.h"
33 : #include "tcop/tcopprot.h"
34 : #include "utils/lsyscache.h"
35 : #include "utils/memutils.h"
36 : #include "utils/rel.h"
37 : #include "utils/snapmgr.h"
38 :
39 : /*
40 : * Represents the different dest cases we need to worry about at
41 : * the bottom level
42 : */
43 : typedef enum CopyDest
44 : {
45 : COPY_FILE, /* to file (or a piped program) */
46 : COPY_FRONTEND, /* to frontend */
47 : COPY_CALLBACK, /* to callback function */
48 : } CopyDest;
49 :
50 : /*
51 : * This struct contains all the state variables used throughout a COPY TO
52 : * operation.
53 : *
54 : * Multi-byte encodings: all supported client-side encodings encode multi-byte
55 : * characters by having the first byte's high bit set. Subsequent bytes of the
56 : * character can have the high bit not set. When scanning data in such an
57 : * encoding to look for a match to a single-byte (ie ASCII) character, we must
58 : * use the full pg_encoding_mblen() machinery to skip over multibyte
59 : * characters, else we might find a false match to a trailing byte. In
60 : * supported server encodings, there is no possibility of a false match, and
61 : * it's faster to make useless comparisons to trailing bytes than it is to
62 : * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
63 : * when we have to do it the hard way.
64 : */
65 : typedef struct CopyToStateData
66 : {
67 : /* format-specific routines */
68 : const CopyToRoutine *routine;
69 :
70 : /* low-level state data */
71 : CopyDest copy_dest; /* type of copy source/destination */
72 : FILE *copy_file; /* used if copy_dest == COPY_FILE */
73 : StringInfo fe_msgbuf; /* used for all dests during COPY TO */
74 :
75 : int file_encoding; /* file or remote side's character encoding */
76 : bool need_transcoding; /* file encoding diff from server? */
77 : bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
78 :
79 : /* parameters from the COPY command */
80 : Relation rel; /* relation to copy to */
81 : QueryDesc *queryDesc; /* executable query to copy from */
82 : List *attnumlist; /* integer list of attnums to copy */
83 : char *filename; /* filename, or NULL for STDOUT */
84 : bool is_program; /* is 'filename' a program to popen? */
85 : copy_data_dest_cb data_dest_cb; /* function for writing data */
86 :
87 : CopyFormatOptions opts;
88 : Node *whereClause; /* WHERE condition (or NULL) */
89 :
90 : /*
91 : * Working state
92 : */
93 : MemoryContext copycontext; /* per-copy execution context */
94 :
95 : FmgrInfo *out_functions; /* lookup info for output functions */
96 : MemoryContext rowcontext; /* per-row evaluation context */
97 : uint64 bytes_processed; /* number of bytes processed so far */
98 : } CopyToStateData;
99 :
100 : /* DestReceiver for COPY (query) TO */
101 : typedef struct
102 : {
103 : DestReceiver pub; /* publicly-known function pointers */
104 : CopyToState cstate; /* CopyToStateData for the command */
105 : uint64 processed; /* # of tuples processed */
106 : } DR_copy;
107 :
108 : /* NOTE: there's a copy of this in copyfromparse.c */
109 : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
110 :
111 :
112 : /* non-export function prototypes */
113 : static void EndCopy(CopyToState cstate);
114 : static void ClosePipeToProgram(CopyToState cstate);
115 : static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
116 : static void CopyAttributeOutText(CopyToState cstate, const char *string);
117 : static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
118 : bool use_quote);
119 :
120 : /* built-in format-specific routines */
121 : static void CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc);
122 : static void CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
123 : static void CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot);
124 : static void CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot);
125 : static void CopyToTextLikeOneRow(CopyToState cstate, TupleTableSlot *slot,
126 : bool is_csv);
127 : static void CopyToTextLikeEnd(CopyToState cstate);
128 : static void CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc);
129 : static void CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo);
130 : static void CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot);
131 : static void CopyToBinaryEnd(CopyToState cstate);
132 :
133 : /* Low-level communications functions */
134 : static void SendCopyBegin(CopyToState cstate);
135 : static void SendCopyEnd(CopyToState cstate);
136 : static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
137 : static void CopySendString(CopyToState cstate, const char *str);
138 : static void CopySendChar(CopyToState cstate, char c);
139 : static void CopySendEndOfRow(CopyToState cstate);
140 : static void CopySendTextLikeEndOfRow(CopyToState cstate);
141 : static void CopySendInt32(CopyToState cstate, int32 val);
142 : static void CopySendInt16(CopyToState cstate, int16 val);
143 :
144 : /*
145 : * COPY TO routines for built-in formats.
146 : *
147 : * CSV and text formats share the same TextLike routines except for the
148 : * one-row callback.
149 : */
150 :
151 : /* text format */
152 : static const CopyToRoutine CopyToRoutineText = {
153 : .CopyToStart = CopyToTextLikeStart,
154 : .CopyToOutFunc = CopyToTextLikeOutFunc,
155 : .CopyToOneRow = CopyToTextOneRow,
156 : .CopyToEnd = CopyToTextLikeEnd,
157 : };
158 :
159 : /* CSV format */
160 : static const CopyToRoutine CopyToRoutineCSV = {
161 : .CopyToStart = CopyToTextLikeStart,
162 : .CopyToOutFunc = CopyToTextLikeOutFunc,
163 : .CopyToOneRow = CopyToCSVOneRow,
164 : .CopyToEnd = CopyToTextLikeEnd,
165 : };
166 :
167 : /* binary format */
168 : static const CopyToRoutine CopyToRoutineBinary = {
169 : .CopyToStart = CopyToBinaryStart,
170 : .CopyToOutFunc = CopyToBinaryOutFunc,
171 : .CopyToOneRow = CopyToBinaryOneRow,
172 : .CopyToEnd = CopyToBinaryEnd,
173 : };
174 :
175 : /* Return a COPY TO routine for the given options */
176 : static const CopyToRoutine *
177 13202 : CopyToGetRoutine(const CopyFormatOptions *opts)
178 : {
179 13202 : if (opts->csv_mode)
180 126 : return &CopyToRoutineCSV;
181 13076 : else if (opts->binary)
182 16 : return &CopyToRoutineBinary;
183 :
184 : /* default is text */
185 13060 : return &CopyToRoutineText;
186 : }
187 :
188 : /* Implementation of the start callback for text and CSV formats */
189 : static void
190 13052 : CopyToTextLikeStart(CopyToState cstate, TupleDesc tupDesc)
191 : {
192 : /*
193 : * For non-binary copy, we need to convert null_print to file encoding,
194 : * because it will be sent directly with CopySendString.
195 : */
196 13052 : if (cstate->need_transcoding)
197 2 : cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
198 : cstate->opts.null_print_len,
199 : cstate->file_encoding);
200 :
201 : /* if a header has been requested send the line */
202 13052 : if (cstate->opts.header_line)
203 : {
204 : ListCell *cur;
205 24 : bool hdr_delim = false;
206 :
207 66 : foreach(cur, cstate->attnumlist)
208 : {
209 42 : int attnum = lfirst_int(cur);
210 : char *colname;
211 :
212 42 : if (hdr_delim)
213 18 : CopySendChar(cstate, cstate->opts.delim[0]);
214 42 : hdr_delim = true;
215 :
216 42 : colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
217 :
218 42 : if (cstate->opts.csv_mode)
219 24 : CopyAttributeOutCSV(cstate, colname, false);
220 : else
221 18 : CopyAttributeOutText(cstate, colname);
222 : }
223 :
224 24 : CopySendTextLikeEndOfRow(cstate);
225 : }
226 13052 : }
227 :
228 : /*
229 : * Implementation of the outfunc callback for text and CSV formats. Assign
230 : * the output function data to the given *finfo.
231 : */
232 : static void
233 47368 : CopyToTextLikeOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
234 : {
235 : Oid func_oid;
236 : bool is_varlena;
237 :
238 : /* Set output function for an attribute */
239 47368 : getTypeOutputInfo(atttypid, &func_oid, &is_varlena);
240 47368 : fmgr_info(func_oid, finfo);
241 47368 : }
242 :
243 : /* Implementation of the per-row callback for text format */
244 : static void
245 6164766 : CopyToTextOneRow(CopyToState cstate, TupleTableSlot *slot)
246 : {
247 6164766 : CopyToTextLikeOneRow(cstate, slot, false);
248 6164766 : }
249 :
250 : /* Implementation of the per-row callback for CSV format */
251 : static void
252 330 : CopyToCSVOneRow(CopyToState cstate, TupleTableSlot *slot)
253 : {
254 330 : CopyToTextLikeOneRow(cstate, slot, true);
255 330 : }
256 :
257 : /*
258 : * Workhorse for CopyToTextOneRow() and CopyToCSVOneRow().
259 : *
260 : * We use pg_attribute_always_inline to reduce function call overhead
261 : * and to help compilers to optimize away the 'is_csv' condition.
262 : */
263 : static pg_attribute_always_inline void
264 6165096 : CopyToTextLikeOneRow(CopyToState cstate,
265 : TupleTableSlot *slot,
266 : bool is_csv)
267 : {
268 6165096 : bool need_delim = false;
269 6165096 : FmgrInfo *out_functions = cstate->out_functions;
270 :
271 33814572 : foreach_int(attnum, cstate->attnumlist)
272 : {
273 21484380 : Datum value = slot->tts_values[attnum - 1];
274 21484380 : bool isnull = slot->tts_isnull[attnum - 1];
275 :
276 21484380 : if (need_delim)
277 15319432 : CopySendChar(cstate, cstate->opts.delim[0]);
278 21484380 : need_delim = true;
279 :
280 21484380 : if (isnull)
281 : {
282 1569508 : CopySendString(cstate, cstate->opts.null_print_client);
283 : }
284 : else
285 : {
286 : char *string;
287 :
288 19914872 : string = OutputFunctionCall(&out_functions[attnum - 1],
289 : value);
290 :
291 19914872 : if (is_csv)
292 594 : CopyAttributeOutCSV(cstate, string,
293 594 : cstate->opts.force_quote_flags[attnum - 1]);
294 : else
295 19914278 : CopyAttributeOutText(cstate, string);
296 : }
297 : }
298 :
299 6165096 : CopySendTextLikeEndOfRow(cstate);
300 6165096 : }
301 :
302 : /* Implementation of the end callback for text and CSV formats */
303 : static void
304 13052 : CopyToTextLikeEnd(CopyToState cstate)
305 : {
306 : /* Nothing to do here */
307 13052 : }
308 :
309 : /*
310 : * Implementation of the start callback for binary format. Send a header
311 : * for a binary copy.
312 : */
313 : static void
314 14 : CopyToBinaryStart(CopyToState cstate, TupleDesc tupDesc)
315 : {
316 : int32 tmp;
317 :
318 : /* Signature */
319 14 : CopySendData(cstate, BinarySignature, 11);
320 : /* Flags field */
321 14 : tmp = 0;
322 14 : CopySendInt32(cstate, tmp);
323 : /* No header extension */
324 14 : tmp = 0;
325 14 : CopySendInt32(cstate, tmp);
326 14 : }
327 :
328 : /*
329 : * Implementation of the outfunc callback for binary format. Assign
330 : * the binary output function to the given *finfo.
331 : */
332 : static void
333 62 : CopyToBinaryOutFunc(CopyToState cstate, Oid atttypid, FmgrInfo *finfo)
334 : {
335 : Oid func_oid;
336 : bool is_varlena;
337 :
338 : /* Set output function for an attribute */
339 62 : getTypeBinaryOutputInfo(atttypid, &func_oid, &is_varlena);
340 60 : fmgr_info(func_oid, finfo);
341 60 : }
342 :
343 : /* Implementation of the per-row callback for binary format */
344 : static void
345 32 : CopyToBinaryOneRow(CopyToState cstate, TupleTableSlot *slot)
346 : {
347 32 : FmgrInfo *out_functions = cstate->out_functions;
348 :
349 : /* Binary per-tuple header */
350 32 : CopySendInt16(cstate, list_length(cstate->attnumlist));
351 :
352 224 : foreach_int(attnum, cstate->attnumlist)
353 : {
354 160 : Datum value = slot->tts_values[attnum - 1];
355 160 : bool isnull = slot->tts_isnull[attnum - 1];
356 :
357 160 : if (isnull)
358 : {
359 30 : CopySendInt32(cstate, -1);
360 : }
361 : else
362 : {
363 : bytea *outputbytes;
364 :
365 130 : outputbytes = SendFunctionCall(&out_functions[attnum - 1],
366 : value);
367 130 : CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
368 130 : CopySendData(cstate, VARDATA(outputbytes),
369 130 : VARSIZE(outputbytes) - VARHDRSZ);
370 : }
371 : }
372 :
373 32 : CopySendEndOfRow(cstate);
374 32 : }
375 :
376 : /* Implementation of the end callback for binary format */
377 : static void
378 14 : CopyToBinaryEnd(CopyToState cstate)
379 : {
380 : /* Generate trailer for a binary copy */
381 14 : CopySendInt16(cstate, -1);
382 : /* Need to flush out the trailer */
383 14 : CopySendEndOfRow(cstate);
384 14 : }
385 :
386 : /*
387 : * Send copy start/stop messages for frontend copies. These have changed
388 : * in past protocol redesigns.
389 : */
390 : static void
391 13004 : SendCopyBegin(CopyToState cstate)
392 : {
393 : StringInfoData buf;
394 13004 : int natts = list_length(cstate->attnumlist);
395 13004 : int16 format = (cstate->opts.binary ? 1 : 0);
396 : int i;
397 :
398 13004 : pq_beginmessage(&buf, PqMsg_CopyOutResponse);
399 13004 : pq_sendbyte(&buf, format); /* overall format */
400 13004 : pq_sendint16(&buf, natts);
401 60202 : for (i = 0; i < natts; i++)
402 47198 : pq_sendint16(&buf, format); /* per-column formats */
403 13004 : pq_endmessage(&buf);
404 13004 : cstate->copy_dest = COPY_FRONTEND;
405 13004 : }
406 :
407 : static void
408 13002 : SendCopyEnd(CopyToState cstate)
409 : {
410 : /* Shouldn't have any unsent data */
411 : Assert(cstate->fe_msgbuf->len == 0);
412 : /* Send Copy Done message */
413 13002 : pq_putemptymessage(PqMsg_CopyDone);
414 13002 : }
415 :
416 : /*----------
417 : * CopySendData sends output data to the destination (file or frontend)
418 : * CopySendString does the same for null-terminated strings
419 : * CopySendChar does the same for single characters
420 : * CopySendEndOfRow does the appropriate thing at end of each data row
421 : * (data is not actually flushed except by CopySendEndOfRow)
422 : *
423 : * NB: no data conversion is applied by these functions
424 : *----------
425 : */
426 : static void
427 19714164 : CopySendData(CopyToState cstate, const void *databuf, int datasize)
428 : {
429 19714164 : appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
430 19714164 : }
431 :
432 : static void
433 1569802 : CopySendString(CopyToState cstate, const char *str)
434 : {
435 1569802 : appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
436 1569802 : }
437 :
438 : static void
439 21526940 : CopySendChar(CopyToState cstate, char c)
440 : {
441 21526940 : appendStringInfoCharMacro(cstate->fe_msgbuf, c);
442 21526940 : }
443 :
444 : static void
445 6165166 : CopySendEndOfRow(CopyToState cstate)
446 : {
447 6165166 : StringInfo fe_msgbuf = cstate->fe_msgbuf;
448 :
449 6165166 : switch (cstate->copy_dest)
450 : {
451 12282 : case COPY_FILE:
452 12282 : if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
453 12282 : cstate->copy_file) != 1 ||
454 12282 : ferror(cstate->copy_file))
455 : {
456 0 : if (cstate->is_program)
457 : {
458 0 : if (errno == EPIPE)
459 : {
460 : /*
461 : * The pipe will be closed automatically on error at
462 : * the end of transaction, but we might get a better
463 : * error message from the subprocess' exit code than
464 : * just "Broken Pipe"
465 : */
466 0 : ClosePipeToProgram(cstate);
467 :
468 : /*
469 : * If ClosePipeToProgram() didn't throw an error, the
470 : * program terminated normally, but closed the pipe
471 : * first. Restore errno, and throw an error.
472 : */
473 0 : errno = EPIPE;
474 : }
475 0 : ereport(ERROR,
476 : (errcode_for_file_access(),
477 : errmsg("could not write to COPY program: %m")));
478 : }
479 : else
480 0 : ereport(ERROR,
481 : (errcode_for_file_access(),
482 : errmsg("could not write to COPY file: %m")));
483 : }
484 12282 : break;
485 6152878 : case COPY_FRONTEND:
486 : /* Dump the accumulated row as one CopyData message */
487 6152878 : (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
488 6152878 : break;
489 6 : case COPY_CALLBACK:
490 6 : cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
491 6 : break;
492 : }
493 :
494 : /* Update the progress */
495 6165166 : cstate->bytes_processed += fe_msgbuf->len;
496 6165166 : pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
497 :
498 6165166 : resetStringInfo(fe_msgbuf);
499 6165166 : }
500 :
501 : /*
502 : * Wrapper function of CopySendEndOfRow for text and CSV formats. Sends the
503 : * line termination and do common appropriate things for the end of row.
504 : */
505 : static inline void
506 6165120 : CopySendTextLikeEndOfRow(CopyToState cstate)
507 : {
508 6165120 : switch (cstate->copy_dest)
509 : {
510 12258 : case COPY_FILE:
511 : /* Default line termination depends on platform */
512 : #ifndef WIN32
513 12258 : CopySendChar(cstate, '\n');
514 : #else
515 : CopySendString(cstate, "\r\n");
516 : #endif
517 12258 : break;
518 6152856 : case COPY_FRONTEND:
519 : /* The FE/BE protocol uses \n as newline for all platforms */
520 6152856 : CopySendChar(cstate, '\n');
521 6152856 : break;
522 6 : default:
523 6 : break;
524 : }
525 :
526 : /* Now take the actions related to the end of a row */
527 6165120 : CopySendEndOfRow(cstate);
528 6165120 : }
529 :
530 : /*
531 : * These functions do apply some data conversion
532 : */
533 :
534 : /*
535 : * CopySendInt32 sends an int32 in network byte order
536 : */
537 : static inline void
538 188 : CopySendInt32(CopyToState cstate, int32 val)
539 : {
540 : uint32 buf;
541 :
542 188 : buf = pg_hton32((uint32) val);
543 188 : CopySendData(cstate, &buf, sizeof(buf));
544 188 : }
545 :
546 : /*
547 : * CopySendInt16 sends an int16 in network byte order
548 : */
549 : static inline void
550 46 : CopySendInt16(CopyToState cstate, int16 val)
551 : {
552 : uint16 buf;
553 :
554 46 : buf = pg_hton16((uint16) val);
555 46 : CopySendData(cstate, &buf, sizeof(buf));
556 46 : }
557 :
558 : /*
559 : * Closes the pipe to an external program, checking the pclose() return code.
560 : */
561 : static void
562 0 : ClosePipeToProgram(CopyToState cstate)
563 : {
564 : int pclose_rc;
565 :
566 : Assert(cstate->is_program);
567 :
568 0 : pclose_rc = ClosePipeStream(cstate->copy_file);
569 0 : if (pclose_rc == -1)
570 0 : ereport(ERROR,
571 : (errcode_for_file_access(),
572 : errmsg("could not close pipe to external command: %m")));
573 0 : else if (pclose_rc != 0)
574 : {
575 0 : ereport(ERROR,
576 : (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
577 : errmsg("program \"%s\" failed",
578 : cstate->filename),
579 : errdetail_internal("%s", wait_result_to_str(pclose_rc))));
580 : }
581 0 : }
582 :
583 : /*
584 : * Release resources allocated in a cstate for COPY TO/FROM.
585 : */
586 : static void
587 13066 : EndCopy(CopyToState cstate)
588 : {
589 13066 : if (cstate->is_program)
590 : {
591 0 : ClosePipeToProgram(cstate);
592 : }
593 : else
594 : {
595 13066 : if (cstate->filename != NULL && FreeFile(cstate->copy_file))
596 0 : ereport(ERROR,
597 : (errcode_for_file_access(),
598 : errmsg("could not close file \"%s\": %m",
599 : cstate->filename)));
600 : }
601 :
602 13066 : pgstat_progress_end_command();
603 :
604 13066 : MemoryContextDelete(cstate->copycontext);
605 13066 : pfree(cstate);
606 13066 : }
607 :
608 : /*
609 : * Setup CopyToState to read tuples from a table or a query for COPY TO.
610 : *
611 : * 'rel': Relation to be copied
612 : * 'raw_query': Query whose results are to be copied
613 : * 'queryRelId': OID of base relation to convert to a query (for RLS)
614 : * 'filename': Name of server-local file to write, NULL for STDOUT
615 : * 'is_program': true if 'filename' is program to execute
616 : * 'data_dest_cb': Callback that processes the output data
617 : * 'attnamelist': List of char *, columns to include. NIL selects all cols.
618 : * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
619 : *
620 : * Returns a CopyToState, to be passed to DoCopyTo() and related functions.
621 : */
622 : CopyToState
623 13262 : BeginCopyTo(ParseState *pstate,
624 : Relation rel,
625 : RawStmt *raw_query,
626 : Oid queryRelId,
627 : const char *filename,
628 : bool is_program,
629 : copy_data_dest_cb data_dest_cb,
630 : List *attnamelist,
631 : List *options)
632 : {
633 : CopyToState cstate;
634 13262 : bool pipe = (filename == NULL && data_dest_cb == NULL);
635 : TupleDesc tupDesc;
636 : int num_phys_attrs;
637 : MemoryContext oldcontext;
638 13262 : const int progress_cols[] = {
639 : PROGRESS_COPY_COMMAND,
640 : PROGRESS_COPY_TYPE
641 : };
642 13262 : int64 progress_vals[] = {
643 : PROGRESS_COPY_COMMAND_TO,
644 : 0
645 : };
646 :
647 13262 : if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
648 : {
649 24 : if (rel->rd_rel->relkind == RELKIND_VIEW)
650 12 : ereport(ERROR,
651 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
652 : errmsg("cannot copy from view \"%s\"",
653 : RelationGetRelationName(rel)),
654 : errhint("Try the COPY (SELECT ...) TO variant.")));
655 12 : else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
656 : {
657 12 : if (!RelationIsPopulated(rel))
658 6 : ereport(ERROR,
659 : errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
660 : errmsg("cannot copy from unpopulated materialized view \"%s\"",
661 : RelationGetRelationName(rel)),
662 : errhint("Use the REFRESH MATERIALIZED VIEW command."));
663 : }
664 0 : else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
665 0 : ereport(ERROR,
666 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
667 : errmsg("cannot copy from foreign table \"%s\"",
668 : RelationGetRelationName(rel)),
669 : errhint("Try the COPY (SELECT ...) TO variant.")));
670 0 : else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
671 0 : ereport(ERROR,
672 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
673 : errmsg("cannot copy from sequence \"%s\"",
674 : RelationGetRelationName(rel))));
675 0 : else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
676 0 : ereport(ERROR,
677 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
678 : errmsg("cannot copy from partitioned table \"%s\"",
679 : RelationGetRelationName(rel)),
680 : errhint("Try the COPY (SELECT ...) TO variant.")));
681 : else
682 0 : ereport(ERROR,
683 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
684 : errmsg("cannot copy from non-table relation \"%s\"",
685 : RelationGetRelationName(rel))));
686 : }
687 :
688 :
689 : /* Allocate workspace and zero all fields */
690 13244 : cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
691 :
692 : /*
693 : * We allocate everything used by a cstate in a new memory context. This
694 : * avoids memory leaks during repeated use of COPY in a query.
695 : */
696 13244 : cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
697 : "COPY",
698 : ALLOCSET_DEFAULT_SIZES);
699 :
700 13244 : oldcontext = MemoryContextSwitchTo(cstate->copycontext);
701 :
702 : /* Extract options from the statement node tree */
703 13244 : ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
704 :
705 : /* Set format routine */
706 13202 : cstate->routine = CopyToGetRoutine(&cstate->opts);
707 :
708 : /* Process the source/target relation or query */
709 13202 : if (rel)
710 : {
711 : Assert(!raw_query);
712 :
713 12696 : cstate->rel = rel;
714 :
715 12696 : tupDesc = RelationGetDescr(cstate->rel);
716 : }
717 : else
718 : {
719 : List *rewritten;
720 : Query *query;
721 : PlannedStmt *plan;
722 : DestReceiver *dest;
723 :
724 506 : cstate->rel = NULL;
725 :
726 : /*
727 : * Run parse analysis and rewrite. Note this also acquires sufficient
728 : * locks on the source table(s).
729 : */
730 506 : rewritten = pg_analyze_and_rewrite_fixedparams(raw_query,
731 : pstate->p_sourcetext, NULL, 0,
732 : NULL);
733 :
734 : /* check that we got back something we can work with */
735 494 : if (rewritten == NIL)
736 : {
737 18 : ereport(ERROR,
738 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
739 : errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
740 : }
741 476 : else if (list_length(rewritten) > 1)
742 : {
743 : ListCell *lc;
744 :
745 : /* examine queries to determine which error message to issue */
746 102 : foreach(lc, rewritten)
747 : {
748 84 : Query *q = lfirst_node(Query, lc);
749 :
750 84 : if (q->querySource == QSRC_QUAL_INSTEAD_RULE)
751 18 : ereport(ERROR,
752 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
753 : errmsg("conditional DO INSTEAD rules are not supported for COPY")));
754 66 : if (q->querySource == QSRC_NON_INSTEAD_RULE)
755 18 : ereport(ERROR,
756 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
757 : errmsg("DO ALSO rules are not supported for COPY")));
758 : }
759 :
760 18 : ereport(ERROR,
761 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
762 : errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
763 : }
764 :
765 422 : query = linitial_node(Query, rewritten);
766 :
767 : /* The grammar allows SELECT INTO, but we don't support that */
768 422 : if (query->utilityStmt != NULL &&
769 18 : IsA(query->utilityStmt, CreateTableAsStmt))
770 12 : ereport(ERROR,
771 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
772 : errmsg("COPY (SELECT INTO) is not supported")));
773 :
774 : /* The only other utility command we could see is NOTIFY */
775 410 : if (query->utilityStmt != NULL)
776 6 : ereport(ERROR,
777 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
778 : errmsg("COPY query must not be a utility command")));
779 :
780 : /*
781 : * Similarly the grammar doesn't enforce the presence of a RETURNING
782 : * clause, but this is required here.
783 : */
784 404 : if (query->commandType != CMD_SELECT &&
785 110 : query->returningList == NIL)
786 : {
787 : Assert(query->commandType == CMD_INSERT ||
788 : query->commandType == CMD_UPDATE ||
789 : query->commandType == CMD_DELETE ||
790 : query->commandType == CMD_MERGE);
791 :
792 24 : ereport(ERROR,
793 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
794 : errmsg("COPY query must have a RETURNING clause")));
795 : }
796 :
797 : /* plan the query */
798 380 : plan = pg_plan_query(query, pstate->p_sourcetext,
799 : CURSOR_OPT_PARALLEL_OK, NULL);
800 :
801 : /*
802 : * With row-level security and a user using "COPY relation TO", we
803 : * have to convert the "COPY relation TO" to a query-based COPY (eg:
804 : * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to
805 : * add in any RLS clauses.
806 : *
807 : * When this happens, we are passed in the relid of the originally
808 : * found relation (which we have locked). As the planner will look up
809 : * the relation again, we double-check here to make sure it found the
810 : * same one that we have locked.
811 : */
812 378 : if (queryRelId != InvalidOid)
813 : {
814 : /*
815 : * Note that with RLS involved there may be multiple relations,
816 : * and while the one we need is almost certainly first, we don't
817 : * make any guarantees of that in the planner, so check the whole
818 : * list and make sure we find the original relation.
819 : */
820 54 : if (!list_member_oid(plan->relationOids, queryRelId))
821 0 : ereport(ERROR,
822 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
823 : errmsg("relation referenced by COPY statement has changed")));
824 : }
825 :
826 : /*
827 : * Use a snapshot with an updated command ID to ensure this query sees
828 : * results of any previously executed queries.
829 : */
830 378 : PushCopiedSnapshot(GetActiveSnapshot());
831 378 : UpdateActiveSnapshotCommandId();
832 :
833 : /* Create dest receiver for COPY OUT */
834 378 : dest = CreateDestReceiver(DestCopyOut);
835 378 : ((DR_copy *) dest)->cstate = cstate;
836 :
837 : /* Create a QueryDesc requesting no output */
838 378 : cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
839 : GetActiveSnapshot(),
840 : InvalidSnapshot,
841 : dest, NULL, NULL, 0);
842 :
843 : /*
844 : * Call ExecutorStart to prepare the plan for execution.
845 : *
846 : * ExecutorStart computes a result tupdesc for us
847 : */
848 378 : ExecutorStart(cstate->queryDesc, 0);
849 :
850 372 : tupDesc = cstate->queryDesc->tupDesc;
851 : }
852 :
853 : /* Generate or convert list of attributes to process */
854 13068 : cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
855 :
856 13068 : num_phys_attrs = tupDesc->natts;
857 :
858 : /* Convert FORCE_QUOTE name list to per-column flags, check validity */
859 13068 : cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
860 13068 : if (cstate->opts.force_quote_all)
861 : {
862 18 : MemSet(cstate->opts.force_quote_flags, true, num_phys_attrs * sizeof(bool));
863 : }
864 13050 : else if (cstate->opts.force_quote)
865 : {
866 : List *attnums;
867 : ListCell *cur;
868 :
869 24 : attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote);
870 :
871 48 : foreach(cur, attnums)
872 : {
873 24 : int attnum = lfirst_int(cur);
874 24 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
875 :
876 24 : if (!list_member_int(cstate->attnumlist, attnum))
877 0 : ereport(ERROR,
878 : (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
879 : /*- translator: %s is the name of a COPY option, e.g. FORCE_NOT_NULL */
880 : errmsg("%s column \"%s\" not referenced by COPY",
881 : "FORCE_QUOTE", NameStr(attr->attname))));
882 24 : cstate->opts.force_quote_flags[attnum - 1] = true;
883 : }
884 : }
885 :
886 : /* Use client encoding when ENCODING option is not specified. */
887 13068 : if (cstate->opts.file_encoding < 0)
888 13050 : cstate->file_encoding = pg_get_client_encoding();
889 : else
890 18 : cstate->file_encoding = cstate->opts.file_encoding;
891 :
892 : /*
893 : * Set up encoding conversion info if the file and server encodings differ
894 : * (see also pg_server_to_any).
895 : */
896 13068 : if (cstate->file_encoding == GetDatabaseEncoding() ||
897 8 : cstate->file_encoding == PG_SQL_ASCII)
898 13066 : cstate->need_transcoding = false;
899 : else
900 2 : cstate->need_transcoding = true;
901 :
902 : /* See Multibyte encoding comment above */
903 13068 : cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
904 :
905 13068 : cstate->copy_dest = COPY_FILE; /* default */
906 :
907 13068 : if (data_dest_cb)
908 : {
909 2 : progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
910 2 : cstate->copy_dest = COPY_CALLBACK;
911 2 : cstate->data_dest_cb = data_dest_cb;
912 : }
913 13066 : else if (pipe)
914 : {
915 13004 : progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
916 :
917 : Assert(!is_program); /* the grammar does not allow this */
918 13004 : if (whereToSendOutput != DestRemote)
919 0 : cstate->copy_file = stdout;
920 : }
921 : else
922 : {
923 62 : cstate->filename = pstrdup(filename);
924 62 : cstate->is_program = is_program;
925 :
926 62 : if (is_program)
927 : {
928 0 : progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
929 0 : cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
930 0 : if (cstate->copy_file == NULL)
931 0 : ereport(ERROR,
932 : (errcode_for_file_access(),
933 : errmsg("could not execute command \"%s\": %m",
934 : cstate->filename)));
935 : }
936 : else
937 : {
938 : mode_t oumask; /* Pre-existing umask value */
939 : struct stat st;
940 :
941 62 : progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
942 :
943 : /*
944 : * Prevent write to relative path ... too easy to shoot oneself in
945 : * the foot by overwriting a database file ...
946 : */
947 62 : if (!is_absolute_path(filename))
948 0 : ereport(ERROR,
949 : (errcode(ERRCODE_INVALID_NAME),
950 : errmsg("relative path not allowed for COPY to file")));
951 :
952 62 : oumask = umask(S_IWGRP | S_IWOTH);
953 62 : PG_TRY();
954 : {
955 62 : cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
956 : }
957 0 : PG_FINALLY();
958 : {
959 62 : umask(oumask);
960 : }
961 62 : PG_END_TRY();
962 62 : if (cstate->copy_file == NULL)
963 : {
964 : /* copy errno because ereport subfunctions might change it */
965 0 : int save_errno = errno;
966 :
967 0 : ereport(ERROR,
968 : (errcode_for_file_access(),
969 : errmsg("could not open file \"%s\" for writing: %m",
970 : cstate->filename),
971 : (save_errno == ENOENT || save_errno == EACCES) ?
972 : errhint("COPY TO instructs the PostgreSQL server process to write a file. "
973 : "You may want a client-side facility such as psql's \\copy.") : 0));
974 : }
975 :
976 62 : if (fstat(fileno(cstate->copy_file), &st))
977 0 : ereport(ERROR,
978 : (errcode_for_file_access(),
979 : errmsg("could not stat file \"%s\": %m",
980 : cstate->filename)));
981 :
982 62 : if (S_ISDIR(st.st_mode))
983 0 : ereport(ERROR,
984 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
985 : errmsg("\"%s\" is a directory", cstate->filename)));
986 : }
987 : }
988 :
989 : /* initialize progress */
990 13068 : pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
991 13068 : cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
992 13068 : pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
993 :
994 13068 : cstate->bytes_processed = 0;
995 :
996 13068 : MemoryContextSwitchTo(oldcontext);
997 :
998 13068 : return cstate;
999 : }
1000 :
1001 : /*
1002 : * Clean up storage and release resources for COPY TO.
1003 : */
1004 : void
1005 13066 : EndCopyTo(CopyToState cstate)
1006 : {
1007 13066 : if (cstate->queryDesc != NULL)
1008 : {
1009 : /* Close down the query and free resources. */
1010 372 : ExecutorFinish(cstate->queryDesc);
1011 372 : ExecutorEnd(cstate->queryDesc);
1012 372 : FreeQueryDesc(cstate->queryDesc);
1013 372 : PopActiveSnapshot();
1014 : }
1015 :
1016 : /* Clean up storage */
1017 13066 : EndCopy(cstate);
1018 13066 : }
1019 :
1020 : /*
1021 : * Copy from relation or query TO file.
1022 : *
1023 : * Returns the number of rows processed.
1024 : */
1025 : uint64
1026 13068 : DoCopyTo(CopyToState cstate)
1027 : {
1028 13068 : bool pipe = (cstate->filename == NULL && cstate->data_dest_cb == NULL);
1029 13068 : bool fe_copy = (pipe && whereToSendOutput == DestRemote);
1030 : TupleDesc tupDesc;
1031 : int num_phys_attrs;
1032 : ListCell *cur;
1033 : uint64 processed;
1034 :
1035 13068 : if (fe_copy)
1036 13004 : SendCopyBegin(cstate);
1037 :
1038 13068 : if (cstate->rel)
1039 12696 : tupDesc = RelationGetDescr(cstate->rel);
1040 : else
1041 372 : tupDesc = cstate->queryDesc->tupDesc;
1042 13068 : num_phys_attrs = tupDesc->natts;
1043 13068 : cstate->opts.null_print_client = cstate->opts.null_print; /* default */
1044 :
1045 : /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
1046 13068 : cstate->fe_msgbuf = makeStringInfo();
1047 :
1048 : /* Get info about the columns we need to process. */
1049 13068 : cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
1050 60496 : foreach(cur, cstate->attnumlist)
1051 : {
1052 47430 : int attnum = lfirst_int(cur);
1053 47430 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
1054 :
1055 47430 : cstate->routine->CopyToOutFunc(cstate, attr->atttypid,
1056 47430 : &cstate->out_functions[attnum - 1]);
1057 : }
1058 :
1059 : /*
1060 : * Create a temporary memory context that we can reset once per row to
1061 : * recover palloc'd memory. This avoids any problems with leaks inside
1062 : * datatype output routines, and should be faster than retail pfree's
1063 : * anyway. (We don't need a whole econtext as CopyFrom does.)
1064 : */
1065 13066 : cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
1066 : "COPY TO",
1067 : ALLOCSET_DEFAULT_SIZES);
1068 :
1069 13066 : cstate->routine->CopyToStart(cstate, tupDesc);
1070 :
1071 13066 : if (cstate->rel)
1072 : {
1073 : TupleTableSlot *slot;
1074 : TableScanDesc scandesc;
1075 :
1076 12694 : scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
1077 12694 : slot = table_slot_create(cstate->rel, NULL);
1078 :
1079 12694 : processed = 0;
1080 6170820 : while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
1081 : {
1082 6158126 : CHECK_FOR_INTERRUPTS();
1083 :
1084 : /* Deconstruct the tuple ... */
1085 6158126 : slot_getallattrs(slot);
1086 :
1087 : /* Format and send the data */
1088 6158126 : CopyOneRowTo(cstate, slot);
1089 :
1090 : /*
1091 : * Increment the number of processed tuples, and report the
1092 : * progress.
1093 : */
1094 6158126 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1095 : ++processed);
1096 : }
1097 :
1098 12694 : ExecDropSingleTupleTableSlot(slot);
1099 12694 : table_endscan(scandesc);
1100 : }
1101 : else
1102 : {
1103 : /* run the plan --- the dest receiver will send tuples */
1104 372 : ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0);
1105 372 : processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
1106 : }
1107 :
1108 13066 : cstate->routine->CopyToEnd(cstate);
1109 :
1110 13066 : MemoryContextDelete(cstate->rowcontext);
1111 :
1112 13066 : if (fe_copy)
1113 13002 : SendCopyEnd(cstate);
1114 :
1115 13066 : return processed;
1116 : }
1117 :
1118 : /*
1119 : * Emit one row during DoCopyTo().
1120 : */
1121 : static inline void
1122 6165128 : CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
1123 : {
1124 : MemoryContext oldcontext;
1125 :
1126 6165128 : MemoryContextReset(cstate->rowcontext);
1127 6165128 : oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
1128 :
1129 : /* Make sure the tuple is fully deconstructed */
1130 6165128 : slot_getallattrs(slot);
1131 :
1132 6165128 : cstate->routine->CopyToOneRow(cstate, slot);
1133 :
1134 6165128 : MemoryContextSwitchTo(oldcontext);
1135 6165128 : }
1136 :
1137 : /*
1138 : * Send text representation of one attribute, with conversion and escaping
1139 : */
1140 : #define DUMPSOFAR() \
1141 : do { \
1142 : if (ptr > start) \
1143 : CopySendData(cstate, start, ptr - start); \
1144 : } while (0)
1145 :
1146 : static void
1147 19914296 : CopyAttributeOutText(CopyToState cstate, const char *string)
1148 : {
1149 : const char *ptr;
1150 : const char *start;
1151 : char c;
1152 19914296 : char delimc = cstate->opts.delim[0];
1153 :
1154 19914296 : if (cstate->need_transcoding)
1155 0 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1156 : else
1157 19914296 : ptr = string;
1158 :
1159 : /*
1160 : * We have to grovel through the string searching for control characters
1161 : * and instances of the delimiter character. In most cases, though, these
1162 : * are infrequent. To avoid overhead from calling CopySendData once per
1163 : * character, we dump out all characters between escaped characters in a
1164 : * single call. The loop invariant is that the data from "start" to "ptr"
1165 : * can be sent literally, but hasn't yet been.
1166 : *
1167 : * We can skip pg_encoding_mblen() overhead when encoding is safe, because
1168 : * in valid backend encodings, extra bytes of a multibyte character never
1169 : * look like ASCII. This loop is sufficiently performance-critical that
1170 : * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
1171 : * of the normal safe-encoding path.
1172 : */
1173 19914296 : if (cstate->encoding_embeds_ascii)
1174 : {
1175 0 : start = ptr;
1176 0 : while ((c = *ptr) != '\0')
1177 : {
1178 0 : if ((unsigned char) c < (unsigned char) 0x20)
1179 : {
1180 : /*
1181 : * \r and \n must be escaped, the others are traditional. We
1182 : * prefer to dump these using the C-like notation, rather than
1183 : * a backslash and the literal character, because it makes the
1184 : * dump file a bit more proof against Microsoftish data
1185 : * mangling.
1186 : */
1187 0 : switch (c)
1188 : {
1189 0 : case '\b':
1190 0 : c = 'b';
1191 0 : break;
1192 0 : case '\f':
1193 0 : c = 'f';
1194 0 : break;
1195 0 : case '\n':
1196 0 : c = 'n';
1197 0 : break;
1198 0 : case '\r':
1199 0 : c = 'r';
1200 0 : break;
1201 0 : case '\t':
1202 0 : c = 't';
1203 0 : break;
1204 0 : case '\v':
1205 0 : c = 'v';
1206 0 : break;
1207 0 : default:
1208 : /* If it's the delimiter, must backslash it */
1209 0 : if (c == delimc)
1210 0 : break;
1211 : /* All ASCII control chars are length 1 */
1212 0 : ptr++;
1213 0 : continue; /* fall to end of loop */
1214 : }
1215 : /* if we get here, we need to convert the control char */
1216 0 : DUMPSOFAR();
1217 0 : CopySendChar(cstate, '\\');
1218 0 : CopySendChar(cstate, c);
1219 0 : start = ++ptr; /* do not include char in next run */
1220 : }
1221 0 : else if (c == '\\' || c == delimc)
1222 : {
1223 0 : DUMPSOFAR();
1224 0 : CopySendChar(cstate, '\\');
1225 0 : start = ptr++; /* we include char in next run */
1226 : }
1227 0 : else if (IS_HIGHBIT_SET(c))
1228 0 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1229 : else
1230 0 : ptr++;
1231 : }
1232 : }
1233 : else
1234 : {
1235 19914296 : start = ptr;
1236 208540132 : while ((c = *ptr) != '\0')
1237 : {
1238 188625836 : if ((unsigned char) c < (unsigned char) 0x20)
1239 : {
1240 : /*
1241 : * \r and \n must be escaped, the others are traditional. We
1242 : * prefer to dump these using the C-like notation, rather than
1243 : * a backslash and the literal character, because it makes the
1244 : * dump file a bit more proof against Microsoftish data
1245 : * mangling.
1246 : */
1247 17294 : switch (c)
1248 : {
1249 0 : case '\b':
1250 0 : c = 'b';
1251 0 : break;
1252 0 : case '\f':
1253 0 : c = 'f';
1254 0 : break;
1255 15180 : case '\n':
1256 15180 : c = 'n';
1257 15180 : break;
1258 0 : case '\r':
1259 0 : c = 'r';
1260 0 : break;
1261 2114 : case '\t':
1262 2114 : c = 't';
1263 2114 : break;
1264 0 : case '\v':
1265 0 : c = 'v';
1266 0 : break;
1267 0 : default:
1268 : /* If it's the delimiter, must backslash it */
1269 0 : if (c == delimc)
1270 0 : break;
1271 : /* All ASCII control chars are length 1 */
1272 0 : ptr++;
1273 0 : continue; /* fall to end of loop */
1274 : }
1275 : /* if we get here, we need to convert the control char */
1276 17294 : DUMPSOFAR();
1277 17294 : CopySendChar(cstate, '\\');
1278 17294 : CopySendChar(cstate, c);
1279 17294 : start = ++ptr; /* do not include char in next run */
1280 : }
1281 188608542 : else if (c == '\\' || c == delimc)
1282 : {
1283 6984 : DUMPSOFAR();
1284 6984 : CopySendChar(cstate, '\\');
1285 6984 : start = ptr++; /* we include char in next run */
1286 : }
1287 : else
1288 188601558 : ptr++;
1289 : }
1290 : }
1291 :
1292 19914296 : DUMPSOFAR();
1293 19914296 : }
1294 :
1295 : /*
1296 : * Send text representation of one attribute, with conversion and
1297 : * CSV-style escaping
1298 : */
1299 : static void
1300 618 : CopyAttributeOutCSV(CopyToState cstate, const char *string,
1301 : bool use_quote)
1302 : {
1303 : const char *ptr;
1304 : const char *start;
1305 : char c;
1306 618 : char delimc = cstate->opts.delim[0];
1307 618 : char quotec = cstate->opts.quote[0];
1308 618 : char escapec = cstate->opts.escape[0];
1309 618 : bool single_attr = (list_length(cstate->attnumlist) == 1);
1310 :
1311 : /* force quoting if it matches null_print (before conversion!) */
1312 618 : if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
1313 54 : use_quote = true;
1314 :
1315 618 : if (cstate->need_transcoding)
1316 0 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1317 : else
1318 618 : ptr = string;
1319 :
1320 : /*
1321 : * Make a preliminary pass to discover if it needs quoting
1322 : */
1323 618 : if (!use_quote)
1324 : {
1325 : /*
1326 : * Quote '\.' if it appears alone on a line, so that it will not be
1327 : * interpreted as an end-of-data marker. (PG 18 and up will not
1328 : * interpret '\.' in CSV that way, except in embedded-in-SQL data; but
1329 : * we want the data to be loadable by older versions too. Also, this
1330 : * avoids breaking clients that are still using PQgetline().)
1331 : */
1332 432 : if (single_attr && strcmp(ptr, "\\.") == 0)
1333 6 : use_quote = true;
1334 : else
1335 : {
1336 426 : const char *tptr = ptr;
1337 :
1338 2208 : while ((c = *tptr) != '\0')
1339 : {
1340 1914 : if (c == delimc || c == quotec || c == '\n' || c == '\r')
1341 : {
1342 132 : use_quote = true;
1343 132 : break;
1344 : }
1345 1782 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1346 0 : tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
1347 : else
1348 1782 : tptr++;
1349 : }
1350 : }
1351 : }
1352 :
1353 618 : if (use_quote)
1354 : {
1355 324 : CopySendChar(cstate, quotec);
1356 :
1357 : /*
1358 : * We adopt the same optimization strategy as in CopyAttributeOutText
1359 : */
1360 324 : start = ptr;
1361 2538 : while ((c = *ptr) != '\0')
1362 : {
1363 2214 : if (c == quotec || c == escapec)
1364 : {
1365 156 : DUMPSOFAR();
1366 156 : CopySendChar(cstate, escapec);
1367 156 : start = ptr; /* we include char in next run */
1368 : }
1369 2214 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1370 0 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1371 : else
1372 2214 : ptr++;
1373 : }
1374 324 : DUMPSOFAR();
1375 :
1376 324 : CopySendChar(cstate, quotec);
1377 : }
1378 : else
1379 : {
1380 : /* If it doesn't need quoting, we can just dump it as-is */
1381 294 : CopySendString(cstate, ptr);
1382 : }
1383 618 : }
1384 :
1385 : /*
1386 : * copy_dest_startup --- executor startup
1387 : */
1388 : static void
1389 372 : copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
1390 : {
1391 : /* no-op */
1392 372 : }
1393 :
1394 : /*
1395 : * copy_dest_receive --- receive one tuple
1396 : */
1397 : static bool
1398 7002 : copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
1399 : {
1400 7002 : DR_copy *myState = (DR_copy *) self;
1401 7002 : CopyToState cstate = myState->cstate;
1402 :
1403 : /* Send the data */
1404 7002 : CopyOneRowTo(cstate, slot);
1405 :
1406 : /* Increment the number of processed tuples, and report the progress */
1407 7002 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1408 7002 : ++myState->processed);
1409 :
1410 7002 : return true;
1411 : }
1412 :
1413 : /*
1414 : * copy_dest_shutdown --- executor end
1415 : */
1416 : static void
1417 372 : copy_dest_shutdown(DestReceiver *self)
1418 : {
1419 : /* no-op */
1420 372 : }
1421 :
1422 : /*
1423 : * copy_dest_destroy --- release DestReceiver object
1424 : */
1425 : static void
1426 0 : copy_dest_destroy(DestReceiver *self)
1427 : {
1428 0 : pfree(self);
1429 0 : }
1430 :
1431 : /*
1432 : * CreateCopyDestReceiver -- create a suitable DestReceiver object
1433 : */
1434 : DestReceiver *
1435 378 : CreateCopyDestReceiver(void)
1436 : {
1437 378 : DR_copy *self = (DR_copy *) palloc(sizeof(DR_copy));
1438 :
1439 378 : self->pub.receiveSlot = copy_dest_receive;
1440 378 : self->pub.rStartup = copy_dest_startup;
1441 378 : self->pub.rShutdown = copy_dest_shutdown;
1442 378 : self->pub.rDestroy = copy_dest_destroy;
1443 378 : self->pub.mydest = DestCopyOut;
1444 :
1445 378 : self->cstate = NULL; /* will be set later */
1446 378 : self->processed = 0;
1447 :
1448 378 : return (DestReceiver *) self;
1449 : }
|