Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * copyto.c
4 : * COPY <table> TO file/program/client
5 : *
6 : * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
7 : * Portions Copyright (c) 1994, Regents of the University of California
8 : *
9 : *
10 : * IDENTIFICATION
11 : * src/backend/commands/copyto.c
12 : *
13 : *-------------------------------------------------------------------------
14 : */
15 : #include "postgres.h"
16 :
17 : #include <ctype.h>
18 : #include <unistd.h>
19 : #include <sys/stat.h>
20 :
21 : #include "access/tableam.h"
22 : #include "commands/copy.h"
23 : #include "commands/progress.h"
24 : #include "executor/execdesc.h"
25 : #include "executor/executor.h"
26 : #include "executor/tuptable.h"
27 : #include "libpq/libpq.h"
28 : #include "libpq/pqformat.h"
29 : #include "mb/pg_wchar.h"
30 : #include "miscadmin.h"
31 : #include "pgstat.h"
32 : #include "storage/fd.h"
33 : #include "tcop/tcopprot.h"
34 : #include "utils/lsyscache.h"
35 : #include "utils/memutils.h"
36 : #include "utils/rel.h"
37 : #include "utils/snapmgr.h"
38 :
39 : /*
40 : * Represents the different dest cases we need to worry about at
41 : * the bottom level
42 : */
43 : typedef enum CopyDest
44 : {
45 : COPY_FILE, /* to file (or a piped program) */
46 : COPY_FRONTEND, /* to frontend */
47 : COPY_CALLBACK, /* to callback function */
48 : } CopyDest;
49 :
50 : /*
51 : * This struct contains all the state variables used throughout a COPY TO
52 : * operation.
53 : *
54 : * Multi-byte encodings: all supported client-side encodings encode multi-byte
55 : * characters by having the first byte's high bit set. Subsequent bytes of the
56 : * character can have the high bit not set. When scanning data in such an
57 : * encoding to look for a match to a single-byte (ie ASCII) character, we must
58 : * use the full pg_encoding_mblen() machinery to skip over multibyte
59 : * characters, else we might find a false match to a trailing byte. In
60 : * supported server encodings, there is no possibility of a false match, and
61 : * it's faster to make useless comparisons to trailing bytes than it is to
62 : * invoke pg_encoding_mblen() to skip over them. encoding_embeds_ascii is true
63 : * when we have to do it the hard way.
64 : */
65 : typedef struct CopyToStateData
66 : {
67 : /* low-level state data */
68 : CopyDest copy_dest; /* type of copy source/destination */
69 : FILE *copy_file; /* used if copy_dest == COPY_FILE */
70 : StringInfo fe_msgbuf; /* used for all dests during COPY TO */
71 :
72 : int file_encoding; /* file or remote side's character encoding */
73 : bool need_transcoding; /* file encoding diff from server? */
74 : bool encoding_embeds_ascii; /* ASCII can be non-first byte? */
75 :
76 : /* parameters from the COPY command */
77 : Relation rel; /* relation to copy to */
78 : QueryDesc *queryDesc; /* executable query to copy from */
79 : List *attnumlist; /* integer list of attnums to copy */
80 : char *filename; /* filename, or NULL for STDOUT */
81 : bool is_program; /* is 'filename' a program to popen? */
82 : copy_data_dest_cb data_dest_cb; /* function for writing data */
83 :
84 : CopyFormatOptions opts;
85 : Node *whereClause; /* WHERE condition (or NULL) */
86 :
87 : /*
88 : * Working state
89 : */
90 : MemoryContext copycontext; /* per-copy execution context */
91 :
92 : FmgrInfo *out_functions; /* lookup info for output functions */
93 : MemoryContext rowcontext; /* per-row evaluation context */
94 : uint64 bytes_processed; /* number of bytes processed so far */
95 : } CopyToStateData;
96 :
97 : /* DestReceiver for COPY (query) TO */
98 : typedef struct
99 : {
100 : DestReceiver pub; /* publicly-known function pointers */
101 : CopyToState cstate; /* CopyToStateData for the command */
102 : uint64 processed; /* # of tuples processed */
103 : } DR_copy;
104 :
105 : /* NOTE: there's a copy of this in copyfromparse.c */
106 : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
107 :
108 :
109 : /* non-export function prototypes */
110 : static void EndCopy(CopyToState cstate);
111 : static void ClosePipeToProgram(CopyToState cstate);
112 : static void CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot);
113 : static void CopyAttributeOutText(CopyToState cstate, const char *string);
114 : static void CopyAttributeOutCSV(CopyToState cstate, const char *string,
115 : bool use_quote);
116 :
117 : /* Low-level communications functions */
118 : static void SendCopyBegin(CopyToState cstate);
119 : static void SendCopyEnd(CopyToState cstate);
120 : static void CopySendData(CopyToState cstate, const void *databuf, int datasize);
121 : static void CopySendString(CopyToState cstate, const char *str);
122 : static void CopySendChar(CopyToState cstate, char c);
123 : static void CopySendEndOfRow(CopyToState cstate);
124 : static void CopySendInt32(CopyToState cstate, int32 val);
125 : static void CopySendInt16(CopyToState cstate, int16 val);
126 :
127 :
128 : /*
129 : * Send copy start/stop messages for frontend copies. These have changed
130 : * in past protocol redesigns.
131 : */
132 : static void
133 7614 : SendCopyBegin(CopyToState cstate)
134 : {
135 : StringInfoData buf;
136 7614 : int natts = list_length(cstate->attnumlist);
137 7614 : int16 format = (cstate->opts.binary ? 1 : 0);
138 : int i;
139 :
140 7614 : pq_beginmessage(&buf, PqMsg_CopyOutResponse);
141 7614 : pq_sendbyte(&buf, format); /* overall format */
142 7614 : pq_sendint16(&buf, natts);
143 34628 : for (i = 0; i < natts; i++)
144 27014 : pq_sendint16(&buf, format); /* per-column formats */
145 7614 : pq_endmessage(&buf);
146 7614 : cstate->copy_dest = COPY_FRONTEND;
147 7614 : }
148 :
149 : static void
150 7612 : SendCopyEnd(CopyToState cstate)
151 : {
152 : /* Shouldn't have any unsent data */
153 : Assert(cstate->fe_msgbuf->len == 0);
154 : /* Send Copy Done message */
155 7612 : pq_putemptymessage(PqMsg_CopyDone);
156 7612 : }
157 :
158 : /*----------
159 : * CopySendData sends output data to the destination (file or frontend)
160 : * CopySendString does the same for null-terminated strings
161 : * CopySendChar does the same for single characters
162 : * CopySendEndOfRow does the appropriate thing at end of each data row
163 : * (data is not actually flushed except by CopySendEndOfRow)
164 : *
165 : * NB: no data conversion is applied by these functions
166 : *----------
167 : */
168 : static void
169 10071580 : CopySendData(CopyToState cstate, const void *databuf, int datasize)
170 : {
171 10071580 : appendBinaryStringInfo(cstate->fe_msgbuf, databuf, datasize);
172 10071580 : }
173 :
174 : static void
175 632724 : CopySendString(CopyToState cstate, const char *str)
176 : {
177 632724 : appendBinaryStringInfo(cstate->fe_msgbuf, str, strlen(str));
178 632724 : }
179 :
180 : static void
181 10744160 : CopySendChar(CopyToState cstate, char c)
182 : {
183 10744160 : appendStringInfoCharMacro(cstate->fe_msgbuf, c);
184 10744160 : }
185 :
186 : static void
187 3460796 : CopySendEndOfRow(CopyToState cstate)
188 : {
189 3460796 : StringInfo fe_msgbuf = cstate->fe_msgbuf;
190 :
191 3460796 : switch (cstate->copy_dest)
192 : {
193 12258 : case COPY_FILE:
194 12258 : if (!cstate->opts.binary)
195 : {
196 : /* Default line termination depends on platform */
197 : #ifndef WIN32
198 12234 : CopySendChar(cstate, '\n');
199 : #else
200 : CopySendString(cstate, "\r\n");
201 : #endif
202 : }
203 :
204 12258 : if (fwrite(fe_msgbuf->data, fe_msgbuf->len, 1,
205 12258 : cstate->copy_file) != 1 ||
206 12258 : ferror(cstate->copy_file))
207 : {
208 0 : if (cstate->is_program)
209 : {
210 0 : if (errno == EPIPE)
211 : {
212 : /*
213 : * The pipe will be closed automatically on error at
214 : * the end of transaction, but we might get a better
215 : * error message from the subprocess' exit code than
216 : * just "Broken Pipe"
217 : */
218 0 : ClosePipeToProgram(cstate);
219 :
220 : /*
221 : * If ClosePipeToProgram() didn't throw an error, the
222 : * program terminated normally, but closed the pipe
223 : * first. Restore errno, and throw an error.
224 : */
225 0 : errno = EPIPE;
226 : }
227 0 : ereport(ERROR,
228 : (errcode_for_file_access(),
229 : errmsg("could not write to COPY program: %m")));
230 : }
231 : else
232 0 : ereport(ERROR,
233 : (errcode_for_file_access(),
234 : errmsg("could not write to COPY file: %m")));
235 : }
236 12258 : break;
237 3448532 : case COPY_FRONTEND:
238 : /* The FE/BE protocol uses \n as newline for all platforms */
239 3448532 : if (!cstate->opts.binary)
240 3448510 : CopySendChar(cstate, '\n');
241 :
242 : /* Dump the accumulated row as one CopyData message */
243 3448532 : (void) pq_putmessage(PqMsg_CopyData, fe_msgbuf->data, fe_msgbuf->len);
244 3448532 : break;
245 6 : case COPY_CALLBACK:
246 6 : cstate->data_dest_cb(fe_msgbuf->data, fe_msgbuf->len);
247 6 : break;
248 : }
249 :
250 : /* Update the progress */
251 3460796 : cstate->bytes_processed += fe_msgbuf->len;
252 3460796 : pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
253 :
254 3460796 : resetStringInfo(fe_msgbuf);
255 3460796 : }
256 :
257 : /*
258 : * These functions do apply some data conversion
259 : */
260 :
261 : /*
262 : * CopySendInt32 sends an int32 in network byte order
263 : */
264 : static inline void
265 188 : CopySendInt32(CopyToState cstate, int32 val)
266 : {
267 : uint32 buf;
268 :
269 188 : buf = pg_hton32((uint32) val);
270 188 : CopySendData(cstate, &buf, sizeof(buf));
271 188 : }
272 :
273 : /*
274 : * CopySendInt16 sends an int16 in network byte order
275 : */
276 : static inline void
277 46 : CopySendInt16(CopyToState cstate, int16 val)
278 : {
279 : uint16 buf;
280 :
281 46 : buf = pg_hton16((uint16) val);
282 46 : CopySendData(cstate, &buf, sizeof(buf));
283 46 : }
284 :
285 : /*
286 : * Closes the pipe to an external program, checking the pclose() return code.
287 : */
288 : static void
289 0 : ClosePipeToProgram(CopyToState cstate)
290 : {
291 : int pclose_rc;
292 :
293 : Assert(cstate->is_program);
294 :
295 0 : pclose_rc = ClosePipeStream(cstate->copy_file);
296 0 : if (pclose_rc == -1)
297 0 : ereport(ERROR,
298 : (errcode_for_file_access(),
299 : errmsg("could not close pipe to external command: %m")));
300 0 : else if (pclose_rc != 0)
301 : {
302 0 : ereport(ERROR,
303 : (errcode(ERRCODE_EXTERNAL_ROUTINE_EXCEPTION),
304 : errmsg("program \"%s\" failed",
305 : cstate->filename),
306 : errdetail_internal("%s", wait_result_to_str(pclose_rc))));
307 : }
308 0 : }
309 :
310 : /*
311 : * Release resources allocated in a cstate for COPY TO/FROM.
312 : */
313 : static void
314 7652 : EndCopy(CopyToState cstate)
315 : {
316 7652 : if (cstate->is_program)
317 : {
318 0 : ClosePipeToProgram(cstate);
319 : }
320 : else
321 : {
322 7652 : if (cstate->filename != NULL && FreeFile(cstate->copy_file))
323 0 : ereport(ERROR,
324 : (errcode_for_file_access(),
325 : errmsg("could not close file \"%s\": %m",
326 : cstate->filename)));
327 : }
328 :
329 7652 : pgstat_progress_end_command();
330 :
331 7652 : MemoryContextDelete(cstate->copycontext);
332 7652 : pfree(cstate);
333 7652 : }
334 :
335 : /*
336 : * Setup CopyToState to read tuples from a table or a query for COPY TO.
337 : *
338 : * 'rel': Relation to be copied
339 : * 'raw_query': Query whose results are to be copied
340 : * 'queryRelId': OID of base relation to convert to a query (for RLS)
341 : * 'filename': Name of server-local file to write, NULL for STDOUT
342 : * 'is_program': true if 'filename' is program to execute
343 : * 'data_dest_cb': Callback that processes the output data
344 : * 'attnamelist': List of char *, columns to include. NIL selects all cols.
345 : * 'options': List of DefElem. See copy_opt_item in gram.y for selections.
346 : *
347 : * Returns a CopyToState, to be passed to DoCopyTo() and related functions.
348 : */
349 : CopyToState
350 7854 : BeginCopyTo(ParseState *pstate,
351 : Relation rel,
352 : RawStmt *raw_query,
353 : Oid queryRelId,
354 : const char *filename,
355 : bool is_program,
356 : copy_data_dest_cb data_dest_cb,
357 : List *attnamelist,
358 : List *options)
359 : {
360 : CopyToState cstate;
361 7854 : bool pipe = (filename == NULL && data_dest_cb == NULL);
362 : TupleDesc tupDesc;
363 : int num_phys_attrs;
364 : MemoryContext oldcontext;
365 7854 : const int progress_cols[] = {
366 : PROGRESS_COPY_COMMAND,
367 : PROGRESS_COPY_TYPE
368 : };
369 7854 : int64 progress_vals[] = {
370 : PROGRESS_COPY_COMMAND_TO,
371 : 0
372 : };
373 :
374 7854 : if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
375 : {
376 12 : if (rel->rd_rel->relkind == RELKIND_VIEW)
377 12 : ereport(ERROR,
378 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
379 : errmsg("cannot copy from view \"%s\"",
380 : RelationGetRelationName(rel)),
381 : errhint("Try the COPY (SELECT ...) TO variant.")));
382 0 : else if (rel->rd_rel->relkind == RELKIND_MATVIEW)
383 0 : ereport(ERROR,
384 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
385 : errmsg("cannot copy from materialized view \"%s\"",
386 : RelationGetRelationName(rel)),
387 : errhint("Try the COPY (SELECT ...) TO variant.")));
388 0 : else if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
389 0 : ereport(ERROR,
390 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
391 : errmsg("cannot copy from foreign table \"%s\"",
392 : RelationGetRelationName(rel)),
393 : errhint("Try the COPY (SELECT ...) TO variant.")));
394 0 : else if (rel->rd_rel->relkind == RELKIND_SEQUENCE)
395 0 : ereport(ERROR,
396 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
397 : errmsg("cannot copy from sequence \"%s\"",
398 : RelationGetRelationName(rel))));
399 0 : else if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
400 0 : ereport(ERROR,
401 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
402 : errmsg("cannot copy from partitioned table \"%s\"",
403 : RelationGetRelationName(rel)),
404 : errhint("Try the COPY (SELECT ...) TO variant.")));
405 : else
406 0 : ereport(ERROR,
407 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
408 : errmsg("cannot copy from non-table relation \"%s\"",
409 : RelationGetRelationName(rel))));
410 : }
411 :
412 :
413 : /* Allocate workspace and zero all fields */
414 7842 : cstate = (CopyToStateData *) palloc0(sizeof(CopyToStateData));
415 :
416 : /*
417 : * We allocate everything used by a cstate in a new memory context. This
418 : * avoids memory leaks during repeated use of COPY in a query.
419 : */
420 7842 : cstate->copycontext = AllocSetContextCreate(CurrentMemoryContext,
421 : "COPY",
422 : ALLOCSET_DEFAULT_SIZES);
423 :
424 7842 : oldcontext = MemoryContextSwitchTo(cstate->copycontext);
425 :
426 : /* Extract options from the statement node tree */
427 7842 : ProcessCopyOptions(pstate, &cstate->opts, false /* is_from */ , options);
428 :
429 : /* Process the source/target relation or query */
430 7782 : if (rel)
431 : {
432 : Assert(!raw_query);
433 :
434 7338 : cstate->rel = rel;
435 :
436 7338 : tupDesc = RelationGetDescr(cstate->rel);
437 : }
438 : else
439 : {
440 : List *rewritten;
441 : Query *query;
442 : PlannedStmt *plan;
443 : DestReceiver *dest;
444 :
445 444 : cstate->rel = NULL;
446 :
447 : /*
448 : * Run parse analysis and rewrite. Note this also acquires sufficient
449 : * locks on the source table(s).
450 : */
451 444 : rewritten = pg_analyze_and_rewrite_fixedparams(raw_query,
452 : pstate->p_sourcetext, NULL, 0,
453 : NULL);
454 :
455 : /* check that we got back something we can work with */
456 432 : if (rewritten == NIL)
457 : {
458 18 : ereport(ERROR,
459 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
460 : errmsg("DO INSTEAD NOTHING rules are not supported for COPY")));
461 : }
462 414 : else if (list_length(rewritten) > 1)
463 : {
464 : ListCell *lc;
465 :
466 : /* examine queries to determine which error message to issue */
467 102 : foreach(lc, rewritten)
468 : {
469 84 : Query *q = lfirst_node(Query, lc);
470 :
471 84 : if (q->querySource == QSRC_QUAL_INSTEAD_RULE)
472 18 : ereport(ERROR,
473 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
474 : errmsg("conditional DO INSTEAD rules are not supported for COPY")));
475 66 : if (q->querySource == QSRC_NON_INSTEAD_RULE)
476 18 : ereport(ERROR,
477 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
478 : errmsg("DO ALSO rules are not supported for the COPY")));
479 : }
480 :
481 18 : ereport(ERROR,
482 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
483 : errmsg("multi-statement DO INSTEAD rules are not supported for COPY")));
484 : }
485 :
486 360 : query = linitial_node(Query, rewritten);
487 :
488 : /* The grammar allows SELECT INTO, but we don't support that */
489 360 : if (query->utilityStmt != NULL &&
490 12 : IsA(query->utilityStmt, CreateTableAsStmt))
491 12 : ereport(ERROR,
492 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
493 : errmsg("COPY (SELECT INTO) is not supported")));
494 :
495 : Assert(query->utilityStmt == NULL);
496 :
497 : /*
498 : * Similarly the grammar doesn't enforce the presence of a RETURNING
499 : * clause, but this is required here.
500 : */
501 348 : if (query->commandType != CMD_SELECT &&
502 94 : query->returningList == NIL)
503 : {
504 : Assert(query->commandType == CMD_INSERT ||
505 : query->commandType == CMD_UPDATE ||
506 : query->commandType == CMD_DELETE ||
507 : query->commandType == CMD_MERGE);
508 :
509 24 : ereport(ERROR,
510 : (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
511 : errmsg("COPY query must have a RETURNING clause")));
512 : }
513 :
514 : /* plan the query */
515 324 : plan = pg_plan_query(query, pstate->p_sourcetext,
516 : CURSOR_OPT_PARALLEL_OK, NULL);
517 :
518 : /*
519 : * With row-level security and a user using "COPY relation TO", we
520 : * have to convert the "COPY relation TO" to a query-based COPY (eg:
521 : * "COPY (SELECT * FROM ONLY relation) TO"), to allow the rewriter to
522 : * add in any RLS clauses.
523 : *
524 : * When this happens, we are passed in the relid of the originally
525 : * found relation (which we have locked). As the planner will look up
526 : * the relation again, we double-check here to make sure it found the
527 : * same one that we have locked.
528 : */
529 322 : if (queryRelId != InvalidOid)
530 : {
531 : /*
532 : * Note that with RLS involved there may be multiple relations,
533 : * and while the one we need is almost certainly first, we don't
534 : * make any guarantees of that in the planner, so check the whole
535 : * list and make sure we find the original relation.
536 : */
537 54 : if (!list_member_oid(plan->relationOids, queryRelId))
538 0 : ereport(ERROR,
539 : (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
540 : errmsg("relation referenced by COPY statement has changed")));
541 : }
542 :
543 : /*
544 : * Use a snapshot with an updated command ID to ensure this query sees
545 : * results of any previously executed queries.
546 : */
547 322 : PushCopiedSnapshot(GetActiveSnapshot());
548 322 : UpdateActiveSnapshotCommandId();
549 :
550 : /* Create dest receiver for COPY OUT */
551 322 : dest = CreateDestReceiver(DestCopyOut);
552 322 : ((DR_copy *) dest)->cstate = cstate;
553 :
554 : /* Create a QueryDesc requesting no output */
555 322 : cstate->queryDesc = CreateQueryDesc(plan, pstate->p_sourcetext,
556 : GetActiveSnapshot(),
557 : InvalidSnapshot,
558 : dest, NULL, NULL, 0);
559 :
560 : /*
561 : * Call ExecutorStart to prepare the plan for execution.
562 : *
563 : * ExecutorStart computes a result tupdesc for us
564 : */
565 322 : ExecutorStart(cstate->queryDesc, 0);
566 :
567 316 : tupDesc = cstate->queryDesc->tupDesc;
568 : }
569 :
570 : /* Generate or convert list of attributes to process */
571 7654 : cstate->attnumlist = CopyGetAttnums(tupDesc, cstate->rel, attnamelist);
572 :
573 7654 : num_phys_attrs = tupDesc->natts;
574 :
575 : /* Convert FORCE_QUOTE name list to per-column flags, check validity */
576 7654 : cstate->opts.force_quote_flags = (bool *) palloc0(num_phys_attrs * sizeof(bool));
577 7654 : if (cstate->opts.force_quote_all)
578 : {
579 18 : MemSet(cstate->opts.force_quote_flags, true, num_phys_attrs * sizeof(bool));
580 : }
581 7636 : else if (cstate->opts.force_quote)
582 : {
583 : List *attnums;
584 : ListCell *cur;
585 :
586 24 : attnums = CopyGetAttnums(tupDesc, cstate->rel, cstate->opts.force_quote);
587 :
588 48 : foreach(cur, attnums)
589 : {
590 24 : int attnum = lfirst_int(cur);
591 24 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
592 :
593 24 : if (!list_member_int(cstate->attnumlist, attnum))
594 0 : ereport(ERROR,
595 : (errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
596 : errmsg("FORCE_QUOTE column \"%s\" not referenced by COPY",
597 : NameStr(attr->attname))));
598 24 : cstate->opts.force_quote_flags[attnum - 1] = true;
599 : }
600 : }
601 :
602 : /* Use client encoding when ENCODING option is not specified. */
603 7654 : if (cstate->opts.file_encoding < 0)
604 7648 : cstate->file_encoding = pg_get_client_encoding();
605 : else
606 6 : cstate->file_encoding = cstate->opts.file_encoding;
607 :
608 : /*
609 : * Set up encoding conversion info if the file and server encodings differ
610 : * (see also pg_server_to_any).
611 : */
612 7654 : if (cstate->file_encoding == GetDatabaseEncoding() ||
613 8 : cstate->file_encoding == PG_SQL_ASCII)
614 7652 : cstate->need_transcoding = false;
615 : else
616 2 : cstate->need_transcoding = true;
617 :
618 : /* See Multibyte encoding comment above */
619 7654 : cstate->encoding_embeds_ascii = PG_ENCODING_IS_CLIENT_ONLY(cstate->file_encoding);
620 :
621 7654 : cstate->copy_dest = COPY_FILE; /* default */
622 :
623 7654 : if (data_dest_cb)
624 : {
625 2 : progress_vals[1] = PROGRESS_COPY_TYPE_CALLBACK;
626 2 : cstate->copy_dest = COPY_CALLBACK;
627 2 : cstate->data_dest_cb = data_dest_cb;
628 : }
629 7652 : else if (pipe)
630 : {
631 7614 : progress_vals[1] = PROGRESS_COPY_TYPE_PIPE;
632 :
633 : Assert(!is_program); /* the grammar does not allow this */
634 7614 : if (whereToSendOutput != DestRemote)
635 0 : cstate->copy_file = stdout;
636 : }
637 : else
638 : {
639 38 : cstate->filename = pstrdup(filename);
640 38 : cstate->is_program = is_program;
641 :
642 38 : if (is_program)
643 : {
644 0 : progress_vals[1] = PROGRESS_COPY_TYPE_PROGRAM;
645 0 : cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
646 0 : if (cstate->copy_file == NULL)
647 0 : ereport(ERROR,
648 : (errcode_for_file_access(),
649 : errmsg("could not execute command \"%s\": %m",
650 : cstate->filename)));
651 : }
652 : else
653 : {
654 : mode_t oumask; /* Pre-existing umask value */
655 : struct stat st;
656 :
657 38 : progress_vals[1] = PROGRESS_COPY_TYPE_FILE;
658 :
659 : /*
660 : * Prevent write to relative path ... too easy to shoot oneself in
661 : * the foot by overwriting a database file ...
662 : */
663 38 : if (!is_absolute_path(filename))
664 0 : ereport(ERROR,
665 : (errcode(ERRCODE_INVALID_NAME),
666 : errmsg("relative path not allowed for COPY to file")));
667 :
668 38 : oumask = umask(S_IWGRP | S_IWOTH);
669 38 : PG_TRY();
670 : {
671 38 : cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_W);
672 : }
673 0 : PG_FINALLY();
674 : {
675 38 : umask(oumask);
676 : }
677 38 : PG_END_TRY();
678 38 : if (cstate->copy_file == NULL)
679 : {
680 : /* copy errno because ereport subfunctions might change it */
681 0 : int save_errno = errno;
682 :
683 0 : ereport(ERROR,
684 : (errcode_for_file_access(),
685 : errmsg("could not open file \"%s\" for writing: %m",
686 : cstate->filename),
687 : (save_errno == ENOENT || save_errno == EACCES) ?
688 : errhint("COPY TO instructs the PostgreSQL server process to write a file. "
689 : "You may want a client-side facility such as psql's \\copy.") : 0));
690 : }
691 :
692 38 : if (fstat(fileno(cstate->copy_file), &st))
693 0 : ereport(ERROR,
694 : (errcode_for_file_access(),
695 : errmsg("could not stat file \"%s\": %m",
696 : cstate->filename)));
697 :
698 38 : if (S_ISDIR(st.st_mode))
699 0 : ereport(ERROR,
700 : (errcode(ERRCODE_WRONG_OBJECT_TYPE),
701 : errmsg("\"%s\" is a directory", cstate->filename)));
702 : }
703 : }
704 :
705 : /* initialize progress */
706 7654 : pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
707 7654 : cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
708 7654 : pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
709 :
710 7654 : cstate->bytes_processed = 0;
711 :
712 7654 : MemoryContextSwitchTo(oldcontext);
713 :
714 7654 : return cstate;
715 : }
716 :
717 : /*
718 : * Clean up storage and release resources for COPY TO.
719 : */
720 : void
721 7652 : EndCopyTo(CopyToState cstate)
722 : {
723 7652 : if (cstate->queryDesc != NULL)
724 : {
725 : /* Close down the query and free resources. */
726 316 : ExecutorFinish(cstate->queryDesc);
727 316 : ExecutorEnd(cstate->queryDesc);
728 316 : FreeQueryDesc(cstate->queryDesc);
729 316 : PopActiveSnapshot();
730 : }
731 :
732 : /* Clean up storage */
733 7652 : EndCopy(cstate);
734 7652 : }
735 :
736 : /*
737 : * Copy from relation or query TO file.
738 : *
739 : * Returns the number of rows processed.
740 : */
741 : uint64
742 7654 : DoCopyTo(CopyToState cstate)
743 : {
744 7654 : bool pipe = (cstate->filename == NULL && cstate->data_dest_cb == NULL);
745 7654 : bool fe_copy = (pipe && whereToSendOutput == DestRemote);
746 : TupleDesc tupDesc;
747 : int num_phys_attrs;
748 : ListCell *cur;
749 : uint64 processed;
750 :
751 7654 : if (fe_copy)
752 7614 : SendCopyBegin(cstate);
753 :
754 7654 : if (cstate->rel)
755 7338 : tupDesc = RelationGetDescr(cstate->rel);
756 : else
757 316 : tupDesc = cstate->queryDesc->tupDesc;
758 7654 : num_phys_attrs = tupDesc->natts;
759 7654 : cstate->opts.null_print_client = cstate->opts.null_print; /* default */
760 :
761 : /* We use fe_msgbuf as a per-row buffer regardless of copy_dest */
762 7654 : cstate->fe_msgbuf = makeStringInfo();
763 :
764 : /* Get info about the columns we need to process. */
765 7654 : cstate->out_functions = (FmgrInfo *) palloc(num_phys_attrs * sizeof(FmgrInfo));
766 34874 : foreach(cur, cstate->attnumlist)
767 : {
768 27222 : int attnum = lfirst_int(cur);
769 : Oid out_func_oid;
770 : bool isvarlena;
771 27222 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
772 :
773 27222 : if (cstate->opts.binary)
774 62 : getTypeBinaryOutputInfo(attr->atttypid,
775 : &out_func_oid,
776 : &isvarlena);
777 : else
778 27160 : getTypeOutputInfo(attr->atttypid,
779 : &out_func_oid,
780 : &isvarlena);
781 27220 : fmgr_info(out_func_oid, &cstate->out_functions[attnum - 1]);
782 : }
783 :
784 : /*
785 : * Create a temporary memory context that we can reset once per row to
786 : * recover palloc'd memory. This avoids any problems with leaks inside
787 : * datatype output routines, and should be faster than retail pfree's
788 : * anyway. (We don't need a whole econtext as CopyFrom does.)
789 : */
790 7652 : cstate->rowcontext = AllocSetContextCreate(CurrentMemoryContext,
791 : "COPY TO",
792 : ALLOCSET_DEFAULT_SIZES);
793 :
794 7652 : if (cstate->opts.binary)
795 : {
796 : /* Generate header for a binary copy */
797 : int32 tmp;
798 :
799 : /* Signature */
800 14 : CopySendData(cstate, BinarySignature, 11);
801 : /* Flags field */
802 14 : tmp = 0;
803 14 : CopySendInt32(cstate, tmp);
804 : /* No header extension */
805 14 : tmp = 0;
806 14 : CopySendInt32(cstate, tmp);
807 : }
808 : else
809 : {
810 : /*
811 : * For non-binary copy, we need to convert null_print to file
812 : * encoding, because it will be sent directly with CopySendString.
813 : */
814 7638 : if (cstate->need_transcoding)
815 2 : cstate->opts.null_print_client = pg_server_to_any(cstate->opts.null_print,
816 : cstate->opts.null_print_len,
817 : cstate->file_encoding);
818 :
819 : /* if a header has been requested send the line */
820 7638 : if (cstate->opts.header_line)
821 : {
822 18 : bool hdr_delim = false;
823 :
824 54 : foreach(cur, cstate->attnumlist)
825 : {
826 36 : int attnum = lfirst_int(cur);
827 : char *colname;
828 :
829 36 : if (hdr_delim)
830 18 : CopySendChar(cstate, cstate->opts.delim[0]);
831 36 : hdr_delim = true;
832 :
833 36 : colname = NameStr(TupleDescAttr(tupDesc, attnum - 1)->attname);
834 :
835 36 : if (cstate->opts.csv_mode)
836 24 : CopyAttributeOutCSV(cstate, colname, false);
837 : else
838 12 : CopyAttributeOutText(cstate, colname);
839 : }
840 :
841 18 : CopySendEndOfRow(cstate);
842 : }
843 : }
844 :
845 7652 : if (cstate->rel)
846 : {
847 : TupleTableSlot *slot;
848 : TableScanDesc scandesc;
849 :
850 7336 : scandesc = table_beginscan(cstate->rel, GetActiveSnapshot(), 0, NULL);
851 7336 : slot = table_slot_create(cstate->rel, NULL);
852 :
853 7336 : processed = 0;
854 3461174 : while (table_scan_getnextslot(scandesc, ForwardScanDirection, slot))
855 : {
856 3453838 : CHECK_FOR_INTERRUPTS();
857 :
858 : /* Deconstruct the tuple ... */
859 3453838 : slot_getallattrs(slot);
860 :
861 : /* Format and send the data */
862 3453838 : CopyOneRowTo(cstate, slot);
863 :
864 : /*
865 : * Increment the number of processed tuples, and report the
866 : * progress.
867 : */
868 3453838 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
869 : ++processed);
870 : }
871 :
872 7336 : ExecDropSingleTupleTableSlot(slot);
873 7336 : table_endscan(scandesc);
874 : }
875 : else
876 : {
877 : /* run the plan --- the dest receiver will send tuples */
878 316 : ExecutorRun(cstate->queryDesc, ForwardScanDirection, 0, true);
879 316 : processed = ((DR_copy *) cstate->queryDesc->dest)->processed;
880 : }
881 :
882 7652 : if (cstate->opts.binary)
883 : {
884 : /* Generate trailer for a binary copy */
885 14 : CopySendInt16(cstate, -1);
886 : /* Need to flush out the trailer */
887 14 : CopySendEndOfRow(cstate);
888 : }
889 :
890 7652 : MemoryContextDelete(cstate->rowcontext);
891 :
892 7652 : if (fe_copy)
893 7612 : SendCopyEnd(cstate);
894 :
895 7652 : return processed;
896 : }
897 :
898 : /*
899 : * Emit one row during DoCopyTo().
900 : */
901 : static void
902 3460764 : CopyOneRowTo(CopyToState cstate, TupleTableSlot *slot)
903 : {
904 3460764 : bool need_delim = false;
905 3460764 : FmgrInfo *out_functions = cstate->out_functions;
906 : MemoryContext oldcontext;
907 : ListCell *cur;
908 : char *string;
909 :
910 3460764 : MemoryContextReset(cstate->rowcontext);
911 3460764 : oldcontext = MemoryContextSwitchTo(cstate->rowcontext);
912 :
913 3460764 : if (cstate->opts.binary)
914 : {
915 : /* Binary per-tuple header */
916 32 : CopySendInt16(cstate, list_length(cstate->attnumlist));
917 : }
918 :
919 : /* Make sure the tuple is fully deconstructed */
920 3460764 : slot_getallattrs(slot);
921 :
922 14192344 : foreach(cur, cstate->attnumlist)
923 : {
924 10731580 : int attnum = lfirst_int(cur);
925 10731580 : Datum value = slot->tts_values[attnum - 1];
926 10731580 : bool isnull = slot->tts_isnull[attnum - 1];
927 :
928 10731580 : if (!cstate->opts.binary)
929 : {
930 10731420 : if (need_delim)
931 7270818 : CopySendChar(cstate, cstate->opts.delim[0]);
932 10731420 : need_delim = true;
933 : }
934 :
935 10731580 : if (isnull)
936 : {
937 632484 : if (!cstate->opts.binary)
938 632454 : CopySendString(cstate, cstate->opts.null_print_client);
939 : else
940 30 : CopySendInt32(cstate, -1);
941 : }
942 : else
943 : {
944 10099096 : if (!cstate->opts.binary)
945 : {
946 10098966 : string = OutputFunctionCall(&out_functions[attnum - 1],
947 : value);
948 10098966 : if (cstate->opts.csv_mode)
949 570 : CopyAttributeOutCSV(cstate, string,
950 570 : cstate->opts.force_quote_flags[attnum - 1]);
951 : else
952 10098396 : CopyAttributeOutText(cstate, string);
953 : }
954 : else
955 : {
956 : bytea *outputbytes;
957 :
958 130 : outputbytes = SendFunctionCall(&out_functions[attnum - 1],
959 : value);
960 130 : CopySendInt32(cstate, VARSIZE(outputbytes) - VARHDRSZ);
961 130 : CopySendData(cstate, VARDATA(outputbytes),
962 130 : VARSIZE(outputbytes) - VARHDRSZ);
963 : }
964 : }
965 : }
966 :
967 3460764 : CopySendEndOfRow(cstate);
968 :
969 3460764 : MemoryContextSwitchTo(oldcontext);
970 3460764 : }
971 :
972 : /*
973 : * Send text representation of one attribute, with conversion and escaping
974 : */
975 : #define DUMPSOFAR() \
976 : do { \
977 : if (ptr > start) \
978 : CopySendData(cstate, start, ptr - start); \
979 : } while (0)
980 :
981 : static void
982 10098408 : CopyAttributeOutText(CopyToState cstate, const char *string)
983 : {
984 : const char *ptr;
985 : const char *start;
986 : char c;
987 10098408 : char delimc = cstate->opts.delim[0];
988 :
989 10098408 : if (cstate->need_transcoding)
990 0 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
991 : else
992 10098408 : ptr = string;
993 :
994 : /*
995 : * We have to grovel through the string searching for control characters
996 : * and instances of the delimiter character. In most cases, though, these
997 : * are infrequent. To avoid overhead from calling CopySendData once per
998 : * character, we dump out all characters between escaped characters in a
999 : * single call. The loop invariant is that the data from "start" to "ptr"
1000 : * can be sent literally, but hasn't yet been.
1001 : *
1002 : * We can skip pg_encoding_mblen() overhead when encoding is safe, because
1003 : * in valid backend encodings, extra bytes of a multibyte character never
1004 : * look like ASCII. This loop is sufficiently performance-critical that
1005 : * it's worth making two copies of it to get the IS_HIGHBIT_SET() test out
1006 : * of the normal safe-encoding path.
1007 : */
1008 10098408 : if (cstate->encoding_embeds_ascii)
1009 : {
1010 0 : start = ptr;
1011 0 : while ((c = *ptr) != '\0')
1012 : {
1013 0 : if ((unsigned char) c < (unsigned char) 0x20)
1014 : {
1015 : /*
1016 : * \r and \n must be escaped, the others are traditional. We
1017 : * prefer to dump these using the C-like notation, rather than
1018 : * a backslash and the literal character, because it makes the
1019 : * dump file a bit more proof against Microsoftish data
1020 : * mangling.
1021 : */
1022 0 : switch (c)
1023 : {
1024 0 : case '\b':
1025 0 : c = 'b';
1026 0 : break;
1027 0 : case '\f':
1028 0 : c = 'f';
1029 0 : break;
1030 0 : case '\n':
1031 0 : c = 'n';
1032 0 : break;
1033 0 : case '\r':
1034 0 : c = 'r';
1035 0 : break;
1036 0 : case '\t':
1037 0 : c = 't';
1038 0 : break;
1039 0 : case '\v':
1040 0 : c = 'v';
1041 0 : break;
1042 0 : default:
1043 : /* If it's the delimiter, must backslash it */
1044 0 : if (c == delimc)
1045 0 : break;
1046 : /* All ASCII control chars are length 1 */
1047 0 : ptr++;
1048 0 : continue; /* fall to end of loop */
1049 : }
1050 : /* if we get here, we need to convert the control char */
1051 0 : DUMPSOFAR();
1052 0 : CopySendChar(cstate, '\\');
1053 0 : CopySendChar(cstate, c);
1054 0 : start = ++ptr; /* do not include char in next run */
1055 : }
1056 0 : else if (c == '\\' || c == delimc)
1057 : {
1058 0 : DUMPSOFAR();
1059 0 : CopySendChar(cstate, '\\');
1060 0 : start = ptr++; /* we include char in next run */
1061 : }
1062 0 : else if (IS_HIGHBIT_SET(c))
1063 0 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1064 : else
1065 0 : ptr++;
1066 : }
1067 : }
1068 : else
1069 : {
1070 10098408 : start = ptr;
1071 107154736 : while ((c = *ptr) != '\0')
1072 : {
1073 97056328 : if ((unsigned char) c < (unsigned char) 0x20)
1074 : {
1075 : /*
1076 : * \r and \n must be escaped, the others are traditional. We
1077 : * prefer to dump these using the C-like notation, rather than
1078 : * a backslash and the literal character, because it makes the
1079 : * dump file a bit more proof against Microsoftish data
1080 : * mangling.
1081 : */
1082 4534 : switch (c)
1083 : {
1084 0 : case '\b':
1085 0 : c = 'b';
1086 0 : break;
1087 0 : case '\f':
1088 0 : c = 'f';
1089 0 : break;
1090 4528 : case '\n':
1091 4528 : c = 'n';
1092 4528 : break;
1093 0 : case '\r':
1094 0 : c = 'r';
1095 0 : break;
1096 6 : case '\t':
1097 6 : c = 't';
1098 6 : break;
1099 0 : case '\v':
1100 0 : c = 'v';
1101 0 : break;
1102 0 : default:
1103 : /* If it's the delimiter, must backslash it */
1104 0 : if (c == delimc)
1105 0 : break;
1106 : /* All ASCII control chars are length 1 */
1107 0 : ptr++;
1108 0 : continue; /* fall to end of loop */
1109 : }
1110 : /* if we get here, we need to convert the control char */
1111 4534 : DUMPSOFAR();
1112 4534 : CopySendChar(cstate, '\\');
1113 4534 : CopySendChar(cstate, c);
1114 4534 : start = ++ptr; /* do not include char in next run */
1115 : }
1116 97051794 : else if (c == '\\' || c == delimc)
1117 : {
1118 2708 : DUMPSOFAR();
1119 2708 : CopySendChar(cstate, '\\');
1120 2708 : start = ptr++; /* we include char in next run */
1121 : }
1122 : else
1123 97049086 : ptr++;
1124 : }
1125 : }
1126 :
1127 10098408 : DUMPSOFAR();
1128 10098408 : }
1129 :
1130 : /*
1131 : * Send text representation of one attribute, with conversion and
1132 : * CSV-style escaping
1133 : */
1134 : static void
1135 594 : CopyAttributeOutCSV(CopyToState cstate, const char *string,
1136 : bool use_quote)
1137 : {
1138 : const char *ptr;
1139 : const char *start;
1140 : char c;
1141 594 : char delimc = cstate->opts.delim[0];
1142 594 : char quotec = cstate->opts.quote[0];
1143 594 : char escapec = cstate->opts.escape[0];
1144 594 : bool single_attr = (list_length(cstate->attnumlist) == 1);
1145 :
1146 : /* force quoting if it matches null_print (before conversion!) */
1147 594 : if (!use_quote && strcmp(string, cstate->opts.null_print) == 0)
1148 54 : use_quote = true;
1149 :
1150 594 : if (cstate->need_transcoding)
1151 0 : ptr = pg_server_to_any(string, strlen(string), cstate->file_encoding);
1152 : else
1153 594 : ptr = string;
1154 :
1155 : /*
1156 : * Make a preliminary pass to discover if it needs quoting
1157 : */
1158 594 : if (!use_quote)
1159 : {
1160 : /*
1161 : * Because '\.' can be a data value, quote it if it appears alone on a
1162 : * line so it is not interpreted as the end-of-data marker.
1163 : */
1164 408 : if (single_attr && strcmp(ptr, "\\.") == 0)
1165 6 : use_quote = true;
1166 : else
1167 : {
1168 402 : const char *tptr = ptr;
1169 :
1170 2112 : while ((c = *tptr) != '\0')
1171 : {
1172 1842 : if (c == delimc || c == quotec || c == '\n' || c == '\r')
1173 : {
1174 132 : use_quote = true;
1175 132 : break;
1176 : }
1177 1710 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1178 0 : tptr += pg_encoding_mblen(cstate->file_encoding, tptr);
1179 : else
1180 1710 : tptr++;
1181 : }
1182 : }
1183 : }
1184 :
1185 594 : if (use_quote)
1186 : {
1187 324 : CopySendChar(cstate, quotec);
1188 :
1189 : /*
1190 : * We adopt the same optimization strategy as in CopyAttributeOutText
1191 : */
1192 324 : start = ptr;
1193 2538 : while ((c = *ptr) != '\0')
1194 : {
1195 2214 : if (c == quotec || c == escapec)
1196 : {
1197 156 : DUMPSOFAR();
1198 156 : CopySendChar(cstate, escapec);
1199 156 : start = ptr; /* we include char in next run */
1200 : }
1201 2214 : if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii)
1202 0 : ptr += pg_encoding_mblen(cstate->file_encoding, ptr);
1203 : else
1204 2214 : ptr++;
1205 : }
1206 324 : DUMPSOFAR();
1207 :
1208 324 : CopySendChar(cstate, quotec);
1209 : }
1210 : else
1211 : {
1212 : /* If it doesn't need quoting, we can just dump it as-is */
1213 270 : CopySendString(cstate, ptr);
1214 : }
1215 594 : }
1216 :
1217 : /*
1218 : * copy_dest_startup --- executor startup
1219 : */
1220 : static void
1221 316 : copy_dest_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
1222 : {
1223 : /* no-op */
1224 316 : }
1225 :
1226 : /*
1227 : * copy_dest_receive --- receive one tuple
1228 : */
1229 : static bool
1230 6926 : copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
1231 : {
1232 6926 : DR_copy *myState = (DR_copy *) self;
1233 6926 : CopyToState cstate = myState->cstate;
1234 :
1235 : /* Send the data */
1236 6926 : CopyOneRowTo(cstate, slot);
1237 :
1238 : /* Increment the number of processed tuples, and report the progress */
1239 6926 : pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED,
1240 6926 : ++myState->processed);
1241 :
1242 6926 : return true;
1243 : }
1244 :
1245 : /*
1246 : * copy_dest_shutdown --- executor end
1247 : */
1248 : static void
1249 316 : copy_dest_shutdown(DestReceiver *self)
1250 : {
1251 : /* no-op */
1252 316 : }
1253 :
1254 : /*
1255 : * copy_dest_destroy --- release DestReceiver object
1256 : */
1257 : static void
1258 0 : copy_dest_destroy(DestReceiver *self)
1259 : {
1260 0 : pfree(self);
1261 0 : }
1262 :
1263 : /*
1264 : * CreateCopyDestReceiver -- create a suitable DestReceiver object
1265 : */
1266 : DestReceiver *
1267 322 : CreateCopyDestReceiver(void)
1268 : {
1269 322 : DR_copy *self = (DR_copy *) palloc(sizeof(DR_copy));
1270 :
1271 322 : self->pub.receiveSlot = copy_dest_receive;
1272 322 : self->pub.rStartup = copy_dest_startup;
1273 322 : self->pub.rShutdown = copy_dest_shutdown;
1274 322 : self->pub.rDestroy = copy_dest_destroy;
1275 322 : self->pub.mydest = DestCopyOut;
1276 :
1277 322 : self->cstate = NULL; /* will be set later */
1278 322 : self->processed = 0;
1279 :
1280 322 : return (DestReceiver *) self;
1281 : }
|