Line data Source code
1 : /*-------------------------------------------------------------------------
2 : *
3 : * copyfromparse.c
4 : * Parse CSV/text/binary format for COPY FROM.
5 : *
6 : * This file contains routines to parse the text, CSV and binary input
7 : * formats. The main entry point is NextCopyFrom(), which parses the
8 : * next input line and returns it as Datums.
9 : *
10 : * In text/CSV mode, the parsing happens in multiple stages:
11 : *
12 : * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
13 : * 1. 2. 3. 4.
14 : *
15 : * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
16 : * places it into 'raw_buf'.
17 : *
18 : * 2. CopyConvertBuf() calls the encoding conversion function to convert
19 : * the data in 'raw_buf' from client to server encoding, placing the
20 : * converted result in 'input_buf'.
21 : *
22 : * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
23 : * It is responsible for finding the next newline marker, taking quote and
24 : * escape characters into account according to the COPY options. The line
25 : * is copied into 'line_buf', with quotes and escape characters still
26 : * intact.
27 : *
28 : * 4. CopyReadAttributesText/CSV() function takes the input line from
29 : * 'line_buf', and splits it into fields, unescaping the data as required.
30 : * The fields are stored in 'attribute_buf', and 'raw_fields' array holds
31 : * pointers to each field.
32 : *
33 : * If encoding conversion is not required, a shortcut is taken in step 2 to
34 : * avoid copying the data unnecessarily. The 'input_buf' pointer is set to
35 : * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
36 : * directly into 'input_buf'. CopyConvertBuf() then merely validates that
37 : * the data is valid in the current encoding.
38 : *
39 : * In binary mode, the pipeline is much simpler. Input is loaded into
40 : * 'raw_buf', and encoding conversion is done in the datatype-specific
41 : * receive functions, if required. 'input_buf' and 'line_buf' are not used,
42 : * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
43 : * data when it's passed the receive function.
44 : *
45 : * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE). 'input_buf' is also
46 : * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required. 'line_buf'
47 : * and 'attribute_buf' are expanded on demand, to hold the longest line
48 : * encountered so far.
49 : *
50 : * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
51 : * Portions Copyright (c) 1994, Regents of the University of California
52 : *
53 : *
54 : * IDENTIFICATION
55 : * src/backend/commands/copyfromparse.c
56 : *
57 : *-------------------------------------------------------------------------
58 : */
59 : #include "postgres.h"
60 :
61 : #include <ctype.h>
62 : #include <unistd.h>
63 : #include <sys/stat.h>
64 :
65 : #include "commands/copy.h"
66 : #include "commands/copyfrom_internal.h"
67 : #include "commands/progress.h"
68 : #include "executor/executor.h"
69 : #include "libpq/libpq.h"
70 : #include "libpq/pqformat.h"
71 : #include "mb/pg_wchar.h"
72 : #include "miscadmin.h"
73 : #include "pgstat.h"
74 : #include "port/pg_bswap.h"
75 : #include "utils/builtins.h"
76 : #include "utils/rel.h"
77 :
78 : #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
79 : #define OCTVALUE(c) ((c) - '0')
80 :
81 : /*
82 : * These macros centralize code used to process line_buf and input_buf buffers.
83 : * They are macros because they often do continue/break control and to avoid
84 : * function call overhead in tight COPY loops.
85 : *
86 : * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
87 : * prevent the continue/break processing from working. We end the "if (1)"
88 : * with "else ((void) 0)" to ensure the "if" does not unintentionally match
89 : * any "else" in the calling code, and to avoid any compiler warnings about
90 : * empty statements. See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
91 : */
92 :
93 : /*
94 : * This keeps the character read at the top of the loop in the buffer
95 : * even if there is more than one read-ahead.
96 : */
97 : #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
98 : if (1) \
99 : { \
100 : if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
101 : { \
102 : input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
103 : need_data = true; \
104 : continue; \
105 : } \
106 : } else ((void) 0)
107 :
108 : /* This consumes the remainder of the buffer and breaks */
109 : #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
110 : if (1) \
111 : { \
112 : if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
113 : { \
114 : if (extralen) \
115 : input_buf_ptr = copy_buf_len; /* consume the partial character */ \
116 : /* backslash just before EOF, treat as data char */ \
117 : result = true; \
118 : break; \
119 : } \
120 : } else ((void) 0)
121 :
122 : /*
123 : * Transfer any approved data to line_buf; must do this to be sure
124 : * there is some room in input_buf.
125 : */
126 : #define REFILL_LINEBUF \
127 : if (1) \
128 : { \
129 : if (input_buf_ptr > cstate->input_buf_index) \
130 : { \
131 : appendBinaryStringInfo(&cstate->line_buf, \
132 : cstate->input_buf + cstate->input_buf_index, \
133 : input_buf_ptr - cstate->input_buf_index); \
134 : cstate->input_buf_index = input_buf_ptr; \
135 : } \
136 : } else ((void) 0)
137 :
138 : /* NOTE: there's a copy of this in copyto.c */
139 : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
140 :
141 :
142 : /* non-export function prototypes */
143 : static bool CopyReadLine(CopyFromState cstate);
144 : static bool CopyReadLineText(CopyFromState cstate);
145 : static int CopyReadAttributesText(CopyFromState cstate);
146 : static int CopyReadAttributesCSV(CopyFromState cstate);
147 : static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
148 : Oid typioparam, int32 typmod,
149 : bool *isnull);
150 :
151 :
152 : /* Low-level communications functions */
153 : static int CopyGetData(CopyFromState cstate, void *databuf,
154 : int minread, int maxread);
155 : static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
156 : static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
157 : static void CopyLoadInputBuf(CopyFromState cstate);
158 : static int CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
159 :
160 : void
161 920 : ReceiveCopyBegin(CopyFromState cstate)
162 : {
163 : StringInfoData buf;
164 920 : int natts = list_length(cstate->attnumlist);
165 920 : int16 format = (cstate->opts.binary ? 1 : 0);
166 : int i;
167 :
168 920 : pq_beginmessage(&buf, PqMsg_CopyInResponse);
169 920 : pq_sendbyte(&buf, format); /* overall format */
170 920 : pq_sendint16(&buf, natts);
171 3102 : for (i = 0; i < natts; i++)
172 2182 : pq_sendint16(&buf, format); /* per-column formats */
173 920 : pq_endmessage(&buf);
174 920 : cstate->copy_src = COPY_FRONTEND;
175 920 : cstate->fe_msgbuf = makeStringInfo();
176 : /* We *must* flush here to ensure FE knows it can send. */
177 920 : pq_flush();
178 920 : }
179 :
180 : void
181 14 : ReceiveCopyBinaryHeader(CopyFromState cstate)
182 : {
183 : char readSig[11];
184 : int32 tmp;
185 :
186 : /* Signature */
187 14 : if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
188 14 : memcmp(readSig, BinarySignature, 11) != 0)
189 0 : ereport(ERROR,
190 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
191 : errmsg("COPY file signature not recognized")));
192 : /* Flags field */
193 14 : if (!CopyGetInt32(cstate, &tmp))
194 0 : ereport(ERROR,
195 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
196 : errmsg("invalid COPY file header (missing flags)")));
197 14 : if ((tmp & (1 << 16)) != 0)
198 0 : ereport(ERROR,
199 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
200 : errmsg("invalid COPY file header (WITH OIDS)")));
201 14 : tmp &= ~(1 << 16);
202 14 : if ((tmp >> 16) != 0)
203 0 : ereport(ERROR,
204 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
205 : errmsg("unrecognized critical flags in COPY file header")));
206 : /* Header extension length */
207 14 : if (!CopyGetInt32(cstate, &tmp) ||
208 14 : tmp < 0)
209 0 : ereport(ERROR,
210 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
211 : errmsg("invalid COPY file header (missing length)")));
212 : /* Skip extension header, if present */
213 14 : while (tmp-- > 0)
214 : {
215 0 : if (CopyReadBinaryData(cstate, readSig, 1) != 1)
216 0 : ereport(ERROR,
217 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
218 : errmsg("invalid COPY file header (wrong length)")));
219 : }
220 14 : }
221 :
222 : /*
223 : * CopyGetData reads data from the source (file or frontend)
224 : *
225 : * We attempt to read at least minread, and at most maxread, bytes from
226 : * the source. The actual number of bytes read is returned; if this is
227 : * less than minread, EOF was detected.
228 : *
229 : * Note: when copying from the frontend, we expect a proper EOF mark per
230 : * protocol; if the frontend simply drops the connection, we raise error.
231 : * It seems unwise to allow the COPY IN to complete normally in that case.
232 : *
233 : * NB: no data conversion is applied here.
234 : */
235 : static int
236 431510 : CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
237 : {
238 431510 : int bytesread = 0;
239 :
240 431510 : switch (cstate->copy_src)
241 : {
242 1068 : case COPY_FILE:
243 1068 : bytesread = fread(databuf, 1, maxread, cstate->copy_file);
244 1068 : if (ferror(cstate->copy_file))
245 0 : ereport(ERROR,
246 : (errcode_for_file_access(),
247 : errmsg("could not read from COPY file: %m")));
248 1068 : if (bytesread == 0)
249 416 : cstate->raw_reached_eof = true;
250 1068 : break;
251 402500 : case COPY_FRONTEND:
252 803582 : while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
253 : {
254 : int avail;
255 :
256 401806 : while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
257 : {
258 : /* Try to receive another message */
259 : int mtype;
260 : int maxmsglen;
261 :
262 401806 : readmessage:
263 401806 : HOLD_CANCEL_INTERRUPTS();
264 401806 : pq_startmsgread();
265 401806 : mtype = pq_getbyte();
266 401806 : if (mtype == EOF)
267 0 : ereport(ERROR,
268 : (errcode(ERRCODE_CONNECTION_FAILURE),
269 : errmsg("unexpected EOF on client connection with an open transaction")));
270 : /* Validate message type and set packet size limit */
271 : switch (mtype)
272 : {
273 401082 : case PqMsg_CopyData:
274 401082 : maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
275 401082 : break;
276 724 : case PqMsg_CopyDone:
277 : case PqMsg_CopyFail:
278 : case PqMsg_Flush:
279 : case PqMsg_Sync:
280 724 : maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
281 724 : break;
282 0 : default:
283 0 : ereport(ERROR,
284 : (errcode(ERRCODE_PROTOCOL_VIOLATION),
285 : errmsg("unexpected message type 0x%02X during COPY from stdin",
286 : mtype)));
287 : maxmsglen = 0; /* keep compiler quiet */
288 : break;
289 : }
290 : /* Now collect the message body */
291 401806 : if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
292 0 : ereport(ERROR,
293 : (errcode(ERRCODE_CONNECTION_FAILURE),
294 : errmsg("unexpected EOF on client connection with an open transaction")));
295 401806 : RESUME_CANCEL_INTERRUPTS();
296 : /* ... and process it */
297 : switch (mtype)
298 : {
299 401082 : case PqMsg_CopyData:
300 401082 : break;
301 724 : case PqMsg_CopyDone:
302 : /* COPY IN correctly terminated by frontend */
303 724 : cstate->raw_reached_eof = true;
304 724 : return bytesread;
305 0 : case PqMsg_CopyFail:
306 0 : ereport(ERROR,
307 : (errcode(ERRCODE_QUERY_CANCELED),
308 : errmsg("COPY from stdin failed: %s",
309 : pq_getmsgstring(cstate->fe_msgbuf))));
310 : break;
311 0 : case PqMsg_Flush:
312 : case PqMsg_Sync:
313 :
314 : /*
315 : * Ignore Flush/Sync for the convenience of client
316 : * libraries (such as libpq) that may send those
317 : * without noticing that the command they just
318 : * sent was COPY.
319 : */
320 0 : goto readmessage;
321 802888 : default:
322 : Assert(false); /* NOT REACHED */
323 : }
324 : }
325 401082 : avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
326 401082 : if (avail > maxread)
327 0 : avail = maxread;
328 401082 : pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
329 401082 : databuf = (void *) ((char *) databuf + avail);
330 401082 : maxread -= avail;
331 401082 : bytesread += avail;
332 : }
333 401776 : break;
334 27942 : case COPY_CALLBACK:
335 27942 : bytesread = cstate->data_source_cb(databuf, minread, maxread);
336 27942 : break;
337 : }
338 :
339 430786 : return bytesread;
340 : }
341 :
342 :
343 : /*
344 : * These functions do apply some data conversion
345 : */
346 :
347 : /*
348 : * CopyGetInt32 reads an int32 that appears in network byte order
349 : *
350 : * Returns true if OK, false if EOF
351 : */
352 : static inline bool
353 186 : CopyGetInt32(CopyFromState cstate, int32 *val)
354 : {
355 : uint32 buf;
356 :
357 186 : if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
358 : {
359 0 : *val = 0; /* suppress compiler warning */
360 0 : return false;
361 : }
362 186 : *val = (int32) pg_ntoh32(buf);
363 186 : return true;
364 : }
365 :
366 : /*
367 : * CopyGetInt16 reads an int16 that appears in network byte order
368 : */
369 : static inline bool
370 42 : CopyGetInt16(CopyFromState cstate, int16 *val)
371 : {
372 : uint16 buf;
373 :
374 42 : if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
375 : {
376 0 : *val = 0; /* suppress compiler warning */
377 0 : return false;
378 : }
379 42 : *val = (int16) pg_ntoh16(buf);
380 42 : return true;
381 : }
382 :
383 :
384 : /*
385 : * Perform encoding conversion on data in 'raw_buf', writing the converted
386 : * data into 'input_buf'.
387 : *
388 : * On entry, there must be some data to convert in 'raw_buf'.
389 : */
390 : static void
391 861488 : CopyConvertBuf(CopyFromState cstate)
392 : {
393 : /*
394 : * If the file and server encoding are the same, no encoding conversion is
395 : * required. However, we still need to verify that the input is valid for
396 : * the encoding.
397 : */
398 861488 : if (!cstate->need_transcoding)
399 : {
400 : /*
401 : * When conversion is not required, input_buf and raw_buf are the
402 : * same. raw_buf_len is the total number of bytes in the buffer, and
403 : * input_buf_len tracks how many of those bytes have already been
404 : * verified.
405 : */
406 861404 : int preverifiedlen = cstate->input_buf_len;
407 861404 : int unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
408 : int nverified;
409 :
410 861404 : if (unverifiedlen == 0)
411 : {
412 : /*
413 : * If no more raw data is coming, report the EOF to the caller.
414 : */
415 432136 : if (cstate->raw_reached_eof)
416 1434 : cstate->input_reached_eof = true;
417 432136 : return;
418 : }
419 :
420 : /*
421 : * Verify the new data, including any residual unverified bytes from
422 : * previous round.
423 : */
424 429268 : nverified = pg_encoding_verifymbstr(cstate->file_encoding,
425 429268 : cstate->raw_buf + preverifiedlen,
426 : unverifiedlen);
427 429268 : if (nverified == 0)
428 : {
429 : /*
430 : * Could not verify anything.
431 : *
432 : * If there is no more raw input data coming, it means that there
433 : * was an incomplete multi-byte sequence at the end. Also, if
434 : * there's "enough" input left, we should be able to verify at
435 : * least one character, and a failure to do so means that we've
436 : * hit an invalid byte sequence.
437 : */
438 0 : if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
439 0 : cstate->input_reached_error = true;
440 0 : return;
441 : }
442 429268 : cstate->input_buf_len += nverified;
443 : }
444 : else
445 : {
446 : /*
447 : * Encoding conversion is needed.
448 : */
449 : int nbytes;
450 : unsigned char *src;
451 : int srclen;
452 : unsigned char *dst;
453 : int dstlen;
454 : int convertedlen;
455 :
456 84 : if (RAW_BUF_BYTES(cstate) == 0)
457 : {
458 : /*
459 : * If no more raw data is coming, report the EOF to the caller.
460 : */
461 48 : if (cstate->raw_reached_eof)
462 12 : cstate->input_reached_eof = true;
463 48 : return;
464 : }
465 :
466 : /*
467 : * First, copy down any unprocessed data.
468 : */
469 36 : nbytes = INPUT_BUF_BYTES(cstate);
470 36 : if (nbytes > 0 && cstate->input_buf_index > 0)
471 0 : memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
472 : nbytes);
473 36 : cstate->input_buf_index = 0;
474 36 : cstate->input_buf_len = nbytes;
475 36 : cstate->input_buf[nbytes] = '\0';
476 :
477 36 : src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
478 36 : srclen = cstate->raw_buf_len - cstate->raw_buf_index;
479 36 : dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
480 36 : dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
481 :
482 : /*
483 : * Do the conversion. This might stop short, if there is an invalid
484 : * byte sequence in the input. We'll convert as much as we can in
485 : * that case.
486 : *
487 : * Note: Even if we hit an invalid byte sequence, we don't report the
488 : * error until all the valid bytes have been consumed. The input
489 : * might contain an end-of-input marker (\.), and we don't want to
490 : * report an error if the invalid byte sequence is after the
491 : * end-of-input marker. We might unnecessarily convert some data
492 : * after the end-of-input marker as long as it's valid for the
493 : * encoding, but that's harmless.
494 : */
495 36 : convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
496 : cstate->file_encoding,
497 : GetDatabaseEncoding(),
498 : src, srclen,
499 : dst, dstlen,
500 : true);
501 36 : if (convertedlen == 0)
502 : {
503 : /*
504 : * Could not convert anything. If there is no more raw input data
505 : * coming, it means that there was an incomplete multi-byte
506 : * sequence at the end. Also, if there is plenty of input left,
507 : * we should be able to convert at least one character, so a
508 : * failure to do so must mean that we've hit a byte sequence
509 : * that's invalid.
510 : */
511 24 : if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
512 12 : cstate->input_reached_error = true;
513 24 : return;
514 : }
515 12 : cstate->raw_buf_index += convertedlen;
516 12 : cstate->input_buf_len += strlen((char *) dst);
517 : }
518 : }
519 :
520 : /*
521 : * Report an encoding or conversion error.
522 : */
523 : static void
524 12 : CopyConversionError(CopyFromState cstate)
525 : {
526 : Assert(cstate->raw_buf_len > 0);
527 : Assert(cstate->input_reached_error);
528 :
529 12 : if (!cstate->need_transcoding)
530 : {
531 : /*
532 : * Everything up to input_buf_len was successfully verified, and
533 : * input_buf_len points to the invalid or incomplete character.
534 : */
535 0 : report_invalid_encoding(cstate->file_encoding,
536 0 : cstate->raw_buf + cstate->input_buf_len,
537 0 : cstate->raw_buf_len - cstate->input_buf_len);
538 : }
539 : else
540 : {
541 : /*
542 : * raw_buf_index points to the invalid or untranslatable character. We
543 : * let the conversion routine report the error, because it can provide
544 : * a more specific error message than we could here. An earlier call
545 : * to the conversion routine in CopyConvertBuf() detected that there
546 : * is an error, now we call the conversion routine again with
547 : * noError=false, to have it throw the error.
548 : */
549 : unsigned char *src;
550 : int srclen;
551 : unsigned char *dst;
552 : int dstlen;
553 :
554 12 : src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
555 12 : srclen = cstate->raw_buf_len - cstate->raw_buf_index;
556 12 : dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
557 12 : dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
558 :
559 12 : (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
560 : cstate->file_encoding,
561 : GetDatabaseEncoding(),
562 : src, srclen,
563 : dst, dstlen,
564 : false);
565 :
566 : /*
567 : * The conversion routine should have reported an error, so this
568 : * should not be reached.
569 : */
570 0 : elog(ERROR, "encoding conversion failed without error");
571 : }
572 : }
573 :
574 : /*
575 : * Load more data from data source to raw_buf.
576 : *
577 : * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
578 : * beginning of the buffer, and we load new data after that.
579 : */
580 : static void
581 430786 : CopyLoadRawBuf(CopyFromState cstate)
582 : {
583 : int nbytes;
584 : int inbytes;
585 :
586 : /*
587 : * In text mode, if encoding conversion is not required, raw_buf and
588 : * input_buf point to the same buffer. Their len/index better agree, too.
589 : */
590 430786 : if (cstate->raw_buf == cstate->input_buf)
591 : {
592 : Assert(!cstate->need_transcoding);
593 : Assert(cstate->raw_buf_index == cstate->input_buf_index);
594 : Assert(cstate->input_buf_len <= cstate->raw_buf_len);
595 : }
596 :
597 : /*
598 : * Copy down the unprocessed data if any.
599 : */
600 430786 : nbytes = RAW_BUF_BYTES(cstate);
601 430786 : if (nbytes > 0 && cstate->raw_buf_index > 0)
602 0 : memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
603 : nbytes);
604 430786 : cstate->raw_buf_len -= cstate->raw_buf_index;
605 430786 : cstate->raw_buf_index = 0;
606 :
607 : /*
608 : * If raw_buf and input_buf are in fact the same buffer, adjust the
609 : * input_buf variables, too.
610 : */
611 430786 : if (cstate->raw_buf == cstate->input_buf)
612 : {
613 430702 : cstate->input_buf_len -= cstate->input_buf_index;
614 430702 : cstate->input_buf_index = 0;
615 : }
616 :
617 : /* Load more data */
618 430786 : inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
619 430786 : 1, RAW_BUF_SIZE - cstate->raw_buf_len);
620 430786 : nbytes += inbytes;
621 430786 : cstate->raw_buf[nbytes] = '\0';
622 430786 : cstate->raw_buf_len = nbytes;
623 :
624 430786 : cstate->bytes_processed += inbytes;
625 430786 : pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
626 :
627 430786 : if (inbytes == 0)
628 1470 : cstate->raw_reached_eof = true;
629 430786 : }
630 :
631 : /*
632 : * CopyLoadInputBuf loads some more data into input_buf
633 : *
634 : * On return, at least one more input character is loaded into
635 : * input_buf, or input_reached_eof is set.
636 : *
637 : * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
638 : * of the buffer and then we load more data after that.
639 : */
640 : static void
641 430738 : CopyLoadInputBuf(CopyFromState cstate)
642 : {
643 430738 : int nbytes = INPUT_BUF_BYTES(cstate);
644 :
645 : /*
646 : * The caller has updated input_buf_index to indicate how much of the
647 : * input has been consumed and isn't needed anymore. If input_buf is the
648 : * same physical area as raw_buf, update raw_buf_index accordingly.
649 : */
650 430738 : if (cstate->raw_buf == cstate->input_buf)
651 : {
652 : Assert(!cstate->need_transcoding);
653 : Assert(cstate->input_buf_index >= cstate->raw_buf_index);
654 430702 : cstate->raw_buf_index = cstate->input_buf_index;
655 : }
656 :
657 : for (;;)
658 : {
659 : /* If we now have some unconverted data, try to convert it */
660 861488 : CopyConvertBuf(cstate);
661 :
662 : /* If we now have some more input bytes ready, return them */
663 861488 : if (INPUT_BUF_BYTES(cstate) > nbytes)
664 429280 : return;
665 :
666 : /*
667 : * If we reached an invalid byte sequence, or we're at an incomplete
668 : * multi-byte character but there is no more raw input data, report
669 : * conversion error.
670 : */
671 432208 : if (cstate->input_reached_error)
672 12 : CopyConversionError(cstate);
673 :
674 : /* no more input, and everything has been converted */
675 432196 : if (cstate->input_reached_eof)
676 1446 : break;
677 :
678 : /* Try to load more raw data */
679 : Assert(!cstate->raw_reached_eof);
680 430750 : CopyLoadRawBuf(cstate);
681 : }
682 : }
683 :
684 : /*
685 : * CopyReadBinaryData
686 : *
687 : * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
688 : * and writes them to 'dest'. Returns the number of bytes read (which
689 : * would be less than 'nbytes' only if we reach EOF).
690 : */
691 : static int
692 382 : CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
693 : {
694 382 : int copied_bytes = 0;
695 :
696 382 : if (RAW_BUF_BYTES(cstate) >= nbytes)
697 : {
698 : /* Enough bytes are present in the buffer. */
699 346 : memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
700 346 : cstate->raw_buf_index += nbytes;
701 346 : copied_bytes = nbytes;
702 : }
703 : else
704 : {
705 : /*
706 : * Not enough bytes in the buffer, so must read from the file. Need
707 : * to loop since 'nbytes' could be larger than the buffer size.
708 : */
709 : do
710 : {
711 : int copy_bytes;
712 :
713 : /* Load more data if buffer is empty. */
714 36 : if (RAW_BUF_BYTES(cstate) == 0)
715 : {
716 36 : CopyLoadRawBuf(cstate);
717 36 : if (cstate->raw_reached_eof)
718 12 : break; /* EOF */
719 : }
720 :
721 : /* Transfer some bytes. */
722 24 : copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
723 24 : memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
724 24 : cstate->raw_buf_index += copy_bytes;
725 24 : dest += copy_bytes;
726 24 : copied_bytes += copy_bytes;
727 24 : } while (copied_bytes < nbytes);
728 : }
729 :
730 382 : return copied_bytes;
731 : }
732 :
733 : /*
734 : * Read raw fields in the next line for COPY FROM in text or csv mode.
735 : * Return false if no more lines.
736 : *
737 : * An internal temporary buffer is returned via 'fields'. It is valid until
738 : * the next call of the function. Since the function returns all raw fields
739 : * in the input file, 'nfields' could be different from the number of columns
740 : * in the relation.
741 : *
742 : * NOTE: force_not_null option are not applied to the returned fields.
743 : */
744 : bool
745 1257952 : NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
746 : {
747 : int fldct;
748 : bool done;
749 :
750 : /* only available for text or csv input */
751 : Assert(!cstate->opts.binary);
752 :
753 : /* on input check that the header line is correct if needed */
754 1257952 : if (cstate->cur_lineno == 0 && cstate->opts.header_line)
755 : {
756 : ListCell *cur;
757 : TupleDesc tupDesc;
758 :
759 120 : tupDesc = RelationGetDescr(cstate->rel);
760 :
761 120 : cstate->cur_lineno++;
762 120 : done = CopyReadLine(cstate);
763 :
764 120 : if (cstate->opts.header_line == COPY_HEADER_MATCH)
765 : {
766 : int fldnum;
767 :
768 76 : if (cstate->opts.csv_mode)
769 10 : fldct = CopyReadAttributesCSV(cstate);
770 : else
771 66 : fldct = CopyReadAttributesText(cstate);
772 :
773 76 : if (fldct != list_length(cstate->attnumlist))
774 24 : ereport(ERROR,
775 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
776 : errmsg("wrong number of fields in header line: got %d, expected %d",
777 : fldct, list_length(cstate->attnumlist))));
778 :
779 52 : fldnum = 0;
780 158 : foreach(cur, cstate->attnumlist)
781 : {
782 126 : int attnum = lfirst_int(cur);
783 : char *colName;
784 126 : Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
785 :
786 : Assert(fldnum < cstate->max_fields);
787 :
788 126 : colName = cstate->raw_fields[fldnum++];
789 126 : if (colName == NULL)
790 6 : ereport(ERROR,
791 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
792 : errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
793 : fldnum, cstate->opts.null_print, NameStr(attr->attname))));
794 :
795 120 : if (namestrcmp(&attr->attname, colName) != 0)
796 : {
797 14 : ereport(ERROR,
798 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
799 : errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
800 : fldnum, colName, NameStr(attr->attname))));
801 : }
802 : }
803 : }
804 :
805 76 : if (done)
806 0 : return false;
807 : }
808 :
809 1257908 : cstate->cur_lineno++;
810 :
811 : /* Actually read the line into memory here */
812 1257908 : done = CopyReadLine(cstate);
813 :
814 : /*
815 : * EOF at start of line means we're done. If we see EOF after some
816 : * characters, we act as though it was newline followed by EOF, ie,
817 : * process the line and then exit loop on next iteration.
818 : */
819 1257884 : if (done && cstate->line_buf.len == 0)
820 1476 : return false;
821 :
822 : /* Parse the line into de-escaped field values */
823 1256408 : if (cstate->opts.csv_mode)
824 464 : fldct = CopyReadAttributesCSV(cstate);
825 : else
826 1255944 : fldct = CopyReadAttributesText(cstate);
827 :
828 1256396 : *fields = cstate->raw_fields;
829 1256396 : *nfields = fldct;
830 1256396 : return true;
831 : }
832 :
833 : /*
834 : * Read next tuple from file for COPY FROM. Return false if no more tuples.
835 : *
836 : * 'econtext' is used to evaluate default expression for each column that is
837 : * either not read from the file or is using the DEFAULT option of COPY FROM.
838 : * It can be NULL when no default values are used, i.e. when all columns are
839 : * read from the file, and DEFAULT option is unset.
840 : *
841 : * 'values' and 'nulls' arrays must be the same length as columns of the
842 : * relation passed to BeginCopyFrom. This function fills the arrays.
843 : */
844 : bool
845 1257994 : NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
846 : Datum *values, bool *nulls)
847 : {
848 : TupleDesc tupDesc;
849 : AttrNumber num_phys_attrs,
850 : attr_count,
851 1257994 : num_defaults = cstate->num_defaults;
852 1257994 : FmgrInfo *in_functions = cstate->in_functions;
853 1257994 : Oid *typioparams = cstate->typioparams;
854 : int i;
855 1257994 : int *defmap = cstate->defmap;
856 1257994 : ExprState **defexprs = cstate->defexprs;
857 :
858 1257994 : tupDesc = RelationGetDescr(cstate->rel);
859 1257994 : num_phys_attrs = tupDesc->natts;
860 1257994 : attr_count = list_length(cstate->attnumlist);
861 :
862 : /* Initialize all values for row to NULL */
863 5863012 : MemSet(values, 0, num_phys_attrs * sizeof(Datum));
864 1257994 : MemSet(nulls, true, num_phys_attrs * sizeof(bool));
865 1402102 : MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
866 :
867 1257994 : if (!cstate->opts.binary)
868 : {
869 : char **field_strings;
870 : ListCell *cur;
871 : int fldct;
872 : int fieldno;
873 : char *string;
874 :
875 : /* read raw fields in the next line */
876 1257952 : if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
877 1592 : return false;
878 :
879 : /* check for overflowing fields */
880 1256396 : if (attr_count > 0 && fldct > attr_count)
881 18 : ereport(ERROR,
882 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
883 : errmsg("extra data after last expected column")));
884 :
885 1256378 : fieldno = 0;
886 :
887 : /* Loop to read the user attributes on the line. */
888 5730288 : foreach(cur, cstate->attnumlist)
889 : {
890 4474082 : int attnum = lfirst_int(cur);
891 4474082 : int m = attnum - 1;
892 4474082 : Form_pg_attribute att = TupleDescAttr(tupDesc, m);
893 :
894 4474082 : if (fieldno >= fldct)
895 18 : ereport(ERROR,
896 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
897 : errmsg("missing data for column \"%s\"",
898 : NameStr(att->attname))));
899 4474064 : string = field_strings[fieldno++];
900 :
901 4474064 : if (cstate->convert_select_flags &&
902 20 : !cstate->convert_select_flags[m])
903 : {
904 : /* ignore input field, leaving column as NULL */
905 10 : continue;
906 : }
907 :
908 4474054 : if (cstate->opts.csv_mode)
909 : {
910 962 : if (string == NULL &&
911 44 : cstate->opts.force_notnull_flags[m])
912 : {
913 : /*
914 : * FORCE_NOT_NULL option is set and column is NULL -
915 : * convert it to the NULL string.
916 : */
917 28 : string = cstate->opts.null_print;
918 : }
919 934 : else if (string != NULL && cstate->opts.force_null_flags[m]
920 50 : && strcmp(string, cstate->opts.null_print) == 0)
921 : {
922 : /*
923 : * FORCE_NULL option is set and column matches the NULL
924 : * string. It must have been quoted, or otherwise the
925 : * string would already have been set to NULL. Convert it
926 : * to NULL as specified.
927 : */
928 26 : string = NULL;
929 : }
930 : }
931 :
932 4474054 : cstate->cur_attname = NameStr(att->attname);
933 4474054 : cstate->cur_attval = string;
934 :
935 4474054 : if (string != NULL)
936 4469210 : nulls[m] = false;
937 :
938 4474054 : if (cstate->defaults[m])
939 : {
940 : /*
941 : * The caller must supply econtext and have switched into the
942 : * per-tuple memory context in it.
943 : */
944 : Assert(econtext != NULL);
945 : Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
946 :
947 60 : values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
948 : }
949 :
950 : /*
951 : * If ON_ERROR is specified with IGNORE, skip rows with soft
952 : * errors
953 : */
954 4473956 : else if (!InputFunctionCallSafe(&in_functions[m],
955 : string,
956 4473994 : typioparams[m],
957 : att->atttypmod,
958 4473994 : (Node *) cstate->escontext,
959 4473994 : &values[m]))
960 : {
961 : Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
962 :
963 116 : cstate->num_errors++;
964 :
965 116 : if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
966 : {
967 : /*
968 : * Since we emit line number and column info in the below
969 : * notice message, we suppress error context information
970 : * other than the relation name.
971 : */
972 : Assert(!cstate->relname_only);
973 42 : cstate->relname_only = true;
974 :
975 42 : if (cstate->cur_attval)
976 : {
977 : char *attval;
978 :
979 36 : attval = CopyLimitPrintoutLength(cstate->cur_attval);
980 36 : ereport(NOTICE,
981 : errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": \"%s\"",
982 : (unsigned long long) cstate->cur_lineno,
983 : cstate->cur_attname,
984 : attval));
985 36 : pfree(attval);
986 : }
987 : else
988 6 : ereport(NOTICE,
989 : errmsg("skipping row due to data type incompatibility at line %llu for column \"%s\": null input",
990 : (unsigned long long) cstate->cur_lineno,
991 : cstate->cur_attname));
992 :
993 : /* reset relname_only */
994 42 : cstate->relname_only = false;
995 : }
996 :
997 116 : return true;
998 : }
999 :
1000 4473900 : cstate->cur_attname = NULL;
1001 4473900 : cstate->cur_attval = NULL;
1002 : }
1003 :
1004 : Assert(fieldno == attr_count);
1005 : }
1006 : else
1007 : {
1008 : /* binary */
1009 : int16 fld_count;
1010 : ListCell *cur;
1011 :
1012 42 : cstate->cur_lineno++;
1013 :
1014 42 : if (!CopyGetInt16(cstate, &fld_count))
1015 : {
1016 : /* EOF detected (end of file, or protocol-level EOF) */
1017 12 : return false;
1018 : }
1019 :
1020 42 : if (fld_count == -1)
1021 : {
1022 : /*
1023 : * Received EOF marker. Wait for the protocol-level EOF, and
1024 : * complain if it doesn't come immediately. In COPY FROM STDIN,
1025 : * this ensures that we correctly handle CopyFail, if client
1026 : * chooses to send that now. When copying from file, we could
1027 : * ignore the rest of the file like in text mode, but we choose to
1028 : * be consistent with the COPY FROM STDIN case.
1029 : */
1030 : char dummy;
1031 :
1032 12 : if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
1033 0 : ereport(ERROR,
1034 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1035 : errmsg("received copy data after EOF marker")));
1036 12 : return false;
1037 : }
1038 :
1039 30 : if (fld_count != attr_count)
1040 0 : ereport(ERROR,
1041 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1042 : errmsg("row field count is %d, expected %d",
1043 : (int) fld_count, attr_count)));
1044 :
1045 186 : foreach(cur, cstate->attnumlist)
1046 : {
1047 158 : int attnum = lfirst_int(cur);
1048 158 : int m = attnum - 1;
1049 158 : Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1050 :
1051 158 : cstate->cur_attname = NameStr(att->attname);
1052 314 : values[m] = CopyReadBinaryAttribute(cstate,
1053 158 : &in_functions[m],
1054 158 : typioparams[m],
1055 : att->atttypmod,
1056 : &nulls[m]);
1057 156 : cstate->cur_attname = NULL;
1058 : }
1059 : }
1060 :
1061 : /*
1062 : * Now compute and insert any defaults available for the columns not
1063 : * provided by the input data. Anything not processed here or above will
1064 : * remain NULL.
1065 : */
1066 1316764 : for (i = 0; i < num_defaults; i++)
1067 : {
1068 : /*
1069 : * The caller must supply econtext and have switched into the
1070 : * per-tuple memory context in it.
1071 : */
1072 : Assert(econtext != NULL);
1073 : Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
1074 :
1075 60530 : values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
1076 60530 : &nulls[defmap[i]]);
1077 : }
1078 :
1079 1256234 : return true;
1080 : }
1081 :
1082 : /*
1083 : * Read the next input line and stash it in line_buf.
1084 : *
1085 : * Result is true if read was terminated by EOF, false if terminated
1086 : * by newline. The terminating newline or EOF marker is not included
1087 : * in the final value of line_buf.
1088 : */
1089 : static bool
1090 1258028 : CopyReadLine(CopyFromState cstate)
1091 : {
1092 : bool result;
1093 :
1094 1258028 : resetStringInfo(&cstate->line_buf);
1095 1258028 : cstate->line_buf_valid = false;
1096 :
1097 : /* Parse data and transfer into line_buf */
1098 1258028 : result = CopyReadLineText(cstate);
1099 :
1100 1258004 : if (result)
1101 : {
1102 : /*
1103 : * Reached EOF. In protocol version 3, we should ignore anything
1104 : * after \. up to the protocol end of copy data. (XXX maybe better
1105 : * not to treat \. as special?)
1106 : */
1107 1476 : if (cstate->copy_src == COPY_FRONTEND)
1108 : {
1109 : int inbytes;
1110 :
1111 : do
1112 : {
1113 724 : inbytes = CopyGetData(cstate, cstate->input_buf,
1114 : 1, INPUT_BUF_SIZE);
1115 724 : } while (inbytes > 0);
1116 724 : cstate->input_buf_index = 0;
1117 724 : cstate->input_buf_len = 0;
1118 724 : cstate->raw_buf_index = 0;
1119 724 : cstate->raw_buf_len = 0;
1120 : }
1121 : }
1122 : else
1123 : {
1124 : /*
1125 : * If we didn't hit EOF, then we must have transferred the EOL marker
1126 : * to line_buf along with the data. Get rid of it.
1127 : */
1128 1256528 : switch (cstate->eol_type)
1129 : {
1130 1256528 : case EOL_NL:
1131 : Assert(cstate->line_buf.len >= 1);
1132 : Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1133 1256528 : cstate->line_buf.len--;
1134 1256528 : cstate->line_buf.data[cstate->line_buf.len] = '\0';
1135 1256528 : break;
1136 0 : case EOL_CR:
1137 : Assert(cstate->line_buf.len >= 1);
1138 : Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
1139 0 : cstate->line_buf.len--;
1140 0 : cstate->line_buf.data[cstate->line_buf.len] = '\0';
1141 0 : break;
1142 0 : case EOL_CRNL:
1143 : Assert(cstate->line_buf.len >= 2);
1144 : Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
1145 : Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
1146 0 : cstate->line_buf.len -= 2;
1147 0 : cstate->line_buf.data[cstate->line_buf.len] = '\0';
1148 0 : break;
1149 0 : case EOL_UNKNOWN:
1150 : /* shouldn't get here */
1151 : Assert(false);
1152 0 : break;
1153 : }
1154 1258004 : }
1155 :
1156 : /* Now it's safe to use the buffer in error messages */
1157 1258004 : cstate->line_buf_valid = true;
1158 :
1159 1258004 : return result;
1160 : }
1161 :
1162 : /*
1163 : * CopyReadLineText - inner loop of CopyReadLine for text mode
1164 : */
1165 : static bool
1166 1258028 : CopyReadLineText(CopyFromState cstate)
1167 : {
1168 : char *copy_input_buf;
1169 : int input_buf_ptr;
1170 : int copy_buf_len;
1171 1258028 : bool need_data = false;
1172 1258028 : bool hit_eof = false;
1173 1258028 : bool result = false;
1174 :
1175 : /* CSV variables */
1176 1258028 : bool in_quote = false,
1177 1258028 : last_was_esc = false;
1178 1258028 : char quotec = '\0';
1179 1258028 : char escapec = '\0';
1180 :
1181 1258028 : if (cstate->opts.csv_mode)
1182 : {
1183 720 : quotec = cstate->opts.quote[0];
1184 720 : escapec = cstate->opts.escape[0];
1185 : /* ignore special escape processing if it's the same as quotec */
1186 720 : if (quotec == escapec)
1187 526 : escapec = '\0';
1188 : }
1189 :
1190 : /*
1191 : * The objective of this loop is to transfer the entire next input line
1192 : * into line_buf. Hence, we only care for detecting newlines (\r and/or
1193 : * \n) and the end-of-copy marker (\.).
1194 : *
1195 : * In CSV mode, \r and \n inside a quoted field are just part of the data
1196 : * value and are put in line_buf. We keep just enough state to know if we
1197 : * are currently in a quoted field or not.
1198 : *
1199 : * The input has already been converted to the database encoding. All
1200 : * supported server encodings have the property that all bytes in a
1201 : * multi-byte sequence have the high bit set, so a multibyte character
1202 : * cannot contain any newline or escape characters embedded in the
1203 : * multibyte sequence. Therefore, we can process the input byte-by-byte,
1204 : * regardless of the encoding.
1205 : *
1206 : * For speed, we try to move data from input_buf to line_buf in chunks
1207 : * rather than one character at a time. input_buf_ptr points to the next
1208 : * character to examine; any characters from input_buf_index to
1209 : * input_buf_ptr have been determined to be part of the line, but not yet
1210 : * transferred to line_buf.
1211 : *
1212 : * For a little extra speed within the loop, we copy input_buf and
1213 : * input_buf_len into local variables.
1214 : */
1215 1258028 : copy_input_buf = cstate->input_buf;
1216 1258028 : input_buf_ptr = cstate->input_buf_index;
1217 1258028 : copy_buf_len = cstate->input_buf_len;
1218 :
1219 : for (;;)
1220 25107062 : {
1221 : int prev_raw_ptr;
1222 : char c;
1223 :
1224 : /*
1225 : * Load more data if needed.
1226 : *
1227 : * TODO: We could just force four bytes of read-ahead and avoid the
1228 : * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(). That was
1229 : * unsafe with the old v2 COPY protocol, but we don't support that
1230 : * anymore.
1231 : */
1232 26365090 : if (input_buf_ptr >= copy_buf_len || need_data)
1233 : {
1234 430738 : REFILL_LINEBUF;
1235 :
1236 430738 : CopyLoadInputBuf(cstate);
1237 : /* update our local variables */
1238 430726 : hit_eof = cstate->input_reached_eof;
1239 430726 : input_buf_ptr = cstate->input_buf_index;
1240 430726 : copy_buf_len = cstate->input_buf_len;
1241 :
1242 : /*
1243 : * If we are completely out of data, break out of the loop,
1244 : * reporting EOF.
1245 : */
1246 430726 : if (INPUT_BUF_BYTES(cstate) <= 0)
1247 : {
1248 1446 : result = true;
1249 1446 : break;
1250 : }
1251 429280 : need_data = false;
1252 : }
1253 :
1254 : /* OK to fetch a character */
1255 26363632 : prev_raw_ptr = input_buf_ptr;
1256 26363632 : c = copy_input_buf[input_buf_ptr++];
1257 :
1258 26363632 : if (cstate->opts.csv_mode)
1259 : {
1260 : /*
1261 : * If character is '\r', we may need to look ahead below. Force
1262 : * fetch of the next character if we don't already have it. We
1263 : * need to do this before changing CSV state, in case '\r' is also
1264 : * the quote or escape character.
1265 : */
1266 5418 : if (c == '\r')
1267 : {
1268 36 : IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1269 : }
1270 :
1271 : /*
1272 : * Dealing with quotes and escapes here is mildly tricky. If the
1273 : * quote char is also the escape char, there's no problem - we
1274 : * just use the char as a toggle. If they are different, we need
1275 : * to ensure that we only take account of an escape inside a
1276 : * quoted field and immediately preceding a quote char, and not
1277 : * the second in an escape-escape sequence.
1278 : */
1279 5418 : if (in_quote && c == escapec)
1280 48 : last_was_esc = !last_was_esc;
1281 5418 : if (c == quotec && !last_was_esc)
1282 508 : in_quote = !in_quote;
1283 5418 : if (c != escapec)
1284 5364 : last_was_esc = false;
1285 :
1286 : /*
1287 : * Updating the line count for embedded CR and/or LF chars is
1288 : * necessarily a little fragile - this test is probably about the
1289 : * best we can do. (XXX it's arguable whether we should do this
1290 : * at all --- is cur_lineno a physical or logical count?)
1291 : */
1292 5418 : if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
1293 36 : cstate->cur_lineno++;
1294 : }
1295 :
1296 : /* Process \r */
1297 26363632 : if (c == '\r' && (!cstate->opts.csv_mode || !in_quote))
1298 : {
1299 : /* Check for \r\n on first line, _and_ handle \r\n. */
1300 0 : if (cstate->eol_type == EOL_UNKNOWN ||
1301 0 : cstate->eol_type == EOL_CRNL)
1302 : {
1303 : /*
1304 : * If need more data, go back to loop top to load it.
1305 : *
1306 : * Note that if we are at EOF, c will wind up as '\0' because
1307 : * of the guaranteed pad of input_buf.
1308 : */
1309 0 : IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1310 :
1311 : /* get next char */
1312 0 : c = copy_input_buf[input_buf_ptr];
1313 :
1314 0 : if (c == '\n')
1315 : {
1316 0 : input_buf_ptr++; /* eat newline */
1317 0 : cstate->eol_type = EOL_CRNL; /* in case not set yet */
1318 : }
1319 : else
1320 : {
1321 : /* found \r, but no \n */
1322 0 : if (cstate->eol_type == EOL_CRNL)
1323 0 : ereport(ERROR,
1324 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1325 : !cstate->opts.csv_mode ?
1326 : errmsg("literal carriage return found in data") :
1327 : errmsg("unquoted carriage return found in data"),
1328 : !cstate->opts.csv_mode ?
1329 : errhint("Use \"\\r\" to represent carriage return.") :
1330 : errhint("Use quoted CSV field to represent carriage return.")));
1331 :
1332 : /*
1333 : * if we got here, it is the first line and we didn't find
1334 : * \n, so don't consume the peeked character
1335 : */
1336 0 : cstate->eol_type = EOL_CR;
1337 : }
1338 : }
1339 0 : else if (cstate->eol_type == EOL_NL)
1340 0 : ereport(ERROR,
1341 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1342 : !cstate->opts.csv_mode ?
1343 : errmsg("literal carriage return found in data") :
1344 : errmsg("unquoted carriage return found in data"),
1345 : !cstate->opts.csv_mode ?
1346 : errhint("Use \"\\r\" to represent carriage return.") :
1347 : errhint("Use quoted CSV field to represent carriage return.")));
1348 : /* If reach here, we have found the line terminator */
1349 0 : break;
1350 : }
1351 :
1352 : /* Process \n */
1353 26363632 : if (c == '\n' && (!cstate->opts.csv_mode || !in_quote))
1354 : {
1355 1256528 : if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
1356 0 : ereport(ERROR,
1357 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1358 : !cstate->opts.csv_mode ?
1359 : errmsg("literal newline found in data") :
1360 : errmsg("unquoted newline found in data"),
1361 : !cstate->opts.csv_mode ?
1362 : errhint("Use \"\\n\" to represent newline.") :
1363 : errhint("Use quoted CSV field to represent newline.")));
1364 1256528 : cstate->eol_type = EOL_NL; /* in case not set yet */
1365 : /* If reach here, we have found the line terminator */
1366 1256528 : break;
1367 : }
1368 :
1369 : /*
1370 : * Process backslash, except in CSV mode where backslash is a normal
1371 : * character.
1372 : */
1373 25107104 : if (c == '\\' && !cstate->opts.csv_mode)
1374 : {
1375 : char c2;
1376 :
1377 8036 : IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1378 8036 : IF_NEED_REFILL_AND_EOF_BREAK(0);
1379 :
1380 : /* -----
1381 : * get next character
1382 : * Note: we do not change c so if it isn't \., we can fall
1383 : * through and continue processing.
1384 : * -----
1385 : */
1386 8036 : c2 = copy_input_buf[input_buf_ptr];
1387 :
1388 8036 : if (c2 == '.')
1389 : {
1390 42 : input_buf_ptr++; /* consume the '.' */
1391 42 : if (cstate->eol_type == EOL_CRNL)
1392 : {
1393 : /* Get the next character */
1394 0 : IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1395 : /* if hit_eof, c2 will become '\0' */
1396 0 : c2 = copy_input_buf[input_buf_ptr++];
1397 :
1398 0 : if (c2 == '\n')
1399 0 : ereport(ERROR,
1400 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1401 : errmsg("end-of-copy marker does not match previous newline style")));
1402 0 : else if (c2 != '\r')
1403 0 : ereport(ERROR,
1404 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1405 : errmsg("end-of-copy marker is not alone on its line")));
1406 : }
1407 :
1408 : /* Get the next character */
1409 42 : IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
1410 : /* if hit_eof, c2 will become '\0' */
1411 42 : c2 = copy_input_buf[input_buf_ptr++];
1412 :
1413 42 : if (c2 != '\r' && c2 != '\n')
1414 6 : ereport(ERROR,
1415 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1416 : errmsg("end-of-copy marker is not alone on its line")));
1417 :
1418 36 : if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
1419 36 : (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
1420 36 : (cstate->eol_type == EOL_CR && c2 != '\r'))
1421 0 : ereport(ERROR,
1422 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1423 : errmsg("end-of-copy marker does not match previous newline style")));
1424 :
1425 : /*
1426 : * If there is any data on this line before the \., complain.
1427 : */
1428 36 : if (cstate->line_buf.len > 0 ||
1429 36 : prev_raw_ptr > cstate->input_buf_index)
1430 6 : ereport(ERROR,
1431 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1432 : errmsg("end-of-copy marker is not alone on its line")));
1433 :
1434 : /*
1435 : * Discard the \. and newline, then report EOF.
1436 : */
1437 30 : cstate->input_buf_index = input_buf_ptr;
1438 30 : result = true; /* report EOF */
1439 30 : break;
1440 : }
1441 : else
1442 : {
1443 : /*
1444 : * If we are here, it means we found a backslash followed by
1445 : * something other than a period. In non-CSV mode, anything
1446 : * after a backslash is special, so we skip over that second
1447 : * character too. If we didn't do that \\. would be
1448 : * considered an eof-of copy, while in non-CSV mode it is a
1449 : * literal backslash followed by a period.
1450 : */
1451 7994 : input_buf_ptr++;
1452 : }
1453 : }
1454 : } /* end of outer loop */
1455 :
1456 : /*
1457 : * Transfer any still-uncopied data to line_buf.
1458 : */
1459 1258004 : REFILL_LINEBUF;
1460 :
1461 1258004 : return result;
1462 : }
1463 :
1464 : /*
1465 : * Return decimal value for a hexadecimal digit
1466 : */
1467 : static int
1468 0 : GetDecimalFromHex(char hex)
1469 : {
1470 0 : if (isdigit((unsigned char) hex))
1471 0 : return hex - '0';
1472 : else
1473 0 : return tolower((unsigned char) hex) - 'a' + 10;
1474 : }
1475 :
1476 : /*
1477 : * Parse the current line into separate attributes (fields),
1478 : * performing de-escaping as needed.
1479 : *
1480 : * The input is in line_buf. We use attribute_buf to hold the result
1481 : * strings. cstate->raw_fields[k] is set to point to the k'th attribute
1482 : * string, or NULL when the input matches the null marker string.
1483 : * This array is expanded as necessary.
1484 : *
1485 : * (Note that the caller cannot check for nulls since the returned
1486 : * string would be the post-de-escaping equivalent, which may look
1487 : * the same as some valid data string.)
1488 : *
1489 : * delim is the column delimiter string (must be just one byte for now).
1490 : * null_print is the null marker string. Note that this is compared to
1491 : * the pre-de-escaped input string.
1492 : *
1493 : * The return value is the number of fields actually read.
1494 : */
1495 : static int
1496 1256010 : CopyReadAttributesText(CopyFromState cstate)
1497 : {
1498 1256010 : char delimc = cstate->opts.delim[0];
1499 : int fieldno;
1500 : char *output_ptr;
1501 : char *cur_ptr;
1502 : char *line_end_ptr;
1503 :
1504 : /*
1505 : * We need a special case for zero-column tables: check that the input
1506 : * line is empty, and return.
1507 : */
1508 1256010 : if (cstate->max_fields <= 0)
1509 : {
1510 8 : if (cstate->line_buf.len != 0)
1511 0 : ereport(ERROR,
1512 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1513 : errmsg("extra data after last expected column")));
1514 8 : return 0;
1515 : }
1516 :
1517 1256002 : resetStringInfo(&cstate->attribute_buf);
1518 :
1519 : /*
1520 : * The de-escaped attributes will certainly not be longer than the input
1521 : * data line, so we can just force attribute_buf to be large enough and
1522 : * then transfer data without any checks for enough space. We need to do
1523 : * it this way because enlarging attribute_buf mid-stream would invalidate
1524 : * pointers already stored into cstate->raw_fields[].
1525 : */
1526 1256002 : if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1527 8 : enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1528 1256002 : output_ptr = cstate->attribute_buf.data;
1529 :
1530 : /* set pointer variables for loop */
1531 1256002 : cur_ptr = cstate->line_buf.data;
1532 1256002 : line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1533 :
1534 : /* Outer loop iterates over fields */
1535 1256002 : fieldno = 0;
1536 : for (;;)
1537 3217432 : {
1538 4473434 : bool found_delim = false;
1539 : char *start_ptr;
1540 : char *end_ptr;
1541 : int input_len;
1542 4473434 : bool saw_non_ascii = false;
1543 :
1544 : /* Make sure there is enough space for the next value */
1545 4473434 : if (fieldno >= cstate->max_fields)
1546 : {
1547 36 : cstate->max_fields *= 2;
1548 36 : cstate->raw_fields =
1549 36 : repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1550 : }
1551 :
1552 : /* Remember start of field on both input and output sides */
1553 4473434 : start_ptr = cur_ptr;
1554 4473434 : cstate->raw_fields[fieldno] = output_ptr;
1555 :
1556 : /*
1557 : * Scan data for field.
1558 : *
1559 : * Note that in this loop, we are scanning to locate the end of field
1560 : * and also speculatively performing de-escaping. Once we find the
1561 : * end-of-field, we can match the raw field contents against the null
1562 : * marker string. Only after that comparison fails do we know that
1563 : * de-escaping is actually the right thing to do; therefore we *must
1564 : * not* throw any syntax errors before we've done the null-marker
1565 : * check.
1566 : */
1567 : for (;;)
1568 21884214 : {
1569 : char c;
1570 :
1571 26357648 : end_ptr = cur_ptr;
1572 26357648 : if (cur_ptr >= line_end_ptr)
1573 1255996 : break;
1574 25101652 : c = *cur_ptr++;
1575 25101652 : if (c == delimc)
1576 : {
1577 3217438 : found_delim = true;
1578 3217438 : break;
1579 : }
1580 21884214 : if (c == '\\')
1581 : {
1582 7994 : if (cur_ptr >= line_end_ptr)
1583 0 : break;
1584 7994 : c = *cur_ptr++;
1585 7994 : switch (c)
1586 : {
1587 12 : case '0':
1588 : case '1':
1589 : case '2':
1590 : case '3':
1591 : case '4':
1592 : case '5':
1593 : case '6':
1594 : case '7':
1595 : {
1596 : /* handle \013 */
1597 : int val;
1598 :
1599 12 : val = OCTVALUE(c);
1600 12 : if (cur_ptr < line_end_ptr)
1601 : {
1602 6 : c = *cur_ptr;
1603 6 : if (ISOCTAL(c))
1604 : {
1605 0 : cur_ptr++;
1606 0 : val = (val << 3) + OCTVALUE(c);
1607 0 : if (cur_ptr < line_end_ptr)
1608 : {
1609 0 : c = *cur_ptr;
1610 0 : if (ISOCTAL(c))
1611 : {
1612 0 : cur_ptr++;
1613 0 : val = (val << 3) + OCTVALUE(c);
1614 : }
1615 : }
1616 : }
1617 : }
1618 12 : c = val & 0377;
1619 12 : if (c == '\0' || IS_HIGHBIT_SET(c))
1620 12 : saw_non_ascii = true;
1621 : }
1622 12 : break;
1623 12 : case 'x':
1624 : /* Handle \x3F */
1625 12 : if (cur_ptr < line_end_ptr)
1626 : {
1627 6 : char hexchar = *cur_ptr;
1628 :
1629 6 : if (isxdigit((unsigned char) hexchar))
1630 : {
1631 0 : int val = GetDecimalFromHex(hexchar);
1632 :
1633 0 : cur_ptr++;
1634 0 : if (cur_ptr < line_end_ptr)
1635 : {
1636 0 : hexchar = *cur_ptr;
1637 0 : if (isxdigit((unsigned char) hexchar))
1638 : {
1639 0 : cur_ptr++;
1640 0 : val = (val << 4) + GetDecimalFromHex(hexchar);
1641 : }
1642 : }
1643 0 : c = val & 0xff;
1644 0 : if (c == '\0' || IS_HIGHBIT_SET(c))
1645 0 : saw_non_ascii = true;
1646 : }
1647 : }
1648 12 : break;
1649 0 : case 'b':
1650 0 : c = '\b';
1651 0 : break;
1652 0 : case 'f':
1653 0 : c = '\f';
1654 0 : break;
1655 3050 : case 'n':
1656 3050 : c = '\n';
1657 3050 : break;
1658 0 : case 'r':
1659 0 : c = '\r';
1660 0 : break;
1661 0 : case 't':
1662 0 : c = '\t';
1663 0 : break;
1664 0 : case 'v':
1665 0 : c = '\v';
1666 0 : break;
1667 :
1668 : /*
1669 : * in all other cases, take the char after '\'
1670 : * literally
1671 : */
1672 : }
1673 21876220 : }
1674 :
1675 : /* Add c to output string */
1676 21884214 : *output_ptr++ = c;
1677 : }
1678 :
1679 : /* Check whether raw input matched null marker */
1680 4473434 : input_len = end_ptr - start_ptr;
1681 4473434 : if (input_len == cstate->opts.null_print_len &&
1682 245764 : strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1683 4808 : cstate->raw_fields[fieldno] = NULL;
1684 : /* Check whether raw input matched default marker */
1685 4468626 : else if (fieldno < list_length(cstate->attnumlist) &&
1686 4468584 : cstate->opts.default_print &&
1687 114 : input_len == cstate->opts.default_print_len &&
1688 30 : strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1689 24 : {
1690 : /* fieldno is 0-indexed and attnum is 1-indexed */
1691 30 : int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1692 :
1693 30 : if (cstate->defexprs[m] != NULL)
1694 : {
1695 : /* defaults contain entries for all physical attributes */
1696 24 : cstate->defaults[m] = true;
1697 : }
1698 : else
1699 : {
1700 6 : TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1701 6 : Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1702 :
1703 6 : ereport(ERROR,
1704 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1705 : errmsg("unexpected default marker in COPY data"),
1706 : errdetail("Column \"%s\" has no default value.",
1707 : NameStr(att->attname))));
1708 : }
1709 : }
1710 : else
1711 : {
1712 : /*
1713 : * At this point we know the field is supposed to contain data.
1714 : *
1715 : * If we de-escaped any non-7-bit-ASCII chars, make sure the
1716 : * resulting string is valid data for the db encoding.
1717 : */
1718 4468596 : if (saw_non_ascii)
1719 : {
1720 0 : char *fld = cstate->raw_fields[fieldno];
1721 :
1722 0 : pg_verifymbstr(fld, output_ptr - fld, false);
1723 : }
1724 : }
1725 :
1726 : /* Terminate attribute value in output area */
1727 4473428 : *output_ptr++ = '\0';
1728 :
1729 4473428 : fieldno++;
1730 : /* Done if we hit EOL instead of a delim */
1731 4473428 : if (!found_delim)
1732 1255996 : break;
1733 : }
1734 :
1735 : /* Clean up state of attribute_buf */
1736 1255996 : output_ptr--;
1737 : Assert(*output_ptr == '\0');
1738 1255996 : cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1739 :
1740 1255996 : return fieldno;
1741 : }
1742 :
1743 : /*
1744 : * Parse the current line into separate attributes (fields),
1745 : * performing de-escaping as needed. This has exactly the same API as
1746 : * CopyReadAttributesText, except we parse the fields according to
1747 : * "standard" (i.e. common) CSV usage.
1748 : */
1749 : static int
1750 474 : CopyReadAttributesCSV(CopyFromState cstate)
1751 : {
1752 474 : char delimc = cstate->opts.delim[0];
1753 474 : char quotec = cstate->opts.quote[0];
1754 474 : char escapec = cstate->opts.escape[0];
1755 : int fieldno;
1756 : char *output_ptr;
1757 : char *cur_ptr;
1758 : char *line_end_ptr;
1759 :
1760 : /*
1761 : * We need a special case for zero-column tables: check that the input
1762 : * line is empty, and return.
1763 : */
1764 474 : if (cstate->max_fields <= 0)
1765 : {
1766 0 : if (cstate->line_buf.len != 0)
1767 0 : ereport(ERROR,
1768 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1769 : errmsg("extra data after last expected column")));
1770 0 : return 0;
1771 : }
1772 :
1773 474 : resetStringInfo(&cstate->attribute_buf);
1774 :
1775 : /*
1776 : * The de-escaped attributes will certainly not be longer than the input
1777 : * data line, so we can just force attribute_buf to be large enough and
1778 : * then transfer data without any checks for enough space. We need to do
1779 : * it this way because enlarging attribute_buf mid-stream would invalidate
1780 : * pointers already stored into cstate->raw_fields[].
1781 : */
1782 474 : if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
1783 0 : enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
1784 474 : output_ptr = cstate->attribute_buf.data;
1785 :
1786 : /* set pointer variables for loop */
1787 474 : cur_ptr = cstate->line_buf.data;
1788 474 : line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
1789 :
1790 : /* Outer loop iterates over fields */
1791 474 : fieldno = 0;
1792 : for (;;)
1793 530 : {
1794 1004 : bool found_delim = false;
1795 1004 : bool saw_quote = false;
1796 : char *start_ptr;
1797 : char *end_ptr;
1798 : int input_len;
1799 :
1800 : /* Make sure there is enough space for the next value */
1801 1004 : if (fieldno >= cstate->max_fields)
1802 : {
1803 0 : cstate->max_fields *= 2;
1804 0 : cstate->raw_fields =
1805 0 : repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
1806 : }
1807 :
1808 : /* Remember start of field on both input and output sides */
1809 1004 : start_ptr = cur_ptr;
1810 1004 : cstate->raw_fields[fieldno] = output_ptr;
1811 :
1812 : /*
1813 : * Scan data for field,
1814 : *
1815 : * The loop starts in "not quote" mode and then toggles between that
1816 : * and "in quote" mode. The loop exits normally if it is in "not
1817 : * quote" mode and a delimiter or line end is seen.
1818 : */
1819 : for (;;)
1820 222 : {
1821 : char c;
1822 :
1823 : /* Not in quote */
1824 : for (;;)
1825 : {
1826 3230 : end_ptr = cur_ptr;
1827 3230 : if (cur_ptr >= line_end_ptr)
1828 468 : goto endfield;
1829 2762 : c = *cur_ptr++;
1830 : /* unquoted field delimiter */
1831 2762 : if (c == delimc)
1832 : {
1833 536 : found_delim = true;
1834 536 : goto endfield;
1835 : }
1836 : /* start of quoted field (or part of field) */
1837 2226 : if (c == quotec)
1838 : {
1839 222 : saw_quote = true;
1840 222 : break;
1841 : }
1842 : /* Add c to output string */
1843 2004 : *output_ptr++ = c;
1844 : }
1845 :
1846 : /* In quote */
1847 : for (;;)
1848 : {
1849 1390 : end_ptr = cur_ptr;
1850 1390 : if (cur_ptr >= line_end_ptr)
1851 0 : ereport(ERROR,
1852 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1853 : errmsg("unterminated CSV quoted field")));
1854 :
1855 1390 : c = *cur_ptr++;
1856 :
1857 : /* escape within a quoted field */
1858 1390 : if (c == escapec)
1859 : {
1860 : /*
1861 : * peek at the next char if available, and escape it if it
1862 : * is an escape char or a quote char
1863 : */
1864 118 : if (cur_ptr < line_end_ptr)
1865 : {
1866 72 : char nextc = *cur_ptr;
1867 :
1868 72 : if (nextc == escapec || nextc == quotec)
1869 : {
1870 24 : *output_ptr++ = nextc;
1871 24 : cur_ptr++;
1872 24 : continue;
1873 : }
1874 : }
1875 : }
1876 :
1877 : /*
1878 : * end of quoted field. Must do this test after testing for
1879 : * escape in case quote char and escape char are the same
1880 : * (which is the common case).
1881 : */
1882 1366 : if (c == quotec)
1883 222 : break;
1884 :
1885 : /* Add c to output string */
1886 1144 : *output_ptr++ = c;
1887 : }
1888 : }
1889 1004 : endfield:
1890 :
1891 : /* Terminate attribute value in output area */
1892 1004 : *output_ptr++ = '\0';
1893 :
1894 : /* Check whether raw input matched null marker */
1895 1004 : input_len = end_ptr - start_ptr;
1896 1004 : if (!saw_quote && input_len == cstate->opts.null_print_len &&
1897 44 : strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
1898 44 : cstate->raw_fields[fieldno] = NULL;
1899 : /* Check whether raw input matched default marker */
1900 960 : else if (fieldno < list_length(cstate->attnumlist) &&
1901 960 : cstate->opts.default_print &&
1902 150 : input_len == cstate->opts.default_print_len &&
1903 42 : strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
1904 : {
1905 : /* fieldno is 0-index and attnum is 1-index */
1906 42 : int m = list_nth_int(cstate->attnumlist, fieldno) - 1;
1907 :
1908 42 : if (cstate->defexprs[m] != NULL)
1909 : {
1910 : /* defaults contain entries for all physical attributes */
1911 36 : cstate->defaults[m] = true;
1912 : }
1913 : else
1914 : {
1915 6 : TupleDesc tupDesc = RelationGetDescr(cstate->rel);
1916 6 : Form_pg_attribute att = TupleDescAttr(tupDesc, m);
1917 :
1918 6 : ereport(ERROR,
1919 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1920 : errmsg("unexpected default marker in COPY data"),
1921 : errdetail("Column \"%s\" has no default value.",
1922 : NameStr(att->attname))));
1923 : }
1924 : }
1925 :
1926 998 : fieldno++;
1927 : /* Done if we hit EOL instead of a delim */
1928 998 : if (!found_delim)
1929 468 : break;
1930 : }
1931 :
1932 : /* Clean up state of attribute_buf */
1933 468 : output_ptr--;
1934 : Assert(*output_ptr == '\0');
1935 468 : cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
1936 :
1937 468 : return fieldno;
1938 : }
1939 :
1940 :
1941 : /*
1942 : * Read a binary attribute
1943 : */
1944 : static Datum
1945 158 : CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
1946 : Oid typioparam, int32 typmod,
1947 : bool *isnull)
1948 : {
1949 : int32 fld_size;
1950 : Datum result;
1951 :
1952 158 : if (!CopyGetInt32(cstate, &fld_size))
1953 0 : ereport(ERROR,
1954 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1955 : errmsg("unexpected EOF in COPY data")));
1956 158 : if (fld_size == -1)
1957 : {
1958 30 : *isnull = true;
1959 30 : return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
1960 : }
1961 128 : if (fld_size < 0)
1962 0 : ereport(ERROR,
1963 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1964 : errmsg("invalid field size")));
1965 :
1966 : /* reset attribute_buf to empty, and load raw data in it */
1967 128 : resetStringInfo(&cstate->attribute_buf);
1968 :
1969 128 : enlargeStringInfo(&cstate->attribute_buf, fld_size);
1970 128 : if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
1971 128 : fld_size) != fld_size)
1972 0 : ereport(ERROR,
1973 : (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
1974 : errmsg("unexpected EOF in COPY data")));
1975 :
1976 128 : cstate->attribute_buf.len = fld_size;
1977 128 : cstate->attribute_buf.data[fld_size] = '\0';
1978 :
1979 : /* Call the column type's binary input converter */
1980 128 : result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
1981 : typioparam, typmod);
1982 :
1983 : /* Trouble if it didn't eat the whole buffer */
1984 128 : if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
1985 2 : ereport(ERROR,
1986 : (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
1987 : errmsg("incorrect binary data format")));
1988 :
1989 126 : *isnull = false;
1990 126 : return result;
1991 : }
|