LCOV - code coverage report
Current view: top level - src/backend/commands - copyfromparse.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 84.3 % 732 617
Test Date: 2026-03-27 17:16:09 Functions: 91.7 % 24 22
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * copyfromparse.c
       4              :  *      Parse CSV/text/binary format for COPY FROM.
       5              :  *
       6              :  * This file contains routines to parse the text, CSV and binary input
       7              :  * formats.  The main entry point is NextCopyFrom(), which parses the
       8              :  * next input line and returns it as Datums.
       9              :  *
      10              :  * In text/CSV mode, the parsing happens in multiple stages:
      11              :  *
      12              :  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
      13              :  *                1.          2.            3.           4.
      14              :  *
      15              :  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
      16              :  *    places it into 'raw_buf'.
      17              :  *
      18              :  * 2. CopyConvertBuf() calls the encoding conversion function to convert
      19              :  *    the data in 'raw_buf' from client to server encoding, placing the
      20              :  *    converted result in 'input_buf'.
      21              :  *
      22              :  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
      23              :  *    It is responsible for finding the next newline marker, taking quote and
      24              :  *    escape characters into account according to the COPY options.  The line
      25              :  *    is copied into 'line_buf', with quotes and escape characters still
      26              :  *    intact.
      27              :  *
      28              :  * 4. CopyReadAttributesText/CSV() function takes the input line from
      29              :  *    'line_buf', and splits it into fields, unescaping the data as required.
      30              :  *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
      31              :  *    pointers to each field.
      32              :  *
      33              :  * If encoding conversion is not required, a shortcut is taken in step 2 to
      34              :  * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
      35              :  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
      36              :  * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
      37              :  * the data is valid in the current encoding.
      38              :  *
      39              :  * In binary mode, the pipeline is much simpler.  Input is loaded into
      40              :  * 'raw_buf', and encoding conversion is done in the datatype-specific
      41              :  * receive functions, if required.  'input_buf' and 'line_buf' are not used,
      42              :  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
      43              :  * data when it's passed the receive function.
      44              :  *
      45              :  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
      46              :  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
      47              :  * and 'attribute_buf' are expanded on demand, to hold the longest line
      48              :  * encountered so far.
      49              :  *
      50              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      51              :  * Portions Copyright (c) 1994, Regents of the University of California
      52              :  *
      53              :  *
      54              :  * IDENTIFICATION
      55              :  *    src/backend/commands/copyfromparse.c
      56              :  *
      57              :  *-------------------------------------------------------------------------
      58              :  */
      59              : #include "postgres.h"
      60              : 
      61              : #include <ctype.h>
      62              : #include <unistd.h>
      63              : #include <sys/stat.h>
      64              : 
      65              : #include "commands/copyapi.h"
      66              : #include "commands/copyfrom_internal.h"
      67              : #include "commands/progress.h"
      68              : #include "executor/executor.h"
      69              : #include "libpq/libpq.h"
      70              : #include "libpq/pqformat.h"
      71              : #include "mb/pg_wchar.h"
      72              : #include "miscadmin.h"
      73              : #include "pgstat.h"
      74              : #include "port/pg_bitutils.h"
      75              : #include "port/pg_bswap.h"
      76              : #include "port/simd.h"
      77              : #include "utils/builtins.h"
      78              : #include "utils/rel.h"
      79              : #include "utils/wait_event.h"
      80              : 
      81              : #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
      82              : #define OCTVALUE(c) ((c) - '0')
      83              : 
      84              : /*
      85              :  * These macros centralize code used to process line_buf and input_buf buffers.
      86              :  * They are macros because they often do continue/break control and to avoid
      87              :  * function call overhead in tight COPY loops.
      88              :  *
      89              :  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
      90              :  * prevent the continue/break processing from working.  We end the "if (1)"
      91              :  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
      92              :  * any "else" in the calling code, and to avoid any compiler warnings about
      93              :  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
      94              :  */
      95              : 
      96              : /*
      97              :  * This keeps the character read at the top of the loop in the buffer
      98              :  * even if there is more than one read-ahead.
      99              :  */
     100              : #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
     101              : if (1) \
     102              : { \
     103              :     if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
     104              :     { \
     105              :         input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
     106              :         need_data = true; \
     107              :         continue; \
     108              :     } \
     109              : } else ((void) 0)
     110              : 
     111              : /* This consumes the remainder of the buffer and breaks */
     112              : #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
     113              : if (1) \
     114              : { \
     115              :     if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
     116              :     { \
     117              :         if (extralen) \
     118              :             input_buf_ptr = copy_buf_len; /* consume the partial character */ \
     119              :         /* backslash just before EOF, treat as data char */ \
     120              :         result = true; \
     121              :         break; \
     122              :     } \
     123              : } else ((void) 0)
     124              : 
     125              : /*
     126              :  * Transfer any approved data to line_buf; must do this to be sure
     127              :  * there is some room in input_buf.
     128              :  */
     129              : #define REFILL_LINEBUF \
     130              : if (1) \
     131              : { \
     132              :     if (input_buf_ptr > cstate->input_buf_index) \
     133              :     { \
     134              :         appendBinaryStringInfo(&cstate->line_buf, \
     135              :                              cstate->input_buf + cstate->input_buf_index, \
     136              :                                input_buf_ptr - cstate->input_buf_index); \
     137              :         cstate->input_buf_index = input_buf_ptr; \
     138              :     } \
     139              : } else ((void) 0)
     140              : 
     141              : /* NOTE: there's a copy of this in copyto.c */
     142              : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
     143              : 
     144              : 
     145              : /* non-export function prototypes */
     146              : static bool CopyReadLine(CopyFromState cstate, bool is_csv);
     147              : static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate,
     148              :                                                         bool is_csv);
     149              : static int  CopyReadAttributesText(CopyFromState cstate);
     150              : static int  CopyReadAttributesCSV(CopyFromState cstate);
     151              : static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     152              :                                      Oid typioparam, int32 typmod,
     153              :                                      bool *isnull);
     154              : static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,
     155              :                                                               ExprContext *econtext,
     156              :                                                               Datum *values,
     157              :                                                               bool *nulls,
     158              :                                                               bool is_csv);
     159              : static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate,
     160              :                                                                      char ***fields,
     161              :                                                                      int *nfields,
     162              :                                                                      bool is_csv);
     163              : 
     164              : 
     165              : /* Low-level communications functions */
     166              : static int  CopyGetData(CopyFromState cstate, void *databuf,
     167              :                         int minread, int maxread);
     168              : static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
     169              : static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
     170              : static void CopyLoadInputBuf(CopyFromState cstate);
     171              : static int  CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
     172              : 
     173              : void
     174          681 : ReceiveCopyBegin(CopyFromState cstate)
     175              : {
     176              :     StringInfoData buf;
     177          681 :     int         natts = list_length(cstate->attnumlist);
     178          681 :     int16       format = (cstate->opts.format == COPY_FORMAT_BINARY ? 1 : 0);
     179              :     int         i;
     180              : 
     181          681 :     pq_beginmessage(&buf, PqMsg_CopyInResponse);
     182          681 :     pq_sendbyte(&buf, format);  /* overall format */
     183          681 :     pq_sendint16(&buf, natts);
     184         2445 :     for (i = 0; i < natts; i++)
     185         1764 :         pq_sendint16(&buf, format); /* per-column formats */
     186          681 :     pq_endmessage(&buf);
     187          681 :     cstate->copy_src = COPY_FRONTEND;
     188          681 :     cstate->fe_msgbuf = makeStringInfo();
     189              :     /* We *must* flush here to ensure FE knows it can send. */
     190          681 :     pq_flush();
     191          681 : }
     192              : 
     193              : void
     194            8 : ReceiveCopyBinaryHeader(CopyFromState cstate)
     195              : {
     196              :     char        readSig[11];
     197              :     int32       tmp;
     198              : 
     199              :     /* Signature */
     200            8 :     if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
     201            8 :         memcmp(readSig, BinarySignature, 11) != 0)
     202            0 :         ereport(ERROR,
     203              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     204              :                  errmsg("COPY file signature not recognized")));
     205              :     /* Flags field */
     206            8 :     if (!CopyGetInt32(cstate, &tmp))
     207            0 :         ereport(ERROR,
     208              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     209              :                  errmsg("invalid COPY file header (missing flags)")));
     210            8 :     if ((tmp & (1 << 16)) != 0)
     211            0 :         ereport(ERROR,
     212              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     213              :                  errmsg("invalid COPY file header (WITH OIDS)")));
     214            8 :     tmp &= ~(1 << 16);
     215            8 :     if ((tmp >> 16) != 0)
     216            0 :         ereport(ERROR,
     217              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     218              :                  errmsg("unrecognized critical flags in COPY file header")));
     219              :     /* Header extension length */
     220            8 :     if (!CopyGetInt32(cstate, &tmp) ||
     221            8 :         tmp < 0)
     222            0 :         ereport(ERROR,
     223              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     224              :                  errmsg("invalid COPY file header (missing length)")));
     225              :     /* Skip extension header, if present */
     226            8 :     while (tmp-- > 0)
     227              :     {
     228            0 :         if (CopyReadBinaryData(cstate, readSig, 1) != 1)
     229            0 :             ereport(ERROR,
     230              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     231              :                      errmsg("invalid COPY file header (wrong length)")));
     232              :     }
     233            8 : }
     234              : 
     235              : /*
     236              :  * CopyGetData reads data from the source (file or frontend)
     237              :  *
     238              :  * We attempt to read at least minread, and at most maxread, bytes from
     239              :  * the source.  The actual number of bytes read is returned; if this is
     240              :  * less than minread, EOF was detected.
     241              :  *
     242              :  * Note: when copying from the frontend, we expect a proper EOF mark per
     243              :  * protocol; if the frontend simply drops the connection, we raise error.
     244              :  * It seems unwise to allow the COPY IN to complete normally in that case.
     245              :  *
     246              :  * NB: no data conversion is applied here.
     247              :  */
     248              : static int
     249       217548 : CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
     250              : {
     251       217548 :     int         bytesread = 0;
     252              : 
     253       217548 :     switch (cstate->copy_src)
     254              :     {
     255          705 :         case COPY_FILE:
     256          705 :             pgstat_report_wait_start(WAIT_EVENT_COPY_FROM_READ);
     257          705 :             bytesread = fread(databuf, 1, maxread, cstate->copy_file);
     258          705 :             pgstat_report_wait_end();
     259          705 :             if (ferror(cstate->copy_file))
     260            0 :                 ereport(ERROR,
     261              :                         (errcode_for_file_access(),
     262              :                          errmsg("could not read from COPY file: %m")));
     263          705 :             if (bytesread == 0)
     264          277 :                 cstate->raw_reached_eof = true;
     265          705 :             break;
     266       201766 :         case COPY_FRONTEND:
     267       402515 :             while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
     268              :             {
     269              :                 int         avail;
     270              : 
     271       402035 :                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
     272              :                 {
     273              :                     /* Try to receive another message */
     274              :                     int         mtype;
     275              :                     int         maxmsglen;
     276              : 
     277       201286 :             readmessage:
     278       201286 :                     HOLD_CANCEL_INTERRUPTS();
     279       201286 :                     pq_startmsgread();
     280       201286 :                     mtype = pq_getbyte();
     281       201286 :                     if (mtype == EOF)
     282            0 :                         ereport(ERROR,
     283              :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     284              :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     285              :                     /* Validate message type and set packet size limit */
     286       201286 :                     switch (mtype)
     287              :                     {
     288       200749 :                         case PqMsg_CopyData:
     289       200749 :                             maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
     290       200749 :                             break;
     291          535 :                         case PqMsg_CopyDone:
     292              :                         case PqMsg_CopyFail:
     293              :                         case PqMsg_Flush:
     294              :                         case PqMsg_Sync:
     295          535 :                             maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
     296          535 :                             break;
     297            2 :                         default:
     298            2 :                             ereport(ERROR,
     299              :                                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
     300              :                                      errmsg("unexpected message type 0x%02X during COPY from stdin",
     301              :                                             mtype)));
     302              :                             maxmsglen = 0;  /* keep compiler quiet */
     303              :                             break;
     304              :                     }
     305              :                     /* Now collect the message body */
     306       201284 :                     if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
     307            0 :                         ereport(ERROR,
     308              :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     309              :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     310       201284 :                     RESUME_CANCEL_INTERRUPTS();
     311              :                     /* ... and process it */
     312       201284 :                     switch (mtype)
     313              :                     {
     314       200749 :                         case PqMsg_CopyData:
     315       200749 :                             break;
     316          535 :                         case PqMsg_CopyDone:
     317              :                             /* COPY IN correctly terminated by frontend */
     318          535 :                             cstate->raw_reached_eof = true;
     319          535 :                             return bytesread;
     320            0 :                         case PqMsg_CopyFail:
     321            0 :                             ereport(ERROR,
     322              :                                     (errcode(ERRCODE_QUERY_CANCELED),
     323              :                                      errmsg("COPY from stdin failed: %s",
     324              :                                             pq_getmsgstring(cstate->fe_msgbuf))));
     325              :                             break;
     326            0 :                         case PqMsg_Flush:
     327              :                         case PqMsg_Sync:
     328              : 
     329              :                             /*
     330              :                              * Ignore Flush/Sync for the convenience of client
     331              :                              * libraries (such as libpq) that may send those
     332              :                              * without noticing that the command they just
     333              :                              * sent was COPY.
     334              :                              */
     335            0 :                             goto readmessage;
     336       200749 :                         default:
     337              :                             Assert(false);  /* NOT REACHED */
     338              :                     }
     339              :                 }
     340       200749 :                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
     341       200749 :                 if (avail > maxread)
     342            0 :                     avail = maxread;
     343       200749 :                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
     344       200749 :                 databuf = (char *) databuf + avail;
     345       200749 :                 maxread -= avail;
     346       200749 :                 bytesread += avail;
     347              :             }
     348       201229 :             break;
     349        15077 :         case COPY_CALLBACK:
     350        15077 :             bytesread = cstate->data_source_cb(databuf, minread, maxread);
     351        15077 :             break;
     352              :     }
     353              : 
     354       217011 :     return bytesread;
     355              : }
     356              : 
     357              : 
     358              : /*
     359              :  * These functions do apply some data conversion
     360              :  */
     361              : 
     362              : /*
     363              :  * CopyGetInt32 reads an int32 that appears in network byte order
     364              :  *
     365              :  * Returns true if OK, false if EOF
     366              :  */
     367              : static inline bool
     368          116 : CopyGetInt32(CopyFromState cstate, int32 *val)
     369              : {
     370              :     uint32      buf;
     371              : 
     372          116 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     373              :     {
     374            0 :         *val = 0;               /* suppress compiler warning */
     375            0 :         return false;
     376              :     }
     377          116 :     *val = (int32) pg_ntoh32(buf);
     378          116 :     return true;
     379              : }
     380              : 
     381              : /*
     382              :  * CopyGetInt16 reads an int16 that appears in network byte order
     383              :  */
     384              : static inline bool
     385           25 : CopyGetInt16(CopyFromState cstate, int16 *val)
     386              : {
     387              :     uint16      buf;
     388              : 
     389           25 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     390              :     {
     391            0 :         *val = 0;               /* suppress compiler warning */
     392            0 :         return false;
     393              :     }
     394           25 :     *val = (int16) pg_ntoh16(buf);
     395           25 :     return true;
     396              : }
     397              : 
     398              : 
     399              : /*
     400              :  * Perform encoding conversion on data in 'raw_buf', writing the converted
     401              :  * data into 'input_buf'.
     402              :  *
     403              :  * On entry, there must be some data to convert in 'raw_buf'.
     404              :  */
     405              : static void
     406       434312 : CopyConvertBuf(CopyFromState cstate)
     407              : {
     408              :     /*
     409              :      * If the file and server encoding are the same, no encoding conversion is
     410              :      * required.  However, we still need to verify that the input is valid for
     411              :      * the encoding.
     412              :      */
     413       434312 :     if (!cstate->need_transcoding)
     414              :     {
     415              :         /*
     416              :          * When conversion is not required, input_buf and raw_buf are the
     417              :          * same.  raw_buf_len is the total number of bytes in the buffer, and
     418              :          * input_buf_len tracks how many of those bytes have already been
     419              :          * verified.
     420              :          */
     421       434224 :         int         preverifiedlen = cstate->input_buf_len;
     422       434224 :         int         unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
     423              :         int         nverified;
     424              : 
     425       434224 :         if (unverifiedlen == 0)
     426              :         {
     427              :             /*
     428              :              * If no more raw data is coming, report the EOF to the caller.
     429              :              */
     430       218216 :             if (cstate->raw_reached_eof)
     431         1259 :                 cstate->input_reached_eof = true;
     432       218216 :             return;
     433              :         }
     434              : 
     435              :         /*
     436              :          * Verify the new data, including any residual unverified bytes from
     437              :          * previous round.
     438              :          */
     439       216008 :         nverified = pg_encoding_verifymbstr(cstate->file_encoding,
     440       216008 :                                             cstate->raw_buf + preverifiedlen,
     441              :                                             unverifiedlen);
     442       216008 :         if (nverified == 0)
     443              :         {
     444              :             /*
     445              :              * Could not verify anything.
     446              :              *
     447              :              * If there is no more raw input data coming, it means that there
     448              :              * was an incomplete multi-byte sequence at the end.  Also, if
     449              :              * there's "enough" input left, we should be able to verify at
     450              :              * least one character, and a failure to do so means that we've
     451              :              * hit an invalid byte sequence.
     452              :              */
     453            0 :             if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
     454            0 :                 cstate->input_reached_error = true;
     455            0 :             return;
     456              :         }
     457       216008 :         cstate->input_buf_len += nverified;
     458              :     }
     459              :     else
     460              :     {
     461              :         /*
     462              :          * Encoding conversion is needed.
     463              :          */
     464              :         int         nbytes;
     465              :         unsigned char *src;
     466              :         int         srclen;
     467              :         unsigned char *dst;
     468              :         int         dstlen;
     469              :         int         convertedlen;
     470              : 
     471           88 :         if (RAW_BUF_BYTES(cstate) == 0)
     472              :         {
     473              :             /*
     474              :              * If no more raw data is coming, report the EOF to the caller.
     475              :              */
     476           56 :             if (cstate->raw_reached_eof)
     477           16 :                 cstate->input_reached_eof = true;
     478           56 :             return;
     479              :         }
     480              : 
     481              :         /*
     482              :          * First, copy down any unprocessed data.
     483              :          */
     484           32 :         nbytes = INPUT_BUF_BYTES(cstate);
     485           32 :         if (nbytes > 0 && cstate->input_buf_index > 0)
     486            0 :             memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
     487              :                     nbytes);
     488           32 :         cstate->input_buf_index = 0;
     489           32 :         cstate->input_buf_len = nbytes;
     490           32 :         cstate->input_buf[nbytes] = '\0';
     491              : 
     492           32 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     493           32 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     494           32 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     495           32 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     496              : 
     497              :         /*
     498              :          * Do the conversion.  This might stop short, if there is an invalid
     499              :          * byte sequence in the input.  We'll convert as much as we can in
     500              :          * that case.
     501              :          *
     502              :          * Note: Even if we hit an invalid byte sequence, we don't report the
     503              :          * error until all the valid bytes have been consumed.  The input
     504              :          * might contain an end-of-input marker (\.), and we don't want to
     505              :          * report an error if the invalid byte sequence is after the
     506              :          * end-of-input marker.  We might unnecessarily convert some data
     507              :          * after the end-of-input marker as long as it's valid for the
     508              :          * encoding, but that's harmless.
     509              :          */
     510           32 :         convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
     511              :                                                      cstate->file_encoding,
     512              :                                                      GetDatabaseEncoding(),
     513              :                                                      src, srclen,
     514              :                                                      dst, dstlen,
     515              :                                                      true);
     516           32 :         if (convertedlen == 0)
     517              :         {
     518              :             /*
     519              :              * Could not convert anything.  If there is no more raw input data
     520              :              * coming, it means that there was an incomplete multi-byte
     521              :              * sequence at the end.  Also, if there is plenty of input left,
     522              :              * we should be able to convert at least one character, so a
     523              :              * failure to do so must mean that we've hit a byte sequence
     524              :              * that's invalid.
     525              :              */
     526           16 :             if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
     527            8 :                 cstate->input_reached_error = true;
     528           16 :             return;
     529              :         }
     530           16 :         cstate->raw_buf_index += convertedlen;
     531           16 :         cstate->input_buf_len += strlen((char *) dst);
     532              :     }
     533              : }
     534              : 
     535              : /*
     536              :  * Report an encoding or conversion error.
     537              :  */
     538              : static void
     539            8 : CopyConversionError(CopyFromState cstate)
     540              : {
     541              :     Assert(cstate->raw_buf_len > 0);
     542              :     Assert(cstate->input_reached_error);
     543              : 
     544            8 :     if (!cstate->need_transcoding)
     545              :     {
     546              :         /*
     547              :          * Everything up to input_buf_len was successfully verified, and
     548              :          * input_buf_len points to the invalid or incomplete character.
     549              :          */
     550            0 :         report_invalid_encoding(cstate->file_encoding,
     551            0 :                                 cstate->raw_buf + cstate->input_buf_len,
     552            0 :                                 cstate->raw_buf_len - cstate->input_buf_len);
     553              :     }
     554              :     else
     555              :     {
     556              :         /*
     557              :          * raw_buf_index points to the invalid or untranslatable character. We
     558              :          * let the conversion routine report the error, because it can provide
     559              :          * a more specific error message than we could here.  An earlier call
     560              :          * to the conversion routine in CopyConvertBuf() detected that there
     561              :          * is an error, now we call the conversion routine again with
     562              :          * noError=false, to have it throw the error.
     563              :          */
     564              :         unsigned char *src;
     565              :         int         srclen;
     566              :         unsigned char *dst;
     567              :         int         dstlen;
     568              : 
     569            8 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     570            8 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     571            8 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     572            8 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     573              : 
     574            8 :         (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
     575              :                                              cstate->file_encoding,
     576              :                                              GetDatabaseEncoding(),
     577              :                                              src, srclen,
     578              :                                              dst, dstlen,
     579              :                                              false);
     580              : 
     581              :         /*
     582              :          * The conversion routine should have reported an error, so this
     583              :          * should not be reached.
     584              :          */
     585            0 :         elog(ERROR, "encoding conversion failed without error");
     586              :     }
     587              : }
     588              : 
     589              : /*
     590              :  * Load more data from data source to raw_buf.
     591              :  *
     592              :  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
     593              :  * beginning of the buffer, and we load new data after that.
     594              :  */
     595              : static void
     596       217025 : CopyLoadRawBuf(CopyFromState cstate)
     597              : {
     598              :     int         nbytes;
     599              :     int         inbytes;
     600              : 
     601              :     /*
     602              :      * In text mode, if encoding conversion is not required, raw_buf and
     603              :      * input_buf point to the same buffer.  Their len/index better agree, too.
     604              :      */
     605       217025 :     if (cstate->raw_buf == cstate->input_buf)
     606              :     {
     607              :         Assert(!cstate->need_transcoding);
     608              :         Assert(cstate->raw_buf_index == cstate->input_buf_index);
     609              :         Assert(cstate->input_buf_len <= cstate->raw_buf_len);
     610              :     }
     611              : 
     612              :     /*
     613              :      * Copy down the unprocessed data if any.
     614              :      */
     615       217025 :     nbytes = RAW_BUF_BYTES(cstate);
     616       217025 :     if (nbytes > 0 && cstate->raw_buf_index > 0)
     617          612 :         memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
     618              :                 nbytes);
     619       217025 :     cstate->raw_buf_len -= cstate->raw_buf_index;
     620       217025 :     cstate->raw_buf_index = 0;
     621              : 
     622              :     /*
     623              :      * If raw_buf and input_buf are in fact the same buffer, adjust the
     624              :      * input_buf variables, too.
     625              :      */
     626       217025 :     if (cstate->raw_buf == cstate->input_buf)
     627              :     {
     628       216957 :         cstate->input_buf_len -= cstate->input_buf_index;
     629       216957 :         cstate->input_buf_index = 0;
     630              :     }
     631              : 
     632              :     /* Load more data */
     633       217025 :     inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
     634       217025 :                           1, RAW_BUF_SIZE - cstate->raw_buf_len);
     635       217023 :     nbytes += inbytes;
     636       217023 :     cstate->raw_buf[nbytes] = '\0';
     637       217023 :     cstate->raw_buf_len = nbytes;
     638              : 
     639       217023 :     cstate->bytes_processed += inbytes;
     640       217023 :     pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
     641              : 
     642       217023 :     if (inbytes == 0)
     643          978 :         cstate->raw_reached_eof = true;
     644       217023 : }
     645              : 
     646              : /*
     647              :  * CopyLoadInputBuf loads some more data into input_buf
     648              :  *
     649              :  * On return, at least one more input character is loaded into
     650              :  * input_buf, or input_reached_eof is set.
     651              :  *
     652              :  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
     653              :  * of the buffer and then we load more data after that.
     654              :  */
     655              : static void
     656       217309 : CopyLoadInputBuf(CopyFromState cstate)
     657              : {
     658       217309 :     int         nbytes = INPUT_BUF_BYTES(cstate);
     659              : 
     660              :     /*
     661              :      * The caller has updated input_buf_index to indicate how much of the
     662              :      * input has been consumed and isn't needed anymore.  If input_buf is the
     663              :      * same physical area as raw_buf, update raw_buf_index accordingly.
     664              :      */
     665       217309 :     if (cstate->raw_buf == cstate->input_buf)
     666              :     {
     667              :         Assert(!cstate->need_transcoding);
     668              :         Assert(cstate->input_buf_index >= cstate->raw_buf_index);
     669       217269 :         cstate->raw_buf_index = cstate->input_buf_index;
     670              :     }
     671              : 
     672              :     for (;;)
     673              :     {
     674              :         /* If we now have some unconverted data, try to convert it */
     675       434312 :         CopyConvertBuf(cstate);
     676              : 
     677              :         /* If we now have some more input bytes ready, return them */
     678       434312 :         if (INPUT_BUF_BYTES(cstate) > nbytes)
     679       216024 :             return;
     680              : 
     681              :         /*
     682              :          * If we reached an invalid byte sequence, or we're at an incomplete
     683              :          * multi-byte character but there is no more raw input data, report
     684              :          * conversion error.
     685              :          */
     686       218288 :         if (cstate->input_reached_error)
     687            8 :             CopyConversionError(cstate);
     688              : 
     689              :         /* no more input, and everything has been converted */
     690       218280 :         if (cstate->input_reached_eof)
     691         1275 :             break;
     692              : 
     693              :         /* Try to load more raw data */
     694              :         Assert(!cstate->raw_reached_eof);
     695       217005 :         CopyLoadRawBuf(cstate);
     696              :     }
     697              : }
     698              : 
     699              : /*
     700              :  * CopyReadBinaryData
     701              :  *
     702              :  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
     703              :  * and writes them to 'dest'.  Returns the number of bytes read (which
     704              :  * would be less than 'nbytes' only if we reach EOF).
     705              :  */
     706              : static int
     707          236 : CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
     708              : {
     709          236 :     int         copied_bytes = 0;
     710              : 
     711          236 :     if (RAW_BUF_BYTES(cstate) >= nbytes)
     712              :     {
     713              :         /* Enough bytes are present in the buffer. */
     714          216 :         memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
     715          216 :         cstate->raw_buf_index += nbytes;
     716          216 :         copied_bytes = nbytes;
     717              :     }
     718              :     else
     719              :     {
     720              :         /*
     721              :          * Not enough bytes in the buffer, so must read from the file.  Need
     722              :          * to loop since 'nbytes' could be larger than the buffer size.
     723              :          */
     724              :         do
     725              :         {
     726              :             int         copy_bytes;
     727              : 
     728              :             /* Load more data if buffer is empty. */
     729           20 :             if (RAW_BUF_BYTES(cstate) == 0)
     730              :             {
     731           20 :                 CopyLoadRawBuf(cstate);
     732           20 :                 if (cstate->raw_reached_eof)
     733            7 :                     break;      /* EOF */
     734              :             }
     735              : 
     736              :             /* Transfer some bytes. */
     737           13 :             copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
     738           13 :             memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
     739           13 :             cstate->raw_buf_index += copy_bytes;
     740           13 :             dest += copy_bytes;
     741           13 :             copied_bytes += copy_bytes;
     742           13 :         } while (copied_bytes < nbytes);
     743              :     }
     744              : 
     745          236 :     return copied_bytes;
     746              : }
     747              : 
     748              : /*
     749              :  * This function is exposed for use by extensions that read raw fields in the
     750              :  * next line. See NextCopyFromRawFieldsInternal() for details.
     751              :  */
     752              : bool
     753            0 : NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
     754              : {
     755            0 :     return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
     756            0 :                                          cstate->opts.format == COPY_FORMAT_CSV);
     757              : }
     758              : 
     759              : /*
     760              :  * Workhorse for NextCopyFromRawFields().
     761              :  *
     762              :  * Read raw fields in the next line for COPY FROM in text or csv mode. Return
     763              :  * false if no more lines.
     764              :  *
     765              :  * An internal temporary buffer is returned via 'fields'. It is valid until
     766              :  * the next call of the function. Since the function returns all raw fields
     767              :  * in the input file, 'nfields' could be different from the number of columns
     768              :  * in the relation.
     769              :  *
     770              :  * NOTE: force_not_null option are not applied to the returned fields.
     771              :  *
     772              :  * We use pg_attribute_always_inline to reduce function call overhead
     773              :  * and to help compilers to optimize away the 'is_csv' condition when called
     774              :  * by internal functions such as CopyFromTextLikeOneRow().
     775              :  */
     776              : static pg_attribute_always_inline bool
     777       754621 : NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
     778              : {
     779              :     int         fldct;
     780       754621 :     bool        done = false;
     781              : 
     782              :     /* only available for text or csv input */
     783              :     Assert(cstate->opts.format == COPY_FORMAT_TEXT ||
     784              :            cstate->opts.format == COPY_FORMAT_CSV);
     785              : 
     786              :     /* on input check that the header line is correct if needed */
     787       754621 :     if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
     788              :     {
     789              :         ListCell   *cur;
     790              :         TupleDesc   tupDesc;
     791           93 :         int         lines_to_skip = cstate->opts.header_line;
     792              : 
     793              :         /* If set to "match", one header line is skipped */
     794           93 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     795           50 :             lines_to_skip = 1;
     796              : 
     797           93 :         tupDesc = RelationGetDescr(cstate->rel);
     798              : 
     799          218 :         for (int i = 0; i < lines_to_skip; i++)
     800              :         {
     801          130 :             cstate->cur_lineno++;
     802          130 :             if ((done = CopyReadLine(cstate, is_csv)))
     803            5 :                 break;
     804              :         }
     805              : 
     806           93 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     807              :         {
     808              :             int         fldnum;
     809              : 
     810           50 :             if (is_csv)
     811            6 :                 fldct = CopyReadAttributesCSV(cstate);
     812              :             else
     813           44 :                 fldct = CopyReadAttributesText(cstate);
     814              : 
     815           50 :             if (fldct != list_length(cstate->attnumlist))
     816           16 :                 ereport(ERROR,
     817              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     818              :                          errmsg("wrong number of fields in header line: got %d, expected %d",
     819              :                                 fldct, list_length(cstate->attnumlist))));
     820              : 
     821           34 :             fldnum = 0;
     822          104 :             foreach(cur, cstate->attnumlist)
     823              :             {
     824           83 :                 int         attnum = lfirst_int(cur);
     825              :                 char       *colName;
     826           83 :                 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
     827              : 
     828              :                 Assert(fldnum < cstate->max_fields);
     829              : 
     830           83 :                 colName = cstate->raw_fields[fldnum++];
     831           83 :                 if (colName == NULL)
     832            4 :                     ereport(ERROR,
     833              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     834              :                              errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
     835              :                                     fldnum, cstate->opts.null_print, NameStr(attr->attname))));
     836              : 
     837           79 :                 if (namestrcmp(&attr->attname, colName) != 0)
     838              :                 {
     839            9 :                     ereport(ERROR,
     840              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     841              :                              errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
     842              :                                     fldnum, colName, NameStr(attr->attname))));
     843              :                 }
     844              :             }
     845              :         }
     846              : 
     847           64 :         if (done)
     848            5 :             return false;
     849              :     }
     850              : 
     851       754587 :     cstate->cur_lineno++;
     852              : 
     853              :     /* Actually read the line into memory here */
     854       754587 :     done = CopyReadLine(cstate, is_csv);
     855              : 
     856              :     /*
     857              :      * EOF at start of line means we're done.  If we see EOF after some
     858              :      * characters, we act as though it was newline followed by EOF, ie,
     859              :      * process the line and then exit loop on next iteration.
     860              :      */
     861       754569 :     if (done && cstate->line_buf.len == 0)
     862          984 :         return false;
     863              : 
     864              :     /* Parse the line into de-escaped field values */
     865       753585 :     if (is_csv)
     866          312 :         fldct = CopyReadAttributesCSV(cstate);
     867              :     else
     868       753273 :         fldct = CopyReadAttributesText(cstate);
     869              : 
     870       753577 :     *fields = cstate->raw_fields;
     871       753577 :     *nfields = fldct;
     872       753577 :     return true;
     873              : }
     874              : 
     875              : /*
     876              :  * Read next tuple from file for COPY FROM. Return false if no more tuples.
     877              :  *
     878              :  * 'econtext' is used to evaluate default expression for each column that is
     879              :  * either not read from the file or is using the DEFAULT option of COPY FROM.
     880              :  * It can be NULL when no default values are used, i.e. when all columns are
     881              :  * read from the file, and DEFAULT option is unset.
     882              :  *
     883              :  * 'values' and 'nulls' arrays must be the same length as columns of the
     884              :  * relation passed to BeginCopyFrom. This function fills the arrays.
     885              :  */
     886              : bool
     887       754646 : NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
     888              :              Datum *values, bool *nulls)
     889              : {
     890              :     TupleDesc   tupDesc;
     891              :     AttrNumber  num_phys_attrs,
     892       754646 :                 num_defaults = cstate->num_defaults;
     893              :     int         i;
     894       754646 :     int        *defmap = cstate->defmap;
     895       754646 :     ExprState **defexprs = cstate->defexprs;
     896              : 
     897       754646 :     tupDesc = RelationGetDescr(cstate->rel);
     898       754646 :     num_phys_attrs = tupDesc->natts;
     899              : 
     900              :     /* Initialize all values for row to NULL */
     901      3532192 :     MemSet(values, 0, num_phys_attrs * sizeof(Datum));
     902       754646 :     MemSet(nulls, true, num_phys_attrs * sizeof(bool));
     903       850718 :     MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
     904              : 
     905              :     /* Get one row from source */
     906       754646 :     if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))
     907          996 :         return false;
     908              : 
     909              :     /*
     910              :      * Now compute and insert any defaults available for the columns not
     911              :      * provided by the input data.  Anything not processed here or above will
     912              :      * remain NULL.
     913              :      */
     914       793870 :     for (i = 0; i < num_defaults; i++)
     915              :     {
     916              :         /*
     917              :          * The caller must supply econtext and have switched into the
     918              :          * per-tuple memory context in it.
     919              :          */
     920              :         Assert(econtext != NULL);
     921              :         Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
     922              : 
     923        40345 :         values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
     924        40345 :                                          &nulls[defmap[i]]);
     925              :     }
     926              : 
     927       753525 :     return true;
     928              : }
     929              : 
     930              : /* Implementation of the per-row callback for text format */
     931              : bool
     932       754151 : CopyFromTextOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     933              :                    bool *nulls)
     934              : {
     935       754151 :     return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);
     936              : }
     937              : 
     938              : /* Implementation of the per-row callback for CSV format */
     939              : bool
     940          470 : CopyFromCSVOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     941              :                   bool *nulls)
     942              : {
     943          470 :     return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);
     944              : }
     945              : 
     946              : /*
     947              :  * Workhorse for CopyFromTextOneRow() and CopyFromCSVOneRow().
     948              :  *
     949              :  * We use pg_attribute_always_inline to reduce function call overhead
     950              :  * and to help compilers to optimize away the 'is_csv' condition.
     951              :  */
     952              : static pg_attribute_always_inline bool
     953       754621 : CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
     954              :                        Datum *values, bool *nulls, bool is_csv)
     955              : {
     956              :     TupleDesc   tupDesc;
     957              :     AttrNumber  attr_count;
     958       754621 :     FmgrInfo   *in_functions = cstate->in_functions;
     959       754621 :     Oid        *typioparams = cstate->typioparams;
     960       754621 :     ExprState **defexprs = cstate->defexprs;
     961              :     char      **field_strings;
     962              :     ListCell   *cur;
     963              :     int         fldct;
     964              :     int         fieldno;
     965              :     char       *string;
     966       754621 :     bool        current_row_erroneous = false;
     967              : 
     968       754621 :     tupDesc = RelationGetDescr(cstate->rel);
     969       754621 :     attr_count = list_length(cstate->attnumlist);
     970              : 
     971              :     /* read raw fields in the next line */
     972       754621 :     if (!NextCopyFromRawFieldsInternal(cstate, &field_strings, &fldct, is_csv))
     973          989 :         return false;
     974              : 
     975              :     /* check for overflowing fields */
     976       753577 :     if (attr_count > 0 && fldct > attr_count)
     977           16 :         ereport(ERROR,
     978              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     979              :                  errmsg("extra data after last expected column")));
     980              : 
     981       753561 :     fieldno = 0;
     982              : 
     983              :     /* Loop to read the user attributes on the line. */
     984      3444916 :     foreach(cur, cstate->attnumlist)
     985              :     {
     986      2691490 :         int         attnum = lfirst_int(cur);
     987      2691490 :         int         m = attnum - 1;
     988      2691490 :         Form_pg_attribute att = TupleDescAttr(tupDesc, m);
     989              : 
     990      2691490 :         if (fieldno >= fldct)
     991           16 :             ereport(ERROR,
     992              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     993              :                      errmsg("missing data for column \"%s\"",
     994              :                             NameStr(att->attname))));
     995      2691474 :         string = field_strings[fieldno++];
     996              : 
     997      2691474 :         if (cstate->convert_select_flags &&
     998           10 :             !cstate->convert_select_flags[m])
     999              :         {
    1000              :             /* ignore input field, leaving column as NULL */
    1001            5 :             continue;
    1002              :         }
    1003              : 
    1004      2691469 :         if (is_csv)
    1005              :         {
    1006          619 :             if (string == NULL &&
    1007           27 :                 cstate->opts.force_notnull_flags[m])
    1008              :             {
    1009              :                 /*
    1010              :                  * FORCE_NOT_NULL option is set and column is NULL - convert
    1011              :                  * it to the NULL string.
    1012              :                  */
    1013           18 :                 string = cstate->opts.null_print;
    1014              :             }
    1015          601 :             else if (string != NULL && cstate->opts.force_null_flags[m]
    1016           32 :                      && strcmp(string, cstate->opts.null_print) == 0)
    1017              :             {
    1018              :                 /*
    1019              :                  * FORCE_NULL option is set and column matches the NULL
    1020              :                  * string. It must have been quoted, or otherwise the string
    1021              :                  * would already have been set to NULL. Convert it to NULL as
    1022              :                  * specified.
    1023              :                  */
    1024           17 :                 string = NULL;
    1025              :             }
    1026              :         }
    1027              : 
    1028      2691469 :         cstate->cur_attname = NameStr(att->attname);
    1029      2691469 :         cstate->cur_attval = string;
    1030              : 
    1031      2691469 :         if (string != NULL)
    1032      2688717 :             nulls[m] = false;
    1033              : 
    1034      2691469 :         if (cstate->defaults[m])
    1035              :         {
    1036              :             /* We must have switched into the per-tuple memory context */
    1037              :             Assert(econtext != NULL);
    1038              :             Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
    1039              : 
    1040           38 :             values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
    1041              :         }
    1042              : 
    1043              :         /*
    1044              :          * If ON_ERROR is specified, handle the different options
    1045              :          */
    1046      2691406 :         else if (!InputFunctionCallSafe(&in_functions[m],
    1047              :                                         string,
    1048      2691431 :                                         typioparams[m],
    1049              :                                         att->atttypmod,
    1050      2691431 :                                         (Node *) cstate->escontext,
    1051      2691431 :                                         &values[m]))
    1052              :         {
    1053              :             Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
    1054              : 
    1055          112 :             if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1056           82 :                 cstate->num_errors++;
    1057           30 :             else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
    1058              :             {
    1059              :                 /*
    1060              :                  * Reset error state so the subsequent InputFunctionCallSafe
    1061              :                  * call (for domain constraint check) can properly report
    1062              :                  * whether it succeeded or failed.
    1063              :                  */
    1064           30 :                 cstate->escontext->error_occurred = false;
    1065              : 
    1066              :                 Assert(cstate->domain_with_constraint != NULL);
    1067              : 
    1068              :                 /*
    1069              :                  * For constrained domains, we need an additional
    1070              :                  * InputFunctionCallSafe() to ensure that an error is thrown
    1071              :                  * if the domain constraint rejects null values.
    1072              :                  */
    1073           50 :                 if (!cstate->domain_with_constraint[m] ||
    1074           20 :                     InputFunctionCallSafe(&in_functions[m],
    1075              :                                           NULL,
    1076           20 :                                           typioparams[m],
    1077              :                                           att->atttypmod,
    1078           20 :                                           (Node *) cstate->escontext,
    1079           20 :                                           &values[m]))
    1080              :                 {
    1081           18 :                     nulls[m] = true;
    1082           18 :                     values[m] = (Datum) 0;
    1083              :                 }
    1084              :                 else
    1085           12 :                     ereport(ERROR,
    1086              :                             errcode(ERRCODE_NOT_NULL_VIOLATION),
    1087              :                             errmsg("domain %s does not allow null values",
    1088              :                                    format_type_be(typioparams[m])),
    1089              :                             errdetail("ON_ERROR SET_NULL cannot be applied because column \"%s\" (domain %s) does not accept null values.",
    1090              :                                       cstate->cur_attname,
    1091              :                                       format_type_be(typioparams[m])),
    1092              :                             errdatatype(typioparams[m]));
    1093              : 
    1094              :                 /*
    1095              :                  * We count only the number of rows (not fields) where
    1096              :                  * ON_ERROR SET_NULL was applied.
    1097              :                  */
    1098           18 :                 if (!current_row_erroneous)
    1099              :                 {
    1100           14 :                     current_row_erroneous = true;
    1101           14 :                     cstate->num_errors++;
    1102              :                 }
    1103              :             }
    1104              : 
    1105          100 :             if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
    1106              :             {
    1107              :                 /*
    1108              :                  * Since we emit line number and column info in the below
    1109              :                  * notice message, we suppress error context information other
    1110              :                  * than the relation name.
    1111              :                  */
    1112              :                 Assert(!cstate->relname_only);
    1113           44 :                 cstate->relname_only = true;
    1114              : 
    1115           44 :                 if (cstate->cur_attval)
    1116              :                 {
    1117              :                     char       *attval;
    1118              : 
    1119           40 :                     attval = CopyLimitPrintoutLength(cstate->cur_attval);
    1120              : 
    1121           40 :                     if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1122           24 :                         ereport(NOTICE,
    1123              :                                 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
    1124              :                                        cstate->cur_lineno,
    1125              :                                        cstate->cur_attname,
    1126              :                                        attval));
    1127           16 :                     else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
    1128           16 :                         ereport(NOTICE,
    1129              :                                 errmsg("setting to null due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
    1130              :                                        cstate->cur_lineno,
    1131              :                                        cstate->cur_attname,
    1132              :                                        attval));
    1133           40 :                     pfree(attval);
    1134              :                 }
    1135              :                 else
    1136              :                 {
    1137            4 :                     if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1138            4 :                         ereport(NOTICE,
    1139              :                                 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
    1140              :                                        cstate->cur_lineno,
    1141              :                                        cstate->cur_attname));
    1142              :                 }
    1143              :                 /* reset relname_only */
    1144           44 :                 cstate->relname_only = false;
    1145              :             }
    1146              : 
    1147          100 :             if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1148           82 :                 return true;
    1149           18 :             else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
    1150           18 :                 continue;
    1151              :         }
    1152              : 
    1153      2691332 :         cstate->cur_attname = NULL;
    1154      2691332 :         cstate->cur_attval = NULL;
    1155              :     }
    1156              : 
    1157              :     Assert(fieldno == attr_count);
    1158              : 
    1159       753426 :     return true;
    1160              : }
    1161              : 
    1162              : /* Implementation of the per-row callback for binary format */
    1163              : bool
    1164           25 : CopyFromBinaryOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
    1165              :                      bool *nulls)
    1166              : {
    1167              :     TupleDesc   tupDesc;
    1168              :     AttrNumber  attr_count;
    1169           25 :     FmgrInfo   *in_functions = cstate->in_functions;
    1170           25 :     Oid        *typioparams = cstate->typioparams;
    1171              :     int16       fld_count;
    1172              :     ListCell   *cur;
    1173              : 
    1174           25 :     tupDesc = RelationGetDescr(cstate->rel);
    1175           25 :     attr_count = list_length(cstate->attnumlist);
    1176              : 
    1177           25 :     cstate->cur_lineno++;
    1178              : 
    1179           25 :     if (!CopyGetInt16(cstate, &fld_count))
    1180              :     {
    1181              :         /* EOF detected (end of file, or protocol-level EOF) */
    1182            0 :         return false;
    1183              :     }
    1184              : 
    1185           25 :     if (fld_count == -1)
    1186              :     {
    1187              :         /*
    1188              :          * Received EOF marker.  Wait for the protocol-level EOF, and complain
    1189              :          * if it doesn't come immediately.  In COPY FROM STDIN, this ensures
    1190              :          * that we correctly handle CopyFail, if client chooses to send that
    1191              :          * now.  When copying from file, we could ignore the rest of the file
    1192              :          * like in text mode, but we choose to be consistent with the COPY
    1193              :          * FROM STDIN case.
    1194              :          */
    1195              :         char        dummy;
    1196              : 
    1197            7 :         if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
    1198            0 :             ereport(ERROR,
    1199              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1200              :                      errmsg("received copy data after EOF marker")));
    1201            7 :         return false;
    1202              :     }
    1203              : 
    1204           18 :     if (fld_count != attr_count)
    1205            0 :         ereport(ERROR,
    1206              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1207              :                  errmsg("row field count is %d, expected %d",
    1208              :                         fld_count, attr_count)));
    1209              : 
    1210          117 :     foreach(cur, cstate->attnumlist)
    1211              :     {
    1212          100 :         int         attnum = lfirst_int(cur);
    1213          100 :         int         m = attnum - 1;
    1214          100 :         Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1215              : 
    1216          100 :         cstate->cur_attname = NameStr(att->attname);
    1217          199 :         values[m] = CopyReadBinaryAttribute(cstate,
    1218          100 :                                             &in_functions[m],
    1219          100 :                                             typioparams[m],
    1220              :                                             att->atttypmod,
    1221              :                                             &nulls[m]);
    1222           99 :         cstate->cur_attname = NULL;
    1223              :     }
    1224              : 
    1225           17 :     return true;
    1226              : }
    1227              : 
    1228              : /*
    1229              :  * Read the next input line and stash it in line_buf.
    1230              :  *
    1231              :  * Result is true if read was terminated by EOF, false if terminated
    1232              :  * by newline.  The terminating newline or EOF marker is not included
    1233              :  * in the final value of line_buf.
    1234              :  */
    1235              : static bool
    1236       754717 : CopyReadLine(CopyFromState cstate, bool is_csv)
    1237              : {
    1238              :     bool        result;
    1239              : 
    1240       754717 :     resetStringInfo(&cstate->line_buf);
    1241       754717 :     cstate->line_buf_valid = false;
    1242              : 
    1243              :     /*
    1244              :      * Parse data and transfer into line_buf.
    1245              :      *
    1246              :      * Because this is performance critical, we inline CopyReadLineText() and
    1247              :      * pass the boolean parameters as constants to allow the compiler to emit
    1248              :      * specialized code with fewer branches.
    1249              :      */
    1250       754717 :     if (is_csv)
    1251          546 :         result = CopyReadLineText(cstate, true);
    1252              :     else
    1253       754171 :         result = CopyReadLineText(cstate, false);
    1254              : 
    1255       754699 :     if (result)
    1256              :     {
    1257              :         /*
    1258              :          * Reached EOF.  In protocol version 3, we should ignore anything
    1259              :          * after \. up to the protocol end of copy data.  (XXX maybe better
    1260              :          * not to treat \. as special?)
    1261              :          */
    1262          993 :         if (cstate->copy_src == COPY_FRONTEND)
    1263              :         {
    1264              :             int         inbytes;
    1265              : 
    1266              :             do
    1267              :             {
    1268          523 :                 inbytes = CopyGetData(cstate, cstate->input_buf,
    1269              :                                       1, INPUT_BUF_SIZE);
    1270          523 :             } while (inbytes > 0);
    1271          523 :             cstate->input_buf_index = 0;
    1272          523 :             cstate->input_buf_len = 0;
    1273          523 :             cstate->raw_buf_index = 0;
    1274          523 :             cstate->raw_buf_len = 0;
    1275              :         }
    1276              :     }
    1277              :     else
    1278              :     {
    1279              :         /*
    1280              :          * If we didn't hit EOF, then we must have transferred the EOL marker
    1281              :          * to line_buf along with the data.  Get rid of it.
    1282              :          */
    1283       753706 :         switch (cstate->eol_type)
    1284              :         {
    1285       753706 :             case EOL_NL:
    1286              :                 Assert(cstate->line_buf.len >= 1);
    1287              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1288       753706 :                 cstate->line_buf.len--;
    1289       753706 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1290       753706 :                 break;
    1291            0 :             case EOL_CR:
    1292              :                 Assert(cstate->line_buf.len >= 1);
    1293              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
    1294            0 :                 cstate->line_buf.len--;
    1295            0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1296            0 :                 break;
    1297            0 :             case EOL_CRNL:
    1298              :                 Assert(cstate->line_buf.len >= 2);
    1299              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
    1300              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1301            0 :                 cstate->line_buf.len -= 2;
    1302            0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1303            0 :                 break;
    1304            0 :             case EOL_UNKNOWN:
    1305              :                 /* shouldn't get here */
    1306              :                 Assert(false);
    1307            0 :                 break;
    1308              :         }
    1309              :     }
    1310              : 
    1311              :     /* Now it's safe to use the buffer in error messages */
    1312       754699 :     cstate->line_buf_valid = true;
    1313              : 
    1314       754699 :     return result;
    1315              : }
    1316              : 
    1317              : #ifndef USE_NO_SIMD
    1318              : /*
    1319              :  * Helper function for CopyReadLineText() that uses SIMD instructions to scan
    1320              :  * the input buffer for special characters.  This can be much faster.
    1321              :  *
    1322              :  * Note that we disable SIMD for the remainder of the COPY FROM command upon
    1323              :  * encountering a special character (except for end-of-line characters) or a
    1324              :  * short line.  This is perhaps too conservative, but it should help avoid
    1325              :  * regressions.  It could probably be made more lenient in the future via
    1326              :  * fine-tuned heuristics.
    1327              :  */
    1328              : static bool
    1329       332164 : CopyReadLineTextSIMDHelper(CopyFromState cstate, bool is_csv,
    1330              :                            bool *hit_eof_p, int *input_buf_ptr_p)
    1331              : {
    1332              :     char       *copy_input_buf;
    1333              :     int         input_buf_ptr;
    1334              :     int         copy_buf_len;
    1335              :     bool        unique_esc_char;    /* for csv, do quote/esc chars differ? */
    1336       332164 :     bool        first = true;
    1337       332164 :     bool        result = false;
    1338       332164 :     const Vector8 nl_vec = vector8_broadcast('\n');
    1339       332164 :     const Vector8 cr_vec = vector8_broadcast('\r');
    1340              :     Vector8     bs_or_quote_vec;    /* '\' for text, quote for csv */
    1341              :     Vector8     esc_vec;        /* only for csv */
    1342              : 
    1343       332164 :     if (is_csv)
    1344              :     {
    1345          392 :         char        quote = cstate->opts.quote[0];
    1346          392 :         char        esc = cstate->opts.escape[0];
    1347              : 
    1348          392 :         bs_or_quote_vec = vector8_broadcast(quote);
    1349          392 :         esc_vec = vector8_broadcast(esc);
    1350          392 :         unique_esc_char = (quote != esc);
    1351              :     }
    1352              :     else
    1353              :     {
    1354       331772 :         bs_or_quote_vec = vector8_broadcast('\\');
    1355       331772 :         unique_esc_char = false;
    1356              :     }
    1357              : 
    1358              :     /*
    1359              :      * For a little extra speed within the loop, we copy some state members
    1360              :      * into local variables. Note that we need to use a separate local
    1361              :      * variable for input_buf_ptr so that the REFILL_LINEBUF macro works.  We
    1362              :      * copy its value into the input_buf_ptr_p argument before returning.
    1363              :      */
    1364       332164 :     copy_input_buf = cstate->input_buf;
    1365       332164 :     input_buf_ptr = cstate->input_buf_index;
    1366       332164 :     copy_buf_len = cstate->input_buf_len;
    1367              : 
    1368              :     /*
    1369              :      * See the corresponding loop in CopyReadLineText() for more information
    1370              :      * about the purpose of this loop.  This one does the same thing using
    1371              :      * SIMD instructions, although we are quick to bail out to the scalar path
    1372              :      * if we encounter a special character.
    1373              :      */
    1374              :     for (;;)
    1375       390136 :     {
    1376              :         Vector8     chunk;
    1377              :         Vector8     match;
    1378              : 
    1379              :         /* Load more data if needed. */
    1380       722300 :         if (copy_buf_len - input_buf_ptr < sizeof(Vector8))
    1381              :         {
    1382       216815 :             REFILL_LINEBUF;
    1383              : 
    1384       216815 :             CopyLoadInputBuf(cstate);
    1385              :             /* update our local variables */
    1386       216805 :             *hit_eof_p = cstate->input_reached_eof;
    1387       216805 :             input_buf_ptr = cstate->input_buf_index;
    1388       216805 :             copy_buf_len = cstate->input_buf_len;
    1389              : 
    1390              :             /*
    1391              :              * If we are completely out of data, break out of the loop,
    1392              :              * reporting EOF.
    1393              :              */
    1394       216805 :             if (INPUT_BUF_BYTES(cstate) <= 0)
    1395              :             {
    1396          586 :                 result = true;
    1397          586 :                 break;
    1398              :             }
    1399              :         }
    1400              : 
    1401              :         /*
    1402              :          * If we still don't have enough data for the SIMD path, fall back to
    1403              :          * the scalar code.  Note that this doesn't necessarily mean we
    1404              :          * encountered a short line, so we leave cstate->simd_enabled set to
    1405              :          * true.
    1406              :          */
    1407       721704 :         if (copy_buf_len - input_buf_ptr < sizeof(Vector8))
    1408       215334 :             break;
    1409              : 
    1410              :         /*
    1411              :          * If we made it here, we have at least enough data to fit in a
    1412              :          * Vector8, so we can use SIMD instructions to scan for special
    1413              :          * characters.
    1414              :          */
    1415       506370 :         vector8_load(&chunk, (const uint8 *) &copy_input_buf[input_buf_ptr]);
    1416              : 
    1417              :         /*
    1418              :          * Check for \n, \r, \\ (for text), quotes (for csv), and escapes (for
    1419              :          * csv, if different from quotes).
    1420              :          */
    1421       506370 :         match = vector8_eq(chunk, nl_vec);
    1422       506370 :         match = vector8_or(match, vector8_eq(chunk, cr_vec));
    1423       506370 :         match = vector8_or(match, vector8_eq(chunk, bs_or_quote_vec));
    1424       506370 :         if (unique_esc_char)
    1425           21 :             match = vector8_or(match, vector8_eq(chunk, esc_vec));
    1426              : 
    1427              :         /*
    1428              :          * If we found a special character, advance to it and hand off to the
    1429              :          * scalar path.  Except for end-of-line characters, we also disable
    1430              :          * SIMD processing for the remainder of the COPY FROM command.
    1431              :          */
    1432       506370 :         if (vector8_is_highbit_set(match))
    1433              :         {
    1434              :             uint32      mask;
    1435              :             char        c;
    1436              : 
    1437       116234 :             mask = vector8_highbit_mask(match);
    1438       116234 :             input_buf_ptr += pg_rightmost_one_pos32(mask);
    1439              : 
    1440              :             /*
    1441              :              * Don't disable SIMD if we found \n or \r, else we'd stop using
    1442              :              * SIMD instructions after the first line.  As an exception, we do
    1443              :              * disable it if this is the first vector we processed, as that
    1444              :              * means the line is too short for SIMD.
    1445              :              */
    1446       116234 :             c = copy_input_buf[input_buf_ptr];
    1447       116234 :             if (first || (c != '\n' && c != '\r'))
    1448          393 :                 cstate->simd_enabled = false;
    1449              : 
    1450       116234 :             break;
    1451              :         }
    1452              : 
    1453              :         /* That chunk was clear of special characters, so we can skip it. */
    1454       390136 :         input_buf_ptr += sizeof(Vector8);
    1455       390136 :         first = false;
    1456              :     }
    1457              : 
    1458       332154 :     *input_buf_ptr_p = input_buf_ptr;
    1459       332154 :     return result;
    1460              : }
    1461              : #endif                          /* ! USE_NO_SIMD */
    1462              : 
    1463              : /*
    1464              :  * CopyReadLineText - inner loop of CopyReadLine for text mode
    1465              :  */
    1466              : static pg_attribute_always_inline bool
    1467       754717 : CopyReadLineText(CopyFromState cstate, bool is_csv)
    1468              : {
    1469              :     char       *copy_input_buf;
    1470              :     int         input_buf_ptr;
    1471              :     int         copy_buf_len;
    1472       754717 :     bool        need_data = false;
    1473       754717 :     bool        hit_eof = false;
    1474       754717 :     bool        result = false;
    1475              : 
    1476              :     /* CSV variables */
    1477       754717 :     bool        in_quote = false,
    1478       754717 :                 last_was_esc = false;
    1479       754717 :     char        quotec = '\0';
    1480       754717 :     char        escapec = '\0';
    1481              : 
    1482       754717 :     if (is_csv)
    1483              :     {
    1484          546 :         quotec = cstate->opts.quote[0];
    1485          546 :         escapec = cstate->opts.escape[0];
    1486              :         /* ignore special escape processing if it's the same as quotec */
    1487          546 :         if (quotec == escapec)
    1488          438 :             escapec = '\0';
    1489              :     }
    1490              : 
    1491              :     /*
    1492              :      * The objective of this loop is to transfer the entire next input line
    1493              :      * into line_buf.  Hence, we only care for detecting newlines (\r and/or
    1494              :      * \n) and the end-of-copy marker (\.).
    1495              :      *
    1496              :      * In CSV mode, \r and \n inside a quoted field are just part of the data
    1497              :      * value and are put in line_buf.  We keep just enough state to know if we
    1498              :      * are currently in a quoted field or not.
    1499              :      *
    1500              :      * The input has already been converted to the database encoding.  All
    1501              :      * supported server encodings have the property that all bytes in a
    1502              :      * multi-byte sequence have the high bit set, so a multibyte character
    1503              :      * cannot contain any newline or escape characters embedded in the
    1504              :      * multibyte sequence.  Therefore, we can process the input byte-by-byte,
    1505              :      * regardless of the encoding.
    1506              :      *
    1507              :      * For speed, we try to move data from input_buf to line_buf in chunks
    1508              :      * rather than one character at a time.  input_buf_ptr points to the next
    1509              :      * character to examine; any characters from input_buf_index to
    1510              :      * input_buf_ptr have been determined to be part of the line, but not yet
    1511              :      * transferred to line_buf.
    1512              :      *
    1513              :      * For a little extra speed within the loop, we copy some state
    1514              :      * information into local variables.  input_buf_ptr could be changed in
    1515              :      * the SIMD path, so we must set that one before it.  The others are set
    1516              :      * afterwards.
    1517              :      */
    1518       754717 :     input_buf_ptr = cstate->input_buf_index;
    1519              : 
    1520              :     /*
    1521              :      * We first try to use SIMD for the task described above, falling back to
    1522              :      * the scalar path (i.e., the loop below) if needed.
    1523              :      */
    1524              : #ifndef USE_NO_SIMD
    1525       754717 :     if (cstate->simd_enabled)
    1526              :     {
    1527              :         /*
    1528              :          * Using temporary variables seems to encourage the compiler to keep
    1529              :          * them in a register, which is beneficial for performance.
    1530              :          */
    1531       332164 :         bool        tmp_hit_eof = false;
    1532       332164 :         int         tmp_input_buf_ptr = 0;  /* silence compiler warning */
    1533              : 
    1534       332164 :         result = CopyReadLineTextSIMDHelper(cstate, is_csv, &tmp_hit_eof,
    1535              :                                             &tmp_input_buf_ptr);
    1536       332154 :         hit_eof = tmp_hit_eof;
    1537       332154 :         input_buf_ptr = tmp_input_buf_ptr;
    1538              : 
    1539       332154 :         if (result)
    1540              :         {
    1541              :             /* Transfer any still-uncopied data to line_buf. */
    1542          586 :             REFILL_LINEBUF;
    1543              : 
    1544          586 :             return result;
    1545              :         }
    1546              :     }
    1547              : #endif                          /* ! USE_NO_SIMD */
    1548              : 
    1549       754121 :     copy_input_buf = cstate->input_buf;
    1550       754121 :     copy_buf_len = cstate->input_buf_len;
    1551              : 
    1552              :     for (;;)
    1553      8712837 :     {
    1554              :         int         prev_raw_ptr;
    1555              :         char        c;
    1556              : 
    1557              :         /*
    1558              :          * Load more data if needed.
    1559              :          *
    1560              :          * TODO: We could just force four bytes of read-ahead and avoid the
    1561              :          * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
    1562              :          * unsafe with the old v2 COPY protocol, but we don't support that
    1563              :          * anymore.
    1564              :          */
    1565      9466958 :         if (input_buf_ptr >= copy_buf_len || need_data)
    1566              :         {
    1567          494 :             REFILL_LINEBUF;
    1568              : 
    1569          494 :             CopyLoadInputBuf(cstate);
    1570              :             /* update our local variables */
    1571          494 :             hit_eof = cstate->input_reached_eof;
    1572          494 :             input_buf_ptr = cstate->input_buf_index;
    1573          494 :             copy_buf_len = cstate->input_buf_len;
    1574              : 
    1575              :             /*
    1576              :              * If we are completely out of data, break out of the loop,
    1577              :              * reporting EOF.
    1578              :              */
    1579          494 :             if (INPUT_BUF_BYTES(cstate) <= 0)
    1580              :             {
    1581          360 :                 result = true;
    1582          360 :                 break;
    1583              :             }
    1584          134 :             need_data = false;
    1585              :         }
    1586              : 
    1587              :         /* OK to fetch a character */
    1588      9466598 :         prev_raw_ptr = input_buf_ptr;
    1589      9466598 :         c = copy_input_buf[input_buf_ptr++];
    1590              : 
    1591      9466598 :         if (is_csv)
    1592              :         {
    1593              :             /*
    1594              :              * If character is '\r', we may need to look ahead below.  Force
    1595              :              * fetch of the next character if we don't already have it.  We
    1596              :              * need to do this before changing CSV state, in case '\r' is also
    1597              :              * the quote or escape character.
    1598              :              */
    1599         2615 :             if (c == '\r')
    1600              :             {
    1601           24 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1602              :             }
    1603              : 
    1604              :             /*
    1605              :              * Dealing with quotes and escapes here is mildly tricky. If the
    1606              :              * quote char is also the escape char, there's no problem - we
    1607              :              * just use the char as a toggle. If they are different, we need
    1608              :              * to ensure that we only take account of an escape inside a
    1609              :              * quoted field and immediately preceding a quote char, and not
    1610              :              * the second in an escape-escape sequence.
    1611              :              */
    1612         2615 :             if (in_quote && c == escapec)
    1613           32 :                 last_was_esc = !last_was_esc;
    1614         2615 :             if (c == quotec && !last_was_esc)
    1615          308 :                 in_quote = !in_quote;
    1616         2615 :             if (c != escapec)
    1617         2579 :                 last_was_esc = false;
    1618              : 
    1619              :             /*
    1620              :              * Updating the line count for embedded CR and/or LF chars is
    1621              :              * necessarily a little fragile - this test is probably about the
    1622              :              * best we can do.  (XXX it's arguable whether we should do this
    1623              :              * at all --- is cur_lineno a physical or logical count?)
    1624              :              */
    1625         2615 :             if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
    1626           24 :                 cstate->cur_lineno++;
    1627              :         }
    1628              : 
    1629              :         /* Process \r */
    1630      9466598 :         if (c == '\r' && (!is_csv || !in_quote))
    1631              :         {
    1632              :             /* Check for \r\n on first line, _and_ handle \r\n. */
    1633            0 :             if (cstate->eol_type == EOL_UNKNOWN ||
    1634            0 :                 cstate->eol_type == EOL_CRNL)
    1635              :             {
    1636              :                 /*
    1637              :                  * If need more data, go back to loop top to load it.
    1638              :                  *
    1639              :                  * Note that if we are at EOF, c will wind up as '\0' because
    1640              :                  * of the guaranteed pad of input_buf.
    1641              :                  */
    1642            0 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1643              : 
    1644              :                 /* get next char */
    1645            0 :                 c = copy_input_buf[input_buf_ptr];
    1646              : 
    1647            0 :                 if (c == '\n')
    1648              :                 {
    1649            0 :                     input_buf_ptr++;    /* eat newline */
    1650            0 :                     cstate->eol_type = EOL_CRNL; /* in case not set yet */
    1651              :                 }
    1652              :                 else
    1653              :                 {
    1654              :                     /* found \r, but no \n */
    1655            0 :                     if (cstate->eol_type == EOL_CRNL)
    1656            0 :                         ereport(ERROR,
    1657              :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1658              :                                  !is_csv ?
    1659              :                                  errmsg("literal carriage return found in data") :
    1660              :                                  errmsg("unquoted carriage return found in data"),
    1661              :                                  !is_csv ?
    1662              :                                  errhint("Use \"\\r\" to represent carriage return.") :
    1663              :                                  errhint("Use quoted CSV field to represent carriage return.")));
    1664              : 
    1665              :                     /*
    1666              :                      * if we got here, it is the first line and we didn't find
    1667              :                      * \n, so don't consume the peeked character
    1668              :                      */
    1669            0 :                     cstate->eol_type = EOL_CR;
    1670              :                 }
    1671              :             }
    1672            0 :             else if (cstate->eol_type == EOL_NL)
    1673            0 :                 ereport(ERROR,
    1674              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1675              :                          !is_csv ?
    1676              :                          errmsg("literal carriage return found in data") :
    1677              :                          errmsg("unquoted carriage return found in data"),
    1678              :                          !is_csv ?
    1679              :                          errhint("Use \"\\r\" to represent carriage return.") :
    1680              :                          errhint("Use quoted CSV field to represent carriage return.")));
    1681              :             /* If reach here, we have found the line terminator */
    1682            0 :             break;
    1683              :         }
    1684              : 
    1685              :         /* Process \n */
    1686      9466598 :         if (c == '\n' && (!is_csv || !in_quote))
    1687              :         {
    1688       753706 :             if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
    1689            0 :                 ereport(ERROR,
    1690              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1691              :                          !is_csv ?
    1692              :                          errmsg("literal newline found in data") :
    1693              :                          errmsg("unquoted newline found in data"),
    1694              :                          !is_csv ?
    1695              :                          errhint("Use \"\\n\" to represent newline.") :
    1696              :                          errhint("Use quoted CSV field to represent newline.")));
    1697       753706 :             cstate->eol_type = EOL_NL;   /* in case not set yet */
    1698              :             /* If reach here, we have found the line terminator */
    1699       753706 :             break;
    1700              :         }
    1701              : 
    1702              :         /*
    1703              :          * Process backslash, except in CSV mode where backslash is a normal
    1704              :          * character.
    1705              :          */
    1706      8712892 :         if (c == '\\' && !is_csv)
    1707              :         {
    1708              :             char        c2;
    1709              : 
    1710         4904 :             IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1711         4904 :             IF_NEED_REFILL_AND_EOF_BREAK(0);
    1712              : 
    1713              :             /* -----
    1714              :              * get next character
    1715              :              * Note: we do not change c so if it isn't \., we can fall
    1716              :              * through and continue processing.
    1717              :              * -----
    1718              :              */
    1719         4904 :             c2 = copy_input_buf[input_buf_ptr];
    1720              : 
    1721         4904 :             if (c2 == '.')
    1722              :             {
    1723           55 :                 input_buf_ptr++;    /* consume the '.' */
    1724           55 :                 if (cstate->eol_type == EOL_CRNL)
    1725              :                 {
    1726              :                     /* Get the next character */
    1727            0 :                     IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1728              :                     /* if hit_eof, c2 will become '\0' */
    1729            0 :                     c2 = copy_input_buf[input_buf_ptr++];
    1730              : 
    1731            0 :                     if (c2 == '\n')
    1732            0 :                         ereport(ERROR,
    1733              :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1734              :                                  errmsg("end-of-copy marker does not match previous newline style")));
    1735            0 :                     else if (c2 != '\r')
    1736            0 :                         ereport(ERROR,
    1737              :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1738              :                                  errmsg("end-of-copy marker is not alone on its line")));
    1739              :                 }
    1740              : 
    1741              :                 /* Get the next character */
    1742           55 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1743              :                 /* if hit_eof, c2 will become '\0' */
    1744           55 :                 c2 = copy_input_buf[input_buf_ptr++];
    1745              : 
    1746           55 :                 if (c2 != '\r' && c2 != '\n')
    1747            4 :                     ereport(ERROR,
    1748              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1749              :                              errmsg("end-of-copy marker is not alone on its line")));
    1750              : 
    1751           51 :                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
    1752           51 :                     (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
    1753           51 :                     (cstate->eol_type == EOL_CR && c2 != '\r'))
    1754            0 :                     ereport(ERROR,
    1755              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1756              :                              errmsg("end-of-copy marker does not match previous newline style")));
    1757              : 
    1758              :                 /*
    1759              :                  * If there is any data on this line before the \., complain.
    1760              :                  */
    1761           51 :                 if (cstate->line_buf.len > 0 ||
    1762           51 :                     prev_raw_ptr > cstate->input_buf_index)
    1763            4 :                     ereport(ERROR,
    1764              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1765              :                              errmsg("end-of-copy marker is not alone on its line")));
    1766              : 
    1767              :                 /*
    1768              :                  * Discard the \. and newline, then report EOF.
    1769              :                  */
    1770           47 :                 cstate->input_buf_index = input_buf_ptr;
    1771           47 :                 result = true;  /* report EOF */
    1772           47 :                 break;
    1773              :             }
    1774              :             else
    1775              :             {
    1776              :                 /*
    1777              :                  * If we are here, it means we found a backslash followed by
    1778              :                  * something other than a period.  In non-CSV mode, anything
    1779              :                  * after a backslash is special, so we skip over that second
    1780              :                  * character too.  If we didn't do that \\. would be
    1781              :                  * considered an eof-of copy, while in non-CSV mode it is a
    1782              :                  * literal backslash followed by a period.
    1783              :                  */
    1784         4849 :                 input_buf_ptr++;
    1785              :             }
    1786              :         }
    1787              :     }                           /* end of outer loop */
    1788              : 
    1789              :     /*
    1790              :      * Transfer any still-uncopied data to line_buf.
    1791              :      */
    1792       754113 :     REFILL_LINEBUF;
    1793              : 
    1794       754113 :     return result;
    1795              : }
    1796              : 
    1797              : /*
    1798              :  *  Return decimal value for a hexadecimal digit
    1799              :  */
    1800              : static int
    1801            0 : GetDecimalFromHex(char hex)
    1802              : {
    1803            0 :     if (isdigit((unsigned char) hex))
    1804            0 :         return hex - '0';
    1805              :     else
    1806            0 :         return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
    1807              : }
    1808              : 
    1809              : /*
    1810              :  * Parse the current line into separate attributes (fields),
    1811              :  * performing de-escaping as needed.
    1812              :  *
    1813              :  * The input is in line_buf.  We use attribute_buf to hold the result
    1814              :  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
    1815              :  * string, or NULL when the input matches the null marker string.
    1816              :  * This array is expanded as necessary.
    1817              :  *
    1818              :  * (Note that the caller cannot check for nulls since the returned
    1819              :  * string would be the post-de-escaping equivalent, which may look
    1820              :  * the same as some valid data string.)
    1821              :  *
    1822              :  * delim is the column delimiter string (must be just one byte for now).
    1823              :  * null_print is the null marker string.  Note that this is compared to
    1824              :  * the pre-de-escaped input string.
    1825              :  *
    1826              :  * The return value is the number of fields actually read.
    1827              :  */
    1828              : static int
    1829       753317 : CopyReadAttributesText(CopyFromState cstate)
    1830              : {
    1831       753317 :     char        delimc = cstate->opts.delim[0];
    1832              :     int         fieldno;
    1833              :     char       *output_ptr;
    1834              :     char       *cur_ptr;
    1835              :     char       *line_end_ptr;
    1836              : 
    1837              :     /*
    1838              :      * We need a special case for zero-column tables: check that the input
    1839              :      * line is empty, and return.
    1840              :      */
    1841       753317 :     if (cstate->max_fields <= 0)
    1842              :     {
    1843            4 :         if (cstate->line_buf.len != 0)
    1844            0 :             ereport(ERROR,
    1845              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1846              :                      errmsg("extra data after last expected column")));
    1847            4 :         return 0;
    1848              :     }
    1849              : 
    1850       753313 :     resetStringInfo(&cstate->attribute_buf);
    1851              : 
    1852              :     /*
    1853              :      * The de-escaped attributes will certainly not be longer than the input
    1854              :      * data line, so we can just force attribute_buf to be large enough and
    1855              :      * then transfer data without any checks for enough space.  We need to do
    1856              :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1857              :      * pointers already stored into cstate->raw_fields[].
    1858              :      */
    1859       753313 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1860            4 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1861       753313 :     output_ptr = cstate->attribute_buf.data;
    1862              : 
    1863              :     /* set pointer variables for loop */
    1864       753313 :     cur_ptr = cstate->line_buf.data;
    1865       753313 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1866              : 
    1867              :     /* Outer loop iterates over fields */
    1868       753313 :     fieldno = 0;
    1869              :     for (;;)
    1870      1937829 :     {
    1871      2691142 :         bool        found_delim = false;
    1872              :         char       *start_ptr;
    1873              :         char       *end_ptr;
    1874              :         int         input_len;
    1875      2691142 :         bool        saw_non_ascii = false;
    1876              : 
    1877              :         /* Make sure there is enough space for the next value */
    1878      2691142 :         if (fieldno >= cstate->max_fields)
    1879              :         {
    1880           28 :             cstate->max_fields *= 2;
    1881           28 :             cstate->raw_fields =
    1882           28 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1883              :         }
    1884              : 
    1885              :         /* Remember start of field on both input and output sides */
    1886      2691142 :         start_ptr = cur_ptr;
    1887      2691142 :         cstate->raw_fields[fieldno] = output_ptr;
    1888              : 
    1889              :         /*
    1890              :          * Scan data for field.
    1891              :          *
    1892              :          * Note that in this loop, we are scanning to locate the end of field
    1893              :          * and also speculatively performing de-escaping.  Once we find the
    1894              :          * end-of-field, we can match the raw field contents against the null
    1895              :          * marker string.  Only after that comparison fails do we know that
    1896              :          * de-escaping is actually the right thing to do; therefore we *must
    1897              :          * not* throw any syntax errors before we've done the null-marker
    1898              :          * check.
    1899              :          */
    1900              :         for (;;)
    1901     13807040 :         {
    1902              :             char        c;
    1903              : 
    1904     16498182 :             end_ptr = cur_ptr;
    1905     16498182 :             if (cur_ptr >= line_end_ptr)
    1906       753309 :                 break;
    1907     15744873 :             c = *cur_ptr++;
    1908     15744873 :             if (c == delimc)
    1909              :             {
    1910      1937833 :                 found_delim = true;
    1911      1937833 :                 break;
    1912              :             }
    1913     13807040 :             if (c == '\\')
    1914              :             {
    1915         4849 :                 if (cur_ptr >= line_end_ptr)
    1916            0 :                     break;
    1917         4849 :                 c = *cur_ptr++;
    1918         4849 :                 switch (c)
    1919              :                 {
    1920            8 :                     case '0':
    1921              :                     case '1':
    1922              :                     case '2':
    1923              :                     case '3':
    1924              :                     case '4':
    1925              :                     case '5':
    1926              :                     case '6':
    1927              :                     case '7':
    1928              :                         {
    1929              :                             /* handle \013 */
    1930              :                             int         val;
    1931              : 
    1932            8 :                             val = OCTVALUE(c);
    1933            8 :                             if (cur_ptr < line_end_ptr)
    1934              :                             {
    1935            4 :                                 c = *cur_ptr;
    1936            4 :                                 if (ISOCTAL(c))
    1937              :                                 {
    1938            0 :                                     cur_ptr++;
    1939            0 :                                     val = (val << 3) + OCTVALUE(c);
    1940            0 :                                     if (cur_ptr < line_end_ptr)
    1941              :                                     {
    1942            0 :                                         c = *cur_ptr;
    1943            0 :                                         if (ISOCTAL(c))
    1944              :                                         {
    1945            0 :                                             cur_ptr++;
    1946            0 :                                             val = (val << 3) + OCTVALUE(c);
    1947              :                                         }
    1948              :                                     }
    1949              :                                 }
    1950              :                             }
    1951            8 :                             c = val & 0377;
    1952            8 :                             if (c == '\0' || IS_HIGHBIT_SET(c))
    1953            8 :                                 saw_non_ascii = true;
    1954              :                         }
    1955            8 :                         break;
    1956            8 :                     case 'x':
    1957              :                         /* Handle \x3F */
    1958            8 :                         if (cur_ptr < line_end_ptr)
    1959              :                         {
    1960            4 :                             char        hexchar = *cur_ptr;
    1961              : 
    1962            4 :                             if (isxdigit((unsigned char) hexchar))
    1963              :                             {
    1964            0 :                                 int         val = GetDecimalFromHex(hexchar);
    1965              : 
    1966            0 :                                 cur_ptr++;
    1967            0 :                                 if (cur_ptr < line_end_ptr)
    1968              :                                 {
    1969            0 :                                     hexchar = *cur_ptr;
    1970            0 :                                     if (isxdigit((unsigned char) hexchar))
    1971              :                                     {
    1972            0 :                                         cur_ptr++;
    1973            0 :                                         val = (val << 4) + GetDecimalFromHex(hexchar);
    1974              :                                     }
    1975              :                                 }
    1976            0 :                                 c = val & 0xff;
    1977            0 :                                 if (c == '\0' || IS_HIGHBIT_SET(c))
    1978            0 :                                     saw_non_ascii = true;
    1979              :                             }
    1980              :                         }
    1981            8 :                         break;
    1982            0 :                     case 'b':
    1983            0 :                         c = '\b';
    1984            0 :                         break;
    1985            0 :                     case 'f':
    1986            0 :                         c = '\f';
    1987            0 :                         break;
    1988         2033 :                     case 'n':
    1989         2033 :                         c = '\n';
    1990         2033 :                         break;
    1991            0 :                     case 'r':
    1992            0 :                         c = '\r';
    1993            0 :                         break;
    1994            0 :                     case 't':
    1995            0 :                         c = '\t';
    1996            0 :                         break;
    1997            0 :                     case 'v':
    1998            0 :                         c = '\v';
    1999            0 :                         break;
    2000              : 
    2001              :                         /*
    2002              :                          * in all other cases, take the char after '\'
    2003              :                          * literally
    2004              :                          */
    2005              :                 }
    2006              :             }
    2007              : 
    2008              :             /* Add c to output string */
    2009     13807040 :             *output_ptr++ = c;
    2010              :         }
    2011              : 
    2012              :         /* Check whether raw input matched null marker */
    2013      2691142 :         input_len = end_ptr - start_ptr;
    2014      2691142 :         if (input_len == cstate->opts.null_print_len &&
    2015       163055 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    2016         2730 :             cstate->raw_fields[fieldno] = NULL;
    2017              :         /* Check whether raw input matched default marker */
    2018      2688412 :         else if (fieldno < list_length(cstate->attnumlist) &&
    2019      2688380 :                  cstate->opts.default_print &&
    2020           76 :                  input_len == cstate->opts.default_print_len &&
    2021           20 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    2022           16 :         {
    2023              :             /* fieldno is 0-indexed and attnum is 1-indexed */
    2024           20 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    2025              : 
    2026           20 :             if (cstate->defexprs[m] != NULL)
    2027              :             {
    2028              :                 /* defaults contain entries for all physical attributes */
    2029           16 :                 cstate->defaults[m] = true;
    2030              :             }
    2031              :             else
    2032              :             {
    2033            4 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    2034            4 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    2035              : 
    2036            4 :                 ereport(ERROR,
    2037              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2038              :                          errmsg("unexpected default marker in COPY data"),
    2039              :                          errdetail("Column \"%s\" has no default value.",
    2040              :                                    NameStr(att->attname))));
    2041              :             }
    2042              :         }
    2043              :         else
    2044              :         {
    2045              :             /*
    2046              :              * At this point we know the field is supposed to contain data.
    2047              :              *
    2048              :              * If we de-escaped any non-7-bit-ASCII chars, make sure the
    2049              :              * resulting string is valid data for the db encoding.
    2050              :              */
    2051      2688392 :             if (saw_non_ascii)
    2052              :             {
    2053            0 :                 char       *fld = cstate->raw_fields[fieldno];
    2054              : 
    2055            0 :                 pg_verifymbstr(fld, output_ptr - fld, false);
    2056              :             }
    2057              :         }
    2058              : 
    2059              :         /* Terminate attribute value in output area */
    2060      2691138 :         *output_ptr++ = '\0';
    2061              : 
    2062      2691138 :         fieldno++;
    2063              :         /* Done if we hit EOL instead of a delim */
    2064      2691138 :         if (!found_delim)
    2065       753309 :             break;
    2066              :     }
    2067              : 
    2068              :     /* Clean up state of attribute_buf */
    2069       753309 :     output_ptr--;
    2070              :     Assert(*output_ptr == '\0');
    2071       753309 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    2072              : 
    2073       753309 :     return fieldno;
    2074              : }
    2075              : 
    2076              : /*
    2077              :  * Parse the current line into separate attributes (fields),
    2078              :  * performing de-escaping as needed.  This has exactly the same API as
    2079              :  * CopyReadAttributesText, except we parse the fields according to
    2080              :  * "standard" (i.e. common) CSV usage.
    2081              :  */
    2082              : static int
    2083          318 : CopyReadAttributesCSV(CopyFromState cstate)
    2084              : {
    2085          318 :     char        delimc = cstate->opts.delim[0];
    2086          318 :     char        quotec = cstate->opts.quote[0];
    2087          318 :     char        escapec = cstate->opts.escape[0];
    2088              :     int         fieldno;
    2089              :     char       *output_ptr;
    2090              :     char       *cur_ptr;
    2091              :     char       *line_end_ptr;
    2092              : 
    2093              :     /*
    2094              :      * We need a special case for zero-column tables: check that the input
    2095              :      * line is empty, and return.
    2096              :      */
    2097          318 :     if (cstate->max_fields <= 0)
    2098              :     {
    2099            0 :         if (cstate->line_buf.len != 0)
    2100            0 :             ereport(ERROR,
    2101              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2102              :                      errmsg("extra data after last expected column")));
    2103            0 :         return 0;
    2104              :     }
    2105              : 
    2106          318 :     resetStringInfo(&cstate->attribute_buf);
    2107              : 
    2108              :     /*
    2109              :      * The de-escaped attributes will certainly not be longer than the input
    2110              :      * data line, so we can just force attribute_buf to be large enough and
    2111              :      * then transfer data without any checks for enough space.  We need to do
    2112              :      * it this way because enlarging attribute_buf mid-stream would invalidate
    2113              :      * pointers already stored into cstate->raw_fields[].
    2114              :      */
    2115          318 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    2116            0 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    2117          318 :     output_ptr = cstate->attribute_buf.data;
    2118              : 
    2119              :     /* set pointer variables for loop */
    2120          318 :     cur_ptr = cstate->line_buf.data;
    2121          318 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    2122              : 
    2123              :     /* Outer loop iterates over fields */
    2124          318 :     fieldno = 0;
    2125              :     for (;;)
    2126          326 :     {
    2127          644 :         bool        found_delim = false;
    2128          644 :         bool        saw_quote = false;
    2129              :         char       *start_ptr;
    2130              :         char       *end_ptr;
    2131              :         int         input_len;
    2132              : 
    2133              :         /* Make sure there is enough space for the next value */
    2134          644 :         if (fieldno >= cstate->max_fields)
    2135              :         {
    2136            0 :             cstate->max_fields *= 2;
    2137            0 :             cstate->raw_fields =
    2138            0 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    2139              :         }
    2140              : 
    2141              :         /* Remember start of field on both input and output sides */
    2142          644 :         start_ptr = cur_ptr;
    2143          644 :         cstate->raw_fields[fieldno] = output_ptr;
    2144              : 
    2145              :         /*
    2146              :          * Scan data for field,
    2147              :          *
    2148              :          * The loop starts in "not quote" mode and then toggles between that
    2149              :          * and "in quote" mode. The loop exits normally if it is in "not
    2150              :          * quote" mode and a delimiter or line end is seen.
    2151              :          */
    2152              :         for (;;)
    2153          137 :         {
    2154              :             char        c;
    2155              : 
    2156              :             /* Not in quote */
    2157              :             for (;;)
    2158              :             {
    2159         2045 :                 end_ptr = cur_ptr;
    2160         2045 :                 if (cur_ptr >= line_end_ptr)
    2161          314 :                     goto endfield;
    2162         1731 :                 c = *cur_ptr++;
    2163              :                 /* unquoted field delimiter */
    2164         1731 :                 if (c == delimc)
    2165              :                 {
    2166          330 :                     found_delim = true;
    2167          330 :                     goto endfield;
    2168              :                 }
    2169              :                 /* start of quoted field (or part of field) */
    2170         1401 :                 if (c == quotec)
    2171              :                 {
    2172          137 :                     saw_quote = true;
    2173          137 :                     break;
    2174              :                 }
    2175              :                 /* Add c to output string */
    2176         1264 :                 *output_ptr++ = c;
    2177              :             }
    2178              : 
    2179              :             /* In quote */
    2180              :             for (;;)
    2181              :             {
    2182          852 :                 end_ptr = cur_ptr;
    2183          852 :                 if (cur_ptr >= line_end_ptr)
    2184            0 :                     ereport(ERROR,
    2185              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2186              :                              errmsg("unterminated CSV quoted field")));
    2187              : 
    2188          852 :                 c = *cur_ptr++;
    2189              : 
    2190              :                 /* escape within a quoted field */
    2191          852 :                 if (c == escapec)
    2192              :                 {
    2193              :                     /*
    2194              :                      * peek at the next char if available, and escape it if it
    2195              :                      * is an escape char or a quote char
    2196              :                      */
    2197           81 :                     if (cur_ptr < line_end_ptr)
    2198              :                     {
    2199           47 :                         char        nextc = *cur_ptr;
    2200              : 
    2201           47 :                         if (nextc == escapec || nextc == quotec)
    2202              :                         {
    2203           16 :                             *output_ptr++ = nextc;
    2204           16 :                             cur_ptr++;
    2205           16 :                             continue;
    2206              :                         }
    2207              :                     }
    2208              :                 }
    2209              : 
    2210              :                 /*
    2211              :                  * end of quoted field. Must do this test after testing for
    2212              :                  * escape in case quote char and escape char are the same
    2213              :                  * (which is the common case).
    2214              :                  */
    2215          836 :                 if (c == quotec)
    2216          137 :                     break;
    2217              : 
    2218              :                 /* Add c to output string */
    2219          699 :                 *output_ptr++ = c;
    2220              :             }
    2221              :         }
    2222          644 : endfield:
    2223              : 
    2224              :         /* Terminate attribute value in output area */
    2225          644 :         *output_ptr++ = '\0';
    2226              : 
    2227              :         /* Check whether raw input matched null marker */
    2228          644 :         input_len = end_ptr - start_ptr;
    2229          644 :         if (!saw_quote && input_len == cstate->opts.null_print_len &&
    2230           27 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    2231           27 :             cstate->raw_fields[fieldno] = NULL;
    2232              :         /* Check whether raw input matched default marker */
    2233          617 :         else if (fieldno < list_length(cstate->attnumlist) &&
    2234          617 :                  cstate->opts.default_print &&
    2235           94 :                  input_len == cstate->opts.default_print_len &&
    2236           26 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    2237              :         {
    2238              :             /* fieldno is 0-index and attnum is 1-index */
    2239           26 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    2240              : 
    2241           26 :             if (cstate->defexprs[m] != NULL)
    2242              :             {
    2243              :                 /* defaults contain entries for all physical attributes */
    2244           22 :                 cstate->defaults[m] = true;
    2245              :             }
    2246              :             else
    2247              :             {
    2248            4 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    2249            4 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    2250              : 
    2251            4 :                 ereport(ERROR,
    2252              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2253              :                          errmsg("unexpected default marker in COPY data"),
    2254              :                          errdetail("Column \"%s\" has no default value.",
    2255              :                                    NameStr(att->attname))));
    2256              :             }
    2257              :         }
    2258              : 
    2259          640 :         fieldno++;
    2260              :         /* Done if we hit EOL instead of a delim */
    2261          640 :         if (!found_delim)
    2262          314 :             break;
    2263              :     }
    2264              : 
    2265              :     /* Clean up state of attribute_buf */
    2266          314 :     output_ptr--;
    2267              :     Assert(*output_ptr == '\0');
    2268          314 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    2269              : 
    2270          314 :     return fieldno;
    2271              : }
    2272              : 
    2273              : 
    2274              : /*
    2275              :  * Read a binary attribute
    2276              :  */
    2277              : static Datum
    2278          100 : CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
    2279              :                         Oid typioparam, int32 typmod,
    2280              :                         bool *isnull)
    2281              : {
    2282              :     int32       fld_size;
    2283              :     Datum       result;
    2284              : 
    2285          100 :     if (!CopyGetInt32(cstate, &fld_size))
    2286            0 :         ereport(ERROR,
    2287              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2288              :                  errmsg("unexpected EOF in COPY data")));
    2289          100 :     if (fld_size == -1)
    2290              :     {
    2291           20 :         *isnull = true;
    2292           20 :         return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
    2293              :     }
    2294           80 :     if (fld_size < 0)
    2295            0 :         ereport(ERROR,
    2296              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2297              :                  errmsg("invalid field size")));
    2298              : 
    2299              :     /* reset attribute_buf to empty, and load raw data in it */
    2300           80 :     resetStringInfo(&cstate->attribute_buf);
    2301              : 
    2302           80 :     enlargeStringInfo(&cstate->attribute_buf, fld_size);
    2303           80 :     if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
    2304           80 :                            fld_size) != fld_size)
    2305            0 :         ereport(ERROR,
    2306              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2307              :                  errmsg("unexpected EOF in COPY data")));
    2308              : 
    2309           80 :     cstate->attribute_buf.len = fld_size;
    2310           80 :     cstate->attribute_buf.data[fld_size] = '\0';
    2311              : 
    2312              :     /* Call the column type's binary input converter */
    2313           80 :     result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
    2314              :                                  typioparam, typmod);
    2315              : 
    2316              :     /* Trouble if it didn't eat the whole buffer */
    2317           80 :     if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
    2318            1 :         ereport(ERROR,
    2319              :                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
    2320              :                  errmsg("incorrect binary data format")));
    2321              : 
    2322           79 :     *isnull = false;
    2323           79 :     return result;
    2324              : }
        

Generated by: LCOV version 2.0-1