LCOV - code coverage report
Current view: top level - src/backend/commands - copyfromparse.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 82.9 % 678 562
Test Date: 2026-03-11 13:14:47 Functions: 91.3 % 23 21
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * copyfromparse.c
       4              :  *      Parse CSV/text/binary format for COPY FROM.
       5              :  *
       6              :  * This file contains routines to parse the text, CSV and binary input
       7              :  * formats.  The main entry point is NextCopyFrom(), which parses the
       8              :  * next input line and returns it as Datums.
       9              :  *
      10              :  * In text/CSV mode, the parsing happens in multiple stages:
      11              :  *
      12              :  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
      13              :  *                1.          2.            3.           4.
      14              :  *
      15              :  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
      16              :  *    places it into 'raw_buf'.
      17              :  *
      18              :  * 2. CopyConvertBuf() calls the encoding conversion function to convert
      19              :  *    the data in 'raw_buf' from client to server encoding, placing the
      20              :  *    converted result in 'input_buf'.
      21              :  *
      22              :  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
      23              :  *    It is responsible for finding the next newline marker, taking quote and
      24              :  *    escape characters into account according to the COPY options.  The line
      25              :  *    is copied into 'line_buf', with quotes and escape characters still
      26              :  *    intact.
      27              :  *
      28              :  * 4. CopyReadAttributesText/CSV() function takes the input line from
      29              :  *    'line_buf', and splits it into fields, unescaping the data as required.
      30              :  *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
      31              :  *    pointers to each field.
      32              :  *
      33              :  * If encoding conversion is not required, a shortcut is taken in step 2 to
      34              :  * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
      35              :  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
      36              :  * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
      37              :  * the data is valid in the current encoding.
      38              :  *
      39              :  * In binary mode, the pipeline is much simpler.  Input is loaded into
      40              :  * 'raw_buf', and encoding conversion is done in the datatype-specific
      41              :  * receive functions, if required.  'input_buf' and 'line_buf' are not used,
      42              :  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
      43              :  * data when it's passed the receive function.
      44              :  *
      45              :  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
      46              :  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
      47              :  * and 'attribute_buf' are expanded on demand, to hold the longest line
      48              :  * encountered so far.
      49              :  *
      50              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      51              :  * Portions Copyright (c) 1994, Regents of the University of California
      52              :  *
      53              :  *
      54              :  * IDENTIFICATION
      55              :  *    src/backend/commands/copyfromparse.c
      56              :  *
      57              :  *-------------------------------------------------------------------------
      58              :  */
      59              : #include "postgres.h"
      60              : 
      61              : #include <ctype.h>
      62              : #include <unistd.h>
      63              : #include <sys/stat.h>
      64              : 
      65              : #include "commands/copyapi.h"
      66              : #include "commands/copyfrom_internal.h"
      67              : #include "commands/progress.h"
      68              : #include "executor/executor.h"
      69              : #include "libpq/libpq.h"
      70              : #include "libpq/pqformat.h"
      71              : #include "mb/pg_wchar.h"
      72              : #include "miscadmin.h"
      73              : #include "pgstat.h"
      74              : #include "port/pg_bswap.h"
      75              : #include "utils/builtins.h"
      76              : #include "utils/rel.h"
      77              : #include "utils/wait_event.h"
      78              : 
      79              : #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
      80              : #define OCTVALUE(c) ((c) - '0')
      81              : 
      82              : /*
      83              :  * These macros centralize code used to process line_buf and input_buf buffers.
      84              :  * They are macros because they often do continue/break control and to avoid
      85              :  * function call overhead in tight COPY loops.
      86              :  *
      87              :  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
      88              :  * prevent the continue/break processing from working.  We end the "if (1)"
      89              :  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
      90              :  * any "else" in the calling code, and to avoid any compiler warnings about
      91              :  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
      92              :  */
      93              : 
      94              : /*
      95              :  * This keeps the character read at the top of the loop in the buffer
      96              :  * even if there is more than one read-ahead.
      97              :  */
      98              : #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
      99              : if (1) \
     100              : { \
     101              :     if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
     102              :     { \
     103              :         input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
     104              :         need_data = true; \
     105              :         continue; \
     106              :     } \
     107              : } else ((void) 0)
     108              : 
     109              : /* This consumes the remainder of the buffer and breaks */
     110              : #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
     111              : if (1) \
     112              : { \
     113              :     if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
     114              :     { \
     115              :         if (extralen) \
     116              :             input_buf_ptr = copy_buf_len; /* consume the partial character */ \
     117              :         /* backslash just before EOF, treat as data char */ \
     118              :         result = true; \
     119              :         break; \
     120              :     } \
     121              : } else ((void) 0)
     122              : 
     123              : /*
     124              :  * Transfer any approved data to line_buf; must do this to be sure
     125              :  * there is some room in input_buf.
     126              :  */
     127              : #define REFILL_LINEBUF \
     128              : if (1) \
     129              : { \
     130              :     if (input_buf_ptr > cstate->input_buf_index) \
     131              :     { \
     132              :         appendBinaryStringInfo(&cstate->line_buf, \
     133              :                              cstate->input_buf + cstate->input_buf_index, \
     134              :                                input_buf_ptr - cstate->input_buf_index); \
     135              :         cstate->input_buf_index = input_buf_ptr; \
     136              :     } \
     137              : } else ((void) 0)
     138              : 
     139              : /* NOTE: there's a copy of this in copyto.c */
     140              : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
     141              : 
     142              : 
     143              : /* non-export function prototypes */
     144              : static bool CopyReadLine(CopyFromState cstate, bool is_csv);
     145              : static pg_attribute_always_inline bool CopyReadLineText(CopyFromState cstate,
     146              :                                                         bool is_csv);
     147              : static int  CopyReadAttributesText(CopyFromState cstate);
     148              : static int  CopyReadAttributesCSV(CopyFromState cstate);
     149              : static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     150              :                                      Oid typioparam, int32 typmod,
     151              :                                      bool *isnull);
     152              : static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,
     153              :                                                               ExprContext *econtext,
     154              :                                                               Datum *values,
     155              :                                                               bool *nulls,
     156              :                                                               bool is_csv);
     157              : static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate,
     158              :                                                                      char ***fields,
     159              :                                                                      int *nfields,
     160              :                                                                      bool is_csv);
     161              : 
     162              : 
     163              : /* Low-level communications functions */
     164              : static int  CopyGetData(CopyFromState cstate, void *databuf,
     165              :                         int minread, int maxread);
     166              : static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
     167              : static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
     168              : static void CopyLoadInputBuf(CopyFromState cstate);
     169              : static int  CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
     170              : 
     171              : void
     172          541 : ReceiveCopyBegin(CopyFromState cstate)
     173              : {
     174              :     StringInfoData buf;
     175          541 :     int         natts = list_length(cstate->attnumlist);
     176          541 :     int16       format = (cstate->opts.binary ? 1 : 0);
     177              :     int         i;
     178              : 
     179          541 :     pq_beginmessage(&buf, PqMsg_CopyInResponse);
     180          541 :     pq_sendbyte(&buf, format);  /* overall format */
     181          541 :     pq_sendint16(&buf, natts);
     182         1944 :     for (i = 0; i < natts; i++)
     183         1403 :         pq_sendint16(&buf, format); /* per-column formats */
     184          541 :     pq_endmessage(&buf);
     185          541 :     cstate->copy_src = COPY_FRONTEND;
     186          541 :     cstate->fe_msgbuf = makeStringInfo();
     187              :     /* We *must* flush here to ensure FE knows it can send. */
     188          541 :     pq_flush();
     189          541 : }
     190              : 
     191              : void
     192            7 : ReceiveCopyBinaryHeader(CopyFromState cstate)
     193              : {
     194              :     char        readSig[11];
     195              :     int32       tmp;
     196              : 
     197              :     /* Signature */
     198            7 :     if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
     199            7 :         memcmp(readSig, BinarySignature, 11) != 0)
     200            0 :         ereport(ERROR,
     201              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     202              :                  errmsg("COPY file signature not recognized")));
     203              :     /* Flags field */
     204            7 :     if (!CopyGetInt32(cstate, &tmp))
     205            0 :         ereport(ERROR,
     206              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     207              :                  errmsg("invalid COPY file header (missing flags)")));
     208            7 :     if ((tmp & (1 << 16)) != 0)
     209            0 :         ereport(ERROR,
     210              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     211              :                  errmsg("invalid COPY file header (WITH OIDS)")));
     212            7 :     tmp &= ~(1 << 16);
     213            7 :     if ((tmp >> 16) != 0)
     214            0 :         ereport(ERROR,
     215              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     216              :                  errmsg("unrecognized critical flags in COPY file header")));
     217              :     /* Header extension length */
     218            7 :     if (!CopyGetInt32(cstate, &tmp) ||
     219            7 :         tmp < 0)
     220            0 :         ereport(ERROR,
     221              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     222              :                  errmsg("invalid COPY file header (missing length)")));
     223              :     /* Skip extension header, if present */
     224            7 :     while (tmp-- > 0)
     225              :     {
     226            0 :         if (CopyReadBinaryData(cstate, readSig, 1) != 1)
     227            0 :             ereport(ERROR,
     228              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     229              :                      errmsg("invalid COPY file header (wrong length)")));
     230              :     }
     231            7 : }
     232              : 
     233              : /*
     234              :  * CopyGetData reads data from the source (file or frontend)
     235              :  *
     236              :  * We attempt to read at least minread, and at most maxread, bytes from
     237              :  * the source.  The actual number of bytes read is returned; if this is
     238              :  * less than minread, EOF was detected.
     239              :  *
     240              :  * Note: when copying from the frontend, we expect a proper EOF mark per
     241              :  * protocol; if the frontend simply drops the connection, we raise error.
     242              :  * It seems unwise to allow the COPY IN to complete normally in that case.
     243              :  *
     244              :  * NB: no data conversion is applied here.
     245              :  */
     246              : static int
     247       217009 : CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
     248              : {
     249       217009 :     int         bytesread = 0;
     250              : 
     251       217009 :     switch (cstate->copy_src)
     252              :     {
     253          564 :         case COPY_FILE:
     254          564 :             pgstat_report_wait_start(WAIT_EVENT_COPY_FROM_READ);
     255          564 :             bytesread = fread(databuf, 1, maxread, cstate->copy_file);
     256          564 :             pgstat_report_wait_end();
     257          564 :             if (ferror(cstate->copy_file))
     258            0 :                 ereport(ERROR,
     259              :                         (errcode_for_file_access(),
     260              :                          errmsg("could not read from COPY file: %m")));
     261          564 :             if (bytesread == 0)
     262          223 :                 cstate->raw_reached_eof = true;
     263          564 :             break;
     264       201414 :         case COPY_FRONTEND:
     265       402029 :             while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
     266              :             {
     267              :                 int         avail;
     268              : 
     269       401653 :                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
     270              :                 {
     271              :                     /* Try to receive another message */
     272              :                     int         mtype;
     273              :                     int         maxmsglen;
     274              : 
     275       201038 :             readmessage:
     276       201038 :                     HOLD_CANCEL_INTERRUPTS();
     277       201038 :                     pq_startmsgread();
     278       201038 :                     mtype = pq_getbyte();
     279       201038 :                     if (mtype == EOF)
     280            0 :                         ereport(ERROR,
     281              :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     282              :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     283              :                     /* Validate message type and set packet size limit */
     284       201038 :                     switch (mtype)
     285              :                     {
     286       200615 :                         case PqMsg_CopyData:
     287       200615 :                             maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
     288       200615 :                             break;
     289          421 :                         case PqMsg_CopyDone:
     290              :                         case PqMsg_CopyFail:
     291              :                         case PqMsg_Flush:
     292              :                         case PqMsg_Sync:
     293          421 :                             maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
     294          421 :                             break;
     295            2 :                         default:
     296            2 :                             ereport(ERROR,
     297              :                                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
     298              :                                      errmsg("unexpected message type 0x%02X during COPY from stdin",
     299              :                                             mtype)));
     300              :                             maxmsglen = 0;  /* keep compiler quiet */
     301              :                             break;
     302              :                     }
     303              :                     /* Now collect the message body */
     304       201036 :                     if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
     305            0 :                         ereport(ERROR,
     306              :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     307              :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     308       201036 :                     RESUME_CANCEL_INTERRUPTS();
     309              :                     /* ... and process it */
     310       201036 :                     switch (mtype)
     311              :                     {
     312       200615 :                         case PqMsg_CopyData:
     313       200615 :                             break;
     314          421 :                         case PqMsg_CopyDone:
     315              :                             /* COPY IN correctly terminated by frontend */
     316          421 :                             cstate->raw_reached_eof = true;
     317          421 :                             return bytesread;
     318            0 :                         case PqMsg_CopyFail:
     319            0 :                             ereport(ERROR,
     320              :                                     (errcode(ERRCODE_QUERY_CANCELED),
     321              :                                      errmsg("COPY from stdin failed: %s",
     322              :                                             pq_getmsgstring(cstate->fe_msgbuf))));
     323              :                             break;
     324            0 :                         case PqMsg_Flush:
     325              :                         case PqMsg_Sync:
     326              : 
     327              :                             /*
     328              :                              * Ignore Flush/Sync for the convenience of client
     329              :                              * libraries (such as libpq) that may send those
     330              :                              * without noticing that the command they just
     331              :                              * sent was COPY.
     332              :                              */
     333            0 :                             goto readmessage;
     334       200615 :                         default:
     335              :                             Assert(false);  /* NOT REACHED */
     336              :                     }
     337              :                 }
     338       200615 :                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
     339       200615 :                 if (avail > maxread)
     340            0 :                     avail = maxread;
     341       200615 :                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
     342       200615 :                 databuf = (char *) databuf + avail;
     343       200615 :                 maxread -= avail;
     344       200615 :                 bytesread += avail;
     345              :             }
     346       200991 :             break;
     347        15031 :         case COPY_CALLBACK:
     348        15031 :             bytesread = cstate->data_source_cb(databuf, minread, maxread);
     349        15031 :             break;
     350              :     }
     351              : 
     352       216586 :     return bytesread;
     353              : }
     354              : 
     355              : 
     356              : /*
     357              :  * These functions do apply some data conversion
     358              :  */
     359              : 
     360              : /*
     361              :  * CopyGetInt32 reads an int32 that appears in network byte order
     362              :  *
     363              :  * Returns true if OK, false if EOF
     364              :  */
     365              : static inline bool
     366           93 : CopyGetInt32(CopyFromState cstate, int32 *val)
     367              : {
     368              :     uint32      buf;
     369              : 
     370           93 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     371              :     {
     372            0 :         *val = 0;               /* suppress compiler warning */
     373            0 :         return false;
     374              :     }
     375           93 :     *val = (int32) pg_ntoh32(buf);
     376           93 :     return true;
     377              : }
     378              : 
     379              : /*
     380              :  * CopyGetInt16 reads an int16 that appears in network byte order
     381              :  */
     382              : static inline bool
     383           21 : CopyGetInt16(CopyFromState cstate, int16 *val)
     384              : {
     385              :     uint16      buf;
     386              : 
     387           21 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     388              :     {
     389            0 :         *val = 0;               /* suppress compiler warning */
     390            0 :         return false;
     391              :     }
     392           21 :     *val = (int16) pg_ntoh16(buf);
     393           21 :     return true;
     394              : }
     395              : 
     396              : 
     397              : /*
     398              :  * Perform encoding conversion on data in 'raw_buf', writing the converted
     399              :  * data into 'input_buf'.
     400              :  *
     401              :  * On entry, there must be some data to convert in 'raw_buf'.
     402              :  */
     403              : static void
     404       433133 : CopyConvertBuf(CopyFromState cstate)
     405              : {
     406              :     /*
     407              :      * If the file and server encoding are the same, no encoding conversion is
     408              :      * required.  However, we still need to verify that the input is valid for
     409              :      * the encoding.
     410              :      */
     411       433133 :     if (!cstate->need_transcoding)
     412              :     {
     413              :         /*
     414              :          * When conversion is not required, input_buf and raw_buf are the
     415              :          * same.  raw_buf_len is the total number of bytes in the buffer, and
     416              :          * input_buf_len tracks how many of those bytes have already been
     417              :          * verified.
     418              :          */
     419       433067 :         int         preverifiedlen = cstate->input_buf_len;
     420       433067 :         int         unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
     421              :         int         nverified;
     422              : 
     423       433067 :         if (unverifiedlen == 0)
     424              :         {
     425              :             /*
     426              :              * If no more raw data is coming, report the EOF to the caller.
     427              :              */
     428       217313 :             if (cstate->raw_reached_eof)
     429          779 :                 cstate->input_reached_eof = true;
     430       217313 :             return;
     431              :         }
     432              : 
     433              :         /*
     434              :          * Verify the new data, including any residual unverified bytes from
     435              :          * previous round.
     436              :          */
     437       215754 :         nverified = pg_encoding_verifymbstr(cstate->file_encoding,
     438       215754 :                                             cstate->raw_buf + preverifiedlen,
     439              :                                             unverifiedlen);
     440       215754 :         if (nverified == 0)
     441              :         {
     442              :             /*
     443              :              * Could not verify anything.
     444              :              *
     445              :              * If there is no more raw input data coming, it means that there
     446              :              * was an incomplete multi-byte sequence at the end.  Also, if
     447              :              * there's "enough" input left, we should be able to verify at
     448              :              * least one character, and a failure to do so means that we've
     449              :              * hit an invalid byte sequence.
     450              :              */
     451            0 :             if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
     452            0 :                 cstate->input_reached_error = true;
     453            0 :             return;
     454              :         }
     455       215754 :         cstate->input_buf_len += nverified;
     456              :     }
     457              :     else
     458              :     {
     459              :         /*
     460              :          * Encoding conversion is needed.
     461              :          */
     462              :         int         nbytes;
     463              :         unsigned char *src;
     464              :         int         srclen;
     465              :         unsigned char *dst;
     466              :         int         dstlen;
     467              :         int         convertedlen;
     468              : 
     469           66 :         if (RAW_BUF_BYTES(cstate) == 0)
     470              :         {
     471              :             /*
     472              :              * If no more raw data is coming, report the EOF to the caller.
     473              :              */
     474           42 :             if (cstate->raw_reached_eof)
     475           12 :                 cstate->input_reached_eof = true;
     476           42 :             return;
     477              :         }
     478              : 
     479              :         /*
     480              :          * First, copy down any unprocessed data.
     481              :          */
     482           24 :         nbytes = INPUT_BUF_BYTES(cstate);
     483           24 :         if (nbytes > 0 && cstate->input_buf_index > 0)
     484            0 :             memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
     485              :                     nbytes);
     486           24 :         cstate->input_buf_index = 0;
     487           24 :         cstate->input_buf_len = nbytes;
     488           24 :         cstate->input_buf[nbytes] = '\0';
     489              : 
     490           24 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     491           24 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     492           24 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     493           24 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     494              : 
     495              :         /*
     496              :          * Do the conversion.  This might stop short, if there is an invalid
     497              :          * byte sequence in the input.  We'll convert as much as we can in
     498              :          * that case.
     499              :          *
     500              :          * Note: Even if we hit an invalid byte sequence, we don't report the
     501              :          * error until all the valid bytes have been consumed.  The input
     502              :          * might contain an end-of-input marker (\.), and we don't want to
     503              :          * report an error if the invalid byte sequence is after the
     504              :          * end-of-input marker.  We might unnecessarily convert some data
     505              :          * after the end-of-input marker as long as it's valid for the
     506              :          * encoding, but that's harmless.
     507              :          */
     508           24 :         convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
     509              :                                                      cstate->file_encoding,
     510              :                                                      GetDatabaseEncoding(),
     511              :                                                      src, srclen,
     512              :                                                      dst, dstlen,
     513              :                                                      true);
     514           24 :         if (convertedlen == 0)
     515              :         {
     516              :             /*
     517              :              * Could not convert anything.  If there is no more raw input data
     518              :              * coming, it means that there was an incomplete multi-byte
     519              :              * sequence at the end.  Also, if there is plenty of input left,
     520              :              * we should be able to convert at least one character, so a
     521              :              * failure to do so must mean that we've hit a byte sequence
     522              :              * that's invalid.
     523              :              */
     524           12 :             if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
     525            6 :                 cstate->input_reached_error = true;
     526           12 :             return;
     527              :         }
     528           12 :         cstate->raw_buf_index += convertedlen;
     529           12 :         cstate->input_buf_len += strlen((char *) dst);
     530              :     }
     531              : }
     532              : 
     533              : /*
     534              :  * Report an encoding or conversion error.
     535              :  */
     536              : static void
     537            6 : CopyConversionError(CopyFromState cstate)
     538              : {
     539              :     Assert(cstate->raw_buf_len > 0);
     540              :     Assert(cstate->input_reached_error);
     541              : 
     542            6 :     if (!cstate->need_transcoding)
     543              :     {
     544              :         /*
     545              :          * Everything up to input_buf_len was successfully verified, and
     546              :          * input_buf_len points to the invalid or incomplete character.
     547              :          */
     548            0 :         report_invalid_encoding(cstate->file_encoding,
     549            0 :                                 cstate->raw_buf + cstate->input_buf_len,
     550            0 :                                 cstate->raw_buf_len - cstate->input_buf_len);
     551              :     }
     552              :     else
     553              :     {
     554              :         /*
     555              :          * raw_buf_index points to the invalid or untranslatable character. We
     556              :          * let the conversion routine report the error, because it can provide
     557              :          * a more specific error message than we could here.  An earlier call
     558              :          * to the conversion routine in CopyConvertBuf() detected that there
     559              :          * is an error, now we call the conversion routine again with
     560              :          * noError=false, to have it throw the error.
     561              :          */
     562              :         unsigned char *src;
     563              :         int         srclen;
     564              :         unsigned char *dst;
     565              :         int         dstlen;
     566              : 
     567            6 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     568            6 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     569            6 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     570            6 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     571              : 
     572            6 :         (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
     573              :                                              cstate->file_encoding,
     574              :                                              GetDatabaseEncoding(),
     575              :                                              src, srclen,
     576              :                                              dst, dstlen,
     577              :                                              false);
     578              : 
     579              :         /*
     580              :          * The conversion routine should have reported an error, so this
     581              :          * should not be reached.
     582              :          */
     583            0 :         elog(ERROR, "encoding conversion failed without error");
     584              :     }
     585              : }
     586              : 
     587              : /*
     588              :  * Load more data from data source to raw_buf.
     589              :  *
     590              :  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
     591              :  * beginning of the buffer, and we load new data after that.
     592              :  */
     593              : static void
     594       216588 : CopyLoadRawBuf(CopyFromState cstate)
     595              : {
     596              :     int         nbytes;
     597              :     int         inbytes;
     598              : 
     599              :     /*
     600              :      * In text mode, if encoding conversion is not required, raw_buf and
     601              :      * input_buf point to the same buffer.  Their len/index better agree, too.
     602              :      */
     603       216588 :     if (cstate->raw_buf == cstate->input_buf)
     604              :     {
     605              :         Assert(!cstate->need_transcoding);
     606              :         Assert(cstate->raw_buf_index == cstate->input_buf_index);
     607              :         Assert(cstate->input_buf_len <= cstate->raw_buf_len);
     608              :     }
     609              : 
     610              :     /*
     611              :      * Copy down the unprocessed data if any.
     612              :      */
     613       216588 :     nbytes = RAW_BUF_BYTES(cstate);
     614       216588 :     if (nbytes > 0 && cstate->raw_buf_index > 0)
     615            0 :         memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
     616              :                 nbytes);
     617       216588 :     cstate->raw_buf_len -= cstate->raw_buf_index;
     618       216588 :     cstate->raw_buf_index = 0;
     619              : 
     620              :     /*
     621              :      * If raw_buf and input_buf are in fact the same buffer, adjust the
     622              :      * input_buf variables, too.
     623              :      */
     624       216588 :     if (cstate->raw_buf == cstate->input_buf)
     625              :     {
     626       216534 :         cstate->input_buf_len -= cstate->input_buf_index;
     627       216534 :         cstate->input_buf_index = 0;
     628              :     }
     629              : 
     630              :     /* Load more data */
     631       216588 :     inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
     632       216588 :                           1, RAW_BUF_SIZE - cstate->raw_buf_len);
     633       216586 :     nbytes += inbytes;
     634       216586 :     cstate->raw_buf[nbytes] = '\0';
     635       216586 :     cstate->raw_buf_len = nbytes;
     636              : 
     637       216586 :     cstate->bytes_processed += inbytes;
     638       216586 :     pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
     639              : 
     640       216586 :     if (inbytes == 0)
     641          802 :         cstate->raw_reached_eof = true;
     642       216586 : }
     643              : 
     644              : /*
     645              :  * CopyLoadInputBuf loads some more data into input_buf
     646              :  *
     647              :  * On return, at least one more input character is loaded into
     648              :  * input_buf, or input_reached_eof is set.
     649              :  *
     650              :  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
     651              :  * of the buffer and then we load more data after that.
     652              :  */
     653              : static void
     654       216565 : CopyLoadInputBuf(CopyFromState cstate)
     655              : {
     656       216565 :     int         nbytes = INPUT_BUF_BYTES(cstate);
     657              : 
     658              :     /*
     659              :      * The caller has updated input_buf_index to indicate how much of the
     660              :      * input has been consumed and isn't needed anymore.  If input_buf is the
     661              :      * same physical area as raw_buf, update raw_buf_index accordingly.
     662              :      */
     663       216565 :     if (cstate->raw_buf == cstate->input_buf)
     664              :     {
     665              :         Assert(!cstate->need_transcoding);
     666              :         Assert(cstate->input_buf_index >= cstate->raw_buf_index);
     667       216535 :         cstate->raw_buf_index = cstate->input_buf_index;
     668              :     }
     669              : 
     670              :     for (;;)
     671              :     {
     672              :         /* If we now have some unconverted data, try to convert it */
     673       433133 :         CopyConvertBuf(cstate);
     674              : 
     675              :         /* If we now have some more input bytes ready, return them */
     676       433133 :         if (INPUT_BUF_BYTES(cstate) > nbytes)
     677       215766 :             return;
     678              : 
     679              :         /*
     680              :          * If we reached an invalid byte sequence, or we're at an incomplete
     681              :          * multi-byte character but there is no more raw input data, report
     682              :          * conversion error.
     683              :          */
     684       217367 :         if (cstate->input_reached_error)
     685            6 :             CopyConversionError(cstate);
     686              : 
     687              :         /* no more input, and everything has been converted */
     688       217361 :         if (cstate->input_reached_eof)
     689          791 :             break;
     690              : 
     691              :         /* Try to load more raw data */
     692              :         Assert(!cstate->raw_reached_eof);
     693       216570 :         CopyLoadRawBuf(cstate);
     694              :     }
     695              : }
     696              : 
     697              : /*
     698              :  * CopyReadBinaryData
     699              :  *
     700              :  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
     701              :  * and writes them to 'dest'.  Returns the number of bytes read (which
     702              :  * would be less than 'nbytes' only if we reach EOF).
     703              :  */
     704              : static int
     705          191 : CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
     706              : {
     707          191 :     int         copied_bytes = 0;
     708              : 
     709          191 :     if (RAW_BUF_BYTES(cstate) >= nbytes)
     710              :     {
     711              :         /* Enough bytes are present in the buffer. */
     712          173 :         memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
     713          173 :         cstate->raw_buf_index += nbytes;
     714          173 :         copied_bytes = nbytes;
     715              :     }
     716              :     else
     717              :     {
     718              :         /*
     719              :          * Not enough bytes in the buffer, so must read from the file.  Need
     720              :          * to loop since 'nbytes' could be larger than the buffer size.
     721              :          */
     722              :         do
     723              :         {
     724              :             int         copy_bytes;
     725              : 
     726              :             /* Load more data if buffer is empty. */
     727           18 :             if (RAW_BUF_BYTES(cstate) == 0)
     728              :             {
     729           18 :                 CopyLoadRawBuf(cstate);
     730           18 :                 if (cstate->raw_reached_eof)
     731            6 :                     break;      /* EOF */
     732              :             }
     733              : 
     734              :             /* Transfer some bytes. */
     735           12 :             copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
     736           12 :             memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
     737           12 :             cstate->raw_buf_index += copy_bytes;
     738           12 :             dest += copy_bytes;
     739           12 :             copied_bytes += copy_bytes;
     740           12 :         } while (copied_bytes < nbytes);
     741              :     }
     742              : 
     743          191 :     return copied_bytes;
     744              : }
     745              : 
     746              : /*
     747              :  * This function is exposed for use by extensions that read raw fields in the
     748              :  * next line. See NextCopyFromRawFieldsInternal() for details.
     749              :  */
     750              : bool
     751            0 : NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
     752              : {
     753            0 :     return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
     754            0 :                                          cstate->opts.csv_mode);
     755              : }
     756              : 
     757              : /*
     758              :  * Workhorse for NextCopyFromRawFields().
     759              :  *
     760              :  * Read raw fields in the next line for COPY FROM in text or csv mode. Return
     761              :  * false if no more lines.
     762              :  *
     763              :  * An internal temporary buffer is returned via 'fields'. It is valid until
     764              :  * the next call of the function. Since the function returns all raw fields
     765              :  * in the input file, 'nfields' could be different from the number of columns
     766              :  * in the relation.
     767              :  *
     768              :  * NOTE: force_not_null option are not applied to the returned fields.
     769              :  *
     770              :  * We use pg_attribute_always_inline to reduce function call overhead
     771              :  * and to help compilers to optimize away the 'is_csv' condition when called
     772              :  * by internal functions such as CopyFromTextLikeOneRow().
     773              :  */
     774              : static pg_attribute_always_inline bool
     775       634773 : NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
     776              : {
     777              :     int         fldct;
     778       634773 :     bool        done = false;
     779              : 
     780              :     /* only available for text or csv input */
     781              :     Assert(!cstate->opts.binary);
     782              : 
     783              :     /* on input check that the header line is correct if needed */
     784       634773 :     if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
     785              :     {
     786              :         ListCell   *cur;
     787              :         TupleDesc   tupDesc;
     788           74 :         int         lines_to_skip = cstate->opts.header_line;
     789              : 
     790              :         /* If set to "match", one header line is skipped */
     791           74 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     792           38 :             lines_to_skip = 1;
     793              : 
     794           74 :         tupDesc = RelationGetDescr(cstate->rel);
     795              : 
     796          173 :         for (int i = 0; i < lines_to_skip; i++)
     797              :         {
     798          103 :             cstate->cur_lineno++;
     799          103 :             if ((done = CopyReadLine(cstate, is_csv)))
     800            4 :                 break;
     801              :         }
     802              : 
     803           74 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     804              :         {
     805              :             int         fldnum;
     806              : 
     807           38 :             if (is_csv)
     808            5 :                 fldct = CopyReadAttributesCSV(cstate);
     809              :             else
     810           33 :                 fldct = CopyReadAttributesText(cstate);
     811              : 
     812           38 :             if (fldct != list_length(cstate->attnumlist))
     813           12 :                 ereport(ERROR,
     814              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     815              :                          errmsg("wrong number of fields in header line: got %d, expected %d",
     816              :                                 fldct, list_length(cstate->attnumlist))));
     817              : 
     818           26 :             fldnum = 0;
     819           79 :             foreach(cur, cstate->attnumlist)
     820              :             {
     821           63 :                 int         attnum = lfirst_int(cur);
     822              :                 char       *colName;
     823           63 :                 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
     824              : 
     825              :                 Assert(fldnum < cstate->max_fields);
     826              : 
     827           63 :                 colName = cstate->raw_fields[fldnum++];
     828           63 :                 if (colName == NULL)
     829            3 :                     ereport(ERROR,
     830              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     831              :                              errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
     832              :                                     fldnum, cstate->opts.null_print, NameStr(attr->attname))));
     833              : 
     834           60 :                 if (namestrcmp(&attr->attname, colName) != 0)
     835              :                 {
     836            7 :                     ereport(ERROR,
     837              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     838              :                              errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
     839              :                                     fldnum, colName, NameStr(attr->attname))));
     840              :                 }
     841              :             }
     842              :         }
     843              : 
     844           52 :         if (done)
     845            4 :             return false;
     846              :     }
     847              : 
     848       634747 :     cstate->cur_lineno++;
     849              : 
     850              :     /* Actually read the line into memory here */
     851       634747 :     done = CopyReadLine(cstate, is_csv);
     852              : 
     853              :     /*
     854              :      * EOF at start of line means we're done.  If we see EOF after some
     855              :      * characters, we act as though it was newline followed by EOF, ie,
     856              :      * process the line and then exit loop on next iteration.
     857              :      */
     858       634733 :     if (done && cstate->line_buf.len == 0)
     859          831 :         return false;
     860              : 
     861              :     /* Parse the line into de-escaped field values */
     862       633902 :     if (is_csv)
     863          252 :         fldct = CopyReadAttributesCSV(cstate);
     864              :     else
     865       633650 :         fldct = CopyReadAttributesText(cstate);
     866              : 
     867       633896 :     *fields = cstate->raw_fields;
     868       633896 :     *nfields = fldct;
     869       633896 :     return true;
     870              : }
     871              : 
     872              : /*
     873              :  * Read next tuple from file for COPY FROM. Return false if no more tuples.
     874              :  *
     875              :  * 'econtext' is used to evaluate default expression for each column that is
     876              :  * either not read from the file or is using the DEFAULT option of COPY FROM.
     877              :  * It can be NULL when no default values are used, i.e. when all columns are
     878              :  * read from the file, and DEFAULT option is unset.
     879              :  *
     880              :  * 'values' and 'nulls' arrays must be the same length as columns of the
     881              :  * relation passed to BeginCopyFrom. This function fills the arrays.
     882              :  */
     883              : bool
     884       634794 : NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
     885              :              Datum *values, bool *nulls)
     886              : {
     887              :     TupleDesc   tupDesc;
     888              :     AttrNumber  num_phys_attrs,
     889       634794 :                 num_defaults = cstate->num_defaults;
     890              :     int         i;
     891       634794 :     int        *defmap = cstate->defmap;
     892       634794 :     ExprState **defexprs = cstate->defexprs;
     893              : 
     894       634794 :     tupDesc = RelationGetDescr(cstate->rel);
     895       634794 :     num_phys_attrs = tupDesc->natts;
     896              : 
     897              :     /* Initialize all values for row to NULL */
     898      2971698 :     MemSet(values, 0, num_phys_attrs * sizeof(Datum));
     899       634794 :     MemSet(nulls, true, num_phys_attrs * sizeof(bool));
     900       706848 :     MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
     901              : 
     902              :     /* Get one row from source */
     903       634794 :     if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))
     904          841 :         return false;
     905              : 
     906              :     /*
     907              :      * Now compute and insert any defaults available for the columns not
     908              :      * provided by the input data.  Anything not processed here or above will
     909              :      * remain NULL.
     910              :      */
     911       664123 :     for (i = 0; i < num_defaults; i++)
     912              :     {
     913              :         /*
     914              :          * The caller must supply econtext and have switched into the
     915              :          * per-tuple memory context in it.
     916              :          */
     917              :         Assert(econtext != NULL);
     918              :         Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
     919              : 
     920        30265 :         values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
     921        30265 :                                          &nulls[defmap[i]]);
     922              :     }
     923              : 
     924       633858 :     return true;
     925              : }
     926              : 
     927              : /* Implementation of the per-row callback for text format */
     928              : bool
     929       634396 : CopyFromTextOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     930              :                    bool *nulls)
     931              : {
     932       634396 :     return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);
     933              : }
     934              : 
     935              : /* Implementation of the per-row callback for CSV format */
     936              : bool
     937          377 : CopyFromCSVOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     938              :                   bool *nulls)
     939              : {
     940          377 :     return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);
     941              : }
     942              : 
     943              : /*
     944              :  * Workhorse for CopyFromTextOneRow() and CopyFromCSVOneRow().
     945              :  *
     946              :  * We use pg_attribute_always_inline to reduce function call overhead
     947              :  * and to help compilers to optimize away the 'is_csv' condition.
     948              :  */
     949              : static pg_attribute_always_inline bool
     950       634773 : CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
     951              :                        Datum *values, bool *nulls, bool is_csv)
     952              : {
     953              :     TupleDesc   tupDesc;
     954              :     AttrNumber  attr_count;
     955       634773 :     FmgrInfo   *in_functions = cstate->in_functions;
     956       634773 :     Oid        *typioparams = cstate->typioparams;
     957       634773 :     ExprState **defexprs = cstate->defexprs;
     958              :     char      **field_strings;
     959              :     ListCell   *cur;
     960              :     int         fldct;
     961              :     int         fieldno;
     962              :     char       *string;
     963       634773 :     bool        current_row_erroneous = false;
     964              : 
     965       634773 :     tupDesc = RelationGetDescr(cstate->rel);
     966       634773 :     attr_count = list_length(cstate->attnumlist);
     967              : 
     968              :     /* read raw fields in the next line */
     969       634773 :     if (!NextCopyFromRawFieldsInternal(cstate, &field_strings, &fldct, is_csv))
     970          835 :         return false;
     971              : 
     972              :     /* check for overflowing fields */
     973       633896 :     if (attr_count > 0 && fldct > attr_count)
     974           12 :         ereport(ERROR,
     975              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     976              :                  errmsg("extra data after last expected column")));
     977              : 
     978       633884 :     fieldno = 0;
     979              : 
     980              :     /* Loop to read the user attributes on the line. */
     981      2900266 :     foreach(cur, cstate->attnumlist)
     982              :     {
     983      2266486 :         int         attnum = lfirst_int(cur);
     984      2266486 :         int         m = attnum - 1;
     985      2266486 :         Form_pg_attribute att = TupleDescAttr(tupDesc, m);
     986              : 
     987      2266486 :         if (fieldno >= fldct)
     988           12 :             ereport(ERROR,
     989              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     990              :                      errmsg("missing data for column \"%s\"",
     991              :                             NameStr(att->attname))));
     992      2266474 :         string = field_strings[fieldno++];
     993              : 
     994      2266474 :         if (cstate->convert_select_flags &&
     995           10 :             !cstate->convert_select_flags[m])
     996              :         {
     997              :             /* ignore input field, leaving column as NULL */
     998            5 :             continue;
     999              :         }
    1000              : 
    1001      2266469 :         if (is_csv)
    1002              :         {
    1003          503 :             if (string == NULL &&
    1004           22 :                 cstate->opts.force_notnull_flags[m])
    1005              :             {
    1006              :                 /*
    1007              :                  * FORCE_NOT_NULL option is set and column is NULL - convert
    1008              :                  * it to the NULL string.
    1009              :                  */
    1010           14 :                 string = cstate->opts.null_print;
    1011              :             }
    1012          489 :             else if (string != NULL && cstate->opts.force_null_flags[m]
    1013           25 :                      && strcmp(string, cstate->opts.null_print) == 0)
    1014              :             {
    1015              :                 /*
    1016              :                  * FORCE_NULL option is set and column matches the NULL
    1017              :                  * string. It must have been quoted, or otherwise the string
    1018              :                  * would already have been set to NULL. Convert it to NULL as
    1019              :                  * specified.
    1020              :                  */
    1021           13 :                 string = NULL;
    1022              :             }
    1023              :         }
    1024              : 
    1025      2266469 :         cstate->cur_attname = NameStr(att->attname);
    1026      2266469 :         cstate->cur_attval = string;
    1027              : 
    1028      2266469 :         if (string != NULL)
    1029      2264044 :             nulls[m] = false;
    1030              : 
    1031      2266469 :         if (cstate->defaults[m])
    1032              :         {
    1033              :             /* We must have switched into the per-tuple memory context */
    1034              :             Assert(econtext != NULL);
    1035              :             Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
    1036              : 
    1037           30 :             values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
    1038              :         }
    1039              : 
    1040              :         /*
    1041              :          * If ON_ERROR is specified, handle the different options
    1042              :          */
    1043      2266420 :         else if (!InputFunctionCallSafe(&in_functions[m],
    1044              :                                         string,
    1045      2266439 :                                         typioparams[m],
    1046              :                                         att->atttypmod,
    1047      2266439 :                                         (Node *) cstate->escontext,
    1048      2266439 :                                         &values[m]))
    1049              :         {
    1050              :             Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
    1051              : 
    1052           85 :             if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1053           64 :                 cstate->num_errors++;
    1054           21 :             else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
    1055              :             {
    1056              :                 /*
    1057              :                  * Reset error state so the subsequent InputFunctionCallSafe
    1058              :                  * call (for domain constraint check) can properly report
    1059              :                  * whether it succeeded or failed.
    1060              :                  */
    1061           21 :                 cstate->escontext->error_occurred = false;
    1062              : 
    1063              :                 Assert(cstate->domain_with_constraint != NULL);
    1064              : 
    1065              :                 /*
    1066              :                  * For constrained domains, we need an additional
    1067              :                  * InputFunctionCallSafe() to ensure that an error is thrown
    1068              :                  * if the domain constraint rejects null values.
    1069              :                  */
    1070           36 :                 if (!cstate->domain_with_constraint[m] ||
    1071           15 :                     InputFunctionCallSafe(&in_functions[m],
    1072              :                                           NULL,
    1073           15 :                                           typioparams[m],
    1074              :                                           att->atttypmod,
    1075           15 :                                           (Node *) cstate->escontext,
    1076           15 :                                           &values[m]))
    1077              :                 {
    1078           12 :                     nulls[m] = true;
    1079           12 :                     values[m] = (Datum) 0;
    1080              :                 }
    1081              :                 else
    1082            9 :                     ereport(ERROR,
    1083              :                             errcode(ERRCODE_NOT_NULL_VIOLATION),
    1084              :                             errmsg("domain %s does not allow null values",
    1085              :                                    format_type_be(typioparams[m])),
    1086              :                             errdetail("ON_ERROR SET_NULL cannot be applied because column \"%s\" (domain %s) does not accept null values.",
    1087              :                                       cstate->cur_attname,
    1088              :                                       format_type_be(typioparams[m])),
    1089              :                             errdatatype(typioparams[m]));
    1090              : 
    1091              :                 /*
    1092              :                  * We count only the number of rows (not fields) where
    1093              :                  * ON_ERROR SET_NULL was applied.
    1094              :                  */
    1095           12 :                 if (!current_row_erroneous)
    1096              :                 {
    1097            9 :                     current_row_erroneous = true;
    1098            9 :                     cstate->num_errors++;
    1099              :                 }
    1100              :             }
    1101              : 
    1102           76 :             if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
    1103              :             {
    1104              :                 /*
    1105              :                  * Since we emit line number and column info in the below
    1106              :                  * notice message, we suppress error context information other
    1107              :                  * than the relation name.
    1108              :                  */
    1109              :                 Assert(!cstate->relname_only);
    1110           33 :                 cstate->relname_only = true;
    1111              : 
    1112           33 :                 if (cstate->cur_attval)
    1113              :                 {
    1114              :                     char       *attval;
    1115              : 
    1116           30 :                     attval = CopyLimitPrintoutLength(cstate->cur_attval);
    1117              : 
    1118           30 :                     if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1119           18 :                         ereport(NOTICE,
    1120              :                                 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
    1121              :                                        cstate->cur_lineno,
    1122              :                                        cstate->cur_attname,
    1123              :                                        attval));
    1124           12 :                     else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
    1125           12 :                         ereport(NOTICE,
    1126              :                                 errmsg("setting to null due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
    1127              :                                        cstate->cur_lineno,
    1128              :                                        cstate->cur_attname,
    1129              :                                        attval));
    1130           30 :                     pfree(attval);
    1131              :                 }
    1132              :                 else
    1133              :                 {
    1134            3 :                     if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1135            3 :                         ereport(NOTICE,
    1136              :                                 errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
    1137              :                                        cstate->cur_lineno,
    1138              :                                        cstate->cur_attname));
    1139              :                 }
    1140              :                 /* reset relname_only */
    1141           33 :                 cstate->relname_only = false;
    1142              :             }
    1143              : 
    1144           76 :             if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
    1145           64 :                 return true;
    1146           12 :             else if (cstate->opts.on_error == COPY_ON_ERROR_SET_NULL)
    1147           12 :                 continue;
    1148              :         }
    1149              : 
    1150      2266365 :         cstate->cur_attname = NULL;
    1151      2266365 :         cstate->cur_attval = NULL;
    1152              :     }
    1153              : 
    1154              :     Assert(fieldno == attr_count);
    1155              : 
    1156       633780 :     return true;
    1157              : }
    1158              : 
    1159              : /* Implementation of the per-row callback for binary format */
    1160              : bool
    1161           21 : CopyFromBinaryOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
    1162              :                      bool *nulls)
    1163              : {
    1164              :     TupleDesc   tupDesc;
    1165              :     AttrNumber  attr_count;
    1166           21 :     FmgrInfo   *in_functions = cstate->in_functions;
    1167           21 :     Oid        *typioparams = cstate->typioparams;
    1168              :     int16       fld_count;
    1169              :     ListCell   *cur;
    1170              : 
    1171           21 :     tupDesc = RelationGetDescr(cstate->rel);
    1172           21 :     attr_count = list_length(cstate->attnumlist);
    1173              : 
    1174           21 :     cstate->cur_lineno++;
    1175              : 
    1176           21 :     if (!CopyGetInt16(cstate, &fld_count))
    1177              :     {
    1178              :         /* EOF detected (end of file, or protocol-level EOF) */
    1179            0 :         return false;
    1180              :     }
    1181              : 
    1182           21 :     if (fld_count == -1)
    1183              :     {
    1184              :         /*
    1185              :          * Received EOF marker.  Wait for the protocol-level EOF, and complain
    1186              :          * if it doesn't come immediately.  In COPY FROM STDIN, this ensures
    1187              :          * that we correctly handle CopyFail, if client chooses to send that
    1188              :          * now.  When copying from file, we could ignore the rest of the file
    1189              :          * like in text mode, but we choose to be consistent with the COPY
    1190              :          * FROM STDIN case.
    1191              :          */
    1192              :         char        dummy;
    1193              : 
    1194            6 :         if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
    1195            0 :             ereport(ERROR,
    1196              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1197              :                      errmsg("received copy data after EOF marker")));
    1198            6 :         return false;
    1199              :     }
    1200              : 
    1201           15 :     if (fld_count != attr_count)
    1202            0 :         ereport(ERROR,
    1203              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1204              :                  errmsg("row field count is %d, expected %d",
    1205              :                         fld_count, attr_count)));
    1206              : 
    1207           93 :     foreach(cur, cstate->attnumlist)
    1208              :     {
    1209           79 :         int         attnum = lfirst_int(cur);
    1210           79 :         int         m = attnum - 1;
    1211           79 :         Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1212              : 
    1213           79 :         cstate->cur_attname = NameStr(att->attname);
    1214          157 :         values[m] = CopyReadBinaryAttribute(cstate,
    1215           79 :                                             &in_functions[m],
    1216           79 :                                             typioparams[m],
    1217              :                                             att->atttypmod,
    1218              :                                             &nulls[m]);
    1219           78 :         cstate->cur_attname = NULL;
    1220              :     }
    1221              : 
    1222           14 :     return true;
    1223              : }
    1224              : 
    1225              : /*
    1226              :  * Read the next input line and stash it in line_buf.
    1227              :  *
    1228              :  * Result is true if read was terminated by EOF, false if terminated
    1229              :  * by newline.  The terminating newline or EOF marker is not included
    1230              :  * in the final value of line_buf.
    1231              :  */
    1232              : static bool
    1233       634850 : CopyReadLine(CopyFromState cstate, bool is_csv)
    1234              : {
    1235              :     bool        result;
    1236              : 
    1237       634850 :     resetStringInfo(&cstate->line_buf);
    1238       634850 :     cstate->line_buf_valid = false;
    1239              : 
    1240              :     /*
    1241              :      * Parse data and transfer into line_buf.
    1242              :      *
    1243              :      * Because this is performance critical, we inline CopyReadLineText() and
    1244              :      * pass the boolean parameters as constants to allow the compiler to emit
    1245              :      * specialized code with fewer branches.
    1246              :      */
    1247       634850 :     if (is_csv)
    1248          439 :         result = CopyReadLineText(cstate, true);
    1249              :     else
    1250       634411 :         result = CopyReadLineText(cstate, false);
    1251              : 
    1252       634836 :     if (result)
    1253              :     {
    1254              :         /*
    1255              :          * Reached EOF.  In protocol version 3, we should ignore anything
    1256              :          * after \. up to the protocol end of copy data.  (XXX maybe better
    1257              :          * not to treat \. as special?)
    1258              :          */
    1259          836 :         if (cstate->copy_src == COPY_FRONTEND)
    1260              :         {
    1261              :             int         inbytes;
    1262              : 
    1263              :             do
    1264              :             {
    1265          421 :                 inbytes = CopyGetData(cstate, cstate->input_buf,
    1266              :                                       1, INPUT_BUF_SIZE);
    1267          421 :             } while (inbytes > 0);
    1268          421 :             cstate->input_buf_index = 0;
    1269          421 :             cstate->input_buf_len = 0;
    1270          421 :             cstate->raw_buf_index = 0;
    1271          421 :             cstate->raw_buf_len = 0;
    1272              :         }
    1273              :     }
    1274              :     else
    1275              :     {
    1276              :         /*
    1277              :          * If we didn't hit EOF, then we must have transferred the EOL marker
    1278              :          * to line_buf along with the data.  Get rid of it.
    1279              :          */
    1280       634000 :         switch (cstate->eol_type)
    1281              :         {
    1282       634000 :             case EOL_NL:
    1283              :                 Assert(cstate->line_buf.len >= 1);
    1284              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1285       634000 :                 cstate->line_buf.len--;
    1286       634000 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1287       634000 :                 break;
    1288            0 :             case EOL_CR:
    1289              :                 Assert(cstate->line_buf.len >= 1);
    1290              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
    1291            0 :                 cstate->line_buf.len--;
    1292            0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1293            0 :                 break;
    1294            0 :             case EOL_CRNL:
    1295              :                 Assert(cstate->line_buf.len >= 2);
    1296              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
    1297              :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1298            0 :                 cstate->line_buf.len -= 2;
    1299            0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1300            0 :                 break;
    1301            0 :             case EOL_UNKNOWN:
    1302              :                 /* shouldn't get here */
    1303              :                 Assert(false);
    1304            0 :                 break;
    1305              :         }
    1306              :     }
    1307              : 
    1308              :     /* Now it's safe to use the buffer in error messages */
    1309       634836 :     cstate->line_buf_valid = true;
    1310              : 
    1311       634836 :     return result;
    1312              : }
    1313              : 
    1314              : /*
    1315              :  * CopyReadLineText - inner loop of CopyReadLine for text mode
    1316              :  */
    1317              : static pg_attribute_always_inline bool
    1318       634850 : CopyReadLineText(CopyFromState cstate, bool is_csv)
    1319              : {
    1320              :     char       *copy_input_buf;
    1321              :     int         input_buf_ptr;
    1322              :     int         copy_buf_len;
    1323       634850 :     bool        need_data = false;
    1324       634850 :     bool        hit_eof = false;
    1325       634850 :     bool        result = false;
    1326              : 
    1327              :     /* CSV variables */
    1328       634850 :     bool        in_quote = false,
    1329       634850 :                 last_was_esc = false;
    1330       634850 :     char        quotec = '\0';
    1331       634850 :     char        escapec = '\0';
    1332              : 
    1333       634850 :     if (is_csv)
    1334              :     {
    1335          439 :         quotec = cstate->opts.quote[0];
    1336          439 :         escapec = cstate->opts.escape[0];
    1337              :         /* ignore special escape processing if it's the same as quotec */
    1338          439 :         if (quotec == escapec)
    1339          342 :             escapec = '\0';
    1340              :     }
    1341              : 
    1342              :     /*
    1343              :      * The objective of this loop is to transfer the entire next input line
    1344              :      * into line_buf.  Hence, we only care for detecting newlines (\r and/or
    1345              :      * \n) and the end-of-copy marker (\.).
    1346              :      *
    1347              :      * In CSV mode, \r and \n inside a quoted field are just part of the data
    1348              :      * value and are put in line_buf.  We keep just enough state to know if we
    1349              :      * are currently in a quoted field or not.
    1350              :      *
    1351              :      * The input has already been converted to the database encoding.  All
    1352              :      * supported server encodings have the property that all bytes in a
    1353              :      * multi-byte sequence have the high bit set, so a multibyte character
    1354              :      * cannot contain any newline or escape characters embedded in the
    1355              :      * multibyte sequence.  Therefore, we can process the input byte-by-byte,
    1356              :      * regardless of the encoding.
    1357              :      *
    1358              :      * For speed, we try to move data from input_buf to line_buf in chunks
    1359              :      * rather than one character at a time.  input_buf_ptr points to the next
    1360              :      * character to examine; any characters from input_buf_index to
    1361              :      * input_buf_ptr have been determined to be part of the line, but not yet
    1362              :      * transferred to line_buf.
    1363              :      *
    1364              :      * For a little extra speed within the loop, we copy input_buf and
    1365              :      * input_buf_len into local variables.
    1366              :      */
    1367       634850 :     copy_input_buf = cstate->input_buf;
    1368       634850 :     input_buf_ptr = cstate->input_buf_index;
    1369       634850 :     copy_buf_len = cstate->input_buf_len;
    1370              : 
    1371              :     for (;;)
    1372     12772607 :     {
    1373              :         int         prev_raw_ptr;
    1374              :         char        c;
    1375              : 
    1376              :         /*
    1377              :          * Load more data if needed.
    1378              :          *
    1379              :          * TODO: We could just force four bytes of read-ahead and avoid the
    1380              :          * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
    1381              :          * unsafe with the old v2 COPY protocol, but we don't support that
    1382              :          * anymore.
    1383              :          */
    1384     13407457 :         if (input_buf_ptr >= copy_buf_len || need_data)
    1385              :         {
    1386       216565 :             REFILL_LINEBUF;
    1387              : 
    1388       216565 :             CopyLoadInputBuf(cstate);
    1389              :             /* update our local variables */
    1390       216557 :             hit_eof = cstate->input_reached_eof;
    1391       216557 :             input_buf_ptr = cstate->input_buf_index;
    1392       216557 :             copy_buf_len = cstate->input_buf_len;
    1393              : 
    1394              :             /*
    1395              :              * If we are completely out of data, break out of the loop,
    1396              :              * reporting EOF.
    1397              :              */
    1398       216557 :             if (INPUT_BUF_BYTES(cstate) <= 0)
    1399              :             {
    1400          791 :                 result = true;
    1401          791 :                 break;
    1402              :             }
    1403       215766 :             need_data = false;
    1404              :         }
    1405              : 
    1406              :         /* OK to fetch a character */
    1407     13406658 :         prev_raw_ptr = input_buf_ptr;
    1408     13406658 :         c = copy_input_buf[input_buf_ptr++];
    1409              : 
    1410     13406658 :         if (is_csv)
    1411              :         {
    1412              :             /*
    1413              :              * If character is '\r', we may need to look ahead below.  Force
    1414              :              * fetch of the next character if we don't already have it.  We
    1415              :              * need to do this before changing CSV state, in case '\r' is also
    1416              :              * the quote or escape character.
    1417              :              */
    1418         3425 :             if (c == '\r')
    1419              :             {
    1420           18 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1421              :             }
    1422              : 
    1423              :             /*
    1424              :              * Dealing with quotes and escapes here is mildly tricky. If the
    1425              :              * quote char is also the escape char, there's no problem - we
    1426              :              * just use the char as a toggle. If they are different, we need
    1427              :              * to ensure that we only take account of an escape inside a
    1428              :              * quoted field and immediately preceding a quote char, and not
    1429              :              * the second in an escape-escape sequence.
    1430              :              */
    1431         3425 :             if (in_quote && c == escapec)
    1432           24 :                 last_was_esc = !last_was_esc;
    1433         3425 :             if (c == quotec && !last_was_esc)
    1434          260 :                 in_quote = !in_quote;
    1435         3425 :             if (c != escapec)
    1436         3398 :                 last_was_esc = false;
    1437              : 
    1438              :             /*
    1439              :              * Updating the line count for embedded CR and/or LF chars is
    1440              :              * necessarily a little fragile - this test is probably about the
    1441              :              * best we can do.  (XXX it's arguable whether we should do this
    1442              :              * at all --- is cur_lineno a physical or logical count?)
    1443              :              */
    1444         3425 :             if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
    1445           18 :                 cstate->cur_lineno++;
    1446              :         }
    1447              : 
    1448              :         /* Process \r */
    1449     13406658 :         if (c == '\r' && (!is_csv || !in_quote))
    1450              :         {
    1451              :             /* Check for \r\n on first line, _and_ handle \r\n. */
    1452            0 :             if (cstate->eol_type == EOL_UNKNOWN ||
    1453            0 :                 cstate->eol_type == EOL_CRNL)
    1454              :             {
    1455              :                 /*
    1456              :                  * If need more data, go back to loop top to load it.
    1457              :                  *
    1458              :                  * Note that if we are at EOF, c will wind up as '\0' because
    1459              :                  * of the guaranteed pad of input_buf.
    1460              :                  */
    1461            0 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1462              : 
    1463              :                 /* get next char */
    1464            0 :                 c = copy_input_buf[input_buf_ptr];
    1465              : 
    1466            0 :                 if (c == '\n')
    1467              :                 {
    1468            0 :                     input_buf_ptr++;    /* eat newline */
    1469            0 :                     cstate->eol_type = EOL_CRNL; /* in case not set yet */
    1470              :                 }
    1471              :                 else
    1472              :                 {
    1473              :                     /* found \r, but no \n */
    1474            0 :                     if (cstate->eol_type == EOL_CRNL)
    1475            0 :                         ereport(ERROR,
    1476              :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1477              :                                  !is_csv ?
    1478              :                                  errmsg("literal carriage return found in data") :
    1479              :                                  errmsg("unquoted carriage return found in data"),
    1480              :                                  !is_csv ?
    1481              :                                  errhint("Use \"\\r\" to represent carriage return.") :
    1482              :                                  errhint("Use quoted CSV field to represent carriage return.")));
    1483              : 
    1484              :                     /*
    1485              :                      * if we got here, it is the first line and we didn't find
    1486              :                      * \n, so don't consume the peeked character
    1487              :                      */
    1488            0 :                     cstate->eol_type = EOL_CR;
    1489              :                 }
    1490              :             }
    1491            0 :             else if (cstate->eol_type == EOL_NL)
    1492            0 :                 ereport(ERROR,
    1493              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1494              :                          !is_csv ?
    1495              :                          errmsg("literal carriage return found in data") :
    1496              :                          errmsg("unquoted carriage return found in data"),
    1497              :                          !is_csv ?
    1498              :                          errhint("Use \"\\r\" to represent carriage return.") :
    1499              :                          errhint("Use quoted CSV field to represent carriage return.")));
    1500              :             /* If reach here, we have found the line terminator */
    1501            0 :             break;
    1502              :         }
    1503              : 
    1504              :         /* Process \n */
    1505     13406658 :         if (c == '\n' && (!is_csv || !in_quote))
    1506              :         {
    1507       634000 :             if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
    1508            0 :                 ereport(ERROR,
    1509              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1510              :                          !is_csv ?
    1511              :                          errmsg("literal newline found in data") :
    1512              :                          errmsg("unquoted newline found in data"),
    1513              :                          !is_csv ?
    1514              :                          errhint("Use \"\\n\" to represent newline.") :
    1515              :                          errhint("Use quoted CSV field to represent newline.")));
    1516       634000 :             cstate->eol_type = EOL_NL;   /* in case not set yet */
    1517              :             /* If reach here, we have found the line terminator */
    1518       634000 :             break;
    1519              :         }
    1520              : 
    1521              :         /*
    1522              :          * Process backslash, except in CSV mode where backslash is a normal
    1523              :          * character.
    1524              :          */
    1525     12772658 :         if (c == '\\' && !is_csv)
    1526              :         {
    1527              :             char        c2;
    1528              : 
    1529         4051 :             IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1530         4051 :             IF_NEED_REFILL_AND_EOF_BREAK(0);
    1531              : 
    1532              :             /* -----
    1533              :              * get next character
    1534              :              * Note: we do not change c so if it isn't \., we can fall
    1535              :              * through and continue processing.
    1536              :              * -----
    1537              :              */
    1538         4051 :             c2 = copy_input_buf[input_buf_ptr];
    1539              : 
    1540         4051 :             if (c2 == '.')
    1541              :             {
    1542           51 :                 input_buf_ptr++;    /* consume the '.' */
    1543           51 :                 if (cstate->eol_type == EOL_CRNL)
    1544              :                 {
    1545              :                     /* Get the next character */
    1546            0 :                     IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1547              :                     /* if hit_eof, c2 will become '\0' */
    1548            0 :                     c2 = copy_input_buf[input_buf_ptr++];
    1549              : 
    1550            0 :                     if (c2 == '\n')
    1551            0 :                         ereport(ERROR,
    1552              :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1553              :                                  errmsg("end-of-copy marker does not match previous newline style")));
    1554            0 :                     else if (c2 != '\r')
    1555            0 :                         ereport(ERROR,
    1556              :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1557              :                                  errmsg("end-of-copy marker is not alone on its line")));
    1558              :                 }
    1559              : 
    1560              :                 /* Get the next character */
    1561           51 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1562              :                 /* if hit_eof, c2 will become '\0' */
    1563           51 :                 c2 = copy_input_buf[input_buf_ptr++];
    1564              : 
    1565           51 :                 if (c2 != '\r' && c2 != '\n')
    1566            3 :                     ereport(ERROR,
    1567              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1568              :                              errmsg("end-of-copy marker is not alone on its line")));
    1569              : 
    1570           48 :                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
    1571           48 :                     (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
    1572           48 :                     (cstate->eol_type == EOL_CR && c2 != '\r'))
    1573            0 :                     ereport(ERROR,
    1574              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1575              :                              errmsg("end-of-copy marker does not match previous newline style")));
    1576              : 
    1577              :                 /*
    1578              :                  * If there is any data on this line before the \., complain.
    1579              :                  */
    1580           48 :                 if (cstate->line_buf.len > 0 ||
    1581           48 :                     prev_raw_ptr > cstate->input_buf_index)
    1582            3 :                     ereport(ERROR,
    1583              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1584              :                              errmsg("end-of-copy marker is not alone on its line")));
    1585              : 
    1586              :                 /*
    1587              :                  * Discard the \. and newline, then report EOF.
    1588              :                  */
    1589           45 :                 cstate->input_buf_index = input_buf_ptr;
    1590           45 :                 result = true;  /* report EOF */
    1591           45 :                 break;
    1592              :             }
    1593              :             else
    1594              :             {
    1595              :                 /*
    1596              :                  * If we are here, it means we found a backslash followed by
    1597              :                  * something other than a period.  In non-CSV mode, anything
    1598              :                  * after a backslash is special, so we skip over that second
    1599              :                  * character too.  If we didn't do that \\. would be
    1600              :                  * considered an eof-of copy, while in non-CSV mode it is a
    1601              :                  * literal backslash followed by a period.
    1602              :                  */
    1603         4000 :                 input_buf_ptr++;
    1604              :             }
    1605              :         }
    1606              :     }                           /* end of outer loop */
    1607              : 
    1608              :     /*
    1609              :      * Transfer any still-uncopied data to line_buf.
    1610              :      */
    1611       634836 :     REFILL_LINEBUF;
    1612              : 
    1613       634836 :     return result;
    1614              : }
    1615              : 
    1616              : /*
    1617              :  *  Return decimal value for a hexadecimal digit
    1618              :  */
    1619              : static int
    1620            0 : GetDecimalFromHex(char hex)
    1621              : {
    1622            0 :     if (isdigit((unsigned char) hex))
    1623            0 :         return hex - '0';
    1624              :     else
    1625            0 :         return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
    1626              : }
    1627              : 
    1628              : /*
    1629              :  * Parse the current line into separate attributes (fields),
    1630              :  * performing de-escaping as needed.
    1631              :  *
    1632              :  * The input is in line_buf.  We use attribute_buf to hold the result
    1633              :  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
    1634              :  * string, or NULL when the input matches the null marker string.
    1635              :  * This array is expanded as necessary.
    1636              :  *
    1637              :  * (Note that the caller cannot check for nulls since the returned
    1638              :  * string would be the post-de-escaping equivalent, which may look
    1639              :  * the same as some valid data string.)
    1640              :  *
    1641              :  * delim is the column delimiter string (must be just one byte for now).
    1642              :  * null_print is the null marker string.  Note that this is compared to
    1643              :  * the pre-de-escaped input string.
    1644              :  *
    1645              :  * The return value is the number of fields actually read.
    1646              :  */
    1647              : static int
    1648       633683 : CopyReadAttributesText(CopyFromState cstate)
    1649              : {
    1650       633683 :     char        delimc = cstate->opts.delim[0];
    1651              :     int         fieldno;
    1652              :     char       *output_ptr;
    1653              :     char       *cur_ptr;
    1654              :     char       *line_end_ptr;
    1655              : 
    1656              :     /*
    1657              :      * We need a special case for zero-column tables: check that the input
    1658              :      * line is empty, and return.
    1659              :      */
    1660       633683 :     if (cstate->max_fields <= 0)
    1661              :     {
    1662            4 :         if (cstate->line_buf.len != 0)
    1663            0 :             ereport(ERROR,
    1664              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1665              :                      errmsg("extra data after last expected column")));
    1666            4 :         return 0;
    1667              :     }
    1668              : 
    1669       633679 :     resetStringInfo(&cstate->attribute_buf);
    1670              : 
    1671              :     /*
    1672              :      * The de-escaped attributes will certainly not be longer than the input
    1673              :      * data line, so we can just force attribute_buf to be large enough and
    1674              :      * then transfer data without any checks for enough space.  We need to do
    1675              :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1676              :      * pointers already stored into cstate->raw_fields[].
    1677              :      */
    1678       633679 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1679            4 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1680       633679 :     output_ptr = cstate->attribute_buf.data;
    1681              : 
    1682              :     /* set pointer variables for loop */
    1683       633679 :     cur_ptr = cstate->line_buf.data;
    1684       633679 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1685              : 
    1686              :     /* Outer loop iterates over fields */
    1687       633679 :     fieldno = 0;
    1688              :     for (;;)
    1689      1632506 :     {
    1690      2266185 :         bool        found_delim = false;
    1691              :         char       *start_ptr;
    1692              :         char       *end_ptr;
    1693              :         int         input_len;
    1694      2266185 :         bool        saw_non_ascii = false;
    1695              : 
    1696              :         /* Make sure there is enough space for the next value */
    1697      2266185 :         if (fieldno >= cstate->max_fields)
    1698              :         {
    1699           21 :             cstate->max_fields *= 2;
    1700           21 :             cstate->raw_fields =
    1701           21 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1702              :         }
    1703              : 
    1704              :         /* Remember start of field on both input and output sides */
    1705      2266185 :         start_ptr = cur_ptr;
    1706      2266185 :         cstate->raw_fields[fieldno] = output_ptr;
    1707              : 
    1708              :         /*
    1709              :          * Scan data for field.
    1710              :          *
    1711              :          * Note that in this loop, we are scanning to locate the end of field
    1712              :          * and also speculatively performing de-escaping.  Once we find the
    1713              :          * end-of-field, we can match the raw field contents against the null
    1714              :          * marker string.  Only after that comparison fails do we know that
    1715              :          * de-escaping is actually the right thing to do; therefore we *must
    1716              :          * not* throw any syntax errors before we've done the null-marker
    1717              :          * check.
    1718              :          */
    1719              :         for (;;)
    1720     11136736 :         {
    1721              :             char        c;
    1722              : 
    1723     13402921 :             end_ptr = cur_ptr;
    1724     13402921 :             if (cur_ptr >= line_end_ptr)
    1725       633676 :                 break;
    1726     12769245 :             c = *cur_ptr++;
    1727     12769245 :             if (c == delimc)
    1728              :             {
    1729      1632509 :                 found_delim = true;
    1730      1632509 :                 break;
    1731              :             }
    1732     11136736 :             if (c == '\\')
    1733              :             {
    1734         4000 :                 if (cur_ptr >= line_end_ptr)
    1735            0 :                     break;
    1736         4000 :                 c = *cur_ptr++;
    1737         4000 :                 switch (c)
    1738              :                 {
    1739            6 :                     case '0':
    1740              :                     case '1':
    1741              :                     case '2':
    1742              :                     case '3':
    1743              :                     case '4':
    1744              :                     case '5':
    1745              :                     case '6':
    1746              :                     case '7':
    1747              :                         {
    1748              :                             /* handle \013 */
    1749              :                             int         val;
    1750              : 
    1751            6 :                             val = OCTVALUE(c);
    1752            6 :                             if (cur_ptr < line_end_ptr)
    1753              :                             {
    1754            3 :                                 c = *cur_ptr;
    1755            3 :                                 if (ISOCTAL(c))
    1756              :                                 {
    1757            0 :                                     cur_ptr++;
    1758            0 :                                     val = (val << 3) + OCTVALUE(c);
    1759            0 :                                     if (cur_ptr < line_end_ptr)
    1760              :                                     {
    1761            0 :                                         c = *cur_ptr;
    1762            0 :                                         if (ISOCTAL(c))
    1763              :                                         {
    1764            0 :                                             cur_ptr++;
    1765            0 :                                             val = (val << 3) + OCTVALUE(c);
    1766              :                                         }
    1767              :                                     }
    1768              :                                 }
    1769              :                             }
    1770            6 :                             c = val & 0377;
    1771            6 :                             if (c == '\0' || IS_HIGHBIT_SET(c))
    1772            6 :                                 saw_non_ascii = true;
    1773              :                         }
    1774            6 :                         break;
    1775            6 :                     case 'x':
    1776              :                         /* Handle \x3F */
    1777            6 :                         if (cur_ptr < line_end_ptr)
    1778              :                         {
    1779            3 :                             char        hexchar = *cur_ptr;
    1780              : 
    1781            3 :                             if (isxdigit((unsigned char) hexchar))
    1782              :                             {
    1783            0 :                                 int         val = GetDecimalFromHex(hexchar);
    1784              : 
    1785            0 :                                 cur_ptr++;
    1786            0 :                                 if (cur_ptr < line_end_ptr)
    1787              :                                 {
    1788            0 :                                     hexchar = *cur_ptr;
    1789            0 :                                     if (isxdigit((unsigned char) hexchar))
    1790              :                                     {
    1791            0 :                                         cur_ptr++;
    1792            0 :                                         val = (val << 4) + GetDecimalFromHex(hexchar);
    1793              :                                     }
    1794              :                                 }
    1795            0 :                                 c = val & 0xff;
    1796            0 :                                 if (c == '\0' || IS_HIGHBIT_SET(c))
    1797            0 :                                     saw_non_ascii = true;
    1798              :                             }
    1799              :                         }
    1800            6 :                         break;
    1801            0 :                     case 'b':
    1802            0 :                         c = '\b';
    1803            0 :                         break;
    1804            0 :                     case 'f':
    1805            0 :                         c = '\f';
    1806            0 :                         break;
    1807         1525 :                     case 'n':
    1808         1525 :                         c = '\n';
    1809         1525 :                         break;
    1810            0 :                     case 'r':
    1811            0 :                         c = '\r';
    1812            0 :                         break;
    1813            0 :                     case 't':
    1814            0 :                         c = '\t';
    1815            0 :                         break;
    1816            0 :                     case 'v':
    1817            0 :                         c = '\v';
    1818            0 :                         break;
    1819              : 
    1820              :                         /*
    1821              :                          * in all other cases, take the char after '\'
    1822              :                          * literally
    1823              :                          */
    1824              :                 }
    1825              :             }
    1826              : 
    1827              :             /* Add c to output string */
    1828     11136736 :             *output_ptr++ = c;
    1829              :         }
    1830              : 
    1831              :         /* Check whether raw input matched null marker */
    1832      2266185 :         input_len = end_ptr - start_ptr;
    1833      2266185 :         if (input_len == cstate->opts.null_print_len &&
    1834       125656 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1835         2407 :             cstate->raw_fields[fieldno] = NULL;
    1836              :         /* Check whether raw input matched default marker */
    1837      2263778 :         else if (fieldno < list_length(cstate->attnumlist) &&
    1838      2263754 :                  cstate->opts.default_print &&
    1839           57 :                  input_len == cstate->opts.default_print_len &&
    1840           15 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1841           12 :         {
    1842              :             /* fieldno is 0-indexed and attnum is 1-indexed */
    1843           15 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1844              : 
    1845           15 :             if (cstate->defexprs[m] != NULL)
    1846              :             {
    1847              :                 /* defaults contain entries for all physical attributes */
    1848           12 :                 cstate->defaults[m] = true;
    1849              :             }
    1850              :             else
    1851              :             {
    1852            3 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    1853            3 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1854              : 
    1855            3 :                 ereport(ERROR,
    1856              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1857              :                          errmsg("unexpected default marker in COPY data"),
    1858              :                          errdetail("Column \"%s\" has no default value.",
    1859              :                                    NameStr(att->attname))));
    1860              :             }
    1861              :         }
    1862              :         else
    1863              :         {
    1864              :             /*
    1865              :              * At this point we know the field is supposed to contain data.
    1866              :              *
    1867              :              * If we de-escaped any non-7-bit-ASCII chars, make sure the
    1868              :              * resulting string is valid data for the db encoding.
    1869              :              */
    1870      2263763 :             if (saw_non_ascii)
    1871              :             {
    1872            0 :                 char       *fld = cstate->raw_fields[fieldno];
    1873              : 
    1874            0 :                 pg_verifymbstr(fld, output_ptr - fld, false);
    1875              :             }
    1876              :         }
    1877              : 
    1878              :         /* Terminate attribute value in output area */
    1879      2266182 :         *output_ptr++ = '\0';
    1880              : 
    1881      2266182 :         fieldno++;
    1882              :         /* Done if we hit EOL instead of a delim */
    1883      2266182 :         if (!found_delim)
    1884       633676 :             break;
    1885              :     }
    1886              : 
    1887              :     /* Clean up state of attribute_buf */
    1888       633676 :     output_ptr--;
    1889              :     Assert(*output_ptr == '\0');
    1890       633676 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    1891              : 
    1892       633676 :     return fieldno;
    1893              : }
    1894              : 
    1895              : /*
    1896              :  * Parse the current line into separate attributes (fields),
    1897              :  * performing de-escaping as needed.  This has exactly the same API as
    1898              :  * CopyReadAttributesText, except we parse the fields according to
    1899              :  * "standard" (i.e. common) CSV usage.
    1900              :  */
    1901              : static int
    1902          257 : CopyReadAttributesCSV(CopyFromState cstate)
    1903              : {
    1904          257 :     char        delimc = cstate->opts.delim[0];
    1905          257 :     char        quotec = cstate->opts.quote[0];
    1906          257 :     char        escapec = cstate->opts.escape[0];
    1907              :     int         fieldno;
    1908              :     char       *output_ptr;
    1909              :     char       *cur_ptr;
    1910              :     char       *line_end_ptr;
    1911              : 
    1912              :     /*
    1913              :      * We need a special case for zero-column tables: check that the input
    1914              :      * line is empty, and return.
    1915              :      */
    1916          257 :     if (cstate->max_fields <= 0)
    1917              :     {
    1918            0 :         if (cstate->line_buf.len != 0)
    1919            0 :             ereport(ERROR,
    1920              :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1921              :                      errmsg("extra data after last expected column")));
    1922            0 :         return 0;
    1923              :     }
    1924              : 
    1925          257 :     resetStringInfo(&cstate->attribute_buf);
    1926              : 
    1927              :     /*
    1928              :      * The de-escaped attributes will certainly not be longer than the input
    1929              :      * data line, so we can just force attribute_buf to be large enough and
    1930              :      * then transfer data without any checks for enough space.  We need to do
    1931              :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1932              :      * pointers already stored into cstate->raw_fields[].
    1933              :      */
    1934          257 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1935            0 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1936          257 :     output_ptr = cstate->attribute_buf.data;
    1937              : 
    1938              :     /* set pointer variables for loop */
    1939          257 :     cur_ptr = cstate->line_buf.data;
    1940          257 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1941              : 
    1942              :     /* Outer loop iterates over fields */
    1943          257 :     fieldno = 0;
    1944              :     for (;;)
    1945          267 :     {
    1946          524 :         bool        found_delim = false;
    1947          524 :         bool        saw_quote = false;
    1948              :         char       *start_ptr;
    1949              :         char       *end_ptr;
    1950              :         int         input_len;
    1951              : 
    1952              :         /* Make sure there is enough space for the next value */
    1953          524 :         if (fieldno >= cstate->max_fields)
    1954              :         {
    1955            0 :             cstate->max_fields *= 2;
    1956            0 :             cstate->raw_fields =
    1957            0 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1958              :         }
    1959              : 
    1960              :         /* Remember start of field on both input and output sides */
    1961          524 :         start_ptr = cur_ptr;
    1962          524 :         cstate->raw_fields[fieldno] = output_ptr;
    1963              : 
    1964              :         /*
    1965              :          * Scan data for field,
    1966              :          *
    1967              :          * The loop starts in "not quote" mode and then toggles between that
    1968              :          * and "in quote" mode. The loop exits normally if it is in "not
    1969              :          * quote" mode and a delimiter or line end is seen.
    1970              :          */
    1971              :         for (;;)
    1972          114 :         {
    1973              :             char        c;
    1974              : 
    1975              :             /* Not in quote */
    1976              :             for (;;)
    1977              :             {
    1978         1666 :                 end_ptr = cur_ptr;
    1979         1666 :                 if (cur_ptr >= line_end_ptr)
    1980          254 :                     goto endfield;
    1981         1412 :                 c = *cur_ptr++;
    1982              :                 /* unquoted field delimiter */
    1983         1412 :                 if (c == delimc)
    1984              :                 {
    1985          270 :                     found_delim = true;
    1986          270 :                     goto endfield;
    1987              :                 }
    1988              :                 /* start of quoted field (or part of field) */
    1989         1142 :                 if (c == quotec)
    1990              :                 {
    1991          114 :                     saw_quote = true;
    1992          114 :                     break;
    1993              :                 }
    1994              :                 /* Add c to output string */
    1995         1028 :                 *output_ptr++ = c;
    1996              :             }
    1997              : 
    1998              :             /* In quote */
    1999              :             for (;;)
    2000              :             {
    2001          710 :                 end_ptr = cur_ptr;
    2002          710 :                 if (cur_ptr >= line_end_ptr)
    2003            0 :                     ereport(ERROR,
    2004              :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2005              :                              errmsg("unterminated CSV quoted field")));
    2006              : 
    2007          710 :                 c = *cur_ptr++;
    2008              : 
    2009              :                 /* escape within a quoted field */
    2010          710 :                 if (c == escapec)
    2011              :                 {
    2012              :                     /*
    2013              :                      * peek at the next char if available, and escape it if it
    2014              :                      * is an escape char or a quote char
    2015              :                      */
    2016           62 :                     if (cur_ptr < line_end_ptr)
    2017              :                     {
    2018           36 :                         char        nextc = *cur_ptr;
    2019              : 
    2020           36 :                         if (nextc == escapec || nextc == quotec)
    2021              :                         {
    2022           12 :                             *output_ptr++ = nextc;
    2023           12 :                             cur_ptr++;
    2024           12 :                             continue;
    2025              :                         }
    2026              :                     }
    2027              :                 }
    2028              : 
    2029              :                 /*
    2030              :                  * end of quoted field. Must do this test after testing for
    2031              :                  * escape in case quote char and escape char are the same
    2032              :                  * (which is the common case).
    2033              :                  */
    2034          698 :                 if (c == quotec)
    2035          114 :                     break;
    2036              : 
    2037              :                 /* Add c to output string */
    2038          584 :                 *output_ptr++ = c;
    2039              :             }
    2040              :         }
    2041          524 : endfield:
    2042              : 
    2043              :         /* Terminate attribute value in output area */
    2044          524 :         *output_ptr++ = '\0';
    2045              : 
    2046              :         /* Check whether raw input matched null marker */
    2047          524 :         input_len = end_ptr - start_ptr;
    2048          524 :         if (!saw_quote && input_len == cstate->opts.null_print_len &&
    2049           22 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    2050           22 :             cstate->raw_fields[fieldno] = NULL;
    2051              :         /* Check whether raw input matched default marker */
    2052          502 :         else if (fieldno < list_length(cstate->attnumlist) &&
    2053          502 :                  cstate->opts.default_print &&
    2054           75 :                  input_len == cstate->opts.default_print_len &&
    2055           21 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    2056              :         {
    2057              :             /* fieldno is 0-index and attnum is 1-index */
    2058           21 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    2059              : 
    2060           21 :             if (cstate->defexprs[m] != NULL)
    2061              :             {
    2062              :                 /* defaults contain entries for all physical attributes */
    2063           18 :                 cstate->defaults[m] = true;
    2064              :             }
    2065              :             else
    2066              :             {
    2067            3 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    2068            3 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    2069              : 
    2070            3 :                 ereport(ERROR,
    2071              :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2072              :                          errmsg("unexpected default marker in COPY data"),
    2073              :                          errdetail("Column \"%s\" has no default value.",
    2074              :                                    NameStr(att->attname))));
    2075              :             }
    2076              :         }
    2077              : 
    2078          521 :         fieldno++;
    2079              :         /* Done if we hit EOL instead of a delim */
    2080          521 :         if (!found_delim)
    2081          254 :             break;
    2082              :     }
    2083              : 
    2084              :     /* Clean up state of attribute_buf */
    2085          254 :     output_ptr--;
    2086              :     Assert(*output_ptr == '\0');
    2087          254 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    2088              : 
    2089          254 :     return fieldno;
    2090              : }
    2091              : 
    2092              : 
    2093              : /*
    2094              :  * Read a binary attribute
    2095              :  */
    2096              : static Datum
    2097           79 : CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
    2098              :                         Oid typioparam, int32 typmod,
    2099              :                         bool *isnull)
    2100              : {
    2101              :     int32       fld_size;
    2102              :     Datum       result;
    2103              : 
    2104           79 :     if (!CopyGetInt32(cstate, &fld_size))
    2105            0 :         ereport(ERROR,
    2106              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2107              :                  errmsg("unexpected EOF in COPY data")));
    2108           79 :     if (fld_size == -1)
    2109              :     {
    2110           15 :         *isnull = true;
    2111           15 :         return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
    2112              :     }
    2113           64 :     if (fld_size < 0)
    2114            0 :         ereport(ERROR,
    2115              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2116              :                  errmsg("invalid field size")));
    2117              : 
    2118              :     /* reset attribute_buf to empty, and load raw data in it */
    2119           64 :     resetStringInfo(&cstate->attribute_buf);
    2120              : 
    2121           64 :     enlargeStringInfo(&cstate->attribute_buf, fld_size);
    2122           64 :     if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
    2123           64 :                            fld_size) != fld_size)
    2124            0 :         ereport(ERROR,
    2125              :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2126              :                  errmsg("unexpected EOF in COPY data")));
    2127              : 
    2128           64 :     cstate->attribute_buf.len = fld_size;
    2129           64 :     cstate->attribute_buf.data[fld_size] = '\0';
    2130              : 
    2131              :     /* Call the column type's binary input converter */
    2132           64 :     result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
    2133              :                                  typioparam, typmod);
    2134              : 
    2135              :     /* Trouble if it didn't eat the whole buffer */
    2136           64 :     if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
    2137            1 :         ereport(ERROR,
    2138              :                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
    2139              :                  errmsg("incorrect binary data format")));
    2140              : 
    2141           63 :     *isnull = false;
    2142           63 :     return result;
    2143              : }
        

Generated by: LCOV version 2.0-1