LCOV - code coverage report
Current view: top level - src/backend/commands - copyfromparse.c (source / functions) Hit Total Coverage
Test: PostgreSQL 19devel Lines: 536 652 82.2 %
Date: 2026-02-07 09:18:21 Functions: 21 23 91.3 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * copyfromparse.c
       4             :  *      Parse CSV/text/binary format for COPY FROM.
       5             :  *
       6             :  * This file contains routines to parse the text, CSV and binary input
       7             :  * formats.  The main entry point is NextCopyFrom(), which parses the
       8             :  * next input line and returns it as Datums.
       9             :  *
      10             :  * In text/CSV mode, the parsing happens in multiple stages:
      11             :  *
      12             :  * [data source] --> raw_buf --> input_buf --> line_buf --> attribute_buf
      13             :  *                1.          2.            3.           4.
      14             :  *
      15             :  * 1. CopyLoadRawBuf() reads raw data from the input file or client, and
      16             :  *    places it into 'raw_buf'.
      17             :  *
      18             :  * 2. CopyConvertBuf() calls the encoding conversion function to convert
      19             :  *    the data in 'raw_buf' from client to server encoding, placing the
      20             :  *    converted result in 'input_buf'.
      21             :  *
      22             :  * 3. CopyReadLine() parses the data in 'input_buf', one line at a time.
      23             :  *    It is responsible for finding the next newline marker, taking quote and
      24             :  *    escape characters into account according to the COPY options.  The line
      25             :  *    is copied into 'line_buf', with quotes and escape characters still
      26             :  *    intact.
      27             :  *
      28             :  * 4. CopyReadAttributesText/CSV() function takes the input line from
      29             :  *    'line_buf', and splits it into fields, unescaping the data as required.
      30             :  *    The fields are stored in 'attribute_buf', and 'raw_fields' array holds
      31             :  *    pointers to each field.
      32             :  *
      33             :  * If encoding conversion is not required, a shortcut is taken in step 2 to
      34             :  * avoid copying the data unnecessarily.  The 'input_buf' pointer is set to
      35             :  * point directly to 'raw_buf', so that CopyLoadRawBuf() loads the raw data
      36             :  * directly into 'input_buf'.  CopyConvertBuf() then merely validates that
      37             :  * the data is valid in the current encoding.
      38             :  *
      39             :  * In binary mode, the pipeline is much simpler.  Input is loaded into
      40             :  * 'raw_buf', and encoding conversion is done in the datatype-specific
      41             :  * receive functions, if required.  'input_buf' and 'line_buf' are not used,
      42             :  * but 'attribute_buf' is used as a temporary buffer to hold one attribute's
      43             :  * data when it's passed the receive function.
      44             :  *
      45             :  * 'raw_buf' is always 64 kB in size (RAW_BUF_SIZE).  'input_buf' is also
      46             :  * 64 kB (INPUT_BUF_SIZE), if encoding conversion is required.  'line_buf'
      47             :  * and 'attribute_buf' are expanded on demand, to hold the longest line
      48             :  * encountered so far.
      49             :  *
      50             :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      51             :  * Portions Copyright (c) 1994, Regents of the University of California
      52             :  *
      53             :  *
      54             :  * IDENTIFICATION
      55             :  *    src/backend/commands/copyfromparse.c
      56             :  *
      57             :  *-------------------------------------------------------------------------
      58             :  */
      59             : #include "postgres.h"
      60             : 
      61             : #include <ctype.h>
      62             : #include <unistd.h>
      63             : #include <sys/stat.h>
      64             : 
      65             : #include "commands/copyapi.h"
      66             : #include "commands/copyfrom_internal.h"
      67             : #include "commands/progress.h"
      68             : #include "executor/executor.h"
      69             : #include "libpq/libpq.h"
      70             : #include "libpq/pqformat.h"
      71             : #include "mb/pg_wchar.h"
      72             : #include "miscadmin.h"
      73             : #include "pgstat.h"
      74             : #include "port/pg_bswap.h"
      75             : #include "utils/builtins.h"
      76             : #include "utils/rel.h"
      77             : 
      78             : #define ISOCTAL(c) (((c) >= '0') && ((c) <= '7'))
      79             : #define OCTVALUE(c) ((c) - '0')
      80             : 
      81             : /*
      82             :  * These macros centralize code used to process line_buf and input_buf buffers.
      83             :  * They are macros because they often do continue/break control and to avoid
      84             :  * function call overhead in tight COPY loops.
      85             :  *
      86             :  * We must use "if (1)" because the usual "do {...} while(0)" wrapper would
      87             :  * prevent the continue/break processing from working.  We end the "if (1)"
      88             :  * with "else ((void) 0)" to ensure the "if" does not unintentionally match
      89             :  * any "else" in the calling code, and to avoid any compiler warnings about
      90             :  * empty statements.  See http://www.cit.gu.edu.au/~anthony/info/C/C.macros.
      91             :  */
      92             : 
      93             : /*
      94             :  * This keeps the character read at the top of the loop in the buffer
      95             :  * even if there is more than one read-ahead.
      96             :  */
      97             : #define IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(extralen) \
      98             : if (1) \
      99             : { \
     100             :     if (input_buf_ptr + (extralen) >= copy_buf_len && !hit_eof) \
     101             :     { \
     102             :         input_buf_ptr = prev_raw_ptr; /* undo fetch */ \
     103             :         need_data = true; \
     104             :         continue; \
     105             :     } \
     106             : } else ((void) 0)
     107             : 
     108             : /* This consumes the remainder of the buffer and breaks */
     109             : #define IF_NEED_REFILL_AND_EOF_BREAK(extralen) \
     110             : if (1) \
     111             : { \
     112             :     if (input_buf_ptr + (extralen) >= copy_buf_len && hit_eof) \
     113             :     { \
     114             :         if (extralen) \
     115             :             input_buf_ptr = copy_buf_len; /* consume the partial character */ \
     116             :         /* backslash just before EOF, treat as data char */ \
     117             :         result = true; \
     118             :         break; \
     119             :     } \
     120             : } else ((void) 0)
     121             : 
     122             : /*
     123             :  * Transfer any approved data to line_buf; must do this to be sure
     124             :  * there is some room in input_buf.
     125             :  */
     126             : #define REFILL_LINEBUF \
     127             : if (1) \
     128             : { \
     129             :     if (input_buf_ptr > cstate->input_buf_index) \
     130             :     { \
     131             :         appendBinaryStringInfo(&cstate->line_buf, \
     132             :                              cstate->input_buf + cstate->input_buf_index, \
     133             :                                input_buf_ptr - cstate->input_buf_index); \
     134             :         cstate->input_buf_index = input_buf_ptr; \
     135             :     } \
     136             : } else ((void) 0)
     137             : 
     138             : /* NOTE: there's a copy of this in copyto.c */
     139             : static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
     140             : 
     141             : 
     142             : /* non-export function prototypes */
     143             : static bool CopyReadLine(CopyFromState cstate, bool is_csv);
     144             : static bool CopyReadLineText(CopyFromState cstate, bool is_csv);
     145             : static int  CopyReadAttributesText(CopyFromState cstate);
     146             : static int  CopyReadAttributesCSV(CopyFromState cstate);
     147             : static Datum CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
     148             :                                      Oid typioparam, int32 typmod,
     149             :                                      bool *isnull);
     150             : static pg_attribute_always_inline bool CopyFromTextLikeOneRow(CopyFromState cstate,
     151             :                                                               ExprContext *econtext,
     152             :                                                               Datum *values,
     153             :                                                               bool *nulls,
     154             :                                                               bool is_csv);
     155             : static pg_attribute_always_inline bool NextCopyFromRawFieldsInternal(CopyFromState cstate,
     156             :                                                                      char ***fields,
     157             :                                                                      int *nfields,
     158             :                                                                      bool is_csv);
     159             : 
     160             : 
     161             : /* Low-level communications functions */
     162             : static int  CopyGetData(CopyFromState cstate, void *databuf,
     163             :                         int minread, int maxread);
     164             : static inline bool CopyGetInt32(CopyFromState cstate, int32 *val);
     165             : static inline bool CopyGetInt16(CopyFromState cstate, int16 *val);
     166             : static void CopyLoadInputBuf(CopyFromState cstate);
     167             : static int  CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes);
     168             : 
     169             : void
     170        1104 : ReceiveCopyBegin(CopyFromState cstate)
     171             : {
     172             :     StringInfoData buf;
     173        1104 :     int         natts = list_length(cstate->attnumlist);
     174        1104 :     int16       format = (cstate->opts.binary ? 1 : 0);
     175             :     int         i;
     176             : 
     177        1104 :     pq_beginmessage(&buf, PqMsg_CopyInResponse);
     178        1104 :     pq_sendbyte(&buf, format);  /* overall format */
     179        1104 :     pq_sendint16(&buf, natts);
     180        3980 :     for (i = 0; i < natts; i++)
     181        2876 :         pq_sendint16(&buf, format); /* per-column formats */
     182        1104 :     pq_endmessage(&buf);
     183        1104 :     cstate->copy_src = COPY_FRONTEND;
     184        1104 :     cstate->fe_msgbuf = makeStringInfo();
     185             :     /* We *must* flush here to ensure FE knows it can send. */
     186        1104 :     pq_flush();
     187        1104 : }
     188             : 
     189             : void
     190          14 : ReceiveCopyBinaryHeader(CopyFromState cstate)
     191             : {
     192             :     char        readSig[11];
     193             :     int32       tmp;
     194             : 
     195             :     /* Signature */
     196          14 :     if (CopyReadBinaryData(cstate, readSig, 11) != 11 ||
     197          14 :         memcmp(readSig, BinarySignature, 11) != 0)
     198           0 :         ereport(ERROR,
     199             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     200             :                  errmsg("COPY file signature not recognized")));
     201             :     /* Flags field */
     202          14 :     if (!CopyGetInt32(cstate, &tmp))
     203           0 :         ereport(ERROR,
     204             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     205             :                  errmsg("invalid COPY file header (missing flags)")));
     206          14 :     if ((tmp & (1 << 16)) != 0)
     207           0 :         ereport(ERROR,
     208             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     209             :                  errmsg("invalid COPY file header (WITH OIDS)")));
     210          14 :     tmp &= ~(1 << 16);
     211          14 :     if ((tmp >> 16) != 0)
     212           0 :         ereport(ERROR,
     213             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     214             :                  errmsg("unrecognized critical flags in COPY file header")));
     215             :     /* Header extension length */
     216          14 :     if (!CopyGetInt32(cstate, &tmp) ||
     217          14 :         tmp < 0)
     218           0 :         ereport(ERROR,
     219             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     220             :                  errmsg("invalid COPY file header (missing length)")));
     221             :     /* Skip extension header, if present */
     222          14 :     while (tmp-- > 0)
     223             :     {
     224           0 :         if (CopyReadBinaryData(cstate, readSig, 1) != 1)
     225           0 :             ereport(ERROR,
     226             :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     227             :                      errmsg("invalid COPY file header (wrong length)")));
     228             :     }
     229          14 : }
     230             : 
     231             : /*
     232             :  * CopyGetData reads data from the source (file or frontend)
     233             :  *
     234             :  * We attempt to read at least minread, and at most maxread, bytes from
     235             :  * the source.  The actual number of bytes read is returned; if this is
     236             :  * less than minread, EOF was detected.
     237             :  *
     238             :  * Note: when copying from the frontend, we expect a proper EOF mark per
     239             :  * protocol; if the frontend simply drops the connection, we raise error.
     240             :  * It seems unwise to allow the COPY IN to complete normally in that case.
     241             :  *
     242             :  * NB: no data conversion is applied here.
     243             :  */
     244             : static int
     245      431994 : CopyGetData(CopyFromState cstate, void *databuf, int minread, int maxread)
     246             : {
     247      431994 :     int         bytesread = 0;
     248             : 
     249      431994 :     switch (cstate->copy_src)
     250             :     {
     251        1100 :         case COPY_FILE:
     252        1100 :             pgstat_report_wait_start(WAIT_EVENT_COPY_FROM_READ);
     253        1100 :             bytesread = fread(databuf, 1, maxread, cstate->copy_file);
     254        1100 :             pgstat_report_wait_end();
     255        1100 :             if (ferror(cstate->copy_file))
     256           0 :                 ereport(ERROR,
     257             :                         (errcode_for_file_access(),
     258             :                          errmsg("could not read from COPY file: %m")));
     259        1100 :             if (bytesread == 0)
     260         432 :                 cstate->raw_reached_eof = true;
     261        1100 :             break;
     262      402894 :         case COPY_FRONTEND:
     263      804146 :             while (maxread > 0 && bytesread < minread && !cstate->raw_reached_eof)
     264             :             {
     265             :                 int         avail;
     266             : 
     267      803402 :                 while (cstate->fe_msgbuf->cursor >= cstate->fe_msgbuf->len)
     268             :                 {
     269             :                     /* Try to receive another message */
     270             :                     int         mtype;
     271             :                     int         maxmsglen;
     272             : 
     273      402150 :             readmessage:
     274      402150 :                     HOLD_CANCEL_INTERRUPTS();
     275      402150 :                     pq_startmsgread();
     276      402150 :                     mtype = pq_getbyte();
     277      402150 :                     if (mtype == EOF)
     278           0 :                         ereport(ERROR,
     279             :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     280             :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     281             :                     /* Validate message type and set packet size limit */
     282             :                     switch (mtype)
     283             :                     {
     284      401252 :                         case PqMsg_CopyData:
     285      401252 :                             maxmsglen = PQ_LARGE_MESSAGE_LIMIT;
     286      401252 :                             break;
     287         894 :                         case PqMsg_CopyDone:
     288             :                         case PqMsg_CopyFail:
     289             :                         case PqMsg_Flush:
     290             :                         case PqMsg_Sync:
     291         894 :                             maxmsglen = PQ_SMALL_MESSAGE_LIMIT;
     292         894 :                             break;
     293           4 :                         default:
     294           4 :                             ereport(ERROR,
     295             :                                     (errcode(ERRCODE_PROTOCOL_VIOLATION),
     296             :                                      errmsg("unexpected message type 0x%02X during COPY from stdin",
     297             :                                             mtype)));
     298             :                             maxmsglen = 0;  /* keep compiler quiet */
     299             :                             break;
     300             :                     }
     301             :                     /* Now collect the message body */
     302      402146 :                     if (pq_getmessage(cstate->fe_msgbuf, maxmsglen))
     303           0 :                         ereport(ERROR,
     304             :                                 (errcode(ERRCODE_CONNECTION_FAILURE),
     305             :                                  errmsg("unexpected EOF on client connection with an open transaction")));
     306      402146 :                     RESUME_CANCEL_INTERRUPTS();
     307             :                     /* ... and process it */
     308             :                     switch (mtype)
     309             :                     {
     310      401252 :                         case PqMsg_CopyData:
     311      401252 :                             break;
     312         894 :                         case PqMsg_CopyDone:
     313             :                             /* COPY IN correctly terminated by frontend */
     314         894 :                             cstate->raw_reached_eof = true;
     315         894 :                             return bytesread;
     316           0 :                         case PqMsg_CopyFail:
     317           0 :                             ereport(ERROR,
     318             :                                     (errcode(ERRCODE_QUERY_CANCELED),
     319             :                                      errmsg("COPY from stdin failed: %s",
     320             :                                             pq_getmsgstring(cstate->fe_msgbuf))));
     321             :                             break;
     322           0 :                         case PqMsg_Flush:
     323             :                         case PqMsg_Sync:
     324             : 
     325             :                             /*
     326             :                              * Ignore Flush/Sync for the convenience of client
     327             :                              * libraries (such as libpq) that may send those
     328             :                              * without noticing that the command they just
     329             :                              * sent was COPY.
     330             :                              */
     331           0 :                             goto readmessage;
     332      401252 :                         default:
     333             :                             Assert(false);  /* NOT REACHED */
     334             :                     }
     335             :                 }
     336      401252 :                 avail = cstate->fe_msgbuf->len - cstate->fe_msgbuf->cursor;
     337      401252 :                 if (avail > maxread)
     338           0 :                     avail = maxread;
     339      401252 :                 pq_copymsgbytes(cstate->fe_msgbuf, databuf, avail);
     340      401252 :                 databuf = (char *) databuf + avail;
     341      401252 :                 maxread -= avail;
     342      401252 :                 bytesread += avail;
     343             :             }
     344      401996 :             break;
     345       28000 :         case COPY_CALLBACK:
     346       28000 :             bytesread = cstate->data_source_cb(databuf, minread, maxread);
     347       28000 :             break;
     348             :     }
     349             : 
     350      431096 :     return bytesread;
     351             : }
     352             : 
     353             : 
     354             : /*
     355             :  * These functions do apply some data conversion
     356             :  */
     357             : 
     358             : /*
     359             :  * CopyGetInt32 reads an int32 that appears in network byte order
     360             :  *
     361             :  * Returns true if OK, false if EOF
     362             :  */
     363             : static inline bool
     364         186 : CopyGetInt32(CopyFromState cstate, int32 *val)
     365             : {
     366             :     uint32      buf;
     367             : 
     368         186 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     369             :     {
     370           0 :         *val = 0;               /* suppress compiler warning */
     371           0 :         return false;
     372             :     }
     373         186 :     *val = (int32) pg_ntoh32(buf);
     374         186 :     return true;
     375             : }
     376             : 
     377             : /*
     378             :  * CopyGetInt16 reads an int16 that appears in network byte order
     379             :  */
     380             : static inline bool
     381          42 : CopyGetInt16(CopyFromState cstate, int16 *val)
     382             : {
     383             :     uint16      buf;
     384             : 
     385          42 :     if (CopyReadBinaryData(cstate, (char *) &buf, sizeof(buf)) != sizeof(buf))
     386             :     {
     387           0 :         *val = 0;               /* suppress compiler warning */
     388           0 :         return false;
     389             :     }
     390          42 :     *val = (int16) pg_ntoh16(buf);
     391          42 :     return true;
     392             : }
     393             : 
     394             : 
     395             : /*
     396             :  * Perform encoding conversion on data in 'raw_buf', writing the converted
     397             :  * data into 'input_buf'.
     398             :  *
     399             :  * On entry, there must be some data to convert in 'raw_buf'.
     400             :  */
     401             : static void
     402      862112 : CopyConvertBuf(CopyFromState cstate)
     403             : {
     404             :     /*
     405             :      * If the file and server encoding are the same, no encoding conversion is
     406             :      * required.  However, we still need to verify that the input is valid for
     407             :      * the encoding.
     408             :      */
     409      862112 :     if (!cstate->need_transcoding)
     410             :     {
     411             :         /*
     412             :          * When conversion is not required, input_buf and raw_buf are the
     413             :          * same.  raw_buf_len is the total number of bytes in the buffer, and
     414             :          * input_buf_len tracks how many of those bytes have already been
     415             :          * verified.
     416             :          */
     417      862028 :         int         preverifiedlen = cstate->input_buf_len;
     418      862028 :         int         unverifiedlen = cstate->raw_buf_len - cstate->input_buf_len;
     419             :         int         nverified;
     420             : 
     421      862028 :         if (unverifiedlen == 0)
     422             :         {
     423             :             /*
     424             :              * If no more raw data is coming, report the EOF to the caller.
     425             :              */
     426      432544 :             if (cstate->raw_reached_eof)
     427        1528 :                 cstate->input_reached_eof = true;
     428      432544 :             return;
     429             :         }
     430             : 
     431             :         /*
     432             :          * Verify the new data, including any residual unverified bytes from
     433             :          * previous round.
     434             :          */
     435      429484 :         nverified = pg_encoding_verifymbstr(cstate->file_encoding,
     436      429484 :                                             cstate->raw_buf + preverifiedlen,
     437             :                                             unverifiedlen);
     438      429484 :         if (nverified == 0)
     439             :         {
     440             :             /*
     441             :              * Could not verify anything.
     442             :              *
     443             :              * If there is no more raw input data coming, it means that there
     444             :              * was an incomplete multi-byte sequence at the end.  Also, if
     445             :              * there's "enough" input left, we should be able to verify at
     446             :              * least one character, and a failure to do so means that we've
     447             :              * hit an invalid byte sequence.
     448             :              */
     449           0 :             if (cstate->raw_reached_eof || unverifiedlen >= pg_encoding_max_length(cstate->file_encoding))
     450           0 :                 cstate->input_reached_error = true;
     451           0 :             return;
     452             :         }
     453      429484 :         cstate->input_buf_len += nverified;
     454             :     }
     455             :     else
     456             :     {
     457             :         /*
     458             :          * Encoding conversion is needed.
     459             :          */
     460             :         int         nbytes;
     461             :         unsigned char *src;
     462             :         int         srclen;
     463             :         unsigned char *dst;
     464             :         int         dstlen;
     465             :         int         convertedlen;
     466             : 
     467          84 :         if (RAW_BUF_BYTES(cstate) == 0)
     468             :         {
     469             :             /*
     470             :              * If no more raw data is coming, report the EOF to the caller.
     471             :              */
     472          48 :             if (cstate->raw_reached_eof)
     473          12 :                 cstate->input_reached_eof = true;
     474          48 :             return;
     475             :         }
     476             : 
     477             :         /*
     478             :          * First, copy down any unprocessed data.
     479             :          */
     480          36 :         nbytes = INPUT_BUF_BYTES(cstate);
     481          36 :         if (nbytes > 0 && cstate->input_buf_index > 0)
     482           0 :             memmove(cstate->input_buf, cstate->input_buf + cstate->input_buf_index,
     483             :                     nbytes);
     484          36 :         cstate->input_buf_index = 0;
     485          36 :         cstate->input_buf_len = nbytes;
     486          36 :         cstate->input_buf[nbytes] = '\0';
     487             : 
     488          36 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     489          36 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     490          36 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     491          36 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     492             : 
     493             :         /*
     494             :          * Do the conversion.  This might stop short, if there is an invalid
     495             :          * byte sequence in the input.  We'll convert as much as we can in
     496             :          * that case.
     497             :          *
     498             :          * Note: Even if we hit an invalid byte sequence, we don't report the
     499             :          * error until all the valid bytes have been consumed.  The input
     500             :          * might contain an end-of-input marker (\.), and we don't want to
     501             :          * report an error if the invalid byte sequence is after the
     502             :          * end-of-input marker.  We might unnecessarily convert some data
     503             :          * after the end-of-input marker as long as it's valid for the
     504             :          * encoding, but that's harmless.
     505             :          */
     506          36 :         convertedlen = pg_do_encoding_conversion_buf(cstate->conversion_proc,
     507             :                                                      cstate->file_encoding,
     508             :                                                      GetDatabaseEncoding(),
     509             :                                                      src, srclen,
     510             :                                                      dst, dstlen,
     511             :                                                      true);
     512          36 :         if (convertedlen == 0)
     513             :         {
     514             :             /*
     515             :              * Could not convert anything.  If there is no more raw input data
     516             :              * coming, it means that there was an incomplete multi-byte
     517             :              * sequence at the end.  Also, if there is plenty of input left,
     518             :              * we should be able to convert at least one character, so a
     519             :              * failure to do so must mean that we've hit a byte sequence
     520             :              * that's invalid.
     521             :              */
     522          24 :             if (cstate->raw_reached_eof || srclen >= MAX_CONVERSION_INPUT_LENGTH)
     523          12 :                 cstate->input_reached_error = true;
     524          24 :             return;
     525             :         }
     526          12 :         cstate->raw_buf_index += convertedlen;
     527          12 :         cstate->input_buf_len += strlen((char *) dst);
     528             :     }
     529             : }
     530             : 
     531             : /*
     532             :  * Report an encoding or conversion error.
     533             :  */
     534             : static void
     535          12 : CopyConversionError(CopyFromState cstate)
     536             : {
     537             :     Assert(cstate->raw_buf_len > 0);
     538             :     Assert(cstate->input_reached_error);
     539             : 
     540          12 :     if (!cstate->need_transcoding)
     541             :     {
     542             :         /*
     543             :          * Everything up to input_buf_len was successfully verified, and
     544             :          * input_buf_len points to the invalid or incomplete character.
     545             :          */
     546           0 :         report_invalid_encoding(cstate->file_encoding,
     547           0 :                                 cstate->raw_buf + cstate->input_buf_len,
     548           0 :                                 cstate->raw_buf_len - cstate->input_buf_len);
     549             :     }
     550             :     else
     551             :     {
     552             :         /*
     553             :          * raw_buf_index points to the invalid or untranslatable character. We
     554             :          * let the conversion routine report the error, because it can provide
     555             :          * a more specific error message than we could here.  An earlier call
     556             :          * to the conversion routine in CopyConvertBuf() detected that there
     557             :          * is an error, now we call the conversion routine again with
     558             :          * noError=false, to have it throw the error.
     559             :          */
     560             :         unsigned char *src;
     561             :         int         srclen;
     562             :         unsigned char *dst;
     563             :         int         dstlen;
     564             : 
     565          12 :         src = (unsigned char *) cstate->raw_buf + cstate->raw_buf_index;
     566          12 :         srclen = cstate->raw_buf_len - cstate->raw_buf_index;
     567          12 :         dst = (unsigned char *) cstate->input_buf + cstate->input_buf_len;
     568          12 :         dstlen = INPUT_BUF_SIZE - cstate->input_buf_len + 1;
     569             : 
     570          12 :         (void) pg_do_encoding_conversion_buf(cstate->conversion_proc,
     571             :                                              cstate->file_encoding,
     572             :                                              GetDatabaseEncoding(),
     573             :                                              src, srclen,
     574             :                                              dst, dstlen,
     575             :                                              false);
     576             : 
     577             :         /*
     578             :          * The conversion routine should have reported an error, so this
     579             :          * should not be reached.
     580             :          */
     581           0 :         elog(ERROR, "encoding conversion failed without error");
     582             :     }
     583             : }
     584             : 
     585             : /*
     586             :  * Load more data from data source to raw_buf.
     587             :  *
     588             :  * If RAW_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the
     589             :  * beginning of the buffer, and we load new data after that.
     590             :  */
     591             : static void
     592      431100 : CopyLoadRawBuf(CopyFromState cstate)
     593             : {
     594             :     int         nbytes;
     595             :     int         inbytes;
     596             : 
     597             :     /*
     598             :      * In text mode, if encoding conversion is not required, raw_buf and
     599             :      * input_buf point to the same buffer.  Their len/index better agree, too.
     600             :      */
     601      431100 :     if (cstate->raw_buf == cstate->input_buf)
     602             :     {
     603             :         Assert(!cstate->need_transcoding);
     604             :         Assert(cstate->raw_buf_index == cstate->input_buf_index);
     605             :         Assert(cstate->input_buf_len <= cstate->raw_buf_len);
     606             :     }
     607             : 
     608             :     /*
     609             :      * Copy down the unprocessed data if any.
     610             :      */
     611      431100 :     nbytes = RAW_BUF_BYTES(cstate);
     612      431100 :     if (nbytes > 0 && cstate->raw_buf_index > 0)
     613           0 :         memmove(cstate->raw_buf, cstate->raw_buf + cstate->raw_buf_index,
     614             :                 nbytes);
     615      431100 :     cstate->raw_buf_len -= cstate->raw_buf_index;
     616      431100 :     cstate->raw_buf_index = 0;
     617             : 
     618             :     /*
     619             :      * If raw_buf and input_buf are in fact the same buffer, adjust the
     620             :      * input_buf variables, too.
     621             :      */
     622      431100 :     if (cstate->raw_buf == cstate->input_buf)
     623             :     {
     624      431016 :         cstate->input_buf_len -= cstate->input_buf_index;
     625      431016 :         cstate->input_buf_index = 0;
     626             :     }
     627             : 
     628             :     /* Load more data */
     629      431100 :     inbytes = CopyGetData(cstate, cstate->raw_buf + cstate->raw_buf_len,
     630      431100 :                           1, RAW_BUF_SIZE - cstate->raw_buf_len);
     631      431096 :     nbytes += inbytes;
     632      431096 :     cstate->raw_buf[nbytes] = '\0';
     633      431096 :     cstate->raw_buf_len = nbytes;
     634             : 
     635      431096 :     cstate->bytes_processed += inbytes;
     636      431096 :     pgstat_progress_update_param(PROGRESS_COPY_BYTES_PROCESSED, cstate->bytes_processed);
     637             : 
     638      431096 :     if (inbytes == 0)
     639        1564 :         cstate->raw_reached_eof = true;
     640      431096 : }
     641             : 
     642             : /*
     643             :  * CopyLoadInputBuf loads some more data into input_buf
     644             :  *
     645             :  * On return, at least one more input character is loaded into
     646             :  * input_buf, or input_reached_eof is set.
     647             :  *
     648             :  * If INPUT_BUF_BYTES(cstate) > 0, the unprocessed bytes are moved to the start
     649             :  * of the buffer and then we load more data after that.
     650             :  */
     651             : static void
     652      431052 : CopyLoadInputBuf(CopyFromState cstate)
     653             : {
     654      431052 :     int         nbytes = INPUT_BUF_BYTES(cstate);
     655             : 
     656             :     /*
     657             :      * The caller has updated input_buf_index to indicate how much of the
     658             :      * input has been consumed and isn't needed anymore.  If input_buf is the
     659             :      * same physical area as raw_buf, update raw_buf_index accordingly.
     660             :      */
     661      431052 :     if (cstate->raw_buf == cstate->input_buf)
     662             :     {
     663             :         Assert(!cstate->need_transcoding);
     664             :         Assert(cstate->input_buf_index >= cstate->raw_buf_index);
     665      431016 :         cstate->raw_buf_index = cstate->input_buf_index;
     666             :     }
     667             : 
     668             :     for (;;)
     669             :     {
     670             :         /* If we now have some unconverted data, try to convert it */
     671      862112 :         CopyConvertBuf(cstate);
     672             : 
     673             :         /* If we now have some more input bytes ready, return them */
     674      862112 :         if (INPUT_BUF_BYTES(cstate) > nbytes)
     675      429496 :             return;
     676             : 
     677             :         /*
     678             :          * If we reached an invalid byte sequence, or we're at an incomplete
     679             :          * multi-byte character but there is no more raw input data, report
     680             :          * conversion error.
     681             :          */
     682      432616 :         if (cstate->input_reached_error)
     683          12 :             CopyConversionError(cstate);
     684             : 
     685             :         /* no more input, and everything has been converted */
     686      432604 :         if (cstate->input_reached_eof)
     687        1540 :             break;
     688             : 
     689             :         /* Try to load more raw data */
     690             :         Assert(!cstate->raw_reached_eof);
     691      431064 :         CopyLoadRawBuf(cstate);
     692             :     }
     693             : }
     694             : 
     695             : /*
     696             :  * CopyReadBinaryData
     697             :  *
     698             :  * Reads up to 'nbytes' bytes from cstate->copy_file via cstate->raw_buf
     699             :  * and writes them to 'dest'.  Returns the number of bytes read (which
     700             :  * would be less than 'nbytes' only if we reach EOF).
     701             :  */
     702             : static int
     703         382 : CopyReadBinaryData(CopyFromState cstate, char *dest, int nbytes)
     704             : {
     705         382 :     int         copied_bytes = 0;
     706             : 
     707         382 :     if (RAW_BUF_BYTES(cstate) >= nbytes)
     708             :     {
     709             :         /* Enough bytes are present in the buffer. */
     710         346 :         memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, nbytes);
     711         346 :         cstate->raw_buf_index += nbytes;
     712         346 :         copied_bytes = nbytes;
     713             :     }
     714             :     else
     715             :     {
     716             :         /*
     717             :          * Not enough bytes in the buffer, so must read from the file.  Need
     718             :          * to loop since 'nbytes' could be larger than the buffer size.
     719             :          */
     720             :         do
     721             :         {
     722             :             int         copy_bytes;
     723             : 
     724             :             /* Load more data if buffer is empty. */
     725          36 :             if (RAW_BUF_BYTES(cstate) == 0)
     726             :             {
     727          36 :                 CopyLoadRawBuf(cstate);
     728          36 :                 if (cstate->raw_reached_eof)
     729          12 :                     break;      /* EOF */
     730             :             }
     731             : 
     732             :             /* Transfer some bytes. */
     733          24 :             copy_bytes = Min(nbytes - copied_bytes, RAW_BUF_BYTES(cstate));
     734          24 :             memcpy(dest, cstate->raw_buf + cstate->raw_buf_index, copy_bytes);
     735          24 :             cstate->raw_buf_index += copy_bytes;
     736          24 :             dest += copy_bytes;
     737          24 :             copied_bytes += copy_bytes;
     738          24 :         } while (copied_bytes < nbytes);
     739             :     }
     740             : 
     741         382 :     return copied_bytes;
     742             : }
     743             : 
     744             : /*
     745             :  * This function is exposed for use by extensions that read raw fields in the
     746             :  * next line. See NextCopyFromRawFieldsInternal() for details.
     747             :  */
     748             : bool
     749           0 : NextCopyFromRawFields(CopyFromState cstate, char ***fields, int *nfields)
     750             : {
     751           0 :     return NextCopyFromRawFieldsInternal(cstate, fields, nfields,
     752           0 :                                          cstate->opts.csv_mode);
     753             : }
     754             : 
     755             : /*
     756             :  * Workhorse for NextCopyFromRawFields().
     757             :  *
     758             :  * Read raw fields in the next line for COPY FROM in text or csv mode. Return
     759             :  * false if no more lines.
     760             :  *
     761             :  * An internal temporary buffer is returned via 'fields'. It is valid until
     762             :  * the next call of the function. Since the function returns all raw fields
     763             :  * in the input file, 'nfields' could be different from the number of columns
     764             :  * in the relation.
     765             :  *
     766             :  * NOTE: force_not_null option are not applied to the returned fields.
     767             :  *
     768             :  * We use pg_attribute_always_inline to reduce function call overhead
     769             :  * and to help compilers to optimize away the 'is_csv' condition when called
     770             :  * by internal functions such as CopyFromTextLikeOneRow().
     771             :  */
     772             : static pg_attribute_always_inline bool
     773     1267376 : NextCopyFromRawFieldsInternal(CopyFromState cstate, char ***fields, int *nfields, bool is_csv)
     774             : {
     775             :     int         fldct;
     776     1267376 :     bool        done = false;
     777             : 
     778             :     /* only available for text or csv input */
     779             :     Assert(!cstate->opts.binary);
     780             : 
     781             :     /* on input check that the header line is correct if needed */
     782     1267376 :     if (cstate->cur_lineno == 0 && cstate->opts.header_line != COPY_HEADER_FALSE)
     783             :     {
     784             :         ListCell   *cur;
     785             :         TupleDesc   tupDesc;
     786         148 :         int         lines_to_skip = cstate->opts.header_line;
     787             : 
     788             :         /* If set to "match", one header line is skipped */
     789         148 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     790          76 :             lines_to_skip = 1;
     791             : 
     792         148 :         tupDesc = RelationGetDescr(cstate->rel);
     793             : 
     794         346 :         for (int i = 0; i < lines_to_skip; i++)
     795             :         {
     796         206 :             cstate->cur_lineno++;
     797         206 :             if ((done = CopyReadLine(cstate, is_csv)))
     798           8 :                 break;
     799             :         }
     800             : 
     801         148 :         if (cstate->opts.header_line == COPY_HEADER_MATCH)
     802             :         {
     803             :             int         fldnum;
     804             : 
     805          76 :             if (is_csv)
     806          10 :                 fldct = CopyReadAttributesCSV(cstate);
     807             :             else
     808          66 :                 fldct = CopyReadAttributesText(cstate);
     809             : 
     810          76 :             if (fldct != list_length(cstate->attnumlist))
     811          24 :                 ereport(ERROR,
     812             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     813             :                          errmsg("wrong number of fields in header line: got %d, expected %d",
     814             :                                 fldct, list_length(cstate->attnumlist))));
     815             : 
     816          52 :             fldnum = 0;
     817         158 :             foreach(cur, cstate->attnumlist)
     818             :             {
     819         126 :                 int         attnum = lfirst_int(cur);
     820             :                 char       *colName;
     821         126 :                 Form_pg_attribute attr = TupleDescAttr(tupDesc, attnum - 1);
     822             : 
     823             :                 Assert(fldnum < cstate->max_fields);
     824             : 
     825         126 :                 colName = cstate->raw_fields[fldnum++];
     826         126 :                 if (colName == NULL)
     827           6 :                     ereport(ERROR,
     828             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     829             :                              errmsg("column name mismatch in header line field %d: got null value (\"%s\"), expected \"%s\"",
     830             :                                     fldnum, cstate->opts.null_print, NameStr(attr->attname))));
     831             : 
     832         120 :                 if (namestrcmp(&attr->attname, colName) != 0)
     833             :                 {
     834          14 :                     ereport(ERROR,
     835             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     836             :                              errmsg("column name mismatch in header line field %d: got \"%s\", expected \"%s\"",
     837             :                                     fldnum, colName, NameStr(attr->attname))));
     838             :                 }
     839             :             }
     840             :         }
     841             : 
     842         104 :         if (done)
     843           8 :             return false;
     844             :     }
     845             : 
     846     1267324 :     cstate->cur_lineno++;
     847             : 
     848             :     /* Actually read the line into memory here */
     849     1267324 :     done = CopyReadLine(cstate, is_csv);
     850             : 
     851             :     /*
     852             :      * EOF at start of line means we're done.  If we see EOF after some
     853             :      * characters, we act as though it was newline followed by EOF, ie,
     854             :      * process the line and then exit loop on next iteration.
     855             :      */
     856     1267296 :     if (done && cstate->line_buf.len == 0)
     857        1682 :         return false;
     858             : 
     859             :     /* Parse the line into de-escaped field values */
     860     1265614 :     if (is_csv)
     861         498 :         fldct = CopyReadAttributesCSV(cstate);
     862             :     else
     863     1265116 :         fldct = CopyReadAttributesText(cstate);
     864             : 
     865     1265602 :     *fields = cstate->raw_fields;
     866     1265602 :     *nfields = fldct;
     867     1265602 :     return true;
     868             : }
     869             : 
     870             : /*
     871             :  * Read next tuple from file for COPY FROM. Return false if no more tuples.
     872             :  *
     873             :  * 'econtext' is used to evaluate default expression for each column that is
     874             :  * either not read from the file or is using the DEFAULT option of COPY FROM.
     875             :  * It can be NULL when no default values are used, i.e. when all columns are
     876             :  * read from the file, and DEFAULT option is unset.
     877             :  *
     878             :  * 'values' and 'nulls' arrays must be the same length as columns of the
     879             :  * relation passed to BeginCopyFrom. This function fills the arrays.
     880             :  */
     881             : bool
     882     1267418 : NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
     883             :              Datum *values, bool *nulls)
     884             : {
     885             :     TupleDesc   tupDesc;
     886             :     AttrNumber  num_phys_attrs,
     887     1267418 :                 num_defaults = cstate->num_defaults;
     888             :     int         i;
     889     1267418 :     int        *defmap = cstate->defmap;
     890     1267418 :     ExprState **defexprs = cstate->defexprs;
     891             : 
     892     1267418 :     tupDesc = RelationGetDescr(cstate->rel);
     893     1267418 :     num_phys_attrs = tupDesc->natts;
     894             : 
     895             :     /* Initialize all values for row to NULL */
     896     5937084 :     MemSet(values, 0, num_phys_attrs * sizeof(Datum));
     897     1267418 :     MemSet(nulls, true, num_phys_attrs * sizeof(bool));
     898     1411526 :     MemSet(cstate->defaults, false, num_phys_attrs * sizeof(bool));
     899             : 
     900             :     /* Get one row from source */
     901     1267418 :     if (!cstate->routine->CopyFromOneRow(cstate, econtext, values, nulls))
     902        1702 :         return false;
     903             : 
     904             :     /*
     905             :      * Now compute and insert any defaults available for the columns not
     906             :      * provided by the input data.  Anything not processed here or above will
     907             :      * remain NULL.
     908             :      */
     909     1326086 :     for (i = 0; i < num_defaults; i++)
     910             :     {
     911             :         /*
     912             :          * The caller must supply econtext and have switched into the
     913             :          * per-tuple memory context in it.
     914             :          */
     915             :         Assert(econtext != NULL);
     916             :         Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
     917             : 
     918       60530 :         values[defmap[i]] = ExecEvalExpr(defexprs[defmap[i]], econtext,
     919       60530 :                                          &nulls[defmap[i]]);
     920             :     }
     921             : 
     922     1265556 :     return true;
     923             : }
     924             : 
     925             : /* Implementation of the per-row callback for text format */
     926             : bool
     927     1266634 : CopyFromTextOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     928             :                    bool *nulls)
     929             : {
     930     1266634 :     return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, false);
     931             : }
     932             : 
     933             : /* Implementation of the per-row callback for CSV format */
     934             : bool
     935         742 : CopyFromCSVOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
     936             :                   bool *nulls)
     937             : {
     938         742 :     return CopyFromTextLikeOneRow(cstate, econtext, values, nulls, true);
     939             : }
     940             : 
     941             : /*
     942             :  * Workhorse for CopyFromTextOneRow() and CopyFromCSVOneRow().
     943             :  *
     944             :  * We use pg_attribute_always_inline to reduce function call overhead
     945             :  * and to help compilers to optimize away the 'is_csv' condition.
     946             :  */
     947             : static pg_attribute_always_inline bool
     948     1267376 : CopyFromTextLikeOneRow(CopyFromState cstate, ExprContext *econtext,
     949             :                        Datum *values, bool *nulls, bool is_csv)
     950             : {
     951             :     TupleDesc   tupDesc;
     952             :     AttrNumber  attr_count;
     953     1267376 :     FmgrInfo   *in_functions = cstate->in_functions;
     954     1267376 :     Oid        *typioparams = cstate->typioparams;
     955     1267376 :     ExprState **defexprs = cstate->defexprs;
     956             :     char      **field_strings;
     957             :     ListCell   *cur;
     958             :     int         fldct;
     959             :     int         fieldno;
     960             :     char       *string;
     961             : 
     962     1267376 :     tupDesc = RelationGetDescr(cstate->rel);
     963     1267376 :     attr_count = list_length(cstate->attnumlist);
     964             : 
     965             :     /* read raw fields in the next line */
     966     1267376 :     if (!NextCopyFromRawFieldsInternal(cstate, &field_strings, &fldct, is_csv))
     967        1690 :         return false;
     968             : 
     969             :     /* check for overflowing fields */
     970     1265602 :     if (attr_count > 0 && fldct > attr_count)
     971          18 :         ereport(ERROR,
     972             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     973             :                  errmsg("extra data after last expected column")));
     974             : 
     975     1265584 :     fieldno = 0;
     976             : 
     977             :     /* Loop to read the user attributes on the line. */
     978     5794162 :     foreach(cur, cstate->attnumlist)
     979             :     {
     980     4528762 :         int         attnum = lfirst_int(cur);
     981     4528762 :         int         m = attnum - 1;
     982     4528762 :         Form_pg_attribute att = TupleDescAttr(tupDesc, m);
     983             : 
     984     4528762 :         if (fieldno >= fldct)
     985          18 :             ereport(ERROR,
     986             :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
     987             :                      errmsg("missing data for column \"%s\"",
     988             :                             NameStr(att->attname))));
     989     4528744 :         string = field_strings[fieldno++];
     990             : 
     991     4528744 :         if (cstate->convert_select_flags &&
     992          20 :             !cstate->convert_select_flags[m])
     993             :         {
     994             :             /* ignore input field, leaving column as NULL */
     995          10 :             continue;
     996             :         }
     997             : 
     998     4528734 :         if (is_csv)
     999             :         {
    1000        1000 :             if (string == NULL &&
    1001          44 :                 cstate->opts.force_notnull_flags[m])
    1002             :             {
    1003             :                 /*
    1004             :                  * FORCE_NOT_NULL option is set and column is NULL - convert
    1005             :                  * it to the NULL string.
    1006             :                  */
    1007          28 :                 string = cstate->opts.null_print;
    1008             :             }
    1009         972 :             else if (string != NULL && cstate->opts.force_null_flags[m]
    1010          50 :                      && strcmp(string, cstate->opts.null_print) == 0)
    1011             :             {
    1012             :                 /*
    1013             :                  * FORCE_NULL option is set and column matches the NULL
    1014             :                  * string. It must have been quoted, or otherwise the string
    1015             :                  * would already have been set to NULL. Convert it to NULL as
    1016             :                  * specified.
    1017             :                  */
    1018          26 :                 string = NULL;
    1019             :             }
    1020             :         }
    1021             : 
    1022     4528734 :         cstate->cur_attname = NameStr(att->attname);
    1023     4528734 :         cstate->cur_attval = string;
    1024             : 
    1025     4528734 :         if (string != NULL)
    1026     4523870 :             nulls[m] = false;
    1027             : 
    1028     4528734 :         if (cstate->defaults[m])
    1029             :         {
    1030             :             /* We must have switched into the per-tuple memory context */
    1031             :             Assert(econtext != NULL);
    1032             :             Assert(CurrentMemoryContext == econtext->ecxt_per_tuple_memory);
    1033             : 
    1034          60 :             values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
    1035             :         }
    1036             : 
    1037             :         /*
    1038             :          * If ON_ERROR is specified with IGNORE, skip rows with soft errors
    1039             :          */
    1040     4528636 :         else if (!InputFunctionCallSafe(&in_functions[m],
    1041             :                                         string,
    1042     4528674 :                                         typioparams[m],
    1043             :                                         att->atttypmod,
    1044     4528674 :                                         (Node *) cstate->escontext,
    1045     4528674 :                                         &values[m]))
    1046             :         {
    1047             :             Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
    1048             : 
    1049         128 :             cstate->num_errors++;
    1050             : 
    1051         128 :             if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
    1052             :             {
    1053             :                 /*
    1054             :                  * Since we emit line number and column info in the below
    1055             :                  * notice message, we suppress error context information other
    1056             :                  * than the relation name.
    1057             :                  */
    1058             :                 Assert(!cstate->relname_only);
    1059          42 :                 cstate->relname_only = true;
    1060             : 
    1061          42 :                 if (cstate->cur_attval)
    1062             :                 {
    1063             :                     char       *attval;
    1064             : 
    1065          36 :                     attval = CopyLimitPrintoutLength(cstate->cur_attval);
    1066          36 :                     ereport(NOTICE,
    1067             :                             errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": \"%s\"",
    1068             :                                    cstate->cur_lineno,
    1069             :                                    cstate->cur_attname,
    1070             :                                    attval));
    1071          36 :                     pfree(attval);
    1072             :                 }
    1073             :                 else
    1074           6 :                     ereport(NOTICE,
    1075             :                             errmsg("skipping row due to data type incompatibility at line %" PRIu64 " for column \"%s\": null input",
    1076             :                                    cstate->cur_lineno,
    1077             :                                    cstate->cur_attname));
    1078             : 
    1079             :                 /* reset relname_only */
    1080          42 :                 cstate->relname_only = false;
    1081             :             }
    1082             : 
    1083         128 :             return true;
    1084             :         }
    1085             : 
    1086     4528568 :         cstate->cur_attname = NULL;
    1087     4528568 :         cstate->cur_attval = NULL;
    1088             :     }
    1089             : 
    1090             :     Assert(fieldno == attr_count);
    1091             : 
    1092     1265400 :     return true;
    1093             : }
    1094             : 
    1095             : /* Implementation of the per-row callback for binary format */
    1096             : bool
    1097          42 : CopyFromBinaryOneRow(CopyFromState cstate, ExprContext *econtext, Datum *values,
    1098             :                      bool *nulls)
    1099             : {
    1100             :     TupleDesc   tupDesc;
    1101             :     AttrNumber  attr_count;
    1102          42 :     FmgrInfo   *in_functions = cstate->in_functions;
    1103          42 :     Oid        *typioparams = cstate->typioparams;
    1104             :     int16       fld_count;
    1105             :     ListCell   *cur;
    1106             : 
    1107          42 :     tupDesc = RelationGetDescr(cstate->rel);
    1108          42 :     attr_count = list_length(cstate->attnumlist);
    1109             : 
    1110          42 :     cstate->cur_lineno++;
    1111             : 
    1112          42 :     if (!CopyGetInt16(cstate, &fld_count))
    1113             :     {
    1114             :         /* EOF detected (end of file, or protocol-level EOF) */
    1115           0 :         return false;
    1116             :     }
    1117             : 
    1118          42 :     if (fld_count == -1)
    1119             :     {
    1120             :         /*
    1121             :          * Received EOF marker.  Wait for the protocol-level EOF, and complain
    1122             :          * if it doesn't come immediately.  In COPY FROM STDIN, this ensures
    1123             :          * that we correctly handle CopyFail, if client chooses to send that
    1124             :          * now.  When copying from file, we could ignore the rest of the file
    1125             :          * like in text mode, but we choose to be consistent with the COPY
    1126             :          * FROM STDIN case.
    1127             :          */
    1128             :         char        dummy;
    1129             : 
    1130          12 :         if (CopyReadBinaryData(cstate, &dummy, 1) > 0)
    1131           0 :             ereport(ERROR,
    1132             :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1133             :                      errmsg("received copy data after EOF marker")));
    1134          12 :         return false;
    1135             :     }
    1136             : 
    1137          30 :     if (fld_count != attr_count)
    1138           0 :         ereport(ERROR,
    1139             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1140             :                  errmsg("row field count is %d, expected %d",
    1141             :                         fld_count, attr_count)));
    1142             : 
    1143         186 :     foreach(cur, cstate->attnumlist)
    1144             :     {
    1145         158 :         int         attnum = lfirst_int(cur);
    1146         158 :         int         m = attnum - 1;
    1147         158 :         Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1148             : 
    1149         158 :         cstate->cur_attname = NameStr(att->attname);
    1150         314 :         values[m] = CopyReadBinaryAttribute(cstate,
    1151         158 :                                             &in_functions[m],
    1152         158 :                                             typioparams[m],
    1153             :                                             att->atttypmod,
    1154             :                                             &nulls[m]);
    1155         156 :         cstate->cur_attname = NULL;
    1156             :     }
    1157             : 
    1158          28 :     return true;
    1159             : }
    1160             : 
    1161             : /*
    1162             :  * Read the next input line and stash it in line_buf.
    1163             :  *
    1164             :  * Result is true if read was terminated by EOF, false if terminated
    1165             :  * by newline.  The terminating newline or EOF marker is not included
    1166             :  * in the final value of line_buf.
    1167             :  */
    1168             : static bool
    1169     1267530 : CopyReadLine(CopyFromState cstate, bool is_csv)
    1170             : {
    1171             :     bool        result;
    1172             : 
    1173     1267530 :     resetStringInfo(&cstate->line_buf);
    1174     1267530 :     cstate->line_buf_valid = false;
    1175             : 
    1176             :     /* Parse data and transfer into line_buf */
    1177     1267530 :     result = CopyReadLineText(cstate, is_csv);
    1178             : 
    1179     1267502 :     if (result)
    1180             :     {
    1181             :         /*
    1182             :          * Reached EOF.  In protocol version 3, we should ignore anything
    1183             :          * after \. up to the protocol end of copy data.  (XXX maybe better
    1184             :          * not to treat \. as special?)
    1185             :          */
    1186        1690 :         if (cstate->copy_src == COPY_FRONTEND)
    1187             :         {
    1188             :             int         inbytes;
    1189             : 
    1190             :             do
    1191             :             {
    1192         894 :                 inbytes = CopyGetData(cstate, cstate->input_buf,
    1193             :                                       1, INPUT_BUF_SIZE);
    1194         894 :             } while (inbytes > 0);
    1195         894 :             cstate->input_buf_index = 0;
    1196         894 :             cstate->input_buf_len = 0;
    1197         894 :             cstate->raw_buf_index = 0;
    1198         894 :             cstate->raw_buf_len = 0;
    1199             :         }
    1200             :     }
    1201             :     else
    1202             :     {
    1203             :         /*
    1204             :          * If we didn't hit EOF, then we must have transferred the EOL marker
    1205             :          * to line_buf along with the data.  Get rid of it.
    1206             :          */
    1207     1265812 :         switch (cstate->eol_type)
    1208             :         {
    1209     1265812 :             case EOL_NL:
    1210             :                 Assert(cstate->line_buf.len >= 1);
    1211             :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1212     1265812 :                 cstate->line_buf.len--;
    1213     1265812 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1214     1265812 :                 break;
    1215           0 :             case EOL_CR:
    1216             :                 Assert(cstate->line_buf.len >= 1);
    1217             :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\r');
    1218           0 :                 cstate->line_buf.len--;
    1219           0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1220           0 :                 break;
    1221           0 :             case EOL_CRNL:
    1222             :                 Assert(cstate->line_buf.len >= 2);
    1223             :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 2] == '\r');
    1224             :                 Assert(cstate->line_buf.data[cstate->line_buf.len - 1] == '\n');
    1225           0 :                 cstate->line_buf.len -= 2;
    1226           0 :                 cstate->line_buf.data[cstate->line_buf.len] = '\0';
    1227           0 :                 break;
    1228           0 :             case EOL_UNKNOWN:
    1229             :                 /* shouldn't get here */
    1230             :                 Assert(false);
    1231           0 :                 break;
    1232             :         }
    1233             :     }
    1234             : 
    1235             :     /* Now it's safe to use the buffer in error messages */
    1236     1267502 :     cstate->line_buf_valid = true;
    1237             : 
    1238     1267502 :     return result;
    1239             : }
    1240             : 
    1241             : /*
    1242             :  * CopyReadLineText - inner loop of CopyReadLine for text mode
    1243             :  */
    1244             : static bool
    1245     1267530 : CopyReadLineText(CopyFromState cstate, bool is_csv)
    1246             : {
    1247             :     char       *copy_input_buf;
    1248             :     int         input_buf_ptr;
    1249             :     int         copy_buf_len;
    1250     1267530 :     bool        need_data = false;
    1251     1267530 :     bool        hit_eof = false;
    1252     1267530 :     bool        result = false;
    1253             : 
    1254             :     /* CSV variables */
    1255     1267530 :     bool        in_quote = false,
    1256     1267530 :                 last_was_esc = false;
    1257     1267530 :     char        quotec = '\0';
    1258     1267530 :     char        escapec = '\0';
    1259             : 
    1260     1267530 :     if (is_csv)
    1261             :     {
    1262         866 :         quotec = cstate->opts.quote[0];
    1263         866 :         escapec = cstate->opts.escape[0];
    1264             :         /* ignore special escape processing if it's the same as quotec */
    1265         866 :         if (quotec == escapec)
    1266         672 :             escapec = '\0';
    1267             :     }
    1268             : 
    1269             :     /*
    1270             :      * The objective of this loop is to transfer the entire next input line
    1271             :      * into line_buf.  Hence, we only care for detecting newlines (\r and/or
    1272             :      * \n) and the end-of-copy marker (\.).
    1273             :      *
    1274             :      * In CSV mode, \r and \n inside a quoted field are just part of the data
    1275             :      * value and are put in line_buf.  We keep just enough state to know if we
    1276             :      * are currently in a quoted field or not.
    1277             :      *
    1278             :      * The input has already been converted to the database encoding.  All
    1279             :      * supported server encodings have the property that all bytes in a
    1280             :      * multi-byte sequence have the high bit set, so a multibyte character
    1281             :      * cannot contain any newline or escape characters embedded in the
    1282             :      * multibyte sequence.  Therefore, we can process the input byte-by-byte,
    1283             :      * regardless of the encoding.
    1284             :      *
    1285             :      * For speed, we try to move data from input_buf to line_buf in chunks
    1286             :      * rather than one character at a time.  input_buf_ptr points to the next
    1287             :      * character to examine; any characters from input_buf_index to
    1288             :      * input_buf_ptr have been determined to be part of the line, but not yet
    1289             :      * transferred to line_buf.
    1290             :      *
    1291             :      * For a little extra speed within the loop, we copy input_buf and
    1292             :      * input_buf_len into local variables.
    1293             :      */
    1294     1267530 :     copy_input_buf = cstate->input_buf;
    1295     1267530 :     input_buf_ptr = cstate->input_buf_index;
    1296     1267530 :     copy_buf_len = cstate->input_buf_len;
    1297             : 
    1298             :     for (;;)
    1299    25529972 :     {
    1300             :         int         prev_raw_ptr;
    1301             :         char        c;
    1302             : 
    1303             :         /*
    1304             :          * Load more data if needed.
    1305             :          *
    1306             :          * TODO: We could just force four bytes of read-ahead and avoid the
    1307             :          * many calls to IF_NEED_REFILL_AND_NOT_EOF_CONTINUE().  That was
    1308             :          * unsafe with the old v2 COPY protocol, but we don't support that
    1309             :          * anymore.
    1310             :          */
    1311    26797502 :         if (input_buf_ptr >= copy_buf_len || need_data)
    1312             :         {
    1313      431052 :             REFILL_LINEBUF;
    1314             : 
    1315      431052 :             CopyLoadInputBuf(cstate);
    1316             :             /* update our local variables */
    1317      431036 :             hit_eof = cstate->input_reached_eof;
    1318      431036 :             input_buf_ptr = cstate->input_buf_index;
    1319      431036 :             copy_buf_len = cstate->input_buf_len;
    1320             : 
    1321             :             /*
    1322             :              * If we are completely out of data, break out of the loop,
    1323             :              * reporting EOF.
    1324             :              */
    1325      431036 :             if (INPUT_BUF_BYTES(cstate) <= 0)
    1326             :             {
    1327        1540 :                 result = true;
    1328        1540 :                 break;
    1329             :             }
    1330      429496 :             need_data = false;
    1331             :         }
    1332             : 
    1333             :         /* OK to fetch a character */
    1334    26795946 :         prev_raw_ptr = input_buf_ptr;
    1335    26795946 :         c = copy_input_buf[input_buf_ptr++];
    1336             : 
    1337    26795946 :         if (is_csv)
    1338             :         {
    1339             :             /*
    1340             :              * If character is '\r', we may need to look ahead below.  Force
    1341             :              * fetch of the next character if we don't already have it.  We
    1342             :              * need to do this before changing CSV state, in case '\r' is also
    1343             :              * the quote or escape character.
    1344             :              */
    1345        6808 :             if (c == '\r')
    1346             :             {
    1347          36 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1348             :             }
    1349             : 
    1350             :             /*
    1351             :              * Dealing with quotes and escapes here is mildly tricky. If the
    1352             :              * quote char is also the escape char, there's no problem - we
    1353             :              * just use the char as a toggle. If they are different, we need
    1354             :              * to ensure that we only take account of an escape inside a
    1355             :              * quoted field and immediately preceding a quote char, and not
    1356             :              * the second in an escape-escape sequence.
    1357             :              */
    1358        6808 :             if (in_quote && c == escapec)
    1359          48 :                 last_was_esc = !last_was_esc;
    1360        6808 :             if (c == quotec && !last_was_esc)
    1361         508 :                 in_quote = !in_quote;
    1362        6808 :             if (c != escapec)
    1363        6754 :                 last_was_esc = false;
    1364             : 
    1365             :             /*
    1366             :              * Updating the line count for embedded CR and/or LF chars is
    1367             :              * necessarily a little fragile - this test is probably about the
    1368             :              * best we can do.  (XXX it's arguable whether we should do this
    1369             :              * at all --- is cur_lineno a physical or logical count?)
    1370             :              */
    1371        6808 :             if (in_quote && c == (cstate->eol_type == EOL_NL ? '\n' : '\r'))
    1372          36 :                 cstate->cur_lineno++;
    1373             :         }
    1374             : 
    1375             :         /* Process \r */
    1376    26795946 :         if (c == '\r' && (!is_csv || !in_quote))
    1377             :         {
    1378             :             /* Check for \r\n on first line, _and_ handle \r\n. */
    1379           0 :             if (cstate->eol_type == EOL_UNKNOWN ||
    1380           0 :                 cstate->eol_type == EOL_CRNL)
    1381             :             {
    1382             :                 /*
    1383             :                  * If need more data, go back to loop top to load it.
    1384             :                  *
    1385             :                  * Note that if we are at EOF, c will wind up as '\0' because
    1386             :                  * of the guaranteed pad of input_buf.
    1387             :                  */
    1388           0 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1389             : 
    1390             :                 /* get next char */
    1391           0 :                 c = copy_input_buf[input_buf_ptr];
    1392             : 
    1393           0 :                 if (c == '\n')
    1394             :                 {
    1395           0 :                     input_buf_ptr++;    /* eat newline */
    1396           0 :                     cstate->eol_type = EOL_CRNL; /* in case not set yet */
    1397             :                 }
    1398             :                 else
    1399             :                 {
    1400             :                     /* found \r, but no \n */
    1401           0 :                     if (cstate->eol_type == EOL_CRNL)
    1402           0 :                         ereport(ERROR,
    1403             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1404             :                                  !is_csv ?
    1405             :                                  errmsg("literal carriage return found in data") :
    1406             :                                  errmsg("unquoted carriage return found in data"),
    1407             :                                  !is_csv ?
    1408             :                                  errhint("Use \"\\r\" to represent carriage return.") :
    1409             :                                  errhint("Use quoted CSV field to represent carriage return.")));
    1410             : 
    1411             :                     /*
    1412             :                      * if we got here, it is the first line and we didn't find
    1413             :                      * \n, so don't consume the peeked character
    1414             :                      */
    1415           0 :                     cstate->eol_type = EOL_CR;
    1416             :                 }
    1417             :             }
    1418           0 :             else if (cstate->eol_type == EOL_NL)
    1419           0 :                 ereport(ERROR,
    1420             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1421             :                          !is_csv ?
    1422             :                          errmsg("literal carriage return found in data") :
    1423             :                          errmsg("unquoted carriage return found in data"),
    1424             :                          !is_csv ?
    1425             :                          errhint("Use \"\\r\" to represent carriage return.") :
    1426             :                          errhint("Use quoted CSV field to represent carriage return.")));
    1427             :             /* If reach here, we have found the line terminator */
    1428           0 :             break;
    1429             :         }
    1430             : 
    1431             :         /* Process \n */
    1432    26795946 :         if (c == '\n' && (!is_csv || !in_quote))
    1433             :         {
    1434     1265812 :             if (cstate->eol_type == EOL_CR || cstate->eol_type == EOL_CRNL)
    1435           0 :                 ereport(ERROR,
    1436             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1437             :                          !is_csv ?
    1438             :                          errmsg("literal newline found in data") :
    1439             :                          errmsg("unquoted newline found in data"),
    1440             :                          !is_csv ?
    1441             :                          errhint("Use \"\\n\" to represent newline.") :
    1442             :                          errhint("Use quoted CSV field to represent newline.")));
    1443     1265812 :             cstate->eol_type = EOL_NL;   /* in case not set yet */
    1444             :             /* If reach here, we have found the line terminator */
    1445     1265812 :             break;
    1446             :         }
    1447             : 
    1448             :         /*
    1449             :          * Process backslash, except in CSV mode where backslash is a normal
    1450             :          * character.
    1451             :          */
    1452    25530134 :         if (c == '\\' && !is_csv)
    1453             :         {
    1454             :             char        c2;
    1455             : 
    1456        8172 :             IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1457        8172 :             IF_NEED_REFILL_AND_EOF_BREAK(0);
    1458             : 
    1459             :             /* -----
    1460             :              * get next character
    1461             :              * Note: we do not change c so if it isn't \., we can fall
    1462             :              * through and continue processing.
    1463             :              * -----
    1464             :              */
    1465        8172 :             c2 = copy_input_buf[input_buf_ptr];
    1466             : 
    1467        8172 :             if (c2 == '.')
    1468             :             {
    1469         162 :                 input_buf_ptr++;    /* consume the '.' */
    1470         162 :                 if (cstate->eol_type == EOL_CRNL)
    1471             :                 {
    1472             :                     /* Get the next character */
    1473           0 :                     IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1474             :                     /* if hit_eof, c2 will become '\0' */
    1475           0 :                     c2 = copy_input_buf[input_buf_ptr++];
    1476             : 
    1477           0 :                     if (c2 == '\n')
    1478           0 :                         ereport(ERROR,
    1479             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1480             :                                  errmsg("end-of-copy marker does not match previous newline style")));
    1481           0 :                     else if (c2 != '\r')
    1482           0 :                         ereport(ERROR,
    1483             :                                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1484             :                                  errmsg("end-of-copy marker is not alone on its line")));
    1485             :                 }
    1486             : 
    1487             :                 /* Get the next character */
    1488         162 :                 IF_NEED_REFILL_AND_NOT_EOF_CONTINUE(0);
    1489             :                 /* if hit_eof, c2 will become '\0' */
    1490         162 :                 c2 = copy_input_buf[input_buf_ptr++];
    1491             : 
    1492         162 :                 if (c2 != '\r' && c2 != '\n')
    1493           6 :                     ereport(ERROR,
    1494             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1495             :                              errmsg("end-of-copy marker is not alone on its line")));
    1496             : 
    1497         156 :                 if ((cstate->eol_type == EOL_NL && c2 != '\n') ||
    1498         156 :                     (cstate->eol_type == EOL_CRNL && c2 != '\n') ||
    1499         156 :                     (cstate->eol_type == EOL_CR && c2 != '\r'))
    1500           0 :                     ereport(ERROR,
    1501             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1502             :                              errmsg("end-of-copy marker does not match previous newline style")));
    1503             : 
    1504             :                 /*
    1505             :                  * If there is any data on this line before the \., complain.
    1506             :                  */
    1507         156 :                 if (cstate->line_buf.len > 0 ||
    1508         156 :                     prev_raw_ptr > cstate->input_buf_index)
    1509           6 :                     ereport(ERROR,
    1510             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1511             :                              errmsg("end-of-copy marker is not alone on its line")));
    1512             : 
    1513             :                 /*
    1514             :                  * Discard the \. and newline, then report EOF.
    1515             :                  */
    1516         150 :                 cstate->input_buf_index = input_buf_ptr;
    1517         150 :                 result = true;  /* report EOF */
    1518         150 :                 break;
    1519             :             }
    1520             :             else
    1521             :             {
    1522             :                 /*
    1523             :                  * If we are here, it means we found a backslash followed by
    1524             :                  * something other than a period.  In non-CSV mode, anything
    1525             :                  * after a backslash is special, so we skip over that second
    1526             :                  * character too.  If we didn't do that \\. would be
    1527             :                  * considered an eof-of copy, while in non-CSV mode it is a
    1528             :                  * literal backslash followed by a period.
    1529             :                  */
    1530        8010 :                 input_buf_ptr++;
    1531             :             }
    1532             :         }
    1533             :     }                           /* end of outer loop */
    1534             : 
    1535             :     /*
    1536             :      * Transfer any still-uncopied data to line_buf.
    1537             :      */
    1538     1267502 :     REFILL_LINEBUF;
    1539             : 
    1540     1267502 :     return result;
    1541             : }
    1542             : 
    1543             : /*
    1544             :  *  Return decimal value for a hexadecimal digit
    1545             :  */
    1546             : static int
    1547           0 : GetDecimalFromHex(char hex)
    1548             : {
    1549           0 :     if (isdigit((unsigned char) hex))
    1550           0 :         return hex - '0';
    1551             :     else
    1552           0 :         return pg_ascii_tolower((unsigned char) hex) - 'a' + 10;
    1553             : }
    1554             : 
    1555             : /*
    1556             :  * Parse the current line into separate attributes (fields),
    1557             :  * performing de-escaping as needed.
    1558             :  *
    1559             :  * The input is in line_buf.  We use attribute_buf to hold the result
    1560             :  * strings.  cstate->raw_fields[k] is set to point to the k'th attribute
    1561             :  * string, or NULL when the input matches the null marker string.
    1562             :  * This array is expanded as necessary.
    1563             :  *
    1564             :  * (Note that the caller cannot check for nulls since the returned
    1565             :  * string would be the post-de-escaping equivalent, which may look
    1566             :  * the same as some valid data string.)
    1567             :  *
    1568             :  * delim is the column delimiter string (must be just one byte for now).
    1569             :  * null_print is the null marker string.  Note that this is compared to
    1570             :  * the pre-de-escaped input string.
    1571             :  *
    1572             :  * The return value is the number of fields actually read.
    1573             :  */
    1574             : static int
    1575     1265182 : CopyReadAttributesText(CopyFromState cstate)
    1576             : {
    1577     1265182 :     char        delimc = cstate->opts.delim[0];
    1578             :     int         fieldno;
    1579             :     char       *output_ptr;
    1580             :     char       *cur_ptr;
    1581             :     char       *line_end_ptr;
    1582             : 
    1583             :     /*
    1584             :      * We need a special case for zero-column tables: check that the input
    1585             :      * line is empty, and return.
    1586             :      */
    1587     1265182 :     if (cstate->max_fields <= 0)
    1588             :     {
    1589           8 :         if (cstate->line_buf.len != 0)
    1590           0 :             ereport(ERROR,
    1591             :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1592             :                      errmsg("extra data after last expected column")));
    1593           8 :         return 0;
    1594             :     }
    1595             : 
    1596     1265174 :     resetStringInfo(&cstate->attribute_buf);
    1597             : 
    1598             :     /*
    1599             :      * The de-escaped attributes will certainly not be longer than the input
    1600             :      * data line, so we can just force attribute_buf to be large enough and
    1601             :      * then transfer data without any checks for enough space.  We need to do
    1602             :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1603             :      * pointers already stored into cstate->raw_fields[].
    1604             :      */
    1605     1265174 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1606           8 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1607     1265174 :     output_ptr = cstate->attribute_buf.data;
    1608             : 
    1609             :     /* set pointer variables for loop */
    1610     1265174 :     cur_ptr = cstate->line_buf.data;
    1611     1265174 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1612             : 
    1613             :     /* Outer loop iterates over fields */
    1614     1265174 :     fieldno = 0;
    1615             :     for (;;)
    1616     3262938 :     {
    1617     4528112 :         bool        found_delim = false;
    1618             :         char       *start_ptr;
    1619             :         char       *end_ptr;
    1620             :         int         input_len;
    1621     4528112 :         bool        saw_non_ascii = false;
    1622             : 
    1623             :         /* Make sure there is enough space for the next value */
    1624     4528112 :         if (fieldno >= cstate->max_fields)
    1625             :         {
    1626          36 :             cstate->max_fields *= 2;
    1627          36 :             cstate->raw_fields =
    1628          36 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1629             :         }
    1630             : 
    1631             :         /* Remember start of field on both input and output sides */
    1632     4528112 :         start_ptr = cur_ptr;
    1633     4528112 :         cstate->raw_fields[fieldno] = output_ptr;
    1634             : 
    1635             :         /*
    1636             :          * Scan data for field.
    1637             :          *
    1638             :          * Note that in this loop, we are scanning to locate the end of field
    1639             :          * and also speculatively performing de-escaping.  Once we find the
    1640             :          * end-of-field, we can match the raw field contents against the null
    1641             :          * marker string.  Only after that comparison fails do we know that
    1642             :          * de-escaping is actually the right thing to do; therefore we *must
    1643             :          * not* throw any syntax errors before we've done the null-marker
    1644             :          * check.
    1645             :          */
    1646             :         for (;;)
    1647    22260340 :         {
    1648             :             char        c;
    1649             : 
    1650    26788452 :             end_ptr = cur_ptr;
    1651    26788452 :             if (cur_ptr >= line_end_ptr)
    1652     1265168 :                 break;
    1653    25523284 :             c = *cur_ptr++;
    1654    25523284 :             if (c == delimc)
    1655             :             {
    1656     3262944 :                 found_delim = true;
    1657     3262944 :                 break;
    1658             :             }
    1659    22260340 :             if (c == '\\')
    1660             :             {
    1661        8010 :                 if (cur_ptr >= line_end_ptr)
    1662           0 :                     break;
    1663        8010 :                 c = *cur_ptr++;
    1664        8010 :                 switch (c)
    1665             :                 {
    1666          12 :                     case '0':
    1667             :                     case '1':
    1668             :                     case '2':
    1669             :                     case '3':
    1670             :                     case '4':
    1671             :                     case '5':
    1672             :                     case '6':
    1673             :                     case '7':
    1674             :                         {
    1675             :                             /* handle \013 */
    1676             :                             int         val;
    1677             : 
    1678          12 :                             val = OCTVALUE(c);
    1679          12 :                             if (cur_ptr < line_end_ptr)
    1680             :                             {
    1681           6 :                                 c = *cur_ptr;
    1682           6 :                                 if (ISOCTAL(c))
    1683             :                                 {
    1684           0 :                                     cur_ptr++;
    1685           0 :                                     val = (val << 3) + OCTVALUE(c);
    1686           0 :                                     if (cur_ptr < line_end_ptr)
    1687             :                                     {
    1688           0 :                                         c = *cur_ptr;
    1689           0 :                                         if (ISOCTAL(c))
    1690             :                                         {
    1691           0 :                                             cur_ptr++;
    1692           0 :                                             val = (val << 3) + OCTVALUE(c);
    1693             :                                         }
    1694             :                                     }
    1695             :                                 }
    1696             :                             }
    1697          12 :                             c = val & 0377;
    1698          12 :                             if (c == '\0' || IS_HIGHBIT_SET(c))
    1699          12 :                                 saw_non_ascii = true;
    1700             :                         }
    1701          12 :                         break;
    1702          12 :                     case 'x':
    1703             :                         /* Handle \x3F */
    1704          12 :                         if (cur_ptr < line_end_ptr)
    1705             :                         {
    1706           6 :                             char        hexchar = *cur_ptr;
    1707             : 
    1708           6 :                             if (isxdigit((unsigned char) hexchar))
    1709             :                             {
    1710           0 :                                 int         val = GetDecimalFromHex(hexchar);
    1711             : 
    1712           0 :                                 cur_ptr++;
    1713           0 :                                 if (cur_ptr < line_end_ptr)
    1714             :                                 {
    1715           0 :                                     hexchar = *cur_ptr;
    1716           0 :                                     if (isxdigit((unsigned char) hexchar))
    1717             :                                     {
    1718           0 :                                         cur_ptr++;
    1719           0 :                                         val = (val << 4) + GetDecimalFromHex(hexchar);
    1720             :                                     }
    1721             :                                 }
    1722           0 :                                 c = val & 0xff;
    1723           0 :                                 if (c == '\0' || IS_HIGHBIT_SET(c))
    1724           0 :                                     saw_non_ascii = true;
    1725             :                             }
    1726             :                         }
    1727          12 :                         break;
    1728           0 :                     case 'b':
    1729           0 :                         c = '\b';
    1730           0 :                         break;
    1731           0 :                     case 'f':
    1732           0 :                         c = '\f';
    1733           0 :                         break;
    1734        3050 :                     case 'n':
    1735        3050 :                         c = '\n';
    1736        3050 :                         break;
    1737           0 :                     case 'r':
    1738           0 :                         c = '\r';
    1739           0 :                         break;
    1740           0 :                     case 't':
    1741           0 :                         c = '\t';
    1742           0 :                         break;
    1743           0 :                     case 'v':
    1744           0 :                         c = '\v';
    1745           0 :                         break;
    1746             : 
    1747             :                         /*
    1748             :                          * in all other cases, take the char after '\'
    1749             :                          * literally
    1750             :                          */
    1751             :                 }
    1752             :             }
    1753             : 
    1754             :             /* Add c to output string */
    1755    22260340 :             *output_ptr++ = c;
    1756             :         }
    1757             : 
    1758             :         /* Check whether raw input matched null marker */
    1759     4528112 :         input_len = end_ptr - start_ptr;
    1760     4528112 :         if (input_len == cstate->opts.null_print_len &&
    1761      250864 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1762        4828 :             cstate->raw_fields[fieldno] = NULL;
    1763             :         /* Check whether raw input matched default marker */
    1764     4523284 :         else if (fieldno < list_length(cstate->attnumlist) &&
    1765     4523242 :                  cstate->opts.default_print &&
    1766         114 :                  input_len == cstate->opts.default_print_len &&
    1767          30 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1768          24 :         {
    1769             :             /* fieldno is 0-indexed and attnum is 1-indexed */
    1770          30 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1771             : 
    1772          30 :             if (cstate->defexprs[m] != NULL)
    1773             :             {
    1774             :                 /* defaults contain entries for all physical attributes */
    1775          24 :                 cstate->defaults[m] = true;
    1776             :             }
    1777             :             else
    1778             :             {
    1779           6 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    1780           6 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1781             : 
    1782           6 :                 ereport(ERROR,
    1783             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1784             :                          errmsg("unexpected default marker in COPY data"),
    1785             :                          errdetail("Column \"%s\" has no default value.",
    1786             :                                    NameStr(att->attname))));
    1787             :             }
    1788             :         }
    1789             :         else
    1790             :         {
    1791             :             /*
    1792             :              * At this point we know the field is supposed to contain data.
    1793             :              *
    1794             :              * If we de-escaped any non-7-bit-ASCII chars, make sure the
    1795             :              * resulting string is valid data for the db encoding.
    1796             :              */
    1797     4523254 :             if (saw_non_ascii)
    1798             :             {
    1799           0 :                 char       *fld = cstate->raw_fields[fieldno];
    1800             : 
    1801           0 :                 pg_verifymbstr(fld, output_ptr - fld, false);
    1802             :             }
    1803             :         }
    1804             : 
    1805             :         /* Terminate attribute value in output area */
    1806     4528106 :         *output_ptr++ = '\0';
    1807             : 
    1808     4528106 :         fieldno++;
    1809             :         /* Done if we hit EOL instead of a delim */
    1810     4528106 :         if (!found_delim)
    1811     1265168 :             break;
    1812             :     }
    1813             : 
    1814             :     /* Clean up state of attribute_buf */
    1815     1265168 :     output_ptr--;
    1816             :     Assert(*output_ptr == '\0');
    1817     1265168 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    1818             : 
    1819     1265168 :     return fieldno;
    1820             : }
    1821             : 
    1822             : /*
    1823             :  * Parse the current line into separate attributes (fields),
    1824             :  * performing de-escaping as needed.  This has exactly the same API as
    1825             :  * CopyReadAttributesText, except we parse the fields according to
    1826             :  * "standard" (i.e. common) CSV usage.
    1827             :  */
    1828             : static int
    1829         508 : CopyReadAttributesCSV(CopyFromState cstate)
    1830             : {
    1831         508 :     char        delimc = cstate->opts.delim[0];
    1832         508 :     char        quotec = cstate->opts.quote[0];
    1833         508 :     char        escapec = cstate->opts.escape[0];
    1834             :     int         fieldno;
    1835             :     char       *output_ptr;
    1836             :     char       *cur_ptr;
    1837             :     char       *line_end_ptr;
    1838             : 
    1839             :     /*
    1840             :      * We need a special case for zero-column tables: check that the input
    1841             :      * line is empty, and return.
    1842             :      */
    1843         508 :     if (cstate->max_fields <= 0)
    1844             :     {
    1845           0 :         if (cstate->line_buf.len != 0)
    1846           0 :             ereport(ERROR,
    1847             :                     (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1848             :                      errmsg("extra data after last expected column")));
    1849           0 :         return 0;
    1850             :     }
    1851             : 
    1852         508 :     resetStringInfo(&cstate->attribute_buf);
    1853             : 
    1854             :     /*
    1855             :      * The de-escaped attributes will certainly not be longer than the input
    1856             :      * data line, so we can just force attribute_buf to be large enough and
    1857             :      * then transfer data without any checks for enough space.  We need to do
    1858             :      * it this way because enlarging attribute_buf mid-stream would invalidate
    1859             :      * pointers already stored into cstate->raw_fields[].
    1860             :      */
    1861         508 :     if (cstate->attribute_buf.maxlen <= cstate->line_buf.len)
    1862           0 :         enlargeStringInfo(&cstate->attribute_buf, cstate->line_buf.len);
    1863         508 :     output_ptr = cstate->attribute_buf.data;
    1864             : 
    1865             :     /* set pointer variables for loop */
    1866         508 :     cur_ptr = cstate->line_buf.data;
    1867         508 :     line_end_ptr = cstate->line_buf.data + cstate->line_buf.len;
    1868             : 
    1869             :     /* Outer loop iterates over fields */
    1870         508 :     fieldno = 0;
    1871             :     for (;;)
    1872         534 :     {
    1873        1042 :         bool        found_delim = false;
    1874        1042 :         bool        saw_quote = false;
    1875             :         char       *start_ptr;
    1876             :         char       *end_ptr;
    1877             :         int         input_len;
    1878             : 
    1879             :         /* Make sure there is enough space for the next value */
    1880        1042 :         if (fieldno >= cstate->max_fields)
    1881             :         {
    1882           0 :             cstate->max_fields *= 2;
    1883           0 :             cstate->raw_fields =
    1884           0 :                 repalloc(cstate->raw_fields, cstate->max_fields * sizeof(char *));
    1885             :         }
    1886             : 
    1887             :         /* Remember start of field on both input and output sides */
    1888        1042 :         start_ptr = cur_ptr;
    1889        1042 :         cstate->raw_fields[fieldno] = output_ptr;
    1890             : 
    1891             :         /*
    1892             :          * Scan data for field,
    1893             :          *
    1894             :          * The loop starts in "not quote" mode and then toggles between that
    1895             :          * and "in quote" mode. The loop exits normally if it is in "not
    1896             :          * quote" mode and a delimiter or line end is seen.
    1897             :          */
    1898             :         for (;;)
    1899         222 :         {
    1900             :             char        c;
    1901             : 
    1902             :             /* Not in quote */
    1903             :             for (;;)
    1904             :             {
    1905        3320 :                 end_ptr = cur_ptr;
    1906        3320 :                 if (cur_ptr >= line_end_ptr)
    1907         502 :                     goto endfield;
    1908        2818 :                 c = *cur_ptr++;
    1909             :                 /* unquoted field delimiter */
    1910        2818 :                 if (c == delimc)
    1911             :                 {
    1912         540 :                     found_delim = true;
    1913         540 :                     goto endfield;
    1914             :                 }
    1915             :                 /* start of quoted field (or part of field) */
    1916        2278 :                 if (c == quotec)
    1917             :                 {
    1918         222 :                     saw_quote = true;
    1919         222 :                     break;
    1920             :                 }
    1921             :                 /* Add c to output string */
    1922        2056 :                 *output_ptr++ = c;
    1923             :             }
    1924             : 
    1925             :             /* In quote */
    1926             :             for (;;)
    1927             :             {
    1928        1390 :                 end_ptr = cur_ptr;
    1929        1390 :                 if (cur_ptr >= line_end_ptr)
    1930           0 :                     ereport(ERROR,
    1931             :                             (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1932             :                              errmsg("unterminated CSV quoted field")));
    1933             : 
    1934        1390 :                 c = *cur_ptr++;
    1935             : 
    1936             :                 /* escape within a quoted field */
    1937        1390 :                 if (c == escapec)
    1938             :                 {
    1939             :                     /*
    1940             :                      * peek at the next char if available, and escape it if it
    1941             :                      * is an escape char or a quote char
    1942             :                      */
    1943         118 :                     if (cur_ptr < line_end_ptr)
    1944             :                     {
    1945          72 :                         char        nextc = *cur_ptr;
    1946             : 
    1947          72 :                         if (nextc == escapec || nextc == quotec)
    1948             :                         {
    1949          24 :                             *output_ptr++ = nextc;
    1950          24 :                             cur_ptr++;
    1951          24 :                             continue;
    1952             :                         }
    1953             :                     }
    1954             :                 }
    1955             : 
    1956             :                 /*
    1957             :                  * end of quoted field. Must do this test after testing for
    1958             :                  * escape in case quote char and escape char are the same
    1959             :                  * (which is the common case).
    1960             :                  */
    1961        1366 :                 if (c == quotec)
    1962         222 :                     break;
    1963             : 
    1964             :                 /* Add c to output string */
    1965        1144 :                 *output_ptr++ = c;
    1966             :             }
    1967             :         }
    1968        1042 : endfield:
    1969             : 
    1970             :         /* Terminate attribute value in output area */
    1971        1042 :         *output_ptr++ = '\0';
    1972             : 
    1973             :         /* Check whether raw input matched null marker */
    1974        1042 :         input_len = end_ptr - start_ptr;
    1975        1042 :         if (!saw_quote && input_len == cstate->opts.null_print_len &&
    1976          44 :             strncmp(start_ptr, cstate->opts.null_print, input_len) == 0)
    1977          44 :             cstate->raw_fields[fieldno] = NULL;
    1978             :         /* Check whether raw input matched default marker */
    1979         998 :         else if (fieldno < list_length(cstate->attnumlist) &&
    1980         998 :                  cstate->opts.default_print &&
    1981         150 :                  input_len == cstate->opts.default_print_len &&
    1982          42 :                  strncmp(start_ptr, cstate->opts.default_print, input_len) == 0)
    1983             :         {
    1984             :             /* fieldno is 0-index and attnum is 1-index */
    1985          42 :             int         m = list_nth_int(cstate->attnumlist, fieldno) - 1;
    1986             : 
    1987          42 :             if (cstate->defexprs[m] != NULL)
    1988             :             {
    1989             :                 /* defaults contain entries for all physical attributes */
    1990          36 :                 cstate->defaults[m] = true;
    1991             :             }
    1992             :             else
    1993             :             {
    1994           6 :                 TupleDesc   tupDesc = RelationGetDescr(cstate->rel);
    1995           6 :                 Form_pg_attribute att = TupleDescAttr(tupDesc, m);
    1996             : 
    1997           6 :                 ereport(ERROR,
    1998             :                         (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    1999             :                          errmsg("unexpected default marker in COPY data"),
    2000             :                          errdetail("Column \"%s\" has no default value.",
    2001             :                                    NameStr(att->attname))));
    2002             :             }
    2003             :         }
    2004             : 
    2005        1036 :         fieldno++;
    2006             :         /* Done if we hit EOL instead of a delim */
    2007        1036 :         if (!found_delim)
    2008         502 :             break;
    2009             :     }
    2010             : 
    2011             :     /* Clean up state of attribute_buf */
    2012         502 :     output_ptr--;
    2013             :     Assert(*output_ptr == '\0');
    2014         502 :     cstate->attribute_buf.len = (output_ptr - cstate->attribute_buf.data);
    2015             : 
    2016         502 :     return fieldno;
    2017             : }
    2018             : 
    2019             : 
    2020             : /*
    2021             :  * Read a binary attribute
    2022             :  */
    2023             : static Datum
    2024         158 : CopyReadBinaryAttribute(CopyFromState cstate, FmgrInfo *flinfo,
    2025             :                         Oid typioparam, int32 typmod,
    2026             :                         bool *isnull)
    2027             : {
    2028             :     int32       fld_size;
    2029             :     Datum       result;
    2030             : 
    2031         158 :     if (!CopyGetInt32(cstate, &fld_size))
    2032           0 :         ereport(ERROR,
    2033             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2034             :                  errmsg("unexpected EOF in COPY data")));
    2035         158 :     if (fld_size == -1)
    2036             :     {
    2037          30 :         *isnull = true;
    2038          30 :         return ReceiveFunctionCall(flinfo, NULL, typioparam, typmod);
    2039             :     }
    2040         128 :     if (fld_size < 0)
    2041           0 :         ereport(ERROR,
    2042             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2043             :                  errmsg("invalid field size")));
    2044             : 
    2045             :     /* reset attribute_buf to empty, and load raw data in it */
    2046         128 :     resetStringInfo(&cstate->attribute_buf);
    2047             : 
    2048         128 :     enlargeStringInfo(&cstate->attribute_buf, fld_size);
    2049         128 :     if (CopyReadBinaryData(cstate, cstate->attribute_buf.data,
    2050         128 :                            fld_size) != fld_size)
    2051           0 :         ereport(ERROR,
    2052             :                 (errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
    2053             :                  errmsg("unexpected EOF in COPY data")));
    2054             : 
    2055         128 :     cstate->attribute_buf.len = fld_size;
    2056         128 :     cstate->attribute_buf.data[fld_size] = '\0';
    2057             : 
    2058             :     /* Call the column type's binary input converter */
    2059         128 :     result = ReceiveFunctionCall(flinfo, &cstate->attribute_buf,
    2060             :                                  typioparam, typmod);
    2061             : 
    2062             :     /* Trouble if it didn't eat the whole buffer */
    2063         128 :     if (cstate->attribute_buf.cursor != cstate->attribute_buf.len)
    2064           2 :         ereport(ERROR,
    2065             :                 (errcode(ERRCODE_INVALID_BINARY_REPRESENTATION),
    2066             :                  errmsg("incorrect binary data format")));
    2067             : 
    2068         126 :     *isnull = false;
    2069         126 :     return result;
    2070             : }

Generated by: LCOV version 1.16