LCOV - code coverage report
Current view: top level - src/bin/psql - stringutils.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 86.0 % 107 92
Test Date: 2026-03-01 16:14:42 Functions: 100.0 % 3 3
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*
       2              :  * psql - the PostgreSQL interactive terminal
       3              :  *
       4              :  * Copyright (c) 2000-2026, PostgreSQL Global Development Group
       5              :  *
       6              :  * src/bin/psql/stringutils.c
       7              :  */
       8              : #include "postgres_fe.h"
       9              : 
      10              : #include <ctype.h>
      11              : 
      12              : #include "common.h"
      13              : #include "stringutils.h"
      14              : 
      15              : 
      16              : /*
      17              :  * Replacement for strtok() (a.k.a. poor man's flex)
      18              :  *
      19              :  * Splits a string into tokens, returning one token per call, then NULL
      20              :  * when no more tokens exist in the given string.
      21              :  *
      22              :  * The calling convention is similar to that of strtok, but with more
      23              :  * frammishes.
      24              :  *
      25              :  * s -          string to parse, if NULL continue parsing the last string
      26              :  * whitespace - set of whitespace characters that separate tokens
      27              :  * delim -      set of non-whitespace separator characters (or NULL)
      28              :  * quote -      set of characters that can quote a token (NULL if none)
      29              :  * escape -     character that can quote quotes (0 if none)
      30              :  * e_strings -  if true, treat E'...' syntax as a valid token
      31              :  * del_quotes - if true, strip quotes from the returned token, else return
      32              :  *              it exactly as found in the string
      33              :  * encoding -   the active character-set encoding
      34              :  *
      35              :  * Characters in 'delim', if any, will be returned as single-character
      36              :  * tokens unless part of a quoted token.
      37              :  *
      38              :  * Double occurrences of the quoting character are always taken to represent
      39              :  * a single quote character in the data.  If escape isn't 0, then escape
      40              :  * followed by anything (except \0) is a data character too.
      41              :  *
      42              :  * The combination of e_strings and del_quotes both true is not currently
      43              :  * handled.  This could be fixed but it's not needed anywhere at the moment.
      44              :  *
      45              :  * Note that the string s is _not_ overwritten in this implementation.
      46              :  *
      47              :  * NB: it's okay to vary delim, quote, and escape from one call to the
      48              :  * next on a single source string, but changing whitespace is a bad idea
      49              :  * since you might lose data.
      50              :  */
      51              : char *
      52          519 : strtokx(const char *s,
      53              :         const char *whitespace,
      54              :         const char *delim,
      55              :         const char *quote,
      56              :         char escape,
      57              :         bool e_strings,
      58              :         bool del_quotes,
      59              :         int encoding)
      60              : {
      61              :     static char *storage = NULL;    /* store the local copy of the users
      62              :                                      * string here */
      63              :     static char *string = NULL; /* pointer into storage where to continue on
      64              :                                  * next call */
      65              : 
      66              :     /* variously abused variables: */
      67              :     unsigned int offset;
      68              :     char       *start;
      69              :     char       *p;
      70              : 
      71          519 :     if (s)
      72              :     {
      73           96 :         free(storage);
      74              : 
      75              :         /*
      76              :          * We may need extra space to insert delimiter nulls for adjacent
      77              :          * tokens.  2X the space is a gross overestimate, but it's unlikely
      78              :          * that this code will be used on huge strings anyway.
      79              :          */
      80           96 :         storage = pg_malloc(2 * strlen(s) + 1);
      81           96 :         strcpy(storage, s);
      82           96 :         string = storage;
      83              :     }
      84              : 
      85          519 :     if (!storage)
      86            0 :         return NULL;
      87              : 
      88              :     /* skip leading whitespace */
      89          519 :     offset = strspn(string, whitespace);
      90          519 :     start = &string[offset];
      91              : 
      92              :     /* end of string reached? */
      93          519 :     if (*start == '\0')
      94              :     {
      95              :         /* technically we don't need to free here, but we're nice */
      96           57 :         free(storage);
      97           57 :         storage = NULL;
      98           57 :         string = NULL;
      99           57 :         return NULL;
     100              :     }
     101              : 
     102              :     /* test if delimiter character */
     103          462 :     if (delim && strchr(delim, *start))
     104              :     {
     105              :         /*
     106              :          * If not at end of string, we need to insert a null to terminate the
     107              :          * returned token.  We can just overwrite the next character if it
     108              :          * happens to be in the whitespace set ... otherwise move over the
     109              :          * rest of the string to make room.  (This is why we allocated extra
     110              :          * space above).
     111              :          */
     112           42 :         p = start + 1;
     113           42 :         if (*p != '\0')
     114              :         {
     115           42 :             if (!strchr(whitespace, *p))
     116           24 :                 memmove(p + 1, p, strlen(p) + 1);
     117           42 :             *p = '\0';
     118           42 :             string = p + 1;
     119              :         }
     120              :         else
     121              :         {
     122              :             /* at end of string, so no extra work */
     123            0 :             string = p;
     124              :         }
     125              : 
     126           42 :         return start;
     127              :     }
     128              : 
     129              :     /* check for E string */
     130          420 :     p = start;
     131          420 :     if (e_strings &&
     132          141 :         (*p == 'E' || *p == 'e') &&
     133            0 :         p[1] == '\'')
     134              :     {
     135            0 :         quote = "'";
     136            0 :         escape = '\\';          /* if std strings before, not any more */
     137            0 :         p++;
     138              :     }
     139              : 
     140              :     /* test if quoting character */
     141          420 :     if (quote && strchr(quote, *p))
     142              :     {
     143              :         /* okay, we have a quoted token, now scan for the closer */
     144           79 :         char        thisquote = *p++;
     145              : 
     146         1231 :         for (; *p; p += PQmblenBounded(p, encoding))
     147              :         {
     148         1225 :             if (*p == escape && p[1] != '\0')
     149            0 :                 p++;            /* process escaped anything */
     150         1225 :             else if (*p == thisquote && p[1] == thisquote)
     151            0 :                 p++;            /* process doubled quote */
     152         1225 :             else if (*p == thisquote)
     153              :             {
     154           73 :                 p++;            /* skip trailing quote */
     155           73 :                 break;
     156              :             }
     157              :         }
     158              : 
     159              :         /*
     160              :          * If not at end of string, we need to insert a null to terminate the
     161              :          * returned token.  See notes above.
     162              :          */
     163           79 :         if (*p != '\0')
     164              :         {
     165           30 :             if (!strchr(whitespace, *p))
     166           18 :                 memmove(p + 1, p, strlen(p) + 1);
     167           30 :             *p = '\0';
     168           30 :             string = p + 1;
     169              :         }
     170              :         else
     171              :         {
     172              :             /* at end of string, so no extra work */
     173           49 :             string = p;
     174              :         }
     175              : 
     176              :         /* Clean up the token if caller wants that */
     177           79 :         if (del_quotes)
     178            6 :             strip_quotes(start, thisquote, escape, encoding);
     179              : 
     180           79 :         return start;
     181              :     }
     182              : 
     183              :     /*
     184              :      * Otherwise no quoting character.  Scan till next whitespace, delimiter
     185              :      * or quote.  NB: at this point, *start is known not to be '\0',
     186              :      * whitespace, delim, or quote, so we will consume at least one character.
     187              :      */
     188          341 :     offset = strcspn(start, whitespace);
     189              : 
     190          341 :     if (delim)
     191              :     {
     192          308 :         unsigned int offset2 = strcspn(start, delim);
     193              : 
     194          308 :         if (offset > offset2)
     195           38 :             offset = offset2;
     196              :     }
     197              : 
     198          341 :     if (quote)
     199              :     {
     200          314 :         unsigned int offset2 = strcspn(start, quote);
     201              : 
     202          314 :         if (offset > offset2)
     203           12 :             offset = offset2;
     204              :     }
     205              : 
     206          341 :     p = start + offset;
     207              : 
     208              :     /*
     209              :      * If not at end of string, we need to insert a null to terminate the
     210              :      * returned token.  See notes above.
     211              :      */
     212          341 :     if (*p != '\0')
     213              :     {
     214          294 :         if (!strchr(whitespace, *p))
     215           44 :             memmove(p + 1, p, strlen(p) + 1);
     216          294 :         *p = '\0';
     217          294 :         string = p + 1;
     218              :     }
     219              :     else
     220              :     {
     221              :         /* at end of string, so no extra work */
     222           47 :         string = p;
     223              :     }
     224              : 
     225          341 :     return start;
     226              : }
     227              : 
     228              : 
     229              : /*
     230              :  * strip_quotes
     231              :  *
     232              :  * Remove quotes from the string at *source.  Leading and trailing occurrences
     233              :  * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
     234              :  * to single occurrences; if 'escape' is not 0 then 'escape' removes special
     235              :  * significance of next character.
     236              :  *
     237              :  * Note that the source string is overwritten in-place.
     238              :  */
     239              : void
     240           53 : strip_quotes(char *source, char quote, char escape, int encoding)
     241              : {
     242              :     char       *src;
     243              :     char       *dst;
     244              : 
     245              :     Assert(source != NULL);
     246              :     Assert(quote != '\0');
     247              : 
     248           53 :     src = dst = source;
     249              : 
     250           53 :     if (*src && *src == quote)
     251           52 :         src++;                  /* skip leading quote */
     252              : 
     253         1231 :     while (*src)
     254              :     {
     255         1224 :         char        c = *src;
     256              :         int         i;
     257              : 
     258         1224 :         if (c == quote && src[1] == '\0')
     259              :             break;              /* skip trailing quote */
     260         1178 :         else if (c == quote && src[1] == quote)
     261            0 :             src++;              /* process doubled quote */
     262         1178 :         else if (c == escape && src[1] != '\0')
     263            0 :             src++;              /* process escaped character */
     264              : 
     265         1178 :         i = PQmblenBounded(src, encoding);
     266         2356 :         while (i--)
     267         1178 :             *dst++ = *src++;
     268              :     }
     269              : 
     270           53 :     *dst = '\0';
     271           53 : }
     272              : 
     273              : 
     274              : /*
     275              :  * quote_if_needed
     276              :  *
     277              :  * Opposite of strip_quotes().  If "source" denotes itself literally without
     278              :  * quoting or escaping, returns NULL.  Otherwise, returns a malloc'd copy with
     279              :  * quoting and escaping applied:
     280              :  *
     281              :  * source -         string to parse
     282              :  * entails_quote -  any of these present?  need outer quotes
     283              :  * quote -          doubled within string, affixed to both ends
     284              :  * escape -         doubled within string
     285              :  * force_quote -    if true, quote the output even if it doesn't "need" it
     286              :  * encoding -       the active character-set encoding
     287              :  *
     288              :  * Do not use this as a substitute for PQescapeStringConn().  Use it for
     289              :  * strings to be parsed by strtokx() or psql_scan_slash_option().
     290              :  */
     291              : char *
     292            5 : quote_if_needed(const char *source, const char *entails_quote,
     293              :                 char quote, char escape, bool force_quote,
     294              :                 int encoding)
     295              : {
     296              :     const char *src;
     297              :     char       *ret;
     298              :     char       *dst;
     299            5 :     bool        need_quotes = force_quote;
     300              : 
     301              :     Assert(source != NULL);
     302              :     Assert(quote != '\0');
     303              : 
     304            5 :     src = source;
     305            5 :     dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
     306              : 
     307            5 :     *dst++ = quote;
     308              : 
     309          101 :     while (*src)
     310              :     {
     311           96 :         char        c = *src;
     312              :         int         i;
     313              : 
     314           96 :         if (c == quote)
     315              :         {
     316            0 :             need_quotes = true;
     317            0 :             *dst++ = quote;
     318              :         }
     319           96 :         else if (c == escape)
     320              :         {
     321            0 :             need_quotes = true;
     322            0 :             *dst++ = escape;
     323              :         }
     324           96 :         else if (strchr(entails_quote, c))
     325            0 :             need_quotes = true;
     326              : 
     327           96 :         i = PQmblenBounded(src, encoding);
     328          192 :         while (i--)
     329           96 :             *dst++ = *src++;
     330              :     }
     331              : 
     332            5 :     *dst++ = quote;
     333            5 :     *dst = '\0';
     334              : 
     335            5 :     if (!need_quotes)
     336              :     {
     337            2 :         free(ret);
     338            2 :         ret = NULL;
     339              :     }
     340              : 
     341            5 :     return ret;
     342              : }
        

Generated by: LCOV version 2.0-1