LCOV - code coverage report
Current view: top level - src/bin/psql - stringutils.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 92 107 86.0 %
Date: 2024-12-12 19:15:15 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * psql - the PostgreSQL interactive terminal
       3             :  *
       4             :  * Copyright (c) 2000-2024, PostgreSQL Global Development Group
       5             :  *
       6             :  * src/bin/psql/stringutils.c
       7             :  */
       8             : #include "postgres_fe.h"
       9             : 
      10             : #include <ctype.h>
      11             : 
      12             : #include "common.h"
      13             : #include "stringutils.h"
      14             : 
      15             : 
      16             : /*
      17             :  * Replacement for strtok() (a.k.a. poor man's flex)
      18             :  *
      19             :  * Splits a string into tokens, returning one token per call, then NULL
      20             :  * when no more tokens exist in the given string.
      21             :  *
      22             :  * The calling convention is similar to that of strtok, but with more
      23             :  * frammishes.
      24             :  *
      25             :  * s -          string to parse, if NULL continue parsing the last string
      26             :  * whitespace - set of whitespace characters that separate tokens
      27             :  * delim -      set of non-whitespace separator characters (or NULL)
      28             :  * quote -      set of characters that can quote a token (NULL if none)
      29             :  * escape -     character that can quote quotes (0 if none)
      30             :  * e_strings -  if true, treat E'...' syntax as a valid token
      31             :  * del_quotes - if true, strip quotes from the returned token, else return
      32             :  *              it exactly as found in the string
      33             :  * encoding -   the active character-set encoding
      34             :  *
      35             :  * Characters in 'delim', if any, will be returned as single-character
      36             :  * tokens unless part of a quoted token.
      37             :  *
      38             :  * Double occurrences of the quoting character are always taken to represent
      39             :  * a single quote character in the data.  If escape isn't 0, then escape
      40             :  * followed by anything (except \0) is a data character too.
      41             :  *
      42             :  * The combination of e_strings and del_quotes both true is not currently
      43             :  * handled.  This could be fixed but it's not needed anywhere at the moment.
      44             :  *
      45             :  * Note that the string s is _not_ overwritten in this implementation.
      46             :  *
      47             :  * NB: it's okay to vary delim, quote, and escape from one call to the
      48             :  * next on a single source string, but changing whitespace is a bad idea
      49             :  * since you might lose data.
      50             :  */
      51             : char *
      52        1014 : strtokx(const char *s,
      53             :         const char *whitespace,
      54             :         const char *delim,
      55             :         const char *quote,
      56             :         char escape,
      57             :         bool e_strings,
      58             :         bool del_quotes,
      59             :         int encoding)
      60             : {
      61             :     static char *storage = NULL;    /* store the local copy of the users
      62             :                                      * string here */
      63             :     static char *string = NULL; /* pointer into storage where to continue on
      64             :                                  * next call */
      65             : 
      66             :     /* variously abused variables: */
      67             :     unsigned int offset;
      68             :     char       *start;
      69             :     char       *p;
      70             : 
      71        1014 :     if (s)
      72             :     {
      73         186 :         free(storage);
      74             : 
      75             :         /*
      76             :          * We may need extra space to insert delimiter nulls for adjacent
      77             :          * tokens.  2X the space is a gross overestimate, but it's unlikely
      78             :          * that this code will be used on huge strings anyway.
      79             :          */
      80         186 :         storage = pg_malloc(2 * strlen(s) + 1);
      81         186 :         strcpy(storage, s);
      82         186 :         string = storage;
      83             :     }
      84             : 
      85        1014 :     if (!storage)
      86           0 :         return NULL;
      87             : 
      88             :     /* skip leading whitespace */
      89        1014 :     offset = strspn(string, whitespace);
      90        1014 :     start = &string[offset];
      91             : 
      92             :     /* end of string reached? */
      93        1014 :     if (*start == '\0')
      94             :     {
      95             :         /* technically we don't need to free here, but we're nice */
      96         112 :         free(storage);
      97         112 :         storage = NULL;
      98         112 :         string = NULL;
      99         112 :         return NULL;
     100             :     }
     101             : 
     102             :     /* test if delimiter character */
     103         902 :     if (delim && strchr(delim, *start))
     104             :     {
     105             :         /*
     106             :          * If not at end of string, we need to insert a null to terminate the
     107             :          * returned token.  We can just overwrite the next character if it
     108             :          * happens to be in the whitespace set ... otherwise move over the
     109             :          * rest of the string to make room.  (This is why we allocated extra
     110             :          * space above).
     111             :          */
     112          84 :         p = start + 1;
     113          84 :         if (*p != '\0')
     114             :         {
     115          84 :             if (!strchr(whitespace, *p))
     116          48 :                 memmove(p + 1, p, strlen(p) + 1);
     117          84 :             *p = '\0';
     118          84 :             string = p + 1;
     119             :         }
     120             :         else
     121             :         {
     122             :             /* at end of string, so no extra work */
     123           0 :             string = p;
     124             :         }
     125             : 
     126          84 :         return start;
     127             :     }
     128             : 
     129             :     /* check for E string */
     130         818 :     p = start;
     131         818 :     if (e_strings &&
     132         282 :         (*p == 'E' || *p == 'e') &&
     133           0 :         p[1] == '\'')
     134             :     {
     135           0 :         quote = "'";
     136           0 :         escape = '\\';          /* if std strings before, not any more */
     137           0 :         p++;
     138             :     }
     139             : 
     140             :     /* test if quoting character */
     141         818 :     if (quote && strchr(quote, *p))
     142             :     {
     143             :         /* okay, we have a quoted token, now scan for the closer */
     144         156 :         char        thisquote = *p++;
     145             : 
     146        2422 :         for (; *p; p += PQmblenBounded(p, encoding))
     147             :         {
     148        2410 :             if (*p == escape && p[1] != '\0')
     149           0 :                 p++;            /* process escaped anything */
     150        2410 :             else if (*p == thisquote && p[1] == thisquote)
     151           0 :                 p++;            /* process doubled quote */
     152        2410 :             else if (*p == thisquote)
     153             :             {
     154         144 :                 p++;            /* skip trailing quote */
     155         144 :                 break;
     156             :             }
     157             :         }
     158             : 
     159             :         /*
     160             :          * If not at end of string, we need to insert a null to terminate the
     161             :          * returned token.  See notes above.
     162             :          */
     163         156 :         if (*p != '\0')
     164             :         {
     165          60 :             if (!strchr(whitespace, *p))
     166          36 :                 memmove(p + 1, p, strlen(p) + 1);
     167          60 :             *p = '\0';
     168          60 :             string = p + 1;
     169             :         }
     170             :         else
     171             :         {
     172             :             /* at end of string, so no extra work */
     173          96 :             string = p;
     174             :         }
     175             : 
     176             :         /* Clean up the token if caller wants that */
     177         156 :         if (del_quotes)
     178          12 :             strip_quotes(start, thisquote, escape, encoding);
     179             : 
     180         156 :         return start;
     181             :     }
     182             : 
     183             :     /*
     184             :      * Otherwise no quoting character.  Scan till next whitespace, delimiter
     185             :      * or quote.  NB: at this point, *start is known not to be '\0',
     186             :      * whitespace, delim, or quote, so we will consume at least one character.
     187             :      */
     188         662 :     offset = strcspn(start, whitespace);
     189             : 
     190         662 :     if (delim)
     191             :     {
     192         600 :         unsigned int offset2 = strcspn(start, delim);
     193             : 
     194         600 :         if (offset > offset2)
     195          72 :             offset = offset2;
     196             :     }
     197             : 
     198         662 :     if (quote)
     199             :     {
     200         612 :         unsigned int offset2 = strcspn(start, quote);
     201             : 
     202         612 :         if (offset > offset2)
     203          24 :             offset = offset2;
     204             :     }
     205             : 
     206         662 :     p = start + offset;
     207             : 
     208             :     /*
     209             :      * If not at end of string, we need to insert a null to terminate the
     210             :      * returned token.  See notes above.
     211             :      */
     212         662 :     if (*p != '\0')
     213             :     {
     214         572 :         if (!strchr(whitespace, *p))
     215          84 :             memmove(p + 1, p, strlen(p) + 1);
     216         572 :         *p = '\0';
     217         572 :         string = p + 1;
     218             :     }
     219             :     else
     220             :     {
     221             :         /* at end of string, so no extra work */
     222          90 :         string = p;
     223             :     }
     224             : 
     225         662 :     return start;
     226             : }
     227             : 
     228             : 
     229             : /*
     230             :  * strip_quotes
     231             :  *
     232             :  * Remove quotes from the string at *source.  Leading and trailing occurrences
     233             :  * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
     234             :  * to single occurrences; if 'escape' is not 0 then 'escape' removes special
     235             :  * significance of next character.
     236             :  *
     237             :  * Note that the source string is overwritten in-place.
     238             :  */
     239             : void
     240         104 : strip_quotes(char *source, char quote, char escape, int encoding)
     241             : {
     242             :     char       *src;
     243             :     char       *dst;
     244             : 
     245             :     Assert(source != NULL);
     246             :     Assert(quote != '\0');
     247             : 
     248         104 :     src = dst = source;
     249             : 
     250         104 :     if (*src && *src == quote)
     251         102 :         src++;                  /* skip leading quote */
     252             : 
     253        2422 :     while (*src)
     254             :     {
     255        2408 :         char        c = *src;
     256             :         int         i;
     257             : 
     258        2408 :         if (c == quote && src[1] == '\0')
     259             :             break;              /* skip trailing quote */
     260        2318 :         else if (c == quote && src[1] == quote)
     261           0 :             src++;              /* process doubled quote */
     262        2318 :         else if (c == escape && src[1] != '\0')
     263           0 :             src++;              /* process escaped character */
     264             : 
     265        2318 :         i = PQmblenBounded(src, encoding);
     266        4636 :         while (i--)
     267        2318 :             *dst++ = *src++;
     268             :     }
     269             : 
     270         104 :     *dst = '\0';
     271         104 : }
     272             : 
     273             : 
     274             : /*
     275             :  * quote_if_needed
     276             :  *
     277             :  * Opposite of strip_quotes().  If "source" denotes itself literally without
     278             :  * quoting or escaping, returns NULL.  Otherwise, returns a malloc'd copy with
     279             :  * quoting and escaping applied:
     280             :  *
     281             :  * source -         string to parse
     282             :  * entails_quote -  any of these present?  need outer quotes
     283             :  * quote -          doubled within string, affixed to both ends
     284             :  * escape -         doubled within string
     285             :  * force_quote -    if true, quote the output even if it doesn't "need" it
     286             :  * encoding -       the active character-set encoding
     287             :  *
     288             :  * Do not use this as a substitute for PQescapeStringConn().  Use it for
     289             :  * strings to be parsed by strtokx() or psql_scan_slash_option().
     290             :  */
     291             : char *
     292          10 : quote_if_needed(const char *source, const char *entails_quote,
     293             :                 char quote, char escape, bool force_quote,
     294             :                 int encoding)
     295             : {
     296             :     const char *src;
     297             :     char       *ret;
     298             :     char       *dst;
     299          10 :     bool        need_quotes = force_quote;
     300             : 
     301             :     Assert(source != NULL);
     302             :     Assert(quote != '\0');
     303             : 
     304          10 :     src = source;
     305          10 :     dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
     306             : 
     307          10 :     *dst++ = quote;
     308             : 
     309         202 :     while (*src)
     310             :     {
     311         192 :         char        c = *src;
     312             :         int         i;
     313             : 
     314         192 :         if (c == quote)
     315             :         {
     316           0 :             need_quotes = true;
     317           0 :             *dst++ = quote;
     318             :         }
     319         192 :         else if (c == escape)
     320             :         {
     321           0 :             need_quotes = true;
     322           0 :             *dst++ = escape;
     323             :         }
     324         192 :         else if (strchr(entails_quote, c))
     325           0 :             need_quotes = true;
     326             : 
     327         192 :         i = PQmblenBounded(src, encoding);
     328         384 :         while (i--)
     329         192 :             *dst++ = *src++;
     330             :     }
     331             : 
     332          10 :     *dst++ = quote;
     333          10 :     *dst = '\0';
     334             : 
     335          10 :     if (!need_quotes)
     336             :     {
     337           4 :         free(ret);
     338           4 :         ret = NULL;
     339             :     }
     340             : 
     341          10 :     return ret;
     342             : }

Generated by: LCOV version 1.14