LCOV - code coverage report
Current view: top level - src/bin/psql - stringutils.c (source / functions) Hit Total Coverage
Test: PostgreSQL 12beta2 Lines: 72 107 67.3 %
Date: 2019-06-19 16:07:09 Functions: 2 3 66.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*
       2             :  * psql - the PostgreSQL interactive terminal
       3             :  *
       4             :  * Copyright (c) 2000-2019, PostgreSQL Global Development Group
       5             :  *
       6             :  * src/bin/psql/stringutils.c
       7             :  */
       8             : #include "postgres_fe.h"
       9             : 
      10             : #include <ctype.h>
      11             : 
      12             : #include "common.h"
      13             : #include "stringutils.h"
      14             : 
      15             : 
      16             : /*
      17             :  * Replacement for strtok() (a.k.a. poor man's flex)
      18             :  *
      19             :  * Splits a string into tokens, returning one token per call, then NULL
      20             :  * when no more tokens exist in the given string.
      21             :  *
      22             :  * The calling convention is similar to that of strtok, but with more
      23             :  * frammishes.
      24             :  *
      25             :  * s -          string to parse, if NULL continue parsing the last string
      26             :  * whitespace - set of whitespace characters that separate tokens
      27             :  * delim -      set of non-whitespace separator characters (or NULL)
      28             :  * quote -      set of characters that can quote a token (NULL if none)
      29             :  * escape -     character that can quote quotes (0 if none)
      30             :  * e_strings -  if true, treat E'...' syntax as a valid token
      31             :  * del_quotes - if true, strip quotes from the returned token, else return
      32             :  *              it exactly as found in the string
      33             :  * encoding -   the active character-set encoding
      34             :  *
      35             :  * Characters in 'delim', if any, will be returned as single-character
      36             :  * tokens unless part of a quoted token.
      37             :  *
      38             :  * Double occurrences of the quoting character are always taken to represent
      39             :  * a single quote character in the data.  If escape isn't 0, then escape
      40             :  * followed by anything (except \0) is a data character too.
      41             :  *
      42             :  * The combination of e_strings and del_quotes both true is not currently
      43             :  * handled.  This could be fixed but it's not needed anywhere at the moment.
      44             :  *
      45             :  * Note that the string s is _not_ overwritten in this implementation.
      46             :  *
      47             :  * NB: it's okay to vary delim, quote, and escape from one call to the
      48             :  * next on a single source string, but changing whitespace is a bad idea
      49             :  * since you might lose data.
      50             :  */
      51             : char *
      52         732 : strtokx(const char *s,
      53             :         const char *whitespace,
      54             :         const char *delim,
      55             :         const char *quote,
      56             :         char escape,
      57             :         bool e_strings,
      58             :         bool del_quotes,
      59             :         int encoding)
      60             : {
      61             :     static char *storage = NULL;    /* store the local copy of the users
      62             :                                      * string here */
      63             :     static char *string = NULL; /* pointer into storage where to continue on
      64             :                                  * next call */
      65             : 
      66             :     /* variously abused variables: */
      67             :     unsigned int offset;
      68             :     char       *start;
      69             :     char       *p;
      70             : 
      71         732 :     if (s)
      72             :     {
      73         126 :         free(storage);
      74             : 
      75             :         /*
      76             :          * We may need extra space to insert delimiter nulls for adjacent
      77             :          * tokens.  2X the space is a gross overestimate, but it's unlikely
      78             :          * that this code will be used on huge strings anyway.
      79             :          */
      80         126 :         storage = pg_malloc(2 * strlen(s) + 1);
      81         126 :         strcpy(storage, s);
      82         126 :         string = storage;
      83             :     }
      84             : 
      85         732 :     if (!storage)
      86           0 :         return NULL;
      87             : 
      88             :     /* skip leading whitespace */
      89         732 :     offset = strspn(string, whitespace);
      90         732 :     start = &string[offset];
      91             : 
      92             :     /* end of string reached? */
      93         732 :     if (*start == '\0')
      94             :     {
      95             :         /* technically we don't need to free here, but we're nice */
      96          98 :         free(storage);
      97          98 :         storage = NULL;
      98          98 :         string = NULL;
      99          98 :         return NULL;
     100             :     }
     101             : 
     102             :     /* test if delimiter character */
     103         634 :     if (delim && strchr(delim, *start))
     104             :     {
     105             :         /*
     106             :          * If not at end of string, we need to insert a null to terminate the
     107             :          * returned token.  We can just overwrite the next character if it
     108             :          * happens to be in the whitespace set ... otherwise move over the
     109             :          * rest of the string to make room.  (This is why we allocated extra
     110             :          * space above).
     111             :          */
     112          56 :         p = start + 1;
     113          56 :         if (*p != '\0')
     114             :         {
     115          56 :             if (!strchr(whitespace, *p))
     116          32 :                 memmove(p + 1, p, strlen(p) + 1);
     117          56 :             *p = '\0';
     118          56 :             string = p + 1;
     119             :         }
     120             :         else
     121             :         {
     122             :             /* at end of string, so no extra work */
     123           0 :             string = p;
     124             :         }
     125             : 
     126          56 :         return start;
     127             :     }
     128             : 
     129             :     /* check for E string */
     130         578 :     p = start;
     131         766 :     if (e_strings &&
     132         376 :         (*p == 'E' || *p == 'e') &&
     133           0 :         p[1] == '\'')
     134             :     {
     135           0 :         quote = "'";
     136           0 :         escape = '\\';          /* if std strings before, not any more */
     137           0 :         p++;
     138             :     }
     139             : 
     140             :     /* test if quoting character */
     141         578 :     if (quote && strchr(quote, *p))
     142             :     {
     143             :         /* okay, we have a quoted token, now scan for the closer */
     144         116 :         char        thisquote = *p++;
     145             : 
     146        1452 :         for (; *p; p += PQmblen(p, encoding))
     147             :         {
     148        1452 :             if (*p == escape && p[1] != '\0')
     149           0 :                 p++;            /* process escaped anything */
     150        1452 :             else if (*p == thisquote && p[1] == thisquote)
     151           0 :                 p++;            /* process doubled quote */
     152        1452 :             else if (*p == thisquote)
     153             :             {
     154         116 :                 p++;            /* skip trailing quote */
     155         116 :                 break;
     156             :             }
     157             :         }
     158             : 
     159             :         /*
     160             :          * If not at end of string, we need to insert a null to terminate the
     161             :          * returned token.  See notes above.
     162             :          */
     163         116 :         if (*p != '\0')
     164             :         {
     165          36 :             if (!strchr(whitespace, *p))
     166          20 :                 memmove(p + 1, p, strlen(p) + 1);
     167          36 :             *p = '\0';
     168          36 :             string = p + 1;
     169             :         }
     170             :         else
     171             :         {
     172             :             /* at end of string, so no extra work */
     173          80 :             string = p;
     174             :         }
     175             : 
     176             :         /* Clean up the token if caller wants that */
     177         116 :         if (del_quotes)
     178           0 :             strip_quotes(start, thisquote, escape, encoding);
     179             : 
     180         116 :         return start;
     181             :     }
     182             : 
     183             :     /*
     184             :      * Otherwise no quoting character.  Scan till next whitespace, delimiter
     185             :      * or quote.  NB: at this point, *start is known not to be '\0',
     186             :      * whitespace, delim, or quote, so we will consume at least one character.
     187             :      */
     188         462 :     offset = strcspn(start, whitespace);
     189             : 
     190         462 :     if (delim)
     191             :     {
     192         434 :         unsigned int offset2 = strcspn(start, delim);
     193             : 
     194         434 :         if (offset > offset2)
     195          48 :             offset = offset2;
     196             :     }
     197             : 
     198         462 :     if (quote)
     199             :     {
     200         434 :         unsigned int offset2 = strcspn(start, quote);
     201             : 
     202         434 :         if (offset > offset2)
     203          16 :             offset = offset2;
     204             :     }
     205             : 
     206         462 :     p = start + offset;
     207             : 
     208             :     /*
     209             :      * If not at end of string, we need to insert a null to terminate the
     210             :      * returned token.  See notes above.
     211             :      */
     212         462 :     if (*p != '\0')
     213             :     {
     214         416 :         if (!strchr(whitespace, *p))
     215          56 :             memmove(p + 1, p, strlen(p) + 1);
     216         416 :         *p = '\0';
     217         416 :         string = p + 1;
     218             :     }
     219             :     else
     220             :     {
     221             :         /* at end of string, so no extra work */
     222          46 :         string = p;
     223             :     }
     224             : 
     225         462 :     return start;
     226             : }
     227             : 
     228             : 
     229             : /*
     230             :  * strip_quotes
     231             :  *
     232             :  * Remove quotes from the string at *source.  Leading and trailing occurrences
     233             :  * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
     234             :  * to single occurrences; if 'escape' is not 0 then 'escape' removes special
     235             :  * significance of next character.
     236             :  *
     237             :  * Note that the source string is overwritten in-place.
     238             :  */
     239             : void
     240          80 : strip_quotes(char *source, char quote, char escape, int encoding)
     241             : {
     242             :     char       *src;
     243             :     char       *dst;
     244             : 
     245             :     Assert(source != NULL);
     246             :     Assert(quote != '\0');
     247             : 
     248          80 :     src = dst = source;
     249             : 
     250          80 :     if (*src && *src == quote)
     251          80 :         src++;                  /* skip leading quote */
     252             : 
     253        1424 :     while (*src)
     254             :     {
     255        1344 :         char        c = *src;
     256             :         int         i;
     257             : 
     258        1344 :         if (c == quote && src[1] == '\0')
     259             :             break;              /* skip trailing quote */
     260        1264 :         else if (c == quote && src[1] == quote)
     261           0 :             src++;              /* process doubled quote */
     262        1264 :         else if (c == escape && src[1] != '\0')
     263           0 :             src++;              /* process escaped character */
     264             : 
     265        1264 :         i = PQmblen(src, encoding);
     266        3792 :         while (i--)
     267        1264 :             *dst++ = *src++;
     268             :     }
     269             : 
     270          80 :     *dst = '\0';
     271          80 : }
     272             : 
     273             : 
     274             : /*
     275             :  * quote_if_needed
     276             :  *
     277             :  * Opposite of strip_quotes().  If "source" denotes itself literally without
     278             :  * quoting or escaping, returns NULL.  Otherwise, returns a malloc'd copy with
     279             :  * quoting and escaping applied:
     280             :  *
     281             :  * source -         string to parse
     282             :  * entails_quote -  any of these present?  need outer quotes
     283             :  * quote -          doubled within string, affixed to both ends
     284             :  * escape -         doubled within string
     285             :  * encoding -       the active character-set encoding
     286             :  *
     287             :  * Do not use this as a substitute for PQescapeStringConn().  Use it for
     288             :  * strings to be parsed by strtokx() or psql_scan_slash_option().
     289             :  */
     290             : char *
     291           0 : quote_if_needed(const char *source, const char *entails_quote,
     292             :                 char quote, char escape, int encoding)
     293             : {
     294             :     const char *src;
     295             :     char       *ret;
     296             :     char       *dst;
     297           0 :     bool        need_quotes = false;
     298             : 
     299             :     Assert(source != NULL);
     300             :     Assert(quote != '\0');
     301             : 
     302           0 :     src = source;
     303           0 :     dst = ret = pg_malloc(2 * strlen(src) + 3); /* excess */
     304             : 
     305           0 :     *dst++ = quote;
     306             : 
     307           0 :     while (*src)
     308             :     {
     309           0 :         char        c = *src;
     310             :         int         i;
     311             : 
     312           0 :         if (c == quote)
     313             :         {
     314           0 :             need_quotes = true;
     315           0 :             *dst++ = quote;
     316             :         }
     317           0 :         else if (c == escape)
     318             :         {
     319           0 :             need_quotes = true;
     320           0 :             *dst++ = escape;
     321             :         }
     322           0 :         else if (strchr(entails_quote, c))
     323           0 :             need_quotes = true;
     324             : 
     325           0 :         i = PQmblen(src, encoding);
     326           0 :         while (i--)
     327           0 :             *dst++ = *src++;
     328             :     }
     329             : 
     330           0 :     *dst++ = quote;
     331           0 :     *dst = '\0';
     332             : 
     333           0 :     if (!need_quotes)
     334             :     {
     335           0 :         free(ret);
     336           0 :         ret = NULL;
     337             :     }
     338             : 
     339           0 :     return ret;
     340             : }

Generated by: LCOV version 1.13