LCOV - code coverage report
Current view: top level - src/backend/parser - scansup.c (source / functions) Hit Total Coverage
Test: PostgreSQL 13devel Lines: 45 68 66.2 %
Date: 2019-11-13 22:07:24 Functions: 5 5 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * scansup.c
       4             :  *    support routines for the lex/flex scanner, used by both the normal
       5             :  * backend as well as the bootstrap backend
       6             :  *
       7             :  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
       8             :  * Portions Copyright (c) 1994, Regents of the University of California
       9             :  *
      10             :  *
      11             :  * IDENTIFICATION
      12             :  *    src/backend/parser/scansup.c
      13             :  *
      14             :  *-------------------------------------------------------------------------
      15             :  */
      16             : #include "postgres.h"
      17             : 
      18             : #include <ctype.h>
      19             : 
      20             : #include "mb/pg_wchar.h"
      21             : #include "parser/scansup.h"
      22             : 
      23             : /* ----------------
      24             :  *      scanstr
      25             :  *
      26             :  * if the string passed in has escaped codes, map the escape codes to actual
      27             :  * chars
      28             :  *
      29             :  * the string returned is palloc'd and should eventually be pfree'd by the
      30             :  * caller!
      31             :  * ----------------
      32             :  */
      33             : 
      34             : char *
      35    32394810 : scanstr(const char *s)
      36             : {
      37             :     char       *newStr;
      38             :     int         len,
      39             :                 i,
      40             :                 j;
      41             : 
      42    32394810 :     if (s == NULL || s[0] == '\0')
      43      133630 :         return pstrdup("");
      44             : 
      45    32261180 :     len = strlen(s);
      46             : 
      47    32261180 :     newStr = palloc(len + 1);   /* string cannot get longer */
      48             : 
      49   127942838 :     for (i = 0, j = 0; i < len; i++)
      50             :     {
      51    95681658 :         if (s[i] == '\'')
      52             :         {
      53             :             /*
      54             :              * Note: if scanner is working right, unescaped quotes can only
      55             :              * appear in pairs, so there should be another character.
      56             :              */
      57        3864 :             i++;
      58             :             /* The bootstrap parser is not as smart, so check here. */
      59             :             Assert(s[i] == '\'');
      60        3864 :             newStr[j] = s[i];
      61             :         }
      62    95677794 :         else if (s[i] == '\\')
      63             :         {
      64           0 :             i++;
      65           0 :             switch (s[i])
      66             :             {
      67             :                 case 'b':
      68           0 :                     newStr[j] = '\b';
      69           0 :                     break;
      70             :                 case 'f':
      71           0 :                     newStr[j] = '\f';
      72           0 :                     break;
      73             :                 case 'n':
      74           0 :                     newStr[j] = '\n';
      75           0 :                     break;
      76             :                 case 'r':
      77           0 :                     newStr[j] = '\r';
      78           0 :                     break;
      79             :                 case 't':
      80           0 :                     newStr[j] = '\t';
      81           0 :                     break;
      82             :                 case '0':
      83             :                 case '1':
      84             :                 case '2':
      85             :                 case '3':
      86             :                 case '4':
      87             :                 case '5':
      88             :                 case '6':
      89             :                 case '7':
      90             :                     {
      91             :                         int         k;
      92           0 :                         long        octVal = 0;
      93             : 
      94           0 :                         for (k = 0;
      95           0 :                              s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
      96           0 :                              k++)
      97           0 :                             octVal = (octVal << 3) + (s[i + k] - '0');
      98           0 :                         i += k - 1;
      99           0 :                         newStr[j] = ((char) octVal);
     100             :                     }
     101           0 :                     break;
     102             :                 default:
     103           0 :                     newStr[j] = s[i];
     104           0 :                     break;
     105             :             }                   /* switch */
     106             :         }                       /* s[i] == '\\' */
     107             :         else
     108    95677794 :             newStr[j] = s[i];
     109    95681658 :         j++;
     110             :     }
     111    32261180 :     newStr[j] = '\0';
     112    32261180 :     return newStr;
     113             : }
     114             : 
     115             : 
     116             : /*
     117             :  * downcase_truncate_identifier() --- do appropriate downcasing and
     118             :  * truncation of an unquoted identifier.  Optionally warn of truncation.
     119             :  *
     120             :  * Returns a palloc'd string containing the adjusted identifier.
     121             :  *
     122             :  * Note: in some usages the passed string is not null-terminated.
     123             :  *
     124             :  * Note: the API of this function is designed to allow for downcasing
     125             :  * transformations that increase the string length, but we don't yet
     126             :  * support that.  If you want to implement it, you'll need to fix
     127             :  * SplitIdentifierString() in utils/adt/varlena.c.
     128             :  */
     129             : char *
     130     5902664 : downcase_truncate_identifier(const char *ident, int len, bool warn)
     131             : {
     132     5902664 :     return downcase_identifier(ident, len, warn, true);
     133             : }
     134             : 
     135             : /*
     136             :  * a workhorse for downcase_truncate_identifier
     137             :  */
     138             : char *
     139     5902732 : downcase_identifier(const char *ident, int len, bool warn, bool truncate)
     140             : {
     141             :     char       *result;
     142             :     int         i;
     143             :     bool        enc_is_single_byte;
     144             : 
     145     5902732 :     result = palloc(len + 1);
     146     5902732 :     enc_is_single_byte = pg_database_encoding_max_length() == 1;
     147             : 
     148             :     /*
     149             :      * SQL99 specifies Unicode-aware case normalization, which we don't yet
     150             :      * have the infrastructure for.  Instead we use tolower() to provide a
     151             :      * locale-aware translation.  However, there are some locales where this
     152             :      * is not right either (eg, Turkish may do strange things with 'i' and
     153             :      * 'I').  Our current compromise is to use tolower() for characters with
     154             :      * the high bit set, as long as they aren't part of a multi-byte
     155             :      * character, and use an ASCII-only downcasing for 7-bit characters.
     156             :      */
     157    49788760 :     for (i = 0; i < len; i++)
     158             :     {
     159    43886028 :         unsigned char ch = (unsigned char) ident[i];
     160             : 
     161    43886028 :         if (ch >= 'A' && ch <= 'Z')
     162      847342 :             ch += 'a' - 'A';
     163    43038686 :         else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch))
     164           0 :             ch = tolower(ch);
     165    43886028 :         result[i] = (char) ch;
     166             :     }
     167     5902732 :     result[i] = '\0';
     168             : 
     169     5902732 :     if (i >= NAMEDATALEN && truncate)
     170           8 :         truncate_identifier(result, i, warn);
     171             : 
     172     5902732 :     return result;
     173             : }
     174             : 
     175             : 
     176             : /*
     177             :  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
     178             :  *
     179             :  * The given string is modified in-place, if necessary.  A warning is
     180             :  * issued if requested.
     181             :  *
     182             :  * We require the caller to pass in the string length since this saves a
     183             :  * strlen() call in some common usages.
     184             :  */
     185             : void
     186      140290 : truncate_identifier(char *ident, int len, bool warn)
     187             : {
     188      140290 :     if (len >= NAMEDATALEN)
     189             :     {
     190           8 :         len = pg_mbcliplen(ident, len, NAMEDATALEN - 1);
     191           8 :         if (warn)
     192             :         {
     193             :             /*
     194             :              * We avoid using %.*s here because it can misbehave if the data
     195             :              * is not valid in what libc thinks is the prevailing encoding.
     196             :              */
     197             :             char        buf[NAMEDATALEN];
     198             : 
     199           8 :             memcpy(buf, ident, len);
     200           8 :             buf[len] = '\0';
     201           8 :             ereport(NOTICE,
     202             :                     (errcode(ERRCODE_NAME_TOO_LONG),
     203             :                      errmsg("identifier \"%s\" will be truncated to \"%s\"",
     204             :                             ident, buf)));
     205             :         }
     206           8 :         ident[len] = '\0';
     207             :     }
     208      140290 : }
     209             : 
     210             : /*
     211             :  * scanner_isspace() --- return true if flex scanner considers char whitespace
     212             :  *
     213             :  * This should be used instead of the potentially locale-dependent isspace()
     214             :  * function when it's important to match the lexer's behavior.
     215             :  *
     216             :  * In principle we might need similar functions for isalnum etc, but for the
     217             :  * moment only isspace seems needed.
     218             :  */
     219             : bool
     220      931180 : scanner_isspace(char ch)
     221             : {
     222             :     /* This must match scan.l's list of {space} characters */
     223      931180 :     if (ch == ' ' ||
     224      903614 :         ch == '\t' ||
     225      903260 :         ch == '\n' ||
     226      903256 :         ch == '\r' ||
     227             :         ch == '\f')
     228       27924 :         return true;
     229      903256 :     return false;
     230             : }

Generated by: LCOV version 1.13