LCOV - code coverage report
Current view: top level - src/interfaces/ecpg/preproc - parser.c (source / functions) Hit Total Coverage
Test: PostgreSQL 18devel Lines: 95 102 93.1 %
Date: 2024-11-21 09:14:53 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * parser.c
       4             :  *      Main entry point/driver for PostgreSQL grammar
       5             :  *
       6             :  * This should match src/backend/parser/parser.c, except that we do not
       7             :  * need to bother with re-entrant interfaces.
       8             :  *
       9             :  * Note: ECPG doesn't report error location like the backend does.
      10             :  * This file will need work if we ever want it to.
      11             :  *
      12             :  *
      13             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      14             :  * Portions Copyright (c) 1994, Regents of the University of California
      15             :  *
      16             :  * IDENTIFICATION
      17             :  *    src/interfaces/ecpg/preproc/parser.c
      18             :  *
      19             :  *-------------------------------------------------------------------------
      20             :  */
      21             : 
      22             : #include "postgres_fe.h"
      23             : 
      24             : #include "preproc_extern.h"
      25             : #include "preproc.h"
      26             : 
      27             : 
      28             : static bool have_lookahead;     /* is lookahead info valid? */
      29             : static int  lookahead_token;    /* one-token lookahead */
      30             : static YYSTYPE lookahead_yylval;    /* yylval for lookahead token */
      31             : static YYLTYPE lookahead_yylloc;    /* yylloc for lookahead token */
      32             : static char *lookahead_yytext;  /* start current token */
      33             : 
      34             : static int  base_yylex_location(void);
      35             : static bool check_uescapechar(unsigned char escape);
      36             : static bool ecpg_isspace(char ch);
      37             : 
      38             : 
      39             : /*
      40             :  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
      41             :  *
      42             :  * This filter is needed because in some cases the standard SQL grammar
      43             :  * requires more than one token lookahead.  We reduce these cases to one-token
      44             :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
      45             :  *
      46             :  * Using a filter is simpler than trying to recognize multiword tokens
      47             :  * directly in scan.l, because we'd have to allow for comments between the
      48             :  * words.  Furthermore it's not clear how to do that without re-introducing
      49             :  * scanner backtrack, which would cost more performance than this filter
      50             :  * layer does.
      51             :  *
      52             :  * We also use this filter to convert UIDENT and USCONST sequences into
      53             :  * plain IDENT and SCONST tokens.  While that could be handled by additional
      54             :  * productions in the main grammar, it's more efficient to do it like this.
      55             :  */
      56             : int
      57       71150 : filtered_base_yylex(void)
      58             : {
      59             :     int         cur_token;
      60             :     int         next_token;
      61             :     YYSTYPE     cur_yylval;
      62             :     YYLTYPE     cur_yylloc;
      63             :     char       *cur_yytext;
      64             : 
      65             :     /* Get next token --- we might already have it */
      66       71150 :     if (have_lookahead)
      67             :     {
      68         114 :         cur_token = lookahead_token;
      69         114 :         base_yylval = lookahead_yylval;
      70         114 :         base_yylloc = lookahead_yylloc;
      71         114 :         base_yytext = lookahead_yytext;
      72         114 :         have_lookahead = false;
      73             :     }
      74             :     else
      75       71036 :         cur_token = base_yylex_location();
      76             : 
      77             :     /*
      78             :      * If this token isn't one that requires lookahead, just return it.
      79             :      */
      80       71150 :     switch (cur_token)
      81             :     {
      82         116 :         case FORMAT:
      83             :         case NOT:
      84             :         case NULLS_P:
      85             :         case WITH:
      86             :         case WITHOUT:
      87             :         case UIDENT:
      88             :         case USCONST:
      89         116 :             break;
      90       71034 :         default:
      91       71034 :             return cur_token;
      92             :     }
      93             : 
      94             :     /* Save and restore lexer output variables around the call */
      95         116 :     cur_yylval = base_yylval;
      96         116 :     cur_yylloc = base_yylloc;
      97         116 :     cur_yytext = base_yytext;
      98             : 
      99             :     /* Get next token, saving outputs into lookahead variables */
     100         116 :     next_token = base_yylex_location();
     101             : 
     102         116 :     lookahead_token = next_token;
     103         116 :     lookahead_yylval = base_yylval;
     104         116 :     lookahead_yylloc = base_yylloc;
     105         116 :     lookahead_yytext = base_yytext;
     106             : 
     107         116 :     base_yylval = cur_yylval;
     108         116 :     base_yylloc = cur_yylloc;
     109         116 :     base_yytext = cur_yytext;
     110             : 
     111         116 :     have_lookahead = true;
     112             : 
     113             :     /* Replace cur_token if needed, based on lookahead */
     114         116 :     switch (cur_token)
     115             :     {
     116          10 :         case FORMAT:
     117             :             /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
     118             :             switch (next_token)
     119             :             {
     120          10 :                 case JSON:
     121          10 :                     cur_token = FORMAT_LA;
     122          10 :                     break;
     123             :             }
     124          10 :             break;
     125             : 
     126          72 :         case NOT:
     127             :             /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
     128             :             switch (next_token)
     129             :             {
     130           0 :                 case BETWEEN:
     131             :                 case IN_P:
     132             :                 case LIKE:
     133             :                 case ILIKE:
     134             :                 case SIMILAR:
     135           0 :                     cur_token = NOT_LA;
     136           0 :                     break;
     137             :             }
     138          72 :             break;
     139             : 
     140           4 :         case NULLS_P:
     141             :             /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
     142             :             switch (next_token)
     143             :             {
     144           4 :                 case FIRST_P:
     145             :                 case LAST_P:
     146           4 :                     cur_token = NULLS_LA;
     147           4 :                     break;
     148             :             }
     149           4 :             break;
     150             : 
     151          16 :         case WITH:
     152             :             /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
     153             :             switch (next_token)
     154             :             {
     155           2 :                 case TIME:
     156             :                 case ORDINALITY:
     157           2 :                     cur_token = WITH_LA;
     158           2 :                     break;
     159             :             }
     160          16 :             break;
     161             : 
     162           8 :         case WITHOUT:
     163             :             /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
     164             :             switch (next_token)
     165             :             {
     166           2 :                 case TIME:
     167           2 :                     cur_token = WITHOUT_LA;
     168           2 :                     break;
     169             :             }
     170           8 :             break;
     171           6 :         case UIDENT:
     172             :         case USCONST:
     173             :             /* Look ahead for UESCAPE */
     174           6 :             if (next_token == UESCAPE)
     175             :             {
     176             :                 /* Yup, so get third token, which had better be SCONST */
     177             :                 const char *escstr;
     178             : 
     179             :                 /*
     180             :                  * Again save and restore lexer output variables around the
     181             :                  * call
     182             :                  */
     183           2 :                 cur_yylval = base_yylval;
     184           2 :                 cur_yylloc = base_yylloc;
     185           2 :                 cur_yytext = base_yytext;
     186             : 
     187             :                 /* Get third token */
     188           2 :                 next_token = base_yylex_location();
     189             : 
     190           2 :                 if (next_token != SCONST)
     191           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
     192             : 
     193             :                 /*
     194             :                  * Save and check escape string, which the scanner returns
     195             :                  * with quotes
     196             :                  */
     197           2 :                 escstr = base_yylval.str;
     198           2 :                 if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
     199           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
     200             : 
     201           2 :                 base_yylval = cur_yylval;
     202           2 :                 base_yylloc = cur_yylloc;
     203           2 :                 base_yytext = cur_yytext;
     204             : 
     205             :                 /* Combine 3 tokens into 1 */
     206           2 :                 base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
     207           2 :                 base_yylloc = loc_strdup(base_yylval.str);
     208             : 
     209             :                 /* Clear have_lookahead, thereby consuming all three tokens */
     210           2 :                 have_lookahead = false;
     211             :             }
     212             : 
     213           6 :             if (cur_token == UIDENT)
     214           2 :                 cur_token = IDENT;
     215           4 :             else if (cur_token == USCONST)
     216           4 :                 cur_token = SCONST;
     217           6 :             break;
     218             :     }
     219             : 
     220         116 :     return cur_token;
     221             : }
     222             : 
     223             : /*
     224             :  * Call base_yylex() and fill in base_yylloc.
     225             :  *
     226             :  * pgc.l does not worry about setting yylloc, and given what we want for
     227             :  * that, trying to set it there would be pretty inconvenient.  What we
     228             :  * want is: if the returned token has type <str>, then duplicate its
     229             :  * string value as yylloc; otherwise, make a downcased copy of yytext.
     230             :  * The downcasing is ASCII-only because all that we care about there
     231             :  * is producing uniformly-cased output of keywords.  (That's mostly
     232             :  * cosmetic, but there are places in ecpglib that expect to receive
     233             :  * downcased keywords, plus it keeps us regression-test-compatible
     234             :  * with the pre-v18 implementation of ecpg.)
     235             :  */
     236             : static int
     237       71154 : base_yylex_location(void)
     238             : {
     239       71154 :     int         token = base_yylex();
     240             : 
     241       71154 :     switch (token)
     242             :     {
     243             :             /* List a token here if pgc.l assigns to base_yylval.str for it */
     244       23784 :         case Op:
     245             :         case CSTRING:
     246             :         case CPP_LINE:
     247             :         case CVARIABLE:
     248             :         case BCONST:
     249             :         case SCONST:
     250             :         case USCONST:
     251             :         case XCONST:
     252             :         case FCONST:
     253             :         case IDENT:
     254             :         case UIDENT:
     255             :         case IP:
     256             :             /* Duplicate the <str> value */
     257       23784 :             base_yylloc = loc_strdup(base_yylval.str);
     258       23784 :             break;
     259       47370 :         default:
     260             :             /* Else just use the input, i.e., yytext */
     261       47370 :             base_yylloc = loc_strdup(base_yytext);
     262             :             /* Apply an ASCII-only downcasing */
     263      158738 :             for (unsigned char *ptr = (unsigned char *) base_yylloc; *ptr; ptr++)
     264             :             {
     265      111368 :                 if (*ptr >= 'A' && *ptr <= 'Z')
     266       22074 :                     *ptr += 'a' - 'A';
     267             :             }
     268       47370 :             break;
     269             :     }
     270       71154 :     return token;
     271             : }
     272             : 
     273             : /*
     274             :  * check_uescapechar() and ecpg_isspace() should match their equivalents
     275             :  * in pgc.l.
     276             :  */
     277             : 
     278             : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
     279             : static bool
     280           2 : check_uescapechar(unsigned char escape)
     281             : {
     282           2 :     if (isxdigit(escape)
     283           2 :         || escape == '+'
     284           2 :         || escape == '\''
     285           2 :         || escape == '"'
     286           2 :         || ecpg_isspace(escape))
     287           0 :         return false;
     288             :     else
     289           2 :         return true;
     290             : }
     291             : 
     292             : /*
     293             :  * ecpg_isspace() --- return true if flex scanner considers char whitespace
     294             :  */
     295             : static bool
     296           2 : ecpg_isspace(char ch)
     297             : {
     298           2 :     if (ch == ' ' ||
     299           2 :         ch == '\t' ||
     300           2 :         ch == '\n' ||
     301           2 :         ch == '\r' ||
     302             :         ch == '\f')
     303           0 :         return true;
     304           2 :     return false;
     305             : }

Generated by: LCOV version 1.14