LCOV - code coverage report
Current view: top level - src/interfaces/ecpg/preproc - parser.c (source / functions) Hit Total Coverage
Test: PostgreSQL 16devel Lines: 76 86 88.4 %
Date: 2022-08-17 03:10:30 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * parser.c
       4             :  *      Main entry point/driver for PostgreSQL grammar
       5             :  *
       6             :  * This should match src/backend/parser/parser.c, except that we do not
       7             :  * need to bother with re-entrant interfaces.
       8             :  *
       9             :  * Note: ECPG doesn't report error location like the backend does.
      10             :  * This file will need work if we ever want it to.
      11             :  *
      12             :  *
      13             :  * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
      14             :  * Portions Copyright (c) 1994, Regents of the University of California
      15             :  *
      16             :  * IDENTIFICATION
      17             :  *    src/interfaces/ecpg/preproc/parser.c
      18             :  *
      19             :  *-------------------------------------------------------------------------
      20             :  */
      21             : 
      22             : #include "postgres_fe.h"
      23             : 
      24             : #include "preproc_extern.h"
      25             : #include "preproc.h"
      26             : 
      27             : 
      28             : static bool have_lookahead;     /* is lookahead info valid? */
      29             : static int  lookahead_token;    /* one-token lookahead */
      30             : static YYSTYPE lookahead_yylval;    /* yylval for lookahead token */
      31             : static YYLTYPE lookahead_yylloc;    /* yylloc for lookahead token */
      32             : static char *lookahead_yytext;  /* start current token */
      33             : 
      34             : static bool check_uescapechar(unsigned char escape);
      35             : static bool ecpg_isspace(char ch);
      36             : 
      37             : 
      38             : /*
      39             :  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
      40             :  *
      41             :  * This filter is needed because in some cases the standard SQL grammar
      42             :  * requires more than one token lookahead.  We reduce these cases to one-token
      43             :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
      44             :  *
      45             :  * Using a filter is simpler than trying to recognize multiword tokens
      46             :  * directly in scan.l, because we'd have to allow for comments between the
      47             :  * words.  Furthermore it's not clear how to do that without re-introducing
      48             :  * scanner backtrack, which would cost more performance than this filter
      49             :  * layer does.
      50             :  *
      51             :  * We also use this filter to convert UIDENT and USCONST sequences into
      52             :  * plain IDENT and SCONST tokens.  While that could be handled by additional
      53             :  * productions in the main grammar, it's more efficient to do it like this.
      54             :  */
      55             : int
      56       68292 : filtered_base_yylex(void)
      57             : {
      58             :     int         cur_token;
      59             :     int         next_token;
      60             :     YYSTYPE     cur_yylval;
      61             :     YYLTYPE     cur_yylloc;
      62             :     char       *cur_yytext;
      63             : 
      64             :     /* Get next token --- we might already have it */
      65       68292 :     if (have_lookahead)
      66             :     {
      67          84 :         cur_token = lookahead_token;
      68          84 :         base_yylval = lookahead_yylval;
      69          84 :         base_yylloc = lookahead_yylloc;
      70          84 :         base_yytext = lookahead_yytext;
      71          84 :         have_lookahead = false;
      72             :     }
      73             :     else
      74       68208 :         cur_token = base_yylex();
      75             : 
      76             :     /*
      77             :      * If this token isn't one that requires lookahead, just return it.
      78             :      */
      79       68292 :     switch (cur_token)
      80             :     {
      81          86 :         case NOT:
      82             :         case NULLS_P:
      83             :         case WITH:
      84             :         case UIDENT:
      85             :         case USCONST:
      86             :         case WITHOUT:
      87          86 :             break;
      88       68206 :         default:
      89       68206 :             return cur_token;
      90             :     }
      91             : 
      92             :     /* Save and restore lexer output variables around the call */
      93          86 :     cur_yylval = base_yylval;
      94          86 :     cur_yylloc = base_yylloc;
      95          86 :     cur_yytext = base_yytext;
      96             : 
      97             :     /* Get next token, saving outputs into lookahead variables */
      98          86 :     next_token = base_yylex();
      99             : 
     100          86 :     lookahead_token = next_token;
     101          86 :     lookahead_yylval = base_yylval;
     102          86 :     lookahead_yylloc = base_yylloc;
     103          86 :     lookahead_yytext = base_yytext;
     104             : 
     105          86 :     base_yylval = cur_yylval;
     106          86 :     base_yylloc = cur_yylloc;
     107          86 :     base_yytext = cur_yytext;
     108             : 
     109          86 :     have_lookahead = true;
     110             : 
     111             :     /* Replace cur_token if needed, based on lookahead */
     112          86 :     switch (cur_token)
     113             :     {
     114          68 :         case NOT:
     115             :             /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
     116             :             switch (next_token)
     117             :             {
     118           0 :                 case BETWEEN:
     119             :                 case IN_P:
     120             :                 case LIKE:
     121             :                 case ILIKE:
     122             :                 case SIMILAR:
     123           0 :                     cur_token = NOT_LA;
     124           0 :                     break;
     125             :             }
     126          68 :             break;
     127             : 
     128           4 :         case NULLS_P:
     129             :             /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
     130             :             switch (next_token)
     131             :             {
     132           4 :                 case FIRST_P:
     133             :                 case LAST_P:
     134           4 :                     cur_token = NULLS_LA;
     135           4 :                     break;
     136             :             }
     137           4 :             break;
     138             : 
     139           6 :         case WITH:
     140             :             /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
     141             :             switch (next_token)
     142             :             {
     143           2 :                 case TIME:
     144             :                 case ORDINALITY:
     145           2 :                     cur_token = WITH_LA;
     146           2 :                     break;
     147           0 :                 case UNIQUE:
     148           0 :                     cur_token = WITH_LA_UNIQUE;
     149           0 :                     break;
     150             :             }
     151           6 :             break;
     152             : 
     153           2 :         case WITHOUT:
     154             :             /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
     155             :             switch (next_token)
     156             :             {
     157           2 :                 case TIME:
     158           2 :                     cur_token = WITHOUT_LA;
     159           2 :                     break;
     160             :             }
     161           2 :             break;
     162           6 :         case UIDENT:
     163             :         case USCONST:
     164             :             /* Look ahead for UESCAPE */
     165           6 :             if (next_token == UESCAPE)
     166             :             {
     167             :                 /* Yup, so get third token, which had better be SCONST */
     168             :                 const char *escstr;
     169             : 
     170             :                 /*
     171             :                  * Again save and restore lexer output variables around the
     172             :                  * call
     173             :                  */
     174           2 :                 cur_yylval = base_yylval;
     175           2 :                 cur_yylloc = base_yylloc;
     176           2 :                 cur_yytext = base_yytext;
     177             : 
     178             :                 /* Get third token */
     179           2 :                 next_token = base_yylex();
     180             : 
     181           2 :                 if (next_token != SCONST)
     182           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
     183             : 
     184             :                 /*
     185             :                  * Save and check escape string, which the scanner returns
     186             :                  * with quotes
     187             :                  */
     188           2 :                 escstr = base_yylval.str;
     189           2 :                 if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
     190           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
     191             : 
     192           2 :                 base_yylval = cur_yylval;
     193           2 :                 base_yylloc = cur_yylloc;
     194           2 :                 base_yytext = cur_yytext;
     195             : 
     196             :                 /* Combine 3 tokens into 1 */
     197           2 :                 base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
     198             : 
     199             :                 /* Clear have_lookahead, thereby consuming all three tokens */
     200           2 :                 have_lookahead = false;
     201             :             }
     202             : 
     203           6 :             if (cur_token == UIDENT)
     204           2 :                 cur_token = IDENT;
     205           4 :             else if (cur_token == USCONST)
     206           4 :                 cur_token = SCONST;
     207           6 :             break;
     208             :     }
     209             : 
     210          86 :     return cur_token;
     211             : }
     212             : 
     213             : /*
     214             :  * check_uescapechar() and ecpg_isspace() should match their equivalents
     215             :  * in pgc.l.
     216             :  */
     217             : 
     218             : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
     219             : static bool
     220           2 : check_uescapechar(unsigned char escape)
     221             : {
     222           2 :     if (isxdigit(escape)
     223           2 :         || escape == '+'
     224           2 :         || escape == '\''
     225           2 :         || escape == '"'
     226           2 :         || ecpg_isspace(escape))
     227           0 :         return false;
     228             :     else
     229           2 :         return true;
     230             : }
     231             : 
     232             : /*
     233             :  * ecpg_isspace() --- return true if flex scanner considers char whitespace
     234             :  */
     235             : static bool
     236           2 : ecpg_isspace(char ch)
     237             : {
     238           2 :     if (ch == ' ' ||
     239           2 :         ch == '\t' ||
     240           2 :         ch == '\n' ||
     241           2 :         ch == '\r' ||
     242             :         ch == '\f')
     243           0 :         return true;
     244           2 :     return false;
     245             : }

Generated by: LCOV version 1.14