LCOV - code coverage report
Current view: top level - src/interfaces/ecpg/preproc - parser.c (source / functions) Coverage Total Hit
Test: PostgreSQL 19devel Lines: 93.5 % 108 101
Test Date: 2026-03-03 13:15:30 Functions: 100.0 % 4 4
Legend: Lines:     hit not hit

            Line data    Source code
       1              : /*-------------------------------------------------------------------------
       2              :  *
       3              :  * parser.c
       4              :  *      Main entry point/driver for PostgreSQL grammar
       5              :  *
       6              :  * This should match src/backend/parser/parser.c, except that we do not
       7              :  * need to bother with re-entrant interfaces.
       8              :  *
       9              :  * Note: ECPG doesn't report error location like the backend does.
      10              :  * This file will need work if we ever want it to.
      11              :  *
      12              :  *
      13              :  * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
      14              :  * Portions Copyright (c) 1994, Regents of the University of California
      15              :  *
      16              :  * IDENTIFICATION
      17              :  *    src/interfaces/ecpg/preproc/parser.c
      18              :  *
      19              :  *-------------------------------------------------------------------------
      20              :  */
      21              : 
      22              : #include "postgres_fe.h"
      23              : 
      24              : #include "preproc_extern.h"
      25              : #include "preproc.h"
      26              : 
      27              : 
      28              : static bool have_lookahead;     /* is lookahead info valid? */
      29              : static int  lookahead_token;    /* one-token lookahead */
      30              : static YYSTYPE lookahead_yylval;    /* yylval for lookahead token */
      31              : static YYLTYPE lookahead_yylloc;    /* yylloc for lookahead token */
      32              : static char *lookahead_yytext;  /* start current token */
      33              : 
      34              : static int  base_yylex_location(void);
      35              : static bool check_uescapechar(unsigned char escape);
      36              : static bool ecpg_isspace(char ch);
      37              : 
      38              : 
      39              : /*
      40              :  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
      41              :  *
      42              :  * This filter is needed because in some cases the standard SQL grammar
      43              :  * requires more than one token lookahead.  We reduce these cases to one-token
      44              :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
      45              :  *
      46              :  * Using a filter is simpler than trying to recognize multiword tokens
      47              :  * directly in scan.l, because we'd have to allow for comments between the
      48              :  * words.  Furthermore it's not clear how to do that without re-introducing
      49              :  * scanner backtrack, which would cost more performance than this filter
      50              :  * layer does.
      51              :  *
      52              :  * We also use this filter to convert UIDENT and USCONST sequences into
      53              :  * plain IDENT and SCONST tokens.  While that could be handled by additional
      54              :  * productions in the main grammar, it's more efficient to do it like this.
      55              :  */
      56              : int
      57        36044 : filtered_base_yylex(void)
      58              : {
      59              :     int         cur_token;
      60              :     int         next_token;
      61              :     YYSTYPE     cur_yylval;
      62              :     YYLTYPE     cur_yylloc;
      63              :     char       *cur_yytext;
      64              : 
      65              :     /* Get next token --- we might already have it */
      66        36044 :     if (have_lookahead)
      67              :     {
      68           58 :         cur_token = lookahead_token;
      69           58 :         base_yylval = lookahead_yylval;
      70           58 :         base_yylloc = lookahead_yylloc;
      71           58 :         base_yytext = lookahead_yytext;
      72           58 :         have_lookahead = false;
      73              :     }
      74              :     else
      75        35986 :         cur_token = base_yylex_location();
      76              : 
      77              :     /*
      78              :      * If this token isn't one that requires lookahead, just return it.
      79              :      */
      80        36044 :     switch (cur_token)
      81              :     {
      82           59 :         case FORMAT:
      83              :         case NOT:
      84              :         case NULLS_P:
      85              :         case WITH:
      86              :         case WITHOUT:
      87              :         case UIDENT:
      88              :         case USCONST:
      89           59 :             break;
      90        35985 :         default:
      91        35985 :             return cur_token;
      92              :     }
      93              : 
      94              :     /* Save and restore lexer output variables around the call */
      95           59 :     cur_yylval = base_yylval;
      96           59 :     cur_yylloc = base_yylloc;
      97           59 :     cur_yytext = base_yytext;
      98              : 
      99              :     /* Get next token, saving outputs into lookahead variables */
     100           59 :     next_token = base_yylex_location();
     101              : 
     102           59 :     lookahead_token = next_token;
     103           59 :     lookahead_yylval = base_yylval;
     104           59 :     lookahead_yylloc = base_yylloc;
     105           59 :     lookahead_yytext = base_yytext;
     106              : 
     107           59 :     base_yylval = cur_yylval;
     108           59 :     base_yylloc = cur_yylloc;
     109           59 :     base_yytext = cur_yytext;
     110              : 
     111           59 :     have_lookahead = true;
     112              : 
     113              :     /* Replace cur_token if needed, based on lookahead */
     114           59 :     switch (cur_token)
     115              :     {
     116            5 :         case FORMAT:
     117              :             /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
     118            5 :             switch (next_token)
     119              :             {
     120            5 :                 case JSON:
     121            5 :                     cur_token = FORMAT_LA;
     122            5 :                     break;
     123              :             }
     124            5 :             break;
     125              : 
     126           37 :         case NOT:
     127              :             /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
     128           37 :             switch (next_token)
     129              :             {
     130            0 :                 case BETWEEN:
     131              :                 case IN_P:
     132              :                 case LIKE:
     133              :                 case ILIKE:
     134              :                 case SIMILAR:
     135            0 :                     cur_token = NOT_LA;
     136            0 :                     break;
     137              :             }
     138           37 :             break;
     139              : 
     140            2 :         case NULLS_P:
     141              :             /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
     142            2 :             switch (next_token)
     143              :             {
     144            2 :                 case FIRST_P:
     145              :                 case LAST_P:
     146            2 :                     cur_token = NULLS_LA;
     147            2 :                     break;
     148              :             }
     149            2 :             break;
     150              : 
     151            8 :         case WITH:
     152              :             /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
     153            8 :             switch (next_token)
     154              :             {
     155            1 :                 case TIME:
     156              :                 case ORDINALITY:
     157            1 :                     cur_token = WITH_LA;
     158            1 :                     break;
     159              :             }
     160            8 :             break;
     161              : 
     162            4 :         case WITHOUT:
     163              :             /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
     164            4 :             switch (next_token)
     165              :             {
     166            1 :                 case TIME:
     167            1 :                     cur_token = WITHOUT_LA;
     168            1 :                     break;
     169              :             }
     170            4 :             break;
     171            3 :         case UIDENT:
     172              :         case USCONST:
     173              :             /* Look ahead for UESCAPE */
     174            3 :             if (next_token == UESCAPE)
     175              :             {
     176              :                 /* Yup, so get third token, which had better be SCONST */
     177              :                 const char *escstr;
     178              : 
     179              :                 /*
     180              :                  * Again save and restore lexer output variables around the
     181              :                  * call
     182              :                  */
     183            1 :                 cur_yylval = base_yylval;
     184            1 :                 cur_yylloc = base_yylloc;
     185            1 :                 cur_yytext = base_yytext;
     186              : 
     187              :                 /* Get third token */
     188            1 :                 next_token = base_yylex_location();
     189              : 
     190            1 :                 if (next_token != SCONST)
     191            0 :                     mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
     192              : 
     193              :                 /*
     194              :                  * Save and check escape string, which the scanner returns
     195              :                  * with quotes
     196              :                  */
     197            1 :                 escstr = base_yylval.str;
     198            1 :                 if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
     199            0 :                     mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
     200              : 
     201            1 :                 base_yylval = cur_yylval;
     202            1 :                 base_yylloc = cur_yylloc;
     203            1 :                 base_yytext = cur_yytext;
     204              : 
     205              :                 /* Combine 3 tokens into 1 */
     206            1 :                 base_yylval.str = make3_str(base_yylval.str,
     207              :                                             " UESCAPE ",
     208              :                                             escstr);
     209            1 :                 base_yylloc = loc_strdup(base_yylval.str);
     210              : 
     211              :                 /* Clear have_lookahead, thereby consuming all three tokens */
     212            1 :                 have_lookahead = false;
     213              :             }
     214              : 
     215            3 :             if (cur_token == UIDENT)
     216            1 :                 cur_token = IDENT;
     217            2 :             else if (cur_token == USCONST)
     218            2 :                 cur_token = SCONST;
     219            3 :             break;
     220              :     }
     221              : 
     222           59 :     return cur_token;
     223              : }
     224              : 
     225              : /*
     226              :  * Call base_yylex() and fill in base_yylloc.
     227              :  *
     228              :  * pgc.l does not worry about setting yylloc, and given what we want for
     229              :  * that, trying to set it there would be pretty inconvenient.  What we
     230              :  * want is: if the returned token has type <str>, then duplicate its
     231              :  * string value as yylloc; otherwise, make a downcased copy of yytext.
     232              :  * The downcasing is ASCII-only because all that we care about there
     233              :  * is producing uniformly-cased output of keywords.  (That's mostly
     234              :  * cosmetic, but there are places in ecpglib that expect to receive
     235              :  * downcased keywords, plus it keeps us regression-test-compatible
     236              :  * with the pre-v18 implementation of ecpg.)
     237              :  */
     238              : static int
     239        36046 : base_yylex_location(void)
     240              : {
     241        36046 :     int         token = base_yylex();
     242              : 
     243        36046 :     switch (token)
     244              :     {
     245              :             /* List a token here if pgc.l assigns to base_yylval.str for it */
     246        12023 :         case Op:
     247              :         case CSTRING:
     248              :         case CPP_LINE:
     249              :         case CVARIABLE:
     250              :         case BCONST:
     251              :         case SCONST:
     252              :         case USCONST:
     253              :         case XCONST:
     254              :         case FCONST:
     255              :         case IDENT:
     256              :         case UIDENT:
     257              :         case IP:
     258              :             /* Duplicate the <str> value */
     259        12023 :             base_yylloc = loc_strdup(base_yylval.str);
     260        12023 :             break;
     261        24023 :         default:
     262              :             /* Else just use the input, i.e., yytext */
     263              :             {
     264              :                 char       *tmp;
     265              : 
     266        24023 :                 tmp = loc_strdup(base_yytext);
     267              :                 /* Apply an ASCII-only downcasing */
     268        80469 :                 for (unsigned char *ptr = (unsigned char *) tmp; *ptr; ptr++)
     269              :                 {
     270        56446 :                     if (*ptr >= 'A' && *ptr <= 'Z')
     271        11416 :                         *ptr += 'a' - 'A';
     272              :                 }
     273        24023 :                 base_yylloc = tmp;
     274        24023 :                 break;
     275              :             }
     276              :     }
     277        36046 :     return token;
     278              : }
     279              : 
     280              : /*
     281              :  * check_uescapechar() and ecpg_isspace() should match their equivalents
     282              :  * in pgc.l.
     283              :  */
     284              : 
     285              : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
     286              : static bool
     287            1 : check_uescapechar(unsigned char escape)
     288              : {
     289            1 :     if (isxdigit(escape)
     290            1 :         || escape == '+'
     291            1 :         || escape == '\''
     292            1 :         || escape == '"'
     293            1 :         || ecpg_isspace(escape))
     294            0 :         return false;
     295              :     else
     296            1 :         return true;
     297              : }
     298              : 
     299              : /*
     300              :  * ecpg_isspace() --- return true if flex scanner considers char whitespace
     301              :  */
     302              : static bool
     303            1 : ecpg_isspace(char ch)
     304              : {
     305            1 :     if (ch == ' ' ||
     306            1 :         ch == '\t' ||
     307            1 :         ch == '\n' ||
     308            1 :         ch == '\r' ||
     309              :         ch == '\f')
     310            0 :         return true;
     311            1 :     return false;
     312              : }
        

Generated by: LCOV version 2.0-1