LCOV - code coverage report
Current view: top level - src/interfaces/ecpg/preproc - parser.c (source / functions) Hit Total Coverage
Test: PostgreSQL 17beta1 Lines: 81 88 92.0 %
Date: 2024-06-17 22:11:45 Functions: 3 3 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /*-------------------------------------------------------------------------
       2             :  *
       3             :  * parser.c
       4             :  *      Main entry point/driver for PostgreSQL grammar
       5             :  *
       6             :  * This should match src/backend/parser/parser.c, except that we do not
       7             :  * need to bother with re-entrant interfaces.
       8             :  *
       9             :  * Note: ECPG doesn't report error location like the backend does.
      10             :  * This file will need work if we ever want it to.
      11             :  *
      12             :  *
      13             :  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
      14             :  * Portions Copyright (c) 1994, Regents of the University of California
      15             :  *
      16             :  * IDENTIFICATION
      17             :  *    src/interfaces/ecpg/preproc/parser.c
      18             :  *
      19             :  *-------------------------------------------------------------------------
      20             :  */
      21             : 
      22             : #include "postgres_fe.h"
      23             : 
      24             : #include "preproc_extern.h"
      25             : #include "preproc.h"
      26             : 
      27             : 
      28             : static bool have_lookahead;     /* is lookahead info valid? */
      29             : static int  lookahead_token;    /* one-token lookahead */
      30             : static YYSTYPE lookahead_yylval;    /* yylval for lookahead token */
      31             : static YYLTYPE lookahead_yylloc;    /* yylloc for lookahead token */
      32             : static char *lookahead_yytext;  /* start current token */
      33             : 
      34             : static bool check_uescapechar(unsigned char escape);
      35             : static bool ecpg_isspace(char ch);
      36             : 
      37             : 
      38             : /*
      39             :  * Intermediate filter between parser and base lexer (base_yylex in scan.l).
      40             :  *
      41             :  * This filter is needed because in some cases the standard SQL grammar
      42             :  * requires more than one token lookahead.  We reduce these cases to one-token
      43             :  * lookahead by replacing tokens here, in order to keep the grammar LALR(1).
      44             :  *
      45             :  * Using a filter is simpler than trying to recognize multiword tokens
      46             :  * directly in scan.l, because we'd have to allow for comments between the
      47             :  * words.  Furthermore it's not clear how to do that without re-introducing
      48             :  * scanner backtrack, which would cost more performance than this filter
      49             :  * layer does.
      50             :  *
      51             :  * We also use this filter to convert UIDENT and USCONST sequences into
      52             :  * plain IDENT and SCONST tokens.  While that could be handled by additional
      53             :  * productions in the main grammar, it's more efficient to do it like this.
      54             :  */
      55             : int
      56       70942 : filtered_base_yylex(void)
      57             : {
      58             :     int         cur_token;
      59             :     int         next_token;
      60             :     YYSTYPE     cur_yylval;
      61             :     YYLTYPE     cur_yylloc;
      62             :     char       *cur_yytext;
      63             : 
      64             :     /* Get next token --- we might already have it */
      65       70942 :     if (have_lookahead)
      66             :     {
      67         114 :         cur_token = lookahead_token;
      68         114 :         base_yylval = lookahead_yylval;
      69         114 :         base_yylloc = lookahead_yylloc;
      70         114 :         base_yytext = lookahead_yytext;
      71         114 :         have_lookahead = false;
      72             :     }
      73             :     else
      74       70828 :         cur_token = base_yylex();
      75             : 
      76             :     /*
      77             :      * If this token isn't one that requires lookahead, just return it.
      78             :      */
      79       70942 :     switch (cur_token)
      80             :     {
      81         116 :         case FORMAT:
      82             :         case NOT:
      83             :         case NULLS_P:
      84             :         case WITH:
      85             :         case WITHOUT:
      86             :         case UIDENT:
      87             :         case USCONST:
      88         116 :             break;
      89       70826 :         default:
      90       70826 :             return cur_token;
      91             :     }
      92             : 
      93             :     /* Save and restore lexer output variables around the call */
      94         116 :     cur_yylval = base_yylval;
      95         116 :     cur_yylloc = base_yylloc;
      96         116 :     cur_yytext = base_yytext;
      97             : 
      98             :     /* Get next token, saving outputs into lookahead variables */
      99         116 :     next_token = base_yylex();
     100             : 
     101         116 :     lookahead_token = next_token;
     102         116 :     lookahead_yylval = base_yylval;
     103         116 :     lookahead_yylloc = base_yylloc;
     104         116 :     lookahead_yytext = base_yytext;
     105             : 
     106         116 :     base_yylval = cur_yylval;
     107         116 :     base_yylloc = cur_yylloc;
     108         116 :     base_yytext = cur_yytext;
     109             : 
     110         116 :     have_lookahead = true;
     111             : 
     112             :     /* Replace cur_token if needed, based on lookahead */
     113         116 :     switch (cur_token)
     114             :     {
     115          10 :         case FORMAT:
     116             :             /* Replace FORMAT by FORMAT_LA if it's followed by JSON */
     117             :             switch (next_token)
     118             :             {
     119          10 :                 case JSON:
     120          10 :                     cur_token = FORMAT_LA;
     121          10 :                     break;
     122             :             }
     123          10 :             break;
     124             : 
     125          72 :         case NOT:
     126             :             /* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
     127             :             switch (next_token)
     128             :             {
     129           0 :                 case BETWEEN:
     130             :                 case IN_P:
     131             :                 case LIKE:
     132             :                 case ILIKE:
     133             :                 case SIMILAR:
     134           0 :                     cur_token = NOT_LA;
     135           0 :                     break;
     136             :             }
     137          72 :             break;
     138             : 
     139           4 :         case NULLS_P:
     140             :             /* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
     141             :             switch (next_token)
     142             :             {
     143           4 :                 case FIRST_P:
     144             :                 case LAST_P:
     145           4 :                     cur_token = NULLS_LA;
     146           4 :                     break;
     147             :             }
     148           4 :             break;
     149             : 
     150          16 :         case WITH:
     151             :             /* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
     152             :             switch (next_token)
     153             :             {
     154           2 :                 case TIME:
     155             :                 case ORDINALITY:
     156           2 :                     cur_token = WITH_LA;
     157           2 :                     break;
     158             :             }
     159          16 :             break;
     160             : 
     161           8 :         case WITHOUT:
     162             :             /* Replace WITHOUT by WITHOUT_LA if it's followed by TIME */
     163             :             switch (next_token)
     164             :             {
     165           2 :                 case TIME:
     166           2 :                     cur_token = WITHOUT_LA;
     167           2 :                     break;
     168             :             }
     169           8 :             break;
     170           6 :         case UIDENT:
     171             :         case USCONST:
     172             :             /* Look ahead for UESCAPE */
     173           6 :             if (next_token == UESCAPE)
     174             :             {
     175             :                 /* Yup, so get third token, which had better be SCONST */
     176             :                 const char *escstr;
     177             : 
     178             :                 /*
     179             :                  * Again save and restore lexer output variables around the
     180             :                  * call
     181             :                  */
     182           2 :                 cur_yylval = base_yylval;
     183           2 :                 cur_yylloc = base_yylloc;
     184           2 :                 cur_yytext = base_yytext;
     185             : 
     186             :                 /* Get third token */
     187           2 :                 next_token = base_yylex();
     188             : 
     189           2 :                 if (next_token != SCONST)
     190           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "UESCAPE must be followed by a simple string literal");
     191             : 
     192             :                 /*
     193             :                  * Save and check escape string, which the scanner returns
     194             :                  * with quotes
     195             :                  */
     196           2 :                 escstr = base_yylval.str;
     197           2 :                 if (strlen(escstr) != 3 || !check_uescapechar(escstr[1]))
     198           0 :                     mmerror(PARSE_ERROR, ET_ERROR, "invalid Unicode escape character");
     199             : 
     200           2 :                 base_yylval = cur_yylval;
     201           2 :                 base_yylloc = cur_yylloc;
     202           2 :                 base_yytext = cur_yytext;
     203             : 
     204             :                 /* Combine 3 tokens into 1 */
     205           2 :                 base_yylval.str = psprintf("%s UESCAPE %s", base_yylval.str, escstr);
     206             : 
     207             :                 /* Clear have_lookahead, thereby consuming all three tokens */
     208           2 :                 have_lookahead = false;
     209             :             }
     210             : 
     211           6 :             if (cur_token == UIDENT)
     212           2 :                 cur_token = IDENT;
     213           4 :             else if (cur_token == USCONST)
     214           4 :                 cur_token = SCONST;
     215           6 :             break;
     216             :     }
     217             : 
     218         116 :     return cur_token;
     219             : }
     220             : 
     221             : /*
     222             :  * check_uescapechar() and ecpg_isspace() should match their equivalents
     223             :  * in pgc.l.
     224             :  */
     225             : 
     226             : /* is 'escape' acceptable as Unicode escape character (UESCAPE syntax) ? */
     227             : static bool
     228           2 : check_uescapechar(unsigned char escape)
     229             : {
     230           2 :     if (isxdigit(escape)
     231           2 :         || escape == '+'
     232           2 :         || escape == '\''
     233           2 :         || escape == '"'
     234           2 :         || ecpg_isspace(escape))
     235           0 :         return false;
     236             :     else
     237           2 :         return true;
     238             : }
     239             : 
     240             : /*
     241             :  * ecpg_isspace() --- return true if flex scanner considers char whitespace
     242             :  */
     243             : static bool
     244           2 : ecpg_isspace(char ch)
     245             : {
     246           2 :     if (ch == ' ' ||
     247           2 :         ch == '\t' ||
     248           2 :         ch == '\n' ||
     249           2 :         ch == '\r' ||
     250             :         ch == '\f')
     251           0 :         return true;
     252           2 :     return false;
     253             : }

Generated by: LCOV version 1.14