LCOV - code coverage report
Current view: top level - src/fe_utils - psqlscan.l (source / functions) Hit Total Coverage
Test: PostgreSQL 15devel Lines: 169 226 74.8 %
Date: 2021-09-17 16:07:28 Functions: 18 18 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : %top{
       2             : /*-------------------------------------------------------------------------
       3             :  *
       4             :  * psqlscan.l
       5             :  *    lexical scanner for SQL commands
       6             :  *
       7             :  * This lexer used to be part of psql, and that heritage is reflected in
       8             :  * the file name as well as function and typedef names, though it can now
       9             :  * be used by other frontend programs as well.  It's also possible to extend
      10             :  * this lexer with a compatible add-on lexer to handle program-specific
      11             :  * backslash commands.
      12             :  *
      13             :  * This code is mainly concerned with determining where the end of a SQL
      14             :  * statement is: we are looking for semicolons that are not within quotes,
      15             :  * comments, or parentheses.  The most reliable way to handle this is to
      16             :  * borrow the backend's flex lexer rules, lock, stock, and barrel.  The rules
      17             :  * below are (except for a few) the same as the backend's, but their actions
      18             :  * are just ECHO whereas the backend's actions generally do other things.
      19             :  *
      20             :  * XXX The rules in this file must be kept in sync with the backend lexer!!!
      21             :  *
      22             :  * XXX Avoid creating backtracking cases --- see the backend lexer for info.
      23             :  *
      24             :  * See psqlscan_int.h for additional commentary.
      25             :  *
      26             :  *
      27             :  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
      28             :  * Portions Copyright (c) 1994, Regents of the University of California
      29             :  *
      30             :  * IDENTIFICATION
      31             :  *    src/fe_utils/psqlscan.l
      32             :  *
      33             :  *-------------------------------------------------------------------------
      34             :  */
      35             : #include "postgres_fe.h"
      36             : 
      37             : #include "common/logging.h"
      38             : #include "fe_utils/psqlscan.h"
      39             : 
      40             : #include "libpq-fe.h"
      41             : }
      42             : 
      43             : %{
      44             : 
      45             : /* LCOV_EXCL_START */
      46             : 
      47             : #include "fe_utils/psqlscan_int.h"
      48             : 
      49             : /*
      50             :  * We must have a typedef YYSTYPE for yylex's first argument, but this lexer
      51             :  * doesn't presently make use of that argument, so just declare it as int.
      52             :  */
      53             : typedef int YYSTYPE;
      54             : 
      55             : /*
      56             :  * Set the type of yyextra; we use it as a pointer back to the containing
      57             :  * PsqlScanState.
      58             :  */
      59             : #define YY_EXTRA_TYPE PsqlScanState
      60             : 
      61             : 
      62             : /* Return values from yylex() */
      63             : #define LEXRES_EOL          0   /* end of input */
      64             : #define LEXRES_SEMI         1   /* command-terminating semicolon found */
      65             : #define LEXRES_BACKSLASH    2   /* backslash command start */
      66             : 
      67             : 
      68             : #define ECHO psqlscan_emit(cur_state, yytext, yyleng)
      69             : 
      70             : /*
      71             :  * Work around a bug in flex 2.5.35: it emits a couple of functions that
      72             :  * it forgets to emit declarations for.  Since we use -Wmissing-prototypes,
      73             :  * this would cause warnings.  Providing our own declarations should be
      74             :  * harmless even when the bug gets fixed.
      75             :  */
      76             : extern int  psql_yyget_column(yyscan_t yyscanner);
      77             : extern void psql_yyset_column(int column_no, yyscan_t yyscanner);
      78             : 
      79             : %}
      80             : 
      81             : %option reentrant
      82             : %option bison-bridge
      83             : %option 8bit
      84             : %option never-interactive
      85             : %option nodefault
      86             : %option noinput
      87             : %option nounput
      88             : %option noyywrap
      89             : %option warn
      90             : %option prefix="psql_yy"
      91             : 
      92             : /*
      93             :  * All of the following definitions and rules should exactly match
      94             :  * src/backend/parser/scan.l so far as the flex patterns are concerned.
      95             :  * The rule bodies are just ECHO as opposed to what the backend does,
      96             :  * however.  (But be sure to duplicate code that affects the lexing process,
      97             :  * such as BEGIN() and yyless().)  Also, psqlscan uses a single <<EOF>> rule
      98             :  * whereas scan.l has a separate one for each exclusive state.
      99             :  */
     100             : 
     101             : /*
     102             :  * OK, here is a short description of lex/flex rules behavior.
     103             :  * The longest pattern which matches an input string is always chosen.
     104             :  * For equal-length patterns, the first occurring in the rules list is chosen.
     105             :  * INITIAL is the starting state, to which all non-conditional rules apply.
     106             :  * Exclusive states change parsing rules while the state is active.  When in
     107             :  * an exclusive state, only those rules defined for that state apply.
     108             :  *
     109             :  * We use exclusive states for quoted strings, extended comments,
     110             :  * and to eliminate parsing troubles for numeric strings.
     111             :  * Exclusive states:
     112             :  *  <xb> bit string literal
     113             :  *  <xc> extended C-style comments
     114             :  *  <xd> delimited identifiers (double-quoted identifiers)
     115             :  *  <xh> hexadecimal numeric string
     116             :  *  <xq> standard quoted strings
     117             :  *  <xqs> quote stop (detect continued strings)
     118             :  *  <xe> extended quoted strings (support backslash escape sequences)
     119             :  *  <xdolq> $foo$ quoted strings
     120             :  *  <xui> quoted identifier with Unicode escapes
     121             :  *  <xus> quoted string with Unicode escapes
     122             :  *
     123             :  * Note: we intentionally don't mimic the backend's <xeu> state; we have
     124             :  * no need to distinguish it from <xe> state, and no good way to get out
     125             :  * of it in error cases.  The backend just throws yyerror() in those
     126             :  * cases, but that's not an option here.
     127             :  */
     128             : 
     129             : %x xb
     130             : %x xc
     131             : %x xd
     132             : %x xh
     133             : %x xq
     134             : %x xqs
     135             : %x xe
     136             : %x xdolq
     137             : %x xui
     138             : %x xus
     139             : 
     140             : /*
     141             :  * In order to make the world safe for Windows and Mac clients as well as
     142             :  * Unix ones, we accept either \n or \r as a newline.  A DOS-style \r\n
     143             :  * sequence will be seen as two successive newlines, but that doesn't cause
     144             :  * any problems.  Comments that start with -- and extend to the next
     145             :  * newline are treated as equivalent to a single whitespace character.
     146             :  *
     147             :  * NOTE a fine point: if there is no newline following --, we will absorb
     148             :  * everything to the end of the input as a comment.  This is correct.  Older
     149             :  * versions of Postgres failed to recognize -- as a comment if the input
     150             :  * did not end with a newline.
     151             :  *
     152             :  * XXX perhaps \f (formfeed) should be treated as a newline as well?
     153             :  *
     154             :  * XXX if you change the set of whitespace characters, fix scanner_isspace()
     155             :  * to agree.
     156             :  */
     157             : 
     158             : space           [ \t\n\r\f]
     159             : horiz_space     [ \t\f]
     160             : newline         [\n\r]
     161             : non_newline     [^\n\r]
     162             : 
     163             : comment         ("--"{non_newline}*)
     164             : 
     165             : whitespace      ({space}+|{comment})
     166             : 
     167             : /*
     168             :  * SQL requires at least one newline in the whitespace separating
     169             :  * string literals that are to be concatenated.  Silly, but who are we
     170             :  * to argue?  Note that {whitespace_with_newline} should not have * after
     171             :  * it, whereas {whitespace} should generally have a * after it...
     172             :  */
     173             : 
     174             : special_whitespace      ({space}+|{comment}{newline})
     175             : horiz_whitespace        ({horiz_space}|{comment})
     176             : whitespace_with_newline ({horiz_whitespace}*{newline}{special_whitespace}*)
     177             : 
     178             : quote           '
     179             : /* If we see {quote} then {quotecontinue}, the quoted string continues */
     180             : quotecontinue   {whitespace_with_newline}{quote}
     181             : 
     182             : /*
     183             :  * {quotecontinuefail} is needed to avoid lexer backup when we fail to match
     184             :  * {quotecontinue}.  It might seem that this could just be {whitespace}*,
     185             :  * but if there's a dash after {whitespace_with_newline}, it must be consumed
     186             :  * to see if there's another dash --- which would start a {comment} and thus
     187             :  * allow continuation of the {quotecontinue} token.
     188             :  */
     189             : quotecontinuefail   {whitespace}*"-"?
     190             : 
     191             : /* Bit string
     192             :  * It is tempting to scan the string for only those characters
     193             :  * which are allowed. However, this leads to silently swallowed
     194             :  * characters if illegal characters are included in the string.
     195             :  * For example, if xbinside is [01] then B'ABCD' is interpreted
     196             :  * as a zero-length string, and the ABCD' is lost!
     197             :  * Better to pass the string forward and let the input routines
     198             :  * validate the contents.
     199             :  */
     200             : xbstart         [bB]{quote}
     201             : xbinside        [^']*
     202             : 
     203             : /* Hexadecimal number */
     204             : xhstart         [xX]{quote}
     205             : xhinside        [^']*
     206             : 
     207             : /* National character */
     208             : xnstart         [nN]{quote}
     209             : 
     210             : /* Quoted string that allows backslash escapes */
     211             : xestart         [eE]{quote}
     212             : xeinside        [^\\']+
     213             : xeescape        [\\][^0-7]
     214             : xeoctesc        [\\][0-7]{1,3}
     215             : xehexesc        [\\]x[0-9A-Fa-f]{1,2}
     216             : xeunicode       [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
     217             : xeunicodefail   [\\](u[0-9A-Fa-f]{0,3}|U[0-9A-Fa-f]{0,7})
     218             : 
     219             : /* Extended quote
     220             :  * xqdouble implements embedded quote, ''''
     221             :  */
     222             : xqstart         {quote}
     223             : xqdouble        {quote}{quote}
     224             : xqinside        [^']+
     225             : 
     226             : /* $foo$ style quotes ("dollar quoting")
     227             :  * The quoted string starts with $foo$ where "foo" is an optional string
     228             :  * in the form of an identifier, except that it may not contain "$",
     229             :  * and extends to the first occurrence of an identical string.
     230             :  * There is *no* processing of the quoted text.
     231             :  *
     232             :  * {dolqfailed} is an error rule to avoid scanner backup when {dolqdelim}
     233             :  * fails to match its trailing "$".
     234             :  */
     235             : dolq_start      [A-Za-z\200-\377_]
     236             : dolq_cont       [A-Za-z\200-\377_0-9]
     237             : dolqdelim       \$({dolq_start}{dolq_cont}*)?\$
     238             : dolqfailed      \${dolq_start}{dolq_cont}*
     239             : dolqinside      [^$]+
     240             : 
     241             : /* Double quote
     242             :  * Allows embedded spaces and other special characters into identifiers.
     243             :  */
     244             : dquote          \"
     245             : xdstart         {dquote}
     246             : xdstop          {dquote}
     247             : xddouble        {dquote}{dquote}
     248             : xdinside        [^"]+
     249             : 
     250             : /* Quoted identifier with Unicode escapes */
     251             : xuistart        [uU]&{dquote}
     252             : 
     253             : /* Quoted string with Unicode escapes */
     254             : xusstart        [uU]&{quote}
     255             : 
     256             : /* error rule to avoid backup */
     257             : xufailed        [uU]&
     258             : 
     259             : 
     260             : /* C-style comments
     261             :  *
     262             :  * The "extended comment" syntax closely resembles allowable operator syntax.
     263             :  * The tricky part here is to get lex to recognize a string starting with
     264             :  * slash-star as a comment, when interpreting it as an operator would produce
     265             :  * a longer match --- remember lex will prefer a longer match!  Also, if we
     266             :  * have something like plus-slash-star, lex will think this is a 3-character
     267             :  * operator whereas we want to see it as a + operator and a comment start.
     268             :  * The solution is two-fold:
     269             :  * 1. append {op_chars}* to xcstart so that it matches as much text as
     270             :  *    {operator} would. Then the tie-breaker (first matching rule of same
     271             :  *    length) ensures xcstart wins.  We put back the extra stuff with yyless()
     272             :  *    in case it contains a star-slash that should terminate the comment.
     273             :  * 2. In the operator rule, check for slash-star within the operator, and
     274             :  *    if found throw it back with yyless().  This handles the plus-slash-star
     275             :  *    problem.
     276             :  * Dash-dash comments have similar interactions with the operator rule.
     277             :  */
     278             : xcstart         \/\*{op_chars}*
     279             : xcstop          \*+\/
     280             : xcinside        [^*/]+
     281             : 
     282             : digit           [0-9]
     283             : ident_start     [A-Za-z\200-\377_]
     284             : ident_cont      [A-Za-z\200-\377_0-9\$]
     285             : 
     286             : identifier      {ident_start}{ident_cont}*
     287             : 
     288             : /* Assorted special-case operators and operator-like tokens */
     289             : typecast        "::"
     290             : dot_dot         \.\.
     291             : colon_equals    ":="
     292             : 
     293             : /*
     294             :  * These operator-like tokens (unlike the above ones) also match the {operator}
     295             :  * rule, which means that they might be overridden by a longer match if they
     296             :  * are followed by a comment start or a + or - character. Accordingly, if you
     297             :  * add to this list, you must also add corresponding code to the {operator}
     298             :  * block to return the correct token in such cases. (This is not needed in
     299             :  * psqlscan.l since the token value is ignored there.)
     300             :  */
     301             : equals_greater  "=>"
     302             : less_equals     "<="
     303             : greater_equals  ">="
     304             : less_greater    "<>"
     305             : not_equals      "!="
     306             : 
     307             : /*
     308             :  * "self" is the set of chars that should be returned as single-character
     309             :  * tokens.  "op_chars" is the set of chars that can make up "Op" tokens,
     310             :  * which can be one or more characters long (but if a single-char token
     311             :  * appears in the "self" set, it is not to be returned as an Op).  Note
     312             :  * that the sets overlap, but each has some chars that are not in the other.
     313             :  *
     314             :  * If you change either set, adjust the character lists appearing in the
     315             :  * rule for "operator"!
     316             :  */
     317             : self            [,()\[\].;\:\+\-\*\/\%\^\<\>\=]
     318             : op_chars        [\~\!\@\#\^\&\|\`\?\+\-\*\/\%\<\>\=]
     319             : operator        {op_chars}+
     320             : 
     321             : /* we no longer allow unary minus in numbers.
     322             :  * instead we pass it separately to parser. there it gets
     323             :  * coerced via doNegate() -- Leon aug 20 1999
     324             :  *
     325             :  * {decimalfail} is used because we would like "1..10" to lex as 1, dot_dot, 10.
     326             :  *
     327             :  * {realfail1} and {realfail2} are added to prevent the need for scanner
     328             :  * backup when the {real} rule fails to match completely.
     329             :  */
     330             : 
     331             : integer         {digit}+
     332             : decimal         (({digit}*\.{digit}+)|({digit}+\.{digit}*))
     333             : decimalfail     {digit}+\.\.
     334             : real            ({integer}|{decimal})[Ee][-+]?{digit}+
     335             : realfail1       ({integer}|{decimal})[Ee]
     336             : realfail2       ({integer}|{decimal})[Ee][-+]
     337             : 
     338             : param           \${integer}
     339             : 
     340             : /* psql-specific: characters allowed in variable names */
     341             : variable_char   [A-Za-z\200-\377_0-9]
     342             : 
     343             : other           .
     344             : 
     345             : /*
     346             :  * Dollar quoted strings are totally opaque, and no escaping is done on them.
     347             :  * Other quoted strings must allow some special characters such as single-quote
     348             :  *  and newline.
     349             :  * Embedded single-quotes are implemented both in the SQL standard
     350             :  *  style of two adjacent single quotes "''" and in the Postgres/Java style
     351             :  *  of escaped-quote "\'".
     352             :  * Other embedded escaped characters are matched explicitly and the leading
     353             :  *  backslash is dropped from the string.
     354             :  * Note that xcstart must appear before operator, as explained above!
     355             :  *  Also whitespace (comment) must appear before operator.
     356             :  */
     357             : 
     358             : %%
     359             : 
     360             : %{
     361             :         /* Declare some local variables inside yylex(), for convenience */
     362             :         PsqlScanState cur_state = yyextra;
     363             :         PQExpBuffer output_buf = cur_state->output_buf;
     364             : 
     365             :         /*
     366             :          * Force flex into the state indicated by start_state.  This has a
     367             :          * couple of purposes: it lets some of the functions below set a new
     368             :          * starting state without ugly direct access to flex variables, and it
     369             :          * allows us to transition from one flex lexer to another so that we
     370             :          * can lex different parts of the source string using separate lexers.
     371             :          */
     372             :         BEGIN(cur_state->start_state);
     373             : %}
     374             : 
     375             : {whitespace}    {
     376             :                     /*
     377             :                      * Note that the whitespace rule includes both true
     378             :                      * whitespace and single-line ("--" style) comments.
     379             :                      * We suppress whitespace at the start of the query
     380             :                      * buffer.  We also suppress all single-line comments,
     381             :                      * which is pretty dubious but is the historical
     382             :                      * behavior.
     383             :                      */
     384             :                     if (!(output_buf->len == 0 || yytext[0] == '-'))
     385             :                         ECHO;
     386             :                 }
     387             : 
     388             : {xcstart}       {
     389             :                     cur_state->xcdepth = 0;
     390             :                     BEGIN(xc);
     391             :                     /* Put back any characters past slash-star; see above */
     392             :                     yyless(2);
     393             :                     ECHO;
     394             :                 }
     395             : 
     396             : <xc>{
     397             : {xcstart}       {
     398             :                     cur_state->xcdepth++;
     399             :                     /* Put back any characters past slash-star; see above */
     400             :                     yyless(2);
     401             :                     ECHO;
     402             :                 }
     403             : 
     404             : {xcstop}        {
     405             :                     if (cur_state->xcdepth <= 0)
     406             :                         BEGIN(INITIAL);
     407             :                     else
     408             :                         cur_state->xcdepth--;
     409             :                     ECHO;
     410             :                 }
     411             : 
     412             : {xcinside}      {
     413             :                     ECHO;
     414             :                 }
     415             : 
     416             : {op_chars}      {
     417             :                     ECHO;
     418             :                 }
     419             : 
     420             : \*+             {
     421             :                     ECHO;
     422             :                 }
     423             : } /* <xc> */
     424             : 
     425             : {xbstart}       {
     426             :                     BEGIN(xb);
     427             :                     ECHO;
     428             :                 }
     429             : <xh>{xhinside}    |
     430             : <xb>{xbinside}    {
     431             :                     ECHO;
     432             :                 }
     433             : 
     434             : {xhstart}       {
     435             :                     /* Hexadecimal bit type.
     436             :                      * At some point we should simply pass the string
     437             :                      * forward to the parser and label it there.
     438             :                      * In the meantime, place a leading "x" on the string
     439             :                      * to mark it for the input routine as a hex string.
     440             :                      */
     441             :                     BEGIN(xh);
     442             :                     ECHO;
     443             :                 }
     444             : 
     445             : {xnstart}       {
     446             :                     yyless(1);  /* eat only 'n' this time */
     447             :                     ECHO;
     448             :                 }
     449             : 
     450             : {xqstart}       {
     451             :                     if (cur_state->std_strings)
     452             :                         BEGIN(xq);
     453             :                     else
     454             :                         BEGIN(xe);
     455             :                     ECHO;
     456             :                 }
     457             : {xestart}       {
     458             :                     BEGIN(xe);
     459             :                     ECHO;
     460             :                 }
     461             : {xusstart}      {
     462             :                     BEGIN(xus);
     463             :                     ECHO;
     464             :                 }
     465             : 
     466             : <xb,xh,xq,xe,xus>{quote} {
     467             :                     /*
     468             :                      * When we are scanning a quoted string and see an end
     469             :                      * quote, we must look ahead for a possible continuation.
     470             :                      * If we don't see one, we know the end quote was in fact
     471             :                      * the end of the string.  To reduce the lexer table size,
     472             :                      * we use a single "xqs" state to do the lookahead for all
     473             :                      * types of strings.
     474             :                      */
     475             :                     cur_state->state_before_str_stop = YYSTATE;
     476             :                     BEGIN(xqs);
     477             :                     ECHO;
     478             :                 }
     479             : <xqs>{quotecontinue} {
     480             :                     /*
     481             :                      * Found a quote continuation, so return to the in-quote
     482             :                      * state and continue scanning the literal.  Nothing is
     483             :                      * added to the literal's contents.
     484             :                      */
     485             :                     BEGIN(cur_state->state_before_str_stop);
     486             :                     ECHO;
     487             :                 }
     488             : <xqs>{quotecontinuefail} |
     489             : <xqs>{other}  {
     490             :                     /*
     491             :                      * Failed to see a quote continuation.  Throw back
     492             :                      * everything after the end quote, and handle the string
     493             :                      * according to the state we were in previously.
     494             :                      */
     495             :                     yyless(0);
     496             :                     BEGIN(INITIAL);
     497             :                     /* There's nothing to echo ... */
     498             :                 }
     499             : 
     500             : <xq,xe,xus>{xqdouble} {
     501             :                     ECHO;
     502             :                 }
     503             : <xq,xus>{xqinside}  {
     504             :                     ECHO;
     505             :                 }
     506             : <xe>{xeinside}  {
     507             :                     ECHO;
     508             :                 }
     509             : <xe>{xeunicode} {
     510             :                     ECHO;
     511             :                 }
     512             : <xe>{xeunicodefail}   {
     513             :                     ECHO;
     514             :                 }
     515             : <xe>{xeescape}  {
     516             :                     ECHO;
     517             :                 }
     518             : <xe>{xeoctesc}  {
     519             :                     ECHO;
     520             :                 }
     521             : <xe>{xehexesc}  {
     522             :                     ECHO;
     523             :                 }
     524             : <xe>.         {
     525             :                     /* This is only needed for \ just before EOF */
     526             :                     ECHO;
     527             :                 }
     528             : 
     529             : {dolqdelim}     {
     530             :                     cur_state->dolqstart = pg_strdup(yytext);
     531             :                     BEGIN(xdolq);
     532             :                     ECHO;
     533             :                 }
     534             : {dolqfailed}    {
     535             :                     /* throw back all but the initial "$" */
     536             :                     yyless(1);
     537             :                     ECHO;
     538             :                 }
     539             : <xdolq>{dolqdelim} {
     540             :                     if (strcmp(yytext, cur_state->dolqstart) == 0)
     541             :                     {
     542             :                         free(cur_state->dolqstart);
     543             :                         cur_state->dolqstart = NULL;
     544             :                         BEGIN(INITIAL);
     545             :                     }
     546             :                     else
     547             :                     {
     548             :                         /*
     549             :                          * When we fail to match $...$ to dolqstart, transfer
     550             :                          * the $... part to the output, but put back the final
     551             :                          * $ for rescanning.  Consider $delim$...$junk$delim$
     552             :                          */
     553             :                         yyless(yyleng - 1);
     554             :                     }
     555             :                     ECHO;
     556             :                 }
     557             : <xdolq>{dolqinside} {
     558             :                     ECHO;
     559             :                 }
     560             : <xdolq>{dolqfailed} {
     561             :                     ECHO;
     562             :                 }
     563             : <xdolq>.      {
     564             :                     /* This is only needed for $ inside the quoted text */
     565             :                     ECHO;
     566             :                 }
     567             : 
     568             : {xdstart}       {
     569             :                     BEGIN(xd);
     570             :                     ECHO;
     571             :                 }
     572             : {xuistart}      {
     573             :                     BEGIN(xui);
     574             :                     ECHO;
     575             :                 }
     576             : <xd>{xdstop}  {
     577             :                     BEGIN(INITIAL);
     578             :                     ECHO;
     579             :                 }
     580             : <xui>{dquote} {
     581             :                     BEGIN(INITIAL);
     582             :                     ECHO;
     583             :                 }
     584             : <xd,xui>{xddouble}    {
     585             :                     ECHO;
     586             :                 }
     587             : <xd,xui>{xdinside}    {
     588             :                     ECHO;
     589             :                 }
     590             : 
     591             : {xufailed}  {
     592             :                     /* throw back all but the initial u/U */
     593             :                     yyless(1);
     594             :                     ECHO;
     595             :                 }
     596             : 
     597             : {typecast}      {
     598             :                     ECHO;
     599             :                 }
     600             : 
     601             : {dot_dot}       {
     602             :                     ECHO;
     603             :                 }
     604             : 
     605             : {colon_equals}  {
     606             :                     ECHO;
     607             :                 }
     608             : 
     609             : {equals_greater} {
     610             :                     ECHO;
     611             :                 }
     612             : 
     613             : {less_equals}   {
     614             :                     ECHO;
     615             :                 }
     616             : 
     617             : {greater_equals} {
     618             :                     ECHO;
     619             :                 }
     620             : 
     621             : {less_greater}  {
     622             :                     ECHO;
     623             :                 }
     624             : 
     625             : {not_equals}    {
     626             :                     ECHO;
     627             :                 }
     628             : 
     629             :     /*
     630             :      * These rules are specific to psql --- they implement parenthesis
     631             :      * counting and detection of command-ending semicolon.  These must
     632             :      * appear before the {self} rule so that they take precedence over it.
     633             :      */
     634             : 
     635             : "("               {
     636             :                     cur_state->paren_depth++;
     637             :                     ECHO;
     638             :                 }
     639             : 
     640             : ")"               {
     641             :                     if (cur_state->paren_depth > 0)
     642             :                         cur_state->paren_depth--;
     643             :                     ECHO;
     644             :                 }
     645             : 
     646             : ";"               {
     647             :                     ECHO;
     648             :                     if (cur_state->paren_depth == 0 && cur_state->begin_depth == 0)
     649             :                     {
     650             :                         /* Terminate lexing temporarily */
     651             :                         cur_state->start_state = YY_START;
     652             :                         cur_state->identifier_count = 0;
     653             :                         return LEXRES_SEMI;
     654             :                     }
     655             :                 }
     656             : 
     657             :     /*
     658             :      * psql-specific rules to handle backslash commands and variable
     659             :      * substitution.  We want these before {self}, also.
     660             :      */
     661             : 
     662             : "\\"[;:]      {
     663             :                     /* Force a semi-colon or colon into the query buffer */
     664             :                     psqlscan_emit(cur_state, yytext + 1, 1);
     665             :                     if (yytext[1] == ';')
     666             :                         cur_state->identifier_count = 0;
     667             :                 }
     668             : 
     669             : "\\"          {
     670             :                     /* Terminate lexing temporarily */
     671             :                     cur_state->start_state = YY_START;
     672             :                     return LEXRES_BACKSLASH;
     673             :                 }
     674             : 
     675             : :{variable_char}+   {
     676             :                     /* Possible psql variable substitution */
     677             :                     char       *varname;
     678             :                     char       *value;
     679             : 
     680             :                     varname = psqlscan_extract_substring(cur_state,
     681             :                                                          yytext + 1,
     682             :                                                          yyleng - 1);
     683             :                     if (cur_state->callbacks->get_variable)
     684             :                         value = cur_state->callbacks->get_variable(varname,
     685             :                                                                    PQUOTE_PLAIN,
     686             :                                                                    cur_state->cb_passthrough);
     687             :                     else
     688             :                         value = NULL;
     689             : 
     690             :                     if (value)
     691             :                     {
     692             :                         /* It is a variable, check for recursion */
     693             :                         if (psqlscan_var_is_current_source(cur_state, varname))
     694             :                         {
     695             :                             /* Recursive expansion --- don't go there */
     696             :                             pg_log_warning("skipping recursive expansion of variable \"%s\"",
     697             :                                                               varname);
     698             :                             /* Instead copy the string as is */
     699             :                             ECHO;
     700             :                         }
     701             :                         else
     702             :                         {
     703             :                             /* OK, perform substitution */
     704             :                             psqlscan_push_new_buffer(cur_state, value, varname);
     705             :                             /* yy_scan_string already made buffer active */
     706             :                         }
     707             :                         free(value);
     708             :                     }
     709             :                     else
     710             :                     {
     711             :                         /*
     712             :                          * if the variable doesn't exist we'll copy the string
     713             :                          * as is
     714             :                          */
     715             :                         ECHO;
     716             :                     }
     717             : 
     718             :                     free(varname);
     719             :                 }
     720             : 
     721             : :'{variable_char}+' {
     722             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     723             :                                              PQUOTE_SQL_LITERAL);
     724             :                 }
     725             : 
     726             : :\"{variable_char}+\" {
     727             :                     psqlscan_escape_variable(cur_state, yytext, yyleng,
     728             :                                              PQUOTE_SQL_IDENT);
     729             :                 }
     730             : 
     731             : :\{\?{variable_char}+\} {
     732             :                     psqlscan_test_variable(cur_state, yytext, yyleng);
     733             :                 }
     734             : 
     735             :     /*
     736             :      * These rules just avoid the need for scanner backup if one of the
     737             :      * three rules above fails to match completely.
     738             :      */
     739             : 
     740             : :'{variable_char}*  {
     741             :                     /* Throw back everything but the colon */
     742             :                     yyless(1);
     743             :                     ECHO;
     744             :                 }
     745             : 
     746             : :\"{variable_char}*    {
     747             :                     /* Throw back everything but the colon */
     748             :                     yyless(1);
     749             :                     ECHO;
     750             :                 }
     751             : 
     752             : :\{\?{variable_char}*   {
     753             :                     /* Throw back everything but the colon */
     754             :                     yyless(1);
     755             :                     ECHO;
     756             :                 }
     757             : :\{ {
     758             :                     /* Throw back everything but the colon */
     759             :                     yyless(1);
     760             :                     ECHO;
     761             :                 }
     762             : 
     763             :     /*
     764             :      * Back to backend-compatible rules.
     765             :      */
     766             : 
     767             : {self}          {
     768             :                     ECHO;
     769             :                 }
     770             : 
     771             : {operator}      {
     772             :                     /*
     773             :                      * Check for embedded slash-star or dash-dash; those
     774             :                      * are comment starts, so operator must stop there.
     775             :                      * Note that slash-star or dash-dash at the first
     776             :                      * character will match a prior rule, not this one.
     777             :                      */
     778             :                     int         nchars = yyleng;
     779             :                     char       *slashstar = strstr(yytext, "/*");
     780             :                     char       *dashdash = strstr(yytext, "--");
     781             : 
     782             :                     if (slashstar && dashdash)
     783             :                     {
     784             :                         /* if both appear, take the first one */
     785             :                         if (slashstar > dashdash)
     786             :                             slashstar = dashdash;
     787             :                     }
     788             :                     else if (!slashstar)
     789             :                         slashstar = dashdash;
     790             :                     if (slashstar)
     791             :                         nchars = slashstar - yytext;
     792             : 
     793             :                     /*
     794             :                      * For SQL compatibility, '+' and '-' cannot be the
     795             :                      * last char of a multi-char operator unless the operator
     796             :                      * contains chars that are not in SQL operators.
     797             :                      * The idea is to lex '=-' as two operators, but not
     798             :                      * to forbid operator names like '?-' that could not be
     799             :                      * sequences of SQL operators.
     800             :                      */
     801             :                     if (nchars > 1 &&
     802             :                         (yytext[nchars - 1] == '+' ||
     803             :                          yytext[nchars - 1] == '-'))
     804             :                     {
     805             :                         int         ic;
     806             : 
     807             :                         for (ic = nchars - 2; ic >= 0; ic--)
     808             :                         {
     809             :                             char c = yytext[ic];
     810             :                             if (c == '~' || c == '!' || c == '@' ||
     811             :                                 c == '#' || c == '^' || c == '&' ||
     812             :                                 c == '|' || c == '`' || c == '?' ||
     813             :                                 c == '%')
     814             :                                 break;
     815             :                         }
     816             :                         if (ic < 0)
     817             :                         {
     818             :                             /*
     819             :                              * didn't find a qualifying character, so remove
     820             :                              * all trailing [+-]
     821             :                              */
     822             :                             do {
     823             :                                 nchars--;
     824             :                             } while (nchars > 1 &&
     825             :                                  (yytext[nchars - 1] == '+' ||
     826             :                                   yytext[nchars - 1] == '-'));
     827             :                         }
     828             :                     }
     829             : 
     830             :                     if (nchars < yyleng)
     831             :                     {
     832             :                         /* Strip the unwanted chars from the token */
     833             :                         yyless(nchars);
     834             :                     }
     835             :                     ECHO;
     836             :                 }
     837             : 
     838             : {param}         {
     839             :                     ECHO;
     840             :                 }
     841             : 
     842             : {integer}       {
     843             :                     ECHO;
     844             :                 }
     845             : {decimal}       {
     846             :                     ECHO;
     847             :                 }
     848             : {decimalfail}   {
     849             :                     /* throw back the .., and treat as integer */
     850             :                     yyless(yyleng - 2);
     851             :                     ECHO;
     852             :                 }
     853             : {real}          {
     854             :                     ECHO;
     855             :                 }
     856             : {realfail1}     {
     857             :                     /*
     858             :                      * throw back the [Ee], and figure out whether what
     859             :                      * remains is an {integer} or {decimal}.
     860             :                      * (in psql, we don't actually care...)
     861             :                      */
     862             :                     yyless(yyleng - 1);
     863             :                     ECHO;
     864             :                 }
     865             : {realfail2}     {
     866             :                     /* throw back the [Ee][+-], and proceed as above */
     867             :                     yyless(yyleng - 2);
     868             :                     ECHO;
     869             :                 }
     870             : 
     871             : 
     872             : {identifier}    {
     873             :                     /*
     874             :                      * We need to track if we are inside a BEGIN .. END block
     875             :                      * in a function definition, so that semicolons contained
     876             :                      * therein don't terminate the whole statement.  Short of
     877             :                      * writing a full parser here, the following heuristic
     878             :                      * should work.  First, we track whether the beginning of
     879             :                      * the statement matches CREATE [OR REPLACE]
     880             :                      * {FUNCTION|PROCEDURE}
     881             :                      */
     882             : 
     883             :                     if (cur_state->identifier_count == 0)
     884             :                         memset(cur_state->identifiers, 0, sizeof(cur_state->identifiers));
     885             : 
     886             :                     if (pg_strcasecmp(yytext, "create") == 0 ||
     887             :                         pg_strcasecmp(yytext, "function") == 0 ||
     888             :                         pg_strcasecmp(yytext, "procedure") == 0 ||
     889             :                         pg_strcasecmp(yytext, "or") == 0 ||
     890             :                         pg_strcasecmp(yytext, "replace") == 0)
     891             :                     {
     892             :                         if (cur_state->identifier_count < sizeof(cur_state->identifiers))
     893             :                             cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned char) yytext[0]);
     894             :                     }
     895             : 
     896             :                     cur_state->identifier_count++;
     897             : 
     898             :                     if (cur_state->identifiers[0] == 'c' &&
     899             :                         (cur_state->identifiers[1] == 'f' || cur_state->identifiers[1] == 'p' ||
     900             :                          (cur_state->identifiers[1] == 'o' && cur_state->identifiers[2] == 'r' &&
     901             :                           (cur_state->identifiers[3] == 'f' || cur_state->identifiers[3] == 'p'))) &&
     902             :                         cur_state->paren_depth == 0)
     903             :                     {
     904             :                         if (pg_strcasecmp(yytext, "begin") == 0)
     905             :                             cur_state->begin_depth++;
     906             :                         else if (pg_strcasecmp(yytext, "case") == 0)
     907             :                         {
     908             :                             /*
     909             :                              * CASE also ends with END.  We only need to track
     910             :                              * this if we are already inside a BEGIN.
     911             :                              */
     912             :                             if (cur_state->begin_depth >= 1)
     913             :                                 cur_state->begin_depth++;
     914             :                         }
     915             :                         else if (pg_strcasecmp(yytext, "end") == 0)
     916             :                         {
     917             :                             if (cur_state->begin_depth > 0)
     918             :                                 cur_state->begin_depth--;
     919             :                         }
     920             :                     }
     921             : 
     922             :                     ECHO;
     923             :                 }
     924             : 
     925             : {other}         {
     926             :                     ECHO;
     927             :                 }
     928             : 
     929             : <<EOF>>         {
     930             :                     if (cur_state->buffer_stack == NULL)
     931             :                     {
     932             :                         cur_state->start_state = YY_START;
     933             :                         return LEXRES_EOL;      /* end of input reached */
     934             :                     }
     935             : 
     936             :                     /*
     937             :                      * We were expanding a variable, so pop the inclusion
     938             :                      * stack and keep lexing
     939             :                      */
     940             :                     psqlscan_pop_buffer_stack(cur_state);
     941             :                     psqlscan_select_top_buffer(cur_state);
     942             :                 }
     943             : 
     944             : %%
     945             : 
     946             : /* LCOV_EXCL_STOP */
     947             : 
     948             : /*
     949             :  * Create a lexer working state struct.
     950             :  *
     951             :  * callbacks is a struct of function pointers that encapsulate some
     952             :  * behavior we need from the surrounding program.  This struct must
     953             :  * remain valid for the lifespan of the PsqlScanState.
     954             :  */
     955             : PsqlScanState
     956             : psql_scan_create(const PsqlScanCallbacks *callbacks)
     957        6856 : {
     958             :     PsqlScanState state;
     959             : 
     960             :     state = (PsqlScanStateData *) pg_malloc0(sizeof(PsqlScanStateData));
     961        6856 : 
     962             :     state->callbacks = callbacks;
     963        6856 : 
     964             :     yylex_init(&state->scanner);
     965        6856 : 
     966             :     yyset_extra(state, state->scanner);
     967        6856 : 
     968             :     psql_scan_reset(state);
     969        6856 : 
     970             :     return state;
     971        6856 : }
     972             : 
     973             : /*
     974             :  * Destroy a lexer working state struct, releasing all resources.
     975             :  */
     976             : void
     977             : psql_scan_destroy(PsqlScanState state)
     978        6762 : {
     979             :     psql_scan_finish(state);
     980        6762 : 
     981             :     psql_scan_reset(state);
     982        6762 : 
     983             :     yylex_destroy(state->scanner);
     984        6762 : 
     985             :     free(state);
     986        6762 : }
     987        6762 : 
     988             : /*
     989             :  * Set the callback passthrough pointer for the lexer.
     990             :  *
     991             :  * This could have been integrated into psql_scan_create, but keeping it
     992             :  * separate allows the application to change the pointer later, which might
     993             :  * be useful.
     994             :  */
     995             : void
     996             : psql_scan_set_passthrough(PsqlScanState state, void *passthrough)
     997        6356 : {
     998             :     state->cb_passthrough = passthrough;
     999        6356 : }
    1000        6356 : 
    1001             : /*
    1002             :  * Set up to perform lexing of the given input line.
    1003             :  *
    1004             :  * The text at *line, extending for line_len bytes, will be scanned by
    1005             :  * subsequent calls to the psql_scan routines.  psql_scan_finish should
    1006             :  * be called when scanning is complete.  Note that the lexer retains
    1007             :  * a pointer to the storage at *line --- this string must not be altered
    1008             :  * or freed until after psql_scan_finish is called.
    1009             :  *
    1010             :  * encoding is the libpq identifier for the character encoding in use,
    1011             :  * and std_strings says whether standard_conforming_strings is on.
    1012             :  */
    1013             : void
    1014             : psql_scan_setup(PsqlScanState state,
    1015      338884 :                 const char *line, int line_len,
    1016             :                 int encoding, bool std_strings)
    1017             : {
    1018             :     /* Mustn't be scanning already */
    1019             :     Assert(state->scanbufhandle == NULL);
    1020             :     Assert(state->buffer_stack == NULL);
    1021             : 
    1022             :     /* Do we need to hack the character set encoding? */
    1023             :     state->encoding = encoding;
    1024      338884 :     state->safe_encoding = pg_valid_server_encoding_id(encoding);
    1025      338884 : 
    1026             :     /* Save standard-strings flag as well */
    1027             :     state->std_strings = std_strings;
    1028      338884 : 
    1029             :     /* Set up flex input buffer with appropriate translation and padding */
    1030             :     state->scanbufhandle = psqlscan_prepare_buffer(state, line, line_len,
    1031      338884 :                                                    &state->scanbuf);
    1032             :     state->scanline = line;
    1033      338884 : 
    1034             :     /* Set lookaside data in case we have to map unsafe encoding */
    1035             :     state->curline = state->scanbuf;
    1036      338884 :     state->refline = state->scanline;
    1037      338884 : }
    1038      338884 : 
    1039             : /*
    1040             :  * Do lexical analysis of SQL command text.
    1041             :  *
    1042             :  * The text previously passed to psql_scan_setup is scanned, and appended
    1043             :  * (possibly with transformation) to query_buf.
    1044             :  *
    1045             :  * The return value indicates the condition that stopped scanning:
    1046             :  *
    1047             :  * PSCAN_SEMICOLON: found a command-ending semicolon.  (The semicolon is
    1048             :  * transferred to query_buf.)  The command accumulated in query_buf should
    1049             :  * be executed, then clear query_buf and call again to scan the remainder
    1050             :  * of the line.
    1051             :  *
    1052             :  * PSCAN_BACKSLASH: found a backslash that starts a special command.
    1053             :  * Any previous data on the line has been transferred to query_buf.
    1054             :  * The caller will typically next apply a separate flex lexer to scan
    1055             :  * the special command.
    1056             :  *
    1057             :  * PSCAN_INCOMPLETE: the end of the line was reached, but we have an
    1058             :  * incomplete SQL command.  *prompt is set to the appropriate prompt type.
    1059             :  *
    1060             :  * PSCAN_EOL: the end of the line was reached, and there is no lexical
    1061             :  * reason to consider the command incomplete.  The caller may or may not
    1062             :  * choose to send it.  *prompt is set to the appropriate prompt type if
    1063             :  * the caller chooses to collect more input.
    1064             :  *
    1065             :  * In the PSCAN_INCOMPLETE and PSCAN_EOL cases, psql_scan_finish() should
    1066             :  * be called next, then the cycle may be repeated with a fresh input line.
    1067             :  *
    1068             :  * In all cases, *prompt is set to an appropriate prompt type code for the
    1069             :  * next line-input operation.
    1070             :  */
    1071             : PsqlScanResult
    1072             : psql_scan(PsqlScanState state,
    1073      521484 :           PQExpBuffer query_buf,
    1074             :           promptStatus_t *prompt)
    1075             : {
    1076             :     PsqlScanResult result;
    1077             :     int         lexresult;
    1078             : 
    1079             :     /* Must be scanning already */
    1080             :     Assert(state->scanbufhandle != NULL);
    1081             : 
    1082             :     /* Set current output target */
    1083             :     state->output_buf = query_buf;
    1084      521484 : 
    1085             :     /* Set input source */
    1086             :     if (state->buffer_stack != NULL)
    1087      521484 :         yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
    1088          60 :     else
    1089             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1090      521424 : 
    1091             :     /* And lex. */
    1092             :     lexresult = yylex(NULL, state->scanner);
    1093      521484 : 
    1094             :     /*
    1095             :      * Check termination state and return appropriate result info.
    1096             :      */
    1097             :     switch (lexresult)
    1098      521484 :     {
    1099             :         case LEXRES_EOL:        /* end of input */
    1100      338772 :             switch (state->start_state)
    1101      338772 :             {
    1102             :                 case INITIAL:
    1103      307078 :                 case xqs:       /* we treat this like INITIAL */
    1104             :                     if (state->paren_depth > 0)
    1105      307078 :                     {
    1106             :                         result = PSCAN_INCOMPLETE;
    1107       24516 :                         *prompt = PROMPT_PAREN;
    1108       24516 :                     }
    1109             :                     else if (state->begin_depth > 0)
    1110      282562 :                     {
    1111             :                         result = PSCAN_INCOMPLETE;
    1112          96 :                         *prompt = PROMPT_CONTINUE;
    1113          96 :                     }
    1114             :                     else if (query_buf->len > 0)
    1115      282466 :                     {
    1116             :                         result = PSCAN_EOL;
    1117       48930 :                         *prompt = PROMPT_CONTINUE;
    1118       48930 :                     }
    1119             :                     else
    1120             :                     {
    1121             :                         /* never bother to send an empty buffer */
    1122             :                         result = PSCAN_INCOMPLETE;
    1123      233536 :                         *prompt = PROMPT_READY;
    1124      233536 :                     }
    1125             :                     break;
    1126      307078 :                 case xb:
    1127           0 :                     result = PSCAN_INCOMPLETE;
    1128           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1129           0 :                     break;
    1130           0 :                 case xc:
    1131         432 :                     result = PSCAN_INCOMPLETE;
    1132         432 :                     *prompt = PROMPT_COMMENT;
    1133         432 :                     break;
    1134         432 :                 case xd:
    1135          12 :                     result = PSCAN_INCOMPLETE;
    1136          12 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1137          12 :                     break;
    1138          12 :                 case xh:
    1139           0 :                     result = PSCAN_INCOMPLETE;
    1140           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1141           0 :                     break;
    1142           0 :                 case xe:
    1143         602 :                     result = PSCAN_INCOMPLETE;
    1144         602 :                     *prompt = PROMPT_SINGLEQUOTE;
    1145         602 :                     break;
    1146         602 :                 case xq:
    1147        6640 :                     result = PSCAN_INCOMPLETE;
    1148        6640 :                     *prompt = PROMPT_SINGLEQUOTE;
    1149        6640 :                     break;
    1150        6640 :                 case xdolq:
    1151       24008 :                     result = PSCAN_INCOMPLETE;
    1152       24008 :                     *prompt = PROMPT_DOLLARQUOTE;
    1153       24008 :                     break;
    1154       24008 :                 case xui:
    1155           0 :                     result = PSCAN_INCOMPLETE;
    1156           0 :                     *prompt = PROMPT_DOUBLEQUOTE;
    1157           0 :                     break;
    1158           0 :                 case xus:
    1159           0 :                     result = PSCAN_INCOMPLETE;
    1160           0 :                     *prompt = PROMPT_SINGLEQUOTE;
    1161           0 :                     break;
    1162           0 :                 default:
    1163           0 :                     /* can't get here */
    1164             :                     fprintf(stderr, "invalid YY_START\n");
    1165           0 :                     exit(1);
    1166           0 :             }
    1167             :             break;
    1168      338772 :         case LEXRES_SEMI:       /* semicolon */
    1169      175984 :             result = PSCAN_SEMICOLON;
    1170      175984 :             *prompt = PROMPT_READY;
    1171      175984 :             break;
    1172      175984 :         case LEXRES_BACKSLASH:  /* backslash */
    1173        6728 :             result = PSCAN_BACKSLASH;
    1174        6728 :             *prompt = PROMPT_READY;
    1175        6728 :             break;
    1176        6728 :         default:
    1177           0 :             /* can't get here */
    1178             :             fprintf(stderr, "invalid yylex result\n");
    1179           0 :             exit(1);
    1180           0 :     }
    1181             : 
    1182             :     return result;
    1183      521484 : }
    1184             : 
    1185             : /*
    1186             :  * Clean up after scanning a string.  This flushes any unread input and
    1187             :  * releases resources (but not the PsqlScanState itself).  Note however
    1188             :  * that this does not reset the lexer scan state; that can be done by
    1189             :  * psql_scan_reset(), which is an orthogonal operation.
    1190             :  *
    1191             :  * It is legal to call this when not scanning anything (makes it easier
    1192             :  * to deal with error recovery).
    1193             :  */
    1194             : void
    1195             : psql_scan_finish(PsqlScanState state)
    1196      345552 : {
    1197             :     /* Drop any incomplete variable expansions. */
    1198             :     while (state->buffer_stack != NULL)
    1199      345552 :         psqlscan_pop_buffer_stack(state);
    1200           0 : 
    1201             :     /* Done with the outer scan buffer, too */
    1202             :     if (state->scanbufhandle)
    1203      345552 :         yy_delete_buffer(state->scanbufhandle, state->scanner);
    1204      338790 :     state->scanbufhandle = NULL;
    1205      345552 :     if (state->scanbuf)
    1206      345552 :         free(state->scanbuf);
    1207      338790 :     state->scanbuf = NULL;
    1208      345552 : }
    1209      345552 : 
    1210             : /*
    1211             :  * Reset lexer scanning state to start conditions.  This is appropriate
    1212             :  * for executing \r psql commands (or any other time that we discard the
    1213             :  * prior contents of query_buf).  It is not, however, necessary to do this
    1214             :  * when we execute and clear the buffer after getting a PSCAN_SEMICOLON or
    1215             :  * PSCAN_EOL scan result, because the scan state must be INITIAL when those
    1216             :  * conditions are returned.
    1217             :  *
    1218             :  * Note that this is unrelated to flushing unread input; that task is
    1219             :  * done by psql_scan_finish().
    1220             :  */
    1221             : void
    1222             : psql_scan_reset(PsqlScanState state)
    1223       14034 : {
    1224             :     state->start_state = INITIAL;
    1225       14034 :     state->paren_depth = 0;
    1226       14034 :     state->xcdepth = 0;          /* not really necessary */
    1227       14034 :     if (state->dolqstart)
    1228       14034 :         free(state->dolqstart);
    1229           0 :     state->dolqstart = NULL;
    1230       14034 :     state->identifier_count = 0;
    1231       14034 :     state->begin_depth = 0;
    1232       14034 : }
    1233       14034 : 
    1234             : /*
    1235             :  * Reselect this lexer (psqlscan.l) after using another one.
    1236             :  *
    1237             :  * Currently and for foreseeable uses, it's sufficient to reset to INITIAL
    1238             :  * state, because we'd never switch to another lexer in a different state.
    1239             :  * However, we don't want to reset e.g. paren_depth, so this can't be
    1240             :  * the same as psql_scan_reset().
    1241             :  *
    1242             :  * Note: psql setjmp error recovery just calls psql_scan_reset(), so that
    1243             :  * must be a superset of this.
    1244             :  *
    1245             :  * Note: it seems likely that other lexers could just assign INITIAL for
    1246             :  * themselves, since that probably has the value zero in every flex-generated
    1247             :  * lexer.  But let's not assume that.
    1248             :  */
    1249             : void
    1250             : psql_scan_reselect_sql_lexer(PsqlScanState state)
    1251       29762 : {
    1252             :     state->start_state = INITIAL;
    1253       29762 : }
    1254       29762 : 
    1255             : /*
    1256             :  * Return true if lexer is currently in an "inside quotes" state.
    1257             :  *
    1258             :  * This is pretty grotty but is needed to preserve the old behavior
    1259             :  * that mainloop.c drops blank lines not inside quotes without even
    1260             :  * echoing them.
    1261             :  */
    1262             : bool
    1263             : psql_scan_in_quote(PsqlScanState state)
    1264       75560 : {
    1265             :     return state->start_state != INITIAL &&
    1266       76344 :             state->start_state != xqs;
    1267         784 : }
    1268             : 
    1269             : /*
    1270             :  * Push the given string onto the stack of stuff to scan.
    1271             :  *
    1272             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1273             :  */
    1274             : void
    1275             : psqlscan_push_new_buffer(PsqlScanState state, const char *newstr,
    1276         288 :                          const char *varname)
    1277             : {
    1278             :     StackElem  *stackelem;
    1279             : 
    1280             :     stackelem = (StackElem *) pg_malloc(sizeof(StackElem));
    1281         288 : 
    1282             :     /*
    1283             :      * In current usage, the passed varname points at the current flex input
    1284             :      * buffer; we must copy it before calling psqlscan_prepare_buffer()
    1285             :      * because that will change the buffer state.
    1286             :      */
    1287             :     stackelem->varname = varname ? pg_strdup(varname) : NULL;
    1288         288 : 
    1289             :     stackelem->buf = psqlscan_prepare_buffer(state, newstr, strlen(newstr),
    1290         288 :                                              &stackelem->bufstring);
    1291             :     state->curline = stackelem->bufstring;
    1292         288 :     if (state->safe_encoding)
    1293         288 :     {
    1294             :         stackelem->origstring = NULL;
    1295         288 :         state->refline = stackelem->bufstring;
    1296         288 :     }
    1297             :     else
    1298             :     {
    1299             :         stackelem->origstring = pg_strdup(newstr);
    1300           0 :         state->refline = stackelem->origstring;
    1301           0 :     }
    1302             :     stackelem->next = state->buffer_stack;
    1303         288 :     state->buffer_stack = stackelem;
    1304         288 : }
    1305         288 : 
    1306             : /*
    1307             :  * Pop the topmost buffer stack item (there must be one!)
    1308             :  *
    1309             :  * NB: after this, the flex input state is unspecified; caller must
    1310             :  * switch to an appropriate buffer to continue lexing.
    1311             :  * See psqlscan_select_top_buffer().
    1312             :  */
    1313             : void
    1314             : psqlscan_pop_buffer_stack(PsqlScanState state)
    1315         288 : {
    1316             :     StackElem  *stackelem = state->buffer_stack;
    1317         288 : 
    1318             :     state->buffer_stack = stackelem->next;
    1319         288 :     yy_delete_buffer(stackelem->buf, state->scanner);
    1320         288 :     free(stackelem->bufstring);
    1321         288 :     if (stackelem->origstring)
    1322         288 :         free(stackelem->origstring);
    1323           0 :     if (stackelem->varname)
    1324         288 :         free(stackelem->varname);
    1325         288 :     free(stackelem);
    1326         288 : }
    1327         288 : 
    1328             : /*
    1329             :  * Select the topmost surviving buffer as the active input.
    1330             :  */
    1331             : void
    1332             : psqlscan_select_top_buffer(PsqlScanState state)
    1333         288 : {
    1334             :     StackElem  *stackelem = state->buffer_stack;
    1335         288 : 
    1336             :     if (stackelem != NULL)
    1337         288 :     {
    1338             :         yy_switch_to_buffer(stackelem->buf, state->scanner);
    1339           0 :         state->curline = stackelem->bufstring;
    1340           0 :         state->refline = stackelem->origstring ? stackelem->origstring : stackelem->bufstring;
    1341           0 :     }
    1342             :     else
    1343             :     {
    1344             :         yy_switch_to_buffer(state->scanbufhandle, state->scanner);
    1345         288 :         state->curline = state->scanbuf;
    1346         288 :         state->refline = state->scanline;
    1347         288 :     }
    1348             : }
    1349         288 : 
    1350             : /*
    1351             :  * Check if specified variable name is the source for any string
    1352             :  * currently being scanned
    1353             :  */
    1354             : bool
    1355             : psqlscan_var_is_current_source(PsqlScanState state, const char *varname)
    1356         288 : {
    1357             :     StackElem  *stackelem;
    1358             : 
    1359             :     for (stackelem = state->buffer_stack;
    1360         288 :          stackelem != NULL;
    1361             :          stackelem = stackelem->next)
    1362           0 :     {
    1363             :         if (stackelem->varname && strcmp(stackelem->varname, varname) == 0)
    1364           0 :             return true;
    1365           0 :     }
    1366             :     return false;
    1367         288 : }
    1368             : 
    1369             : /*
    1370             :  * Set up a flex input buffer to scan the given data.  We always make a
    1371             :  * copy of the data.  If working in an unsafe encoding, the copy has
    1372             :  * multibyte sequences replaced by FFs to avoid fooling the lexer rules.
    1373             :  *
    1374             :  * NOTE SIDE EFFECT: the new buffer is made the active flex input buffer.
    1375             :  */
    1376             : YY_BUFFER_STATE
    1377             : psqlscan_prepare_buffer(PsqlScanState state, const char *txt, int len,
    1378      339172 :                         char **txtcopy)
    1379             : {
    1380             :     char       *newtxt;
    1381             : 
    1382             :     /* Flex wants two \0 characters after the actual data */
    1383             :     newtxt = pg_malloc(len + 2);
    1384      339172 :     *txtcopy = newtxt;
    1385      339172 :     newtxt[len] = newtxt[len + 1] = YY_END_OF_BUFFER_CHAR;
    1386      339172 : 
    1387             :     if (state->safe_encoding)
    1388      339172 :         memcpy(newtxt, txt, len);
    1389      339172 :     else
    1390             :     {
    1391             :         /* Gotta do it the hard way */
    1392             :         int         i = 0;
    1393           0 : 
    1394             :         while (i < len)
    1395           0 :         {
    1396             :             int         thislen = PQmblen(txt + i, state->encoding);
    1397           0 : 
    1398             :             /* first byte should always be okay... */
    1399             :             newtxt[i] = txt[i];
    1400           0 :             i++;
    1401           0 :             while (--thislen > 0 && i < len)
    1402           0 :                 newtxt[i++] = (char) 0xFF;
    1403           0 :         }
    1404             :     }
    1405             : 
    1406             :     return yy_scan_buffer(newtxt, len + 2, state->scanner);
    1407      339172 : }
    1408             : 
    1409             : /*
    1410             :  * psqlscan_emit() --- body for ECHO macro
    1411             :  *
    1412             :  * NB: this must be used for ALL and ONLY the text copied from the flex
    1413             :  * input data.  If you pass it something that is not part of the yytext
    1414             :  * string, you are making a mistake.  Internally generated text can be
    1415             :  * appended directly to state->output_buf.
    1416             :  */
    1417             : void
    1418             : psqlscan_emit(PsqlScanState state, const char *txt, int len)
    1419     4019708 : {
    1420             :     PQExpBuffer output_buf = state->output_buf;
    1421     4019708 : 
    1422             :     if (state->safe_encoding)
    1423     4019708 :         appendBinaryPQExpBuffer(output_buf, txt, len);
    1424     4019708 :     else
    1425             :     {
    1426             :         /* Gotta do it the hard way */
    1427             :         const char *reference = state->refline;
    1428           0 :         int         i;
    1429             : 
    1430             :         reference += (txt - state->curline);
    1431           0 : 
    1432             :         for (i = 0; i < len; i++)
    1433           0 :         {
    1434             :             char        ch = txt[i];
    1435           0 : 
    1436             :             if (ch == (char) 0xFF)
    1437           0 :                 ch = reference[i];
    1438           0 :             appendPQExpBufferChar(output_buf, ch);
    1439           0 :         }
    1440             :     }
    1441             : }
    1442     4019708 : 
    1443             : /*
    1444             :  * psqlscan_extract_substring --- fetch value of (part of) the current token
    1445             :  *
    1446             :  * This is like psqlscan_emit(), except that the data is returned as a
    1447             :  * malloc'd string rather than being pushed directly to state->output_buf.
    1448             :  */
    1449             : char *
    1450             : psqlscan_extract_substring(PsqlScanState state, const char *txt, int len)
    1451        2144 : {
    1452             :     char       *result = (char *) pg_malloc(len + 1);
    1453        2144 : 
    1454             :     if (state->safe_encoding)
    1455        2144 :         memcpy(result, txt, len);
    1456        2144 :     else
    1457             :     {
    1458             :         /* Gotta do it the hard way */
    1459             :         const char *reference = state->refline;
    1460           0 :         int         i;
    1461             : 
    1462             :         reference += (txt - state->curline);
    1463           0 : 
    1464             :         for (i = 0; i < len; i++)
    1465           0 :         {
    1466             :             char        ch = txt[i];
    1467           0 : 
    1468             :             if (ch == (char) 0xFF)
    1469           0 :                 ch = reference[i];
    1470           0 :             result[i] = ch;
    1471           0 :         }
    1472             :     }
    1473             :     result[len] = '\0';
    1474        2144 :     return result;
    1475        2144 : }
    1476             : 
    1477             : /*
    1478             :  * psqlscan_escape_variable --- process :'VARIABLE' or :"VARIABLE"
    1479             :  *
    1480             :  * If the variable name is found, escape its value using the appropriate
    1481             :  * quoting method and emit the value to output_buf.  (Since the result is
    1482             :  * surely quoted, there is never any reason to rescan it.)  If we don't
    1483             :  * find the variable or escaping fails, emit the token as-is.
    1484             :  */
    1485             : void
    1486             : psqlscan_escape_variable(PsqlScanState state, const char *txt, int len,
    1487          62 :                          PsqlScanQuoteType quote)
    1488             : {
    1489             :     char       *varname;
    1490             :     char       *value;
    1491             : 
    1492             :     /* Variable lookup. */
    1493             :     varname = psqlscan_extract_substring(state, txt + 2, len - 3);
    1494          62 :     if (state->callbacks->get_variable)
    1495          62 :         value = state->callbacks->get_variable(varname, quote,
    1496          62 :                                                state->cb_passthrough);
    1497             :     else
    1498             :         value = NULL;
    1499           0 :     free(varname);
    1500          62 : 
    1501             :     if (value)
    1502          62 :     {
    1503             :         /* Emit the suitably-escaped value */
    1504             :         appendPQExpBufferStr(state->output_buf, value);
    1505          32 :         free(value);
    1506          32 :     }
    1507             :     else
    1508             :     {
    1509             :         /* Emit original token as-is */
    1510             :         psqlscan_emit(state, txt, len);
    1511          30 :     }
    1512             : }
    1513          62 : 
    1514             : void
    1515             : psqlscan_test_variable(PsqlScanState state, const char *txt, int len)
    1516          16 : {
    1517             :     char    *varname;
    1518             :     char    *value;
    1519             : 
    1520             :     varname = psqlscan_extract_substring(state, txt + 3, len - 4);
    1521          16 :     if (state->callbacks->get_variable)
    1522          16 :         value = state->callbacks->get_variable(varname, PQUOTE_PLAIN,
    1523          16 :                                                state->cb_passthrough);
    1524             :     else
    1525             :         value = NULL;
    1526           0 :     free(varname);
    1527          16 : 
    1528             :     if (value != NULL)
    1529          16 :     {
    1530             :         psqlscan_emit(state, "TRUE", 4);
    1531           8 :         free(value);
    1532           8 :     }
    1533             :     else
    1534             :     {
    1535             :         psqlscan_emit(state, "FALSE", 5);
    1536           8 :     }
    1537             : }

Generated by: LCOV version 1.13